def __store_article(self, title, url, category, content, date_published, author_name, feed_link, feed): try: entry = FeedsResult.objects.get(title=title) return False except: entry = FeedsResult( title=htmllib.decoding(title), link=url, excerpt=content, author_name=htmllib.decoding(author_name), category=category, feed=self.model.objects.get(feedurl=feed_link), date=datetime.now() ) # try: # entry.date = datetime.strptime(date_published[:-6], '%a, %d %b %Y %H:%M:%S') # except: # try: # entry.date = datetime.strptime(date_published[0:19], '%Y-%m-%d %H:%M:%S') # except: # entry.date = datetime.now() # entry.save() return True
def __parse_feed(self, feed_content, feed_url, stop_target, category, feed_latest, start_target, mid_target, end_target, allow_target): feed = feedparser.parse(feed_content) i = 0 dead_i = 0 for entry in feed.entries: logger.info('start parse feed,the dead_i is %s', dead_i) title = htmllib.decoding(entry.title) categorie_keys = [] content = '' date_published = datetime.now() author_name = '' Mystat = True if self.__feedslist_check(title) == False: try: i += 1 url = '' logger.info('beging to add new article No. %s', i) if(entry.has_key('feedburner_origlink')): url = entry.feedburner_origlink else: url = entry.link if entry.has_key('content'): content = entry.content[0].value else: content = entry.description if entry.has_key('author'): author_name = entry.author else: author_name = "转载" stripper = HTMLStripper() stripper.feed(title) title = stripper.get_data() content = htmllib.decoding(content) content = htmllib.GetFeedclean(url, content, stop_target) if(entry.has_key('updated_parsed')): date_published = datetime(*entry.updated_parsed[:6]) else: date_published = datetime.now() except Exception, data: logger.warn('this like something happened,the error is %s', data) try: feedresult = self.__store_article(title, url, category, content, date_published, author_name, feed_url, feed) if feedresult == True: logger.info('The No.%s is fetched to the db', i) else: logger.error('The No.%s is fetched Fail', i) Mystat = False except Exception, data: logger.warning('the error is %s', data) Mystat = False
def __store_entry(self, feed): try: entry, result = Entry.published.get_or_create(title=feed.title) entry.excerpt = htmllib.Filter_html(feed.excerpt).strip()[:80] + u'……' entry.status = 2 entry.author_name = htmllib.decoding(feed.author_name) entry.date = feed.date entry.slug = htmllib.sid() entry.content = htmllib.decoding(self.__Parse_image(feed.content)) entry.categories.add(feed.feed.category) entry.sites = [Site.objects.get_current(), ] entry.save() feed.fetch_stat = 4 feed.save() except Exception, data: logger.error('the db saved error is: %s', data) feed.fetch_stat = 3 feed.save()
def __store_article(self, contenthtml, feed): entry = FeedsResult.objects.get(pk=feed.pk) try: entry.content = htmllib.decoding(contenthtml) entry.fetch_stat = 1 images = htmllib.Parse_images_url(contenthtml) for image in images: obj, result = TempImages.objects.get_or_create(oldurl=image, entry=entry) except Exception, data: entry.fetch_stat = 2 logger.info('the db saved error is: %s', data)
def __Parse_image(self, content): images = htmllib.Parse_images_url(content) if images: try: for image in images: tmpimage = TempImages.objects.get(oldurl=image) if tmpimage != None: content = gbtools.stringQ2B(content) content = htmllib.decoding(content).replace(image, tmpimage.newurl) except Exception, data: logger.info(data)