def getFeed(self, request, queryset, *arg1, **arg2): logger.info(u'开始采集Feed') feed_retrieval_deadline = datetime.now() - timedelta(minutes=1200) for feed in queryset: if feed.last_retrieved > feed_retrieval_deadline: logger.info('Skipping feed %s.', feed.feedurl) continue logger.info('Getting feed %s.', feed.feedurl) try: result = getpage(feed.feedurl, 30) except Exception: logger.warning('Could not get feed %s ,and the fetch is restart now' % feed.feedurl) feed.last_retrieved = datetime.now() #feed.save() break if result.code == 200: self.__parse_feed(result.read(), feed.feedurl, feed.stop_target, feed.category, feed.latest, feed.start_target, feed.mid_target, feed.end_target, feed.allow_target) feed.last_retrieved = datetime.now() feed.save() elif result.code == 500: logger.error('Feed %s returned with status code 500.' % feed.feedurl) elif result.code == 404: logger.error('Error 404: Nothing found at %s.' % feed.feedurl)
def getArticle(self, request, queryset, *arg1, **arg2): for feed in queryset: logger.info('start to fetch article,The title is %s', feed.title) try: if feed.feed.start_target != 'nohtml': logger.info('fetch new article %s,at %s' % (feed.link, datetime.now())) contenthtml = '' try: result = getpage(feed.link, 30) if result.code == 200: if len(feed.feed.start_target) != 0 and feed.feed.start_target != 'nohtml': contenthtml = htmllib.parsehtml(result.read(), feed.feed, feed.link, feed.feed.feedurl) else: contenthtml = feed.excerpt self.__store_article(contenthtml, feed) else: feed.fetch_stat = 2 feed.save() except Exception, data: logger.info('DownloadError in get %s.the error is %s', feed.link, data) return False else: self.__store_article(feed.excerpt, feed)
def test_feed(self, request, queryset): """Set the entries to the user""" for feed in queryset: result = getpage(feed.feedurl, 30) if result.code == 200: self.message_user(request, "测试成功!") else: self.message_user(request, "测试失败!")
def getImages(self, request, queryset, *arg1, **arg2): for image in queryset: logger.info('start to fetch images,The url is %s', image.oldurl) try: if image.stat == 1: pass else: name = htmllib.sid() + '.jpg' result = getpage(htmllib.encoding(image.oldurl), 30) if result.code == 200: result = self.__store_images(result.read(), name, image) else: result = False if result: logger.info('Success!') else: logger.info('this one was Fail!') except Exception, data: logger.info(data)