def getArticle(self, request, queryset, *arg1, **arg2): for feed in queryset: logging.info('start to fetch article,The title is %s', feed.title) try: if feed.feed.start_target != 'nohtml': logging.info('fetch new article %s,at %s' % (feed.link, datetime.now())) contenthtml = '' try: result = getpage(feed.link, 30) if result.code == 200: if len( feed.feed.start_target ) != 0 and feed.feed.start_target != 'nohtml': contenthtml = htmllib.parsehtml( result.read(), feed.feed, feed.link, feed.feed.feedurl) else: contenthtml = feed.excerpt self.__store_article(contenthtml, feed) return True return False except Exception, data: logging.info('DownloadError in get %s.the error is %s', feed.link, data) return False else: self.__store_article(feed.excerpt, feed)
def getFeed(self, request, queryset, *arg1, **arg2): logging.info(u'开始采集Feed') feed_retrieval_deadline = datetime.now() - timedelta(minutes=1200) for feed in queryset: if feed.last_retrieved > feed_retrieval_deadline: logging.info('Skipping feed %s.', feed.feedurl) continue logging.info('Getting feed %s.', feed.feedurl) try: result = getpage(feed.feedurl, 30) except Exception: logging.warning( 'Could not get feed %s ,and the fetch is restart now' % feed.feedurl) feed.last_retrieved = datetime.now() #feed.save() break if result.code == 200: self.__parse_feed(result.read(), feed.feedurl, feed.stop_target, feed.category, feed.latest, feed.start_target, feed.mid_target, feed.end_target, feed.allow_target) feed.last_retrieved = datetime.now() feed.save() elif result.code == 500: logging.error('Feed %s returned with status code 500.' % feed.feedurl) elif result.code == 404: logging.error('Error 404: Nothing found at %s.' % feed.feedurl)
def getArticle(self, request, queryset, *arg1, **arg2): for feed in queryset: logging.info('start to fetch article,The title is %s', feed.title) try: if feed.feed.start_target != 'nohtml': logging.info('fetch new article %s,at %s' % (feed.link, datetime.now())) contenthtml = '' try: result = getpage(feed.link, 30) if result.code == 200: if len(feed.feed.start_target) != 0 and feed.feed.start_target != 'nohtml': contenthtml = htmllib.parsehtml(result.read(), feed.feed, feed.link, feed.feed.feedurl) else: contenthtml = feed.excerpt self.__store_article(contenthtml, feed) return True return False except Exception, data: logging.info('DownloadError in get %s.the error is %s', feed.link, data) return False else: self.__store_article(feed.excerpt, feed)
def getFeed(self, request, queryset, *arg1, **arg2): logging.info(u'开始采集Feed') feed_retrieval_deadline = datetime.now() - timedelta(minutes=1200) for feed in queryset: if feed.last_retrieved > feed_retrieval_deadline: logging.info('Skipping feed %s.', feed.feedurl) continue logging.info('Getting feed %s.', feed.feedurl) try: result = getpage(feed.feedurl, 30) except Exception: logging.warning('Could not get feed %s ,and the fetch is restart now' % feed.feedurl) feed.last_retrieved = datetime.now() #feed.save() break if result.code == 200: self.__parse_feed(result.read(), feed.feedurl, feed.stop_target, feed.category, feed.latest, feed.start_target, feed.mid_target, feed.end_target, feed.allow_target) feed.last_retrieved = datetime.now() feed.save() elif result.code == 500: logging.error('Feed %s returned with status code 500.' % feed.feedurl) elif result.code == 404: logging.error('Error 404: Nothing found at %s.' % feed.feedurl)
def test_feed(self, request, queryset): """Set the entries to the user""" for feed in queryset: result = getpage(feed.feedurl, 30) if result.code == 200: self.message_user(request, "测试成功!") else: self.message_user(request, "测试失败!")
def getImages(self, request, queryset, *arg1, **arg2): for image in queryset: logging.info('start to fetch images,The url is %s', image.oldurl) try: name = htmllib.sid() + '.jpg' result = getpage(htmllib.encoding(image.oldurl), 30) if result.code == 200: result = self.__store_images(result.read(), name, image) else: result = False if result: logging.info('Success!') else: logging.info('this one was Fail!') except Exception, data: logging.info(data)