Beispiel #1
0
    def getFeed(self, request, queryset, *arg1, **arg2):
                logger.info(u'开始采集Feed')
                feed_retrieval_deadline = datetime.now() - timedelta(minutes=1200)
                
                for feed in queryset:

                    if feed.last_retrieved > feed_retrieval_deadline:
                            logger.info('Skipping feed %s.', feed.feedurl)
                            continue

                    logger.info('Getting feed %s.', feed.feedurl)
                    try:

                            result = getpage(feed.feedurl, 30)
                    except Exception:
                            logger.warning('Could not get feed %s ,and the fetch is restart now' % feed.feedurl)
                            feed.last_retrieved = datetime.now()
                            #feed.save()
                            break
                    if result.code == 200:
                            self.__parse_feed(result.read(), feed.feedurl, feed.stop_target, feed.category, feed.latest, feed.start_target, feed.mid_target, feed.end_target, feed.allow_target)

                            feed.last_retrieved = datetime.now()
                            feed.save()

                    elif result.code == 500:
                            logger.error('Feed %s returned with status code 500.' % feed.feedurl)
                    elif result.code == 404:
                            logger.error('Error 404: Nothing found at %s.' % feed.feedurl)
Beispiel #2
0
    def getArticle(self, request, queryset, *arg1, **arg2):
                for feed in queryset:
                        logger.info('start to fetch article,The title is %s', feed.title)
                        try:
                                if feed.feed.start_target != 'nohtml':

                                        logger.info('fetch new article %s,at %s' % (feed.link, datetime.now()))
                                        contenthtml = ''
                                        try:
                                                result = getpage(feed.link, 30)
                                                if result.code == 200:
                                                        if len(feed.feed.start_target) != 0 and feed.feed.start_target != 'nohtml':
                                                                contenthtml = htmllib.parsehtml(result.read(), feed.feed, feed.link, feed.feed.feedurl)
                                                        else:
                                                                contenthtml = feed.excerpt
                        
                                                        self.__store_article(contenthtml, feed)
                        
                                                else:
                                                    feed.fetch_stat = 2
                                                    feed.save()
                                        except Exception, data:
                                                logger.info('DownloadError in get %s.the error is %s', feed.link, data)
                                                return False
                                else:
                                    self.__store_article(feed.excerpt, feed)
Beispiel #3
0
 def test_feed(self, request, queryset):
     """Set the entries to the user"""
     for feed in queryset:
         result = getpage(feed.feedurl, 30)
         if result.code == 200:
             self.message_user(request, "测试成功!")
         else:
             self.message_user(request, "测试失败!")
Beispiel #4
0
    def getImages(self, request, queryset, *arg1, **arg2):
                for image in queryset:
                        logger.info('start to fetch images,The url is %s', image.oldurl)
                        try:
                                if image.stat == 1:
                                    pass
                                else:
                                    name = htmllib.sid() + '.jpg'
                                    result = getpage(htmllib.encoding(image.oldurl), 30)
                                    if result.code == 200:
                                            result = self.__store_images(result.read(), name, image)
                                    else:
                                            result = False
                                    if result:
                                            logger.info('Success!')
                                    else:
                                            logger.info('this one was Fail!')

                        except Exception, data:
                                logger.info(data)