Example #1
0
    def fetch(self):
        """ 
        Uses feedparser to download the feed. Will be parsed later.
        """
        start = time.time()
        identity = self.get_identity()
        log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % (
            identity, self.feed.title[:30], self.feed.id,
            datetime.datetime.now() - self.feed.last_update)
        logging.debug(log_msg)

        etag = self.feed.etag
        modified = self.feed.last_modified.utctimetuple(
        )[:7] if self.feed.last_modified else None
        address = self.feed.feed_address

        if (self.options.get('force') or random.random() <= .01):
            modified = None
            etag = None
            address = cache_bust_url(address)
            logging.debug(u'   ---> [%-30s] ~FBForcing fetch: %s' %
                          (self.feed.title[:30], address))
        elif (not self.feed.fetched_once or not self.feed.known_good):
            modified = None
            etag = None

        USER_AGENT = ('NewsBlur Feed Fetcher - %s subscriber%s - %s '
                      '(Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) '
                      'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 '
                      'Safari/534.48.3)' % (
                          self.feed.num_subscribers,
                          's' if self.feed.num_subscribers != 1 else '',
                          self.feed.permalink,
                      ))
        if self.options.get('feed_xml'):
            logging.debug(
                u'   ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s'
                % (self.feed.title[:30], len(self.options.get('feed_xml'))))

        if self.options.get('fpf'):
            self.fpf = self.options.get('fpf')
            logging.debug(
                u'   ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping.'
                % (self.feed.title[:30]))
            return FEED_OK, self.fpf

        try:
            self.fpf = feedparser.parse(address,
                                        agent=USER_AGENT,
                                        etag=etag,
                                        modified=modified)
        except (TypeError, ValueError), e:
            logging.debug(u'   ***> [%-30s] ~FR%s, turning off microformats.' %
                          (self.feed.title[:30], e))
            feedparser.PARSE_MICROFORMATS = False
            self.fpf = feedparser.parse(address,
                                        agent=USER_AGENT,
                                        etag=etag,
                                        modified=modified)
            feedparser.PARSE_MICROFORMATS = True
Example #2
0
    def fetch(self):
        """ 
        Uses feedparser to download the feed. Will be parsed later.
        """
        start = time.time()
        identity = self.get_identity()
        log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % (identity,
                                                            self.feed.title[:30],
                                                            self.feed.id,
                                                            datetime.datetime.now() - self.feed.last_update)
        logging.debug(log_msg)
                                                 
        etag=self.feed.etag
        modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
        address = self.feed.feed_address
        
        if (self.options.get('force') or random.random() <= .01):
            modified = None
            etag = None
            address = cache_bust_url(address)
            logging.debug(u'   ---> [%-30s] ~FBForcing fetch: %s' % (
                          self.feed.title[:30], address))
        elif (not self.feed.fetched_once or not self.feed.known_good):
            modified = None
            etag = None
        
        USER_AGENT = ('NewsBlur Feed Fetcher - %s subscriber%s - %s '
                      '(Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) '
                      'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 '
                      'Safari/534.48.3)' % (
                          self.feed.num_subscribers,
                          's' if self.feed.num_subscribers != 1 else '',
                          self.feed.permalink,
                     ))
        if self.options.get('feed_xml'):
            logging.debug(u'   ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s' % (
                          self.feed.title[:30], len(self.options.get('feed_xml'))))
        
        if self.options.get('fpf'):
            self.fpf = self.options.get('fpf')
            logging.debug(u'   ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping.' % (
                          self.feed.title[:30]))
            return FEED_OK, self.fpf

        try:
            self.fpf = feedparser.parse(address,
                                        agent=USER_AGENT,
                                        etag=etag,
                                        modified=modified)
        except (TypeError, ValueError), e:
            logging.debug(u'   ***> [%-30s] ~FR%s, turning off microformats.' % 
                          (self.feed.title[:30], e))
            feedparser.PARSE_MICROFORMATS = False
            self.fpf = feedparser.parse(address,
                                        agent=USER_AGENT,
                                        etag=etag,
                                        modified=modified)
            feedparser.PARSE_MICROFORMATS = True
Example #3
0
    def fetch(self):
        """ 
        Uses feedparser to download the feed. Will be parsed later.
        """
        start = time.time()
        identity = self.get_identity()
        log_msg = u"%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s" % (
            identity,
            self.feed.title[:30],
            self.feed.id,
            datetime.datetime.now() - self.feed.last_update,
        )
        logging.debug(log_msg)

        etag = self.feed.etag
        modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
        address = self.feed.feed_address

        if self.options.get("force") or random.random() <= 0.01:
            modified = None
            etag = None
            address = cache_bust_url(address)
            logging.debug(u"   ---> [%-30s] ~FBForcing fetch: %s" % (self.feed.title[:30], address))
        elif not self.feed.fetched_once or not self.feed.known_good:
            modified = None
            etag = None

        USER_AGENT = (
            "NewsBlur Feed Fetcher - %s subscriber%s - %s (Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/536.2.3 (KHTML, like Gecko) Version/5.2)"
            % (self.feed.num_subscribers, "s" if self.feed.num_subscribers != 1 else "", settings.NEWSBLUR_URL)
        )
        if self.options.get("feed_xml"):
            logging.debug(
                u"   ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s"
                % (self.feed.title[:30], len(self.options.get("feed_xml")))
            )

        if self.options.get("fpf"):
            self.fpf = self.options.get("fpf")
            logging.debug(u"   ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping." % (self.feed.title[:30]))
            return FEED_OK, self.fpf

        try:
            self.fpf = feedparser.parse(address, agent=USER_AGENT, etag=etag, modified=modified)
        except (TypeError, ValueError), e:
            logging.debug(u"   ***> [%-30s] ~FR%s, turning off microformats." % (self.feed.title[:30], e))
            feedparser.PARSE_MICROFORMATS = False
            self.fpf = feedparser.parse(address, agent=USER_AGENT, etag=etag, modified=modified)
            feedparser.PARSE_MICROFORMATS = True
Example #4
0
    def fetch(self):
        """     
        Uses feedparser to download the feed. Will be parsed later.
        """
        start = time.time()
        identity = self.get_identity()
        log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY)' % (identity,
                                                            self.feed.title[:30],
                                                            self.feed.id)
        logging.debug(log_msg)
                                                 
        etag = self.feed.etag
        modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
        address = self.feed.feed_address
        
        # If is forced or random is less than 1%, set modified = None and etag = None,
        # means it will fetch new
        if (self.options.get('force') or random.random() <= .01):
            modified = None
            etag = None
            address = cache_bust_url(address)
            logging.debug(u'   ---> [%-30s] ~FBForcing fetch: %s' % (
                          self.feed.title[:30], address))
        # If this feed_id in not fetched once before or not known_good
        elif (not self.feed.fetched_once or not self.feed.known_good):
            modified = None
            etag = None
        
        USER_AGENT = ('NewsBlur Feed Fetcher - %s '
                      '(Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) '
                      'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 '
                      'Safari/534.48.3)' % (
                          self.feed.permalink,
                     ))

        try:
            self.fpf = feedparser.parse(address,
                                        agent=USER_AGENT,
                                        etag=etag,
                                        modified=modified)
        except (TypeError, ValueError, KeyError), e:
            logging.debug(u'   ***> [%-30s] ~FR%s, turning off headers.' % 
                          (self.feed.title[:30], e))
            self.fpf = feedparser.parse(address, agent=USER_AGENT)
Example #5
0
    def fetch(self):
        """ 
        Uses feedparser to download the feed. Will be parsed later.
        """
        start = time.time()
        identity = self.get_identity()
        log_msg = u"%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s" % (
            identity,
            self.feed.title[:30],
            self.feed.id,
            datetime.datetime.now() - self.feed.last_update,
        )
        logging.debug(log_msg)

        etag = self.feed.etag
        modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
        address = self.feed.feed_address

        if self.options.get("force") or random.random() <= 0.01:
            modified = None
            etag = None
            address = cache_bust_url(address)
            logging.debug(u"   ---> [%-30s] ~FBForcing fetch: %s" % (self.feed.title[:30], address))
        elif not self.feed.fetched_once or not self.feed.known_good:
            modified = None
            etag = None

        USER_AGENT = (
            "NewsBlur Feed Fetcher - %s subscriber%s - %s "
            "(Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) "
            "AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 "
            "Safari/534.48.3)"
            % (self.feed.num_subscribers, "s" if self.feed.num_subscribers != 1 else "", self.feed.permalink)
        )
        if self.options.get("feed_xml"):
            logging.debug(
                u"   ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s"
                % (self.feed.title[:30], len(self.options.get("feed_xml")))
            )

        if self.options.get("fpf"):
            self.fpf = self.options.get("fpf")
            logging.debug(u"   ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping." % (self.feed.title[:30]))
            return FEED_OK, self.fpf

        if "youtube.com" in address:
            try:
                youtube_feed = self.fetch_youtube(address)
            except (requests.adapters.ConnectionError):
                youtube_feed = None
            if not youtube_feed:
                logging.debug(u"   ***> [%-30s] ~FRYouTube fetch failed: %s." % (self.feed.title[:30], address))
                return FEED_ERRHTTP, None
            self.fpf = feedparser.parse(youtube_feed)

        if not self.fpf:
            try:
                self.fpf = feedparser.parse(address, agent=USER_AGENT, etag=etag, modified=modified)
            except (TypeError, ValueError, KeyError, EOFError), e:
                logging.debug(u"   ***> [%-30s] ~FR%s, turning off headers." % (self.feed.title[:30], e))
                self.fpf = feedparser.parse(address, agent=USER_AGENT)
            except (TypeError, ValueError, KeyError, EOFError), e:
                logging.debug(u"   ***> [%-30s] ~FR%s fetch failed: %s." % (self.feed.title[:30], e))
                return FEED_ERRHTTP, None