Example #1
0
    def process_feed_wrapper(self, feed_queue):
        delta = None
        current_process = multiprocessing.current_process()
        identity = "X"
        if current_process._identity:
            identity = current_process._identity[0]

        for feed_id in feed_queue:
            ret_entries = {
                ENTRY_NEW: 0,
                ENTRY_UPDATED: 0,
                ENTRY_SAME: 0,
                ENTRY_ERR: 0
            }
            start_time = datetime.datetime.utcnow()

            try:
                feed = self.refresh_feed(feed_id)

                ffeed = FetchFeed(feed_id, self.options)
                ret_feed, fetched_feed = ffeed.fetch()

                if ((fetched_feed and ret_feed == FEED_OK)
                        or self.options['force']):
                    pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
                    ret_feed, ret_entries = pfeed.process()

                    feed = self.refresh_feed(feed_id)

                    if ret_entries.get(ENTRY_NEW) or self.options[
                            'force'] or not feed.fetched_once:
                        if not feed.fetched_once:
                            feed.fetched_once = True
                            feed.save()
                        MUserStory.delete_old_stories(feed_id=feed.pk)
                        try:
                            self.count_unreads_for_subscribers(feed)
                        except TimeoutError:
                            logging.debug(
                                '   ---> [%-30s] Unread count took too long...'
                                % (unicode(feed)[:30], ))
                    cache.delete('feed_stories:%s-%s-%s' % (feed.id, 0, 25))
                    # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']:
                    #     feed.get_stories(force=True)
            except KeyboardInterrupt:
                break
            except urllib2.HTTPError, e:
                feed.save_feed_history(e.code, e.msg, e.fp.read())
                fetched_feed = None
            except Feed.DoesNotExist, e:
                logging.debug('   ---> [%-30s] Feed is now gone...' %
                              (unicode(feed_id)[:30]))
                continue
Example #2
0
    def process_feed_wrapper(self, feed_queue):
        """ wrapper for ProcessFeed
        """
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        delta = None
        current_process = multiprocessing.current_process()
        identity = "X"
        if current_process._identity:
            identity = current_process._identity[0]
        for feed_id in feed_queue:
            ret_entries = {ENTRY_NEW: 0, ENTRY_UPDATED: 0, ENTRY_SAME: 0, ENTRY_ERR: 0}
            start_time = datetime.datetime.utcnow()

            feed = self.refresh_feed(feed_id)

            try:
                ffeed = FetchFeed(feed_id, self.options)
                ret_feed, fetched_feed = ffeed.fetch()

                if (fetched_feed and ret_feed == FEED_OK) or self.options["force"]:
                    pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
                    ret_feed, ret_entries = pfeed.process()

                    feed = self.refresh_feed(feed_id)

                    if ret_entries.get(ENTRY_NEW) or self.options["force"] or not feed.fetched_once:
                        if not feed.fetched_once:
                            feed.fetched_once = True
                            feed.save()
                        MUserStory.delete_old_stories(feed_id=feed.pk)
                        user_subs = UserSubscription.objects.filter(feed=feed)
                        logging.debug(
                            u"   ---> [%-30s] Computing scores for all feed subscribers: %s subscribers"
                            % (unicode(feed)[:30], user_subs.count())
                        )
                        stories_db = MStory.objects(story_feed_id=feed.pk, story_date__gte=UNREAD_CUTOFF)
                        for sub in user_subs:
                            cache.delete("usersub:%s" % sub.user_id)
                            silent = False if self.options["verbose"] >= 2 else True
                            sub.calculate_feed_scores(silent=silent, stories_db=stories_db)
                    cache.delete("feed_stories:%s-%s-%s" % (feed.id, 0, 25))
                    # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']:
                    #     feed.get_stories(force=True)
            except KeyboardInterrupt:
                break
            except urllib2.HTTPError, e:
                feed.save_feed_history(e.code, e.msg, e.fp.read())
                fetched_feed = None
            except Feed.DoesNotExist, e:
                logging.debug("   ---> [%-30s] Feed is now gone..." % (unicode(feed)[:30]))
                return
Example #3
0
    def process_feed_wrapper(self, feed_queue):
        delta = None
        current_process = multiprocessing.current_process()
        identity = "X"
        if current_process._identity:
            identity = current_process._identity[0]
            
        for feed_id in feed_queue:
            ret_entries = {
                ENTRY_NEW: 0,
                ENTRY_UPDATED: 0,
                ENTRY_SAME: 0,
                ENTRY_ERR: 0
            }
            start_time = time.time()
            ret_feed = FEED_ERREXC
            try:
                feed = self.refresh_feed(feed_id)

                if self.options.get('fake'):
                    logging.debug('   ---> [%-30s] ~BGFaking fetch, skipping...' % (unicode(feed)[:30],))
                    continue
                
                ffeed = FetchFeed(feed_id, self.options)
                ret_feed, fetched_feed = ffeed.fetch()
                
                if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']):
                    pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
                    ret_feed, ret_entries = pfeed.process()
                    
                    feed = self.refresh_feed(feed_id)
                    
                    if ret_entries.get(ENTRY_NEW) or self.options['force']:
                        if not feed.known_good:
                            feed.known_good = True
                            feed.save()
                        MUserStory.delete_old_stories(feed_id=feed.pk)
                        try:
                            self.count_unreads_for_subscribers(feed)
                        except TimeoutError:
                            logging.debug('   ---> [%-30s] Unread count took too long...' % (unicode(feed)[:30],))
                    cache.delete('feed_stories:%s-%s-%s' % (feed.id, 0, 25))
                    # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']:
                    #     feed.get_stories(force=True)
            except KeyboardInterrupt:
                break
            except urllib2.HTTPError, e:
                feed.save_feed_history(e.code, e.msg, e.fp.read())
                fetched_feed = None
            except Feed.DoesNotExist, e:
                logging.debug('   ---> [%-30s] Feed is now gone...' % (unicode(feed_id)[:30]))
                continue
Example #4
0
 def process_feed_wrapper(self, feed_queue):
     delta = None
     current_process = multiprocessing.current_process()
     identity = "X"
     feed = None
     
     if current_process._identity:
         identity = current_process._identity[0]
         
     for feed_id in feed_queue:
         start_duration = time.time()
         feed_fetch_duration = None
         feed_process_duration = None
         page_duration = None
         icon_duration = None
         feed_code = None
         ret_entries = None
         start_time = time.time()
         ret_feed = FEED_ERREXC
         try:
             feed = self.refresh_feed(feed_id)
             
             skip = False
             if self.options.get('fake'):
                 skip = True
                 weight = "-"
                 quick = "-"
                 rand = "-"
             elif (self.options.get('quick') and not self.options['force'] and 
                   feed.known_good and feed.fetched_once and not feed.is_push):
                 weight = feed.stories_last_month * feed.num_subscribers
                 random_weight = random.randint(1, max(weight, 1))
                 quick = float(self.options.get('quick', 0))
                 rand = random.random()
                 if random_weight < 100 and rand < quick:
                     skip = True
             if skip:
                 logging.debug('   ---> [%-30s] ~BGFaking fetch, skipping (%s/month, %s subs, %s < %s)...' % (
                     feed.title[:30],
                     weight,
                     feed.num_subscribers,
                     rand, quick))
                 continue
                 
             ffeed = FetchFeed(feed_id, self.options)
             ret_feed, fetched_feed = ffeed.fetch()
             feed_fetch_duration = time.time() - start_duration
             
             if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']):
                 pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
                 ret_feed, ret_entries = pfeed.process()
                 feed = pfeed.feed
                 feed_process_duration = time.time() - start_duration
                 
                 if ret_entries['new'] or self.options['force']:
                     start = time.time()
                     if not feed.known_good or not feed.fetched_once:
                         feed.known_good = True
                         feed.fetched_once = True
                         feed = feed.save()
                     if random.random() <= 0.02:
                         feed.sync_redis()
                         MUserStory.delete_old_stories(feed_id=feed.pk)
                         MUserStory.sync_all_redis(feed_id=feed.pk)
                     try:
                         self.count_unreads_for_subscribers(feed)
                     except TimeoutError:
                         logging.debug('   ---> [%-30s] Unread count took too long...' % (feed.title[:30],))
                     if self.options['verbose']:
                         logging.debug(u'   ---> [%-30s] ~FBTIME: unread count in ~FM%.4ss' % (
                                       feed.title[:30], time.time() - start))
         except KeyboardInterrupt:
             break
         except urllib2.HTTPError, e:
             logging.debug('   ---> [%-30s] ~FRFeed throws HTTP error: ~SB%s' % (unicode(feed_id)[:30], e.fp.read()))
             feed.save_feed_history(e.code, e.msg, e.fp.read())
             fetched_feed = None
         except Feed.DoesNotExist, e:
             logging.debug('   ---> [%-30s] ~FRFeed is now gone...' % (unicode(feed_id)[:30]))
             continue
Example #5
0
    def process_feed_wrapper(self, feed_queue):
        delta = None
        current_process = multiprocessing.current_process()
        identity = "X"
        feed = None

        if current_process._identity:
            identity = current_process._identity[0]

        for feed_id in feed_queue:
            start_duration = time.time()
            feed_fetch_duration = None
            feed_process_duration = None
            page_duration = None
            icon_duration = None
            feed_code = None
            ret_entries = None
            start_time = time.time()
            ret_feed = FEED_ERREXC
            try:
                feed = self.refresh_feed(feed_id)

                skip = False
                if self.options.get('fake'):
                    skip = True
                    weight = "-"
                    quick = "-"
                    rand = "-"
                elif (self.options.get('quick') and not self.options['force']
                      and feed.known_good and feed.fetched_once
                      and not feed.is_push):
                    weight = feed.stories_last_month * feed.num_subscribers
                    random_weight = random.randint(1, max(weight, 1))
                    quick = float(self.options.get('quick', 0))
                    rand = random.random()
                    if random_weight < 100 and rand < quick:
                        skip = True
                if skip:
                    logging.debug(
                        '   ---> [%-30s] ~BGFaking fetch, skipping (%s/month, %s subs, %s < %s)...'
                        % (feed.title[:30], weight, feed.num_subscribers, rand,
                           quick))
                    continue

                ffeed = FetchFeed(feed_id, self.options)
                ret_feed, fetched_feed = ffeed.fetch()
                feed_fetch_duration = time.time() - start_duration

                if ((fetched_feed and ret_feed == FEED_OK)
                        or self.options['force']):
                    pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
                    ret_feed, ret_entries = pfeed.process()
                    feed = pfeed.feed
                    feed_process_duration = time.time() - start_duration

                    if ret_entries['new'] or self.options['force']:
                        start = time.time()
                        if not feed.known_good or not feed.fetched_once:
                            feed.known_good = True
                            feed.fetched_once = True
                            feed = feed.save()
                        if random.random() <= 0.02:
                            feed.sync_redis()
                            MUserStory.delete_old_stories(feed_id=feed.pk)
                            MUserStory.sync_all_redis(feed_id=feed.pk)
                        try:
                            self.count_unreads_for_subscribers(feed)
                        except TimeoutError:
                            logging.debug(
                                '   ---> [%-30s] Unread count took too long...'
                                % (feed.title[:30], ))
                        if self.options['verbose']:
                            logging.debug(
                                u'   ---> [%-30s] ~FBTIME: unread count in ~FM%.4ss'
                                % (feed.title[:30], time.time() - start))
            except KeyboardInterrupt:
                break
            except urllib2.HTTPError, e:
                logging.debug(
                    '   ---> [%-30s] ~FRFeed throws HTTP error: ~SB%s' %
                    (unicode(feed_id)[:30], e.fp.read()))
                feed.save_feed_history(e.code, e.msg, e.fp.read())
                fetched_feed = None
            except Feed.DoesNotExist, e:
                logging.debug('   ---> [%-30s] ~FRFeed is now gone...' %
                              (unicode(feed_id)[:30]))
                continue
Example #6
0
    def process_feed_wrapper(self, feed_queue):
        """ wrapper for ProcessFeed
        """
        if not self.options['single_threaded']:
            # Close the DB so the connection can be re-opened on a per-process basis
            from django.db import connection
            connection.close()
        delta = None
        
        MONGO_DB = settings.MONGO_DB
        db = mongoengine.connection.connect(db=MONGO_DB['NAME'], host=MONGO_DB['HOST'], port=MONGO_DB['PORT'])
        
        current_process = multiprocessing.current_process()
        
        identity = "X"
        if current_process._identity:
            identity = current_process._identity[0]
        for feed in feed_queue:
            ret_entries = {
                ENTRY_NEW: 0,
                ENTRY_UPDATED: 0,
                ENTRY_SAME: 0,
                ENTRY_ERR: 0
            }
            start_time = datetime.datetime.now()
                    
            ### Uncomment to test feed fetcher
            # from random import randint
            # if randint(0,10) < 10:
            #     continue
            
            try:
                ffeed = FetchFeed(feed, self.options)
                ret_feed, fetched_feed = ffeed.fetch()
                
                if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']):
                    pfeed = ProcessFeed(feed, fetched_feed, db, self.options)
                    ret_feed, ret_entries = pfeed.process()

                    if ret_entries.get(ENTRY_NEW) or self.options['force'] or not feed.fetched_once:
                        if not feed.fetched_once:
                            feed.fetched_once = True
                            feed.save()
                        MUserStory.delete_old_stories(feed_id=feed.pk)
                        user_subs = UserSubscription.objects.filter(feed=feed)
                        logging.debug(u'   ---> [%-30s] Computing scores for all feed subscribers: %s subscribers' % (unicode(feed)[:30], user_subs.count()))
                        stories_db = MStory.objects(story_feed_id=feed.pk,
                                                    story_date__gte=UNREAD_CUTOFF)
                        for sub in user_subs:
                            cache.delete('usersub:%s' % sub.user_id)
                            silent = False if self.options['verbose'] >= 2 else True
                            sub.calculate_feed_scores(silent=silent, stories_db=stories_db)
                    cache.delete('feed_stories:%s-%s-%s' % (feed.id, 0, 25))
                    # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']:
                    #     feed.get_stories(force=True)
            except KeyboardInterrupt:
                break
            except urllib2.HTTPError, e:
                feed.save_feed_history(e.code, e.msg, e.fp.read())
                fetched_feed = None
            except Exception, e:
                logging.debug('[%d] ! -------------------------' % (feed.id,))
                tb = traceback.format_exc()
                logging.debug(tb)
                logging.debug('[%d] ! -------------------------' % (feed.id,))
                ret_feed = FEED_ERREXC 
                feed.save_feed_history(500, "Error", tb)
                fetched_feed = None
Example #7
0
                
                if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']):
                    pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
                    ret_feed, ret_entries = pfeed.process()
                    feed = pfeed.feed
                    feed_process_duration = time.time() - start_duration
                    
                    if ret_entries['new'] or self.options['force']:
                        start = time.time()
                        if not feed.known_good or not feed.fetched_once:
                            feed.known_good = True
                            feed.fetched_once = True
                            feed = feed.save()
                        if random.random() <= 0.02:
                            feed.sync_redis()
                            MUserStory.delete_old_stories(feed_id=feed.pk)
                            MUserStory.sync_all_redis(feed_id=feed.pk)
                        try:
                            self.count_unreads_for_subscribers(feed)
                        except TimeoutError:
                            logging.debug('   ---> [%-30s] Unread count took too long...' % (feed.title[:30],))
                        if self.options['verbose']:
                            logging.debug(u'   ---> [%-30s] ~FBTIME: unread count in ~FM%.4ss' % (
                                          feed.title[:30], time.time() - start))
<<<<<<< HEAD
                    cache.delete('feed_stories:%s-%s-%s' % (feed.id, 0, 25))
                    # if ret_entries['new'] or ret_entries['updated'] or self.options['force']:
=======
                    # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']:
>>>>>>> Cleaning up RSS feed header for shared stories feeds.
                    #     feed.get_stories(force=True)