def process_feed_wrapper(self, feed_queue): delta = None current_process = multiprocessing.current_process() identity = "X" if current_process._identity: identity = current_process._identity[0] for feed_id in feed_queue: ret_entries = { ENTRY_NEW: 0, ENTRY_UPDATED: 0, ENTRY_SAME: 0, ENTRY_ERR: 0 } start_time = datetime.datetime.utcnow() try: feed = self.refresh_feed(feed_id) ffeed = FetchFeed(feed_id, self.options) ret_feed, fetched_feed = ffeed.fetch() if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']): pfeed = ProcessFeed(feed_id, fetched_feed, self.options) ret_feed, ret_entries = pfeed.process() feed = self.refresh_feed(feed_id) if ret_entries.get(ENTRY_NEW) or self.options[ 'force'] or not feed.fetched_once: if not feed.fetched_once: feed.fetched_once = True feed.save() MUserStory.delete_old_stories(feed_id=feed.pk) try: self.count_unreads_for_subscribers(feed) except TimeoutError: logging.debug( ' ---> [%-30s] Unread count took too long...' % (unicode(feed)[:30], )) cache.delete('feed_stories:%s-%s-%s' % (feed.id, 0, 25)) # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']: # feed.get_stories(force=True) except KeyboardInterrupt: break except urllib2.HTTPError, e: feed.save_feed_history(e.code, e.msg, e.fp.read()) fetched_feed = None except Feed.DoesNotExist, e: logging.debug(' ---> [%-30s] Feed is now gone...' % (unicode(feed_id)[:30])) continue
def process_feed_wrapper(self, feed_queue): """ wrapper for ProcessFeed """ UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD) delta = None current_process = multiprocessing.current_process() identity = "X" if current_process._identity: identity = current_process._identity[0] for feed_id in feed_queue: ret_entries = {ENTRY_NEW: 0, ENTRY_UPDATED: 0, ENTRY_SAME: 0, ENTRY_ERR: 0} start_time = datetime.datetime.utcnow() feed = self.refresh_feed(feed_id) try: ffeed = FetchFeed(feed_id, self.options) ret_feed, fetched_feed = ffeed.fetch() if (fetched_feed and ret_feed == FEED_OK) or self.options["force"]: pfeed = ProcessFeed(feed_id, fetched_feed, self.options) ret_feed, ret_entries = pfeed.process() feed = self.refresh_feed(feed_id) if ret_entries.get(ENTRY_NEW) or self.options["force"] or not feed.fetched_once: if not feed.fetched_once: feed.fetched_once = True feed.save() MUserStory.delete_old_stories(feed_id=feed.pk) user_subs = UserSubscription.objects.filter(feed=feed) logging.debug( u" ---> [%-30s] Computing scores for all feed subscribers: %s subscribers" % (unicode(feed)[:30], user_subs.count()) ) stories_db = MStory.objects(story_feed_id=feed.pk, story_date__gte=UNREAD_CUTOFF) for sub in user_subs: cache.delete("usersub:%s" % sub.user_id) silent = False if self.options["verbose"] >= 2 else True sub.calculate_feed_scores(silent=silent, stories_db=stories_db) cache.delete("feed_stories:%s-%s-%s" % (feed.id, 0, 25)) # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']: # feed.get_stories(force=True) except KeyboardInterrupt: break except urllib2.HTTPError, e: feed.save_feed_history(e.code, e.msg, e.fp.read()) fetched_feed = None except Feed.DoesNotExist, e: logging.debug(" ---> [%-30s] Feed is now gone..." % (unicode(feed)[:30])) return
def process_feed_wrapper(self, feed_queue): delta = None current_process = multiprocessing.current_process() identity = "X" if current_process._identity: identity = current_process._identity[0] for feed_id in feed_queue: ret_entries = { ENTRY_NEW: 0, ENTRY_UPDATED: 0, ENTRY_SAME: 0, ENTRY_ERR: 0 } start_time = time.time() ret_feed = FEED_ERREXC try: feed = self.refresh_feed(feed_id) if self.options.get('fake'): logging.debug(' ---> [%-30s] ~BGFaking fetch, skipping...' % (unicode(feed)[:30],)) continue ffeed = FetchFeed(feed_id, self.options) ret_feed, fetched_feed = ffeed.fetch() if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']): pfeed = ProcessFeed(feed_id, fetched_feed, self.options) ret_feed, ret_entries = pfeed.process() feed = self.refresh_feed(feed_id) if ret_entries.get(ENTRY_NEW) or self.options['force']: if not feed.known_good: feed.known_good = True feed.save() MUserStory.delete_old_stories(feed_id=feed.pk) try: self.count_unreads_for_subscribers(feed) except TimeoutError: logging.debug(' ---> [%-30s] Unread count took too long...' % (unicode(feed)[:30],)) cache.delete('feed_stories:%s-%s-%s' % (feed.id, 0, 25)) # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']: # feed.get_stories(force=True) except KeyboardInterrupt: break except urllib2.HTTPError, e: feed.save_feed_history(e.code, e.msg, e.fp.read()) fetched_feed = None except Feed.DoesNotExist, e: logging.debug(' ---> [%-30s] Feed is now gone...' % (unicode(feed_id)[:30])) continue
def process_feed_wrapper(self, feed_queue): delta = None current_process = multiprocessing.current_process() identity = "X" feed = None if current_process._identity: identity = current_process._identity[0] for feed_id in feed_queue: start_duration = time.time() feed_fetch_duration = None feed_process_duration = None page_duration = None icon_duration = None feed_code = None ret_entries = None start_time = time.time() ret_feed = FEED_ERREXC try: feed = self.refresh_feed(feed_id) skip = False if self.options.get('fake'): skip = True weight = "-" quick = "-" rand = "-" elif (self.options.get('quick') and not self.options['force'] and feed.known_good and feed.fetched_once and not feed.is_push): weight = feed.stories_last_month * feed.num_subscribers random_weight = random.randint(1, max(weight, 1)) quick = float(self.options.get('quick', 0)) rand = random.random() if random_weight < 100 and rand < quick: skip = True if skip: logging.debug(' ---> [%-30s] ~BGFaking fetch, skipping (%s/month, %s subs, %s < %s)...' % ( feed.title[:30], weight, feed.num_subscribers, rand, quick)) continue ffeed = FetchFeed(feed_id, self.options) ret_feed, fetched_feed = ffeed.fetch() feed_fetch_duration = time.time() - start_duration if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']): pfeed = ProcessFeed(feed_id, fetched_feed, self.options) ret_feed, ret_entries = pfeed.process() feed = pfeed.feed feed_process_duration = time.time() - start_duration if ret_entries['new'] or self.options['force']: start = time.time() if not feed.known_good or not feed.fetched_once: feed.known_good = True feed.fetched_once = True feed = feed.save() if random.random() <= 0.02: feed.sync_redis() MUserStory.delete_old_stories(feed_id=feed.pk) MUserStory.sync_all_redis(feed_id=feed.pk) try: self.count_unreads_for_subscribers(feed) except TimeoutError: logging.debug(' ---> [%-30s] Unread count took too long...' % (feed.title[:30],)) if self.options['verbose']: logging.debug(u' ---> [%-30s] ~FBTIME: unread count in ~FM%.4ss' % ( feed.title[:30], time.time() - start)) except KeyboardInterrupt: break except urllib2.HTTPError, e: logging.debug(' ---> [%-30s] ~FRFeed throws HTTP error: ~SB%s' % (unicode(feed_id)[:30], e.fp.read())) feed.save_feed_history(e.code, e.msg, e.fp.read()) fetched_feed = None except Feed.DoesNotExist, e: logging.debug(' ---> [%-30s] ~FRFeed is now gone...' % (unicode(feed_id)[:30])) continue
def process_feed_wrapper(self, feed_queue): delta = None current_process = multiprocessing.current_process() identity = "X" feed = None if current_process._identity: identity = current_process._identity[0] for feed_id in feed_queue: start_duration = time.time() feed_fetch_duration = None feed_process_duration = None page_duration = None icon_duration = None feed_code = None ret_entries = None start_time = time.time() ret_feed = FEED_ERREXC try: feed = self.refresh_feed(feed_id) skip = False if self.options.get('fake'): skip = True weight = "-" quick = "-" rand = "-" elif (self.options.get('quick') and not self.options['force'] and feed.known_good and feed.fetched_once and not feed.is_push): weight = feed.stories_last_month * feed.num_subscribers random_weight = random.randint(1, max(weight, 1)) quick = float(self.options.get('quick', 0)) rand = random.random() if random_weight < 100 and rand < quick: skip = True if skip: logging.debug( ' ---> [%-30s] ~BGFaking fetch, skipping (%s/month, %s subs, %s < %s)...' % (feed.title[:30], weight, feed.num_subscribers, rand, quick)) continue ffeed = FetchFeed(feed_id, self.options) ret_feed, fetched_feed = ffeed.fetch() feed_fetch_duration = time.time() - start_duration if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']): pfeed = ProcessFeed(feed_id, fetched_feed, self.options) ret_feed, ret_entries = pfeed.process() feed = pfeed.feed feed_process_duration = time.time() - start_duration if ret_entries['new'] or self.options['force']: start = time.time() if not feed.known_good or not feed.fetched_once: feed.known_good = True feed.fetched_once = True feed = feed.save() if random.random() <= 0.02: feed.sync_redis() MUserStory.delete_old_stories(feed_id=feed.pk) MUserStory.sync_all_redis(feed_id=feed.pk) try: self.count_unreads_for_subscribers(feed) except TimeoutError: logging.debug( ' ---> [%-30s] Unread count took too long...' % (feed.title[:30], )) if self.options['verbose']: logging.debug( u' ---> [%-30s] ~FBTIME: unread count in ~FM%.4ss' % (feed.title[:30], time.time() - start)) except KeyboardInterrupt: break except urllib2.HTTPError, e: logging.debug( ' ---> [%-30s] ~FRFeed throws HTTP error: ~SB%s' % (unicode(feed_id)[:30], e.fp.read())) feed.save_feed_history(e.code, e.msg, e.fp.read()) fetched_feed = None except Feed.DoesNotExist, e: logging.debug(' ---> [%-30s] ~FRFeed is now gone...' % (unicode(feed_id)[:30])) continue
def process_feed_wrapper(self, feed_queue): """ wrapper for ProcessFeed """ if not self.options['single_threaded']: # Close the DB so the connection can be re-opened on a per-process basis from django.db import connection connection.close() delta = None MONGO_DB = settings.MONGO_DB db = mongoengine.connection.connect(db=MONGO_DB['NAME'], host=MONGO_DB['HOST'], port=MONGO_DB['PORT']) current_process = multiprocessing.current_process() identity = "X" if current_process._identity: identity = current_process._identity[0] for feed in feed_queue: ret_entries = { ENTRY_NEW: 0, ENTRY_UPDATED: 0, ENTRY_SAME: 0, ENTRY_ERR: 0 } start_time = datetime.datetime.now() ### Uncomment to test feed fetcher # from random import randint # if randint(0,10) < 10: # continue try: ffeed = FetchFeed(feed, self.options) ret_feed, fetched_feed = ffeed.fetch() if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']): pfeed = ProcessFeed(feed, fetched_feed, db, self.options) ret_feed, ret_entries = pfeed.process() if ret_entries.get(ENTRY_NEW) or self.options['force'] or not feed.fetched_once: if not feed.fetched_once: feed.fetched_once = True feed.save() MUserStory.delete_old_stories(feed_id=feed.pk) user_subs = UserSubscription.objects.filter(feed=feed) logging.debug(u' ---> [%-30s] Computing scores for all feed subscribers: %s subscribers' % (unicode(feed)[:30], user_subs.count())) stories_db = MStory.objects(story_feed_id=feed.pk, story_date__gte=UNREAD_CUTOFF) for sub in user_subs: cache.delete('usersub:%s' % sub.user_id) silent = False if self.options['verbose'] >= 2 else True sub.calculate_feed_scores(silent=silent, stories_db=stories_db) cache.delete('feed_stories:%s-%s-%s' % (feed.id, 0, 25)) # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']: # feed.get_stories(force=True) except KeyboardInterrupt: break except urllib2.HTTPError, e: feed.save_feed_history(e.code, e.msg, e.fp.read()) fetched_feed = None except Exception, e: logging.debug('[%d] ! -------------------------' % (feed.id,)) tb = traceback.format_exc() logging.debug(tb) logging.debug('[%d] ! -------------------------' % (feed.id,)) ret_feed = FEED_ERREXC feed.save_feed_history(500, "Error", tb) fetched_feed = None
if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']): pfeed = ProcessFeed(feed_id, fetched_feed, self.options) ret_feed, ret_entries = pfeed.process() feed = pfeed.feed feed_process_duration = time.time() - start_duration if ret_entries['new'] or self.options['force']: start = time.time() if not feed.known_good or not feed.fetched_once: feed.known_good = True feed.fetched_once = True feed = feed.save() if random.random() <= 0.02: feed.sync_redis() MUserStory.delete_old_stories(feed_id=feed.pk) MUserStory.sync_all_redis(feed_id=feed.pk) try: self.count_unreads_for_subscribers(feed) except TimeoutError: logging.debug(' ---> [%-30s] Unread count took too long...' % (feed.title[:30],)) if self.options['verbose']: logging.debug(u' ---> [%-30s] ~FBTIME: unread count in ~FM%.4ss' % ( feed.title[:30], time.time() - start)) <<<<<<< HEAD cache.delete('feed_stories:%s-%s-%s' % (feed.id, 0, 25)) # if ret_entries['new'] or ret_entries['updated'] or self.options['force']: ======= # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']: >>>>>>> Cleaning up RSS feed header for shared stories feeds. # feed.get_stories(force=True)