def handle(self, *args, **options): if options['deleteall']: MImage.drop() return if options['daemonize']: daemonize() settings.LOG_TO_STREAM = True # Added by Xinyan Lu: domain based feed refresh if options['domain']: feeds = Feed.objects.filter( feed_address__contains=options['domain']) elif options['id']: feeds = Feed.objects.filter(id=options['id']) else: feeds = Feed.objects.filter(num_subscribers__gte=1) # feeds = feeds.order_by('?') num_feeds = len(feeds) print 'num feeds', num_feeds for feed in feeds: UpdateFeedImages.apply_async(args=(feed.pk, ), queue='update_feed_images') print 'dispatch done'
def handle(self, *args, **options): if options['daemonize']: daemonize() settings.LOG_TO_STREAM = True r = redis.Redis(connection_pool=settings.REDIS_FEED_POOL) if options['initialize']: feeds = Feed.objects.filter(num_subscribers__gte=1).order_by('?') print 'Query feeds done with num of feeds',len(feeds) r.ltrim('freeze_feeds',1,0) pipeline = r.pipeline() for feed in feeds: pipeline.rpush('freeze_feeds',feed.pk) pipeline.execute() print 'Initialize freeze_feeds done' feed_id = r.lpop('freeze_feeds') while feed_id: try: frozen_num = MStory.freeze_feed(int(feed_id)) if frozen_num > 0: r.rpush('freeze_feeds',feed_id) except Exception, e: logging.error(str(e)+\ traceback.format_exc()+'\n'+\ 'Error from: freeze_feeds\n') feed_id = r.lpop('freeze_feeds')
def handle(self, *args, **options): if options['daemonize']: daemonize() settings.LOG_TO_STREAM = True r = redis.Redis(connection_pool=settings.REDIS_FEED_POOL) if options['initialize']: feeds = Feed.objects.filter(num_subscribers__gte=1).order_by('?') print 'Query feeds done with num of feeds', len(feeds) r.ltrim('freeze_feeds', 1, 0) pipeline = r.pipeline() for feed in feeds: pipeline.rpush('freeze_feeds', feed.pk) pipeline.execute() print 'Initialize freeze_feeds done' feed_id = r.lpop('freeze_feeds') while feed_id: try: frozen_num = MStory.freeze_feed(int(feed_id)) if frozen_num > 0: r.rpush('freeze_feeds', feed_id) except Exception, e: logging.error(str(e)+\ traceback.format_exc()+'\n'+\ 'Error from: freeze_feeds\n') feed_id = r.lpop('freeze_feeds')
def handle(self, *args, **options): if options['deleteall']: MImage.drop() return if options['daemonize']: daemonize() settings.LOG_TO_STREAM = True # Added by Xinyan Lu: domain based feed refresh if options['domain']: feeds = Feed.objects.filter(feed_address__contains=options['domain']) elif options['id']: feeds = Feed.objects.filter(id=options['id']) else: feeds = Feed.objects.filter(num_subscribers__gte=1) # feeds = feeds.order_by('?') num_feeds = len(feeds) print 'num feeds',num_feeds for feed in feeds: UpdateFeedImages.apply_async(args=(feed.pk,), queue='update_feed_images') print 'dispatch done'
def handle(self, *args, **options): if options['daemonize']: daemonize() settings.LOG_TO_STREAM = True now = datetime.datetime.utcnow() if options['skip']: feeds = Feed.objects.filter( next_scheduled_update__lte=now, average_stories_per_month__lt=options['skip'], active=True) print " ---> Skipping %s feeds" % feeds.count() for feed in feeds: feed.set_next_scheduled_update() print '.', return socket.setdefaulttimeout(options['timeout']) if options['force']: feeds = Feed.objects.all() elif options['username']: feeds = Feed.objects.filter(subscribers__user=User.objects.get( username=options['username'])) elif options['feed']: feeds = Feed.objects.filter(pk=options['feed']) else: feeds = Feed.objects.filter(next_scheduled_update__lte=now, active=True) feeds = feeds.order_by('?') for f in feeds: f.set_next_scheduled_update() num_workers = min(len(feeds), options['workerthreads']) if options['single_threaded']: num_workers = 1 options['compute_scores'] = True options['quick'] = float(MStatistics.get('quick_fetch', 0)) options['updates_off'] = MStatistics.get('updates_off', False) disp = feed_fetcher.Dispatcher(options, num_workers) feeds_queue = [] for _ in range(num_workers): feeds_queue.append([]) i = 0 for feed in feeds: feeds_queue[i % num_workers].append(feed.pk) i += 1 disp.add_jobs(feeds_queue, i) django.db.connection.close() print " ---> Fetching %s feeds..." % feeds.count() disp.run_jobs()
def handle(self, *args, **options): if options['daemonize']: daemonize() if options['title']: feed = Feed.objects.get(feed_title__icontains=options['title']) else: feed = Feed.objects.get(pk=options['feed']) self._refresh_feeds([feed], force=options['force'])
def handle(self, *args, **options): if options['daemonize']: daemonize() if options['title']: feed = Feed.objects.get(feed_title__icontains=options['title']) else: feed = Feed.objects.get(pk=options['feed']) feed.update(force=options['force'], single_threaded=True, verbose=2)
def handle(self, *args, **options): if options['daemonize']: daemonize() settings.LOG_TO_STREAM = True now = datetime.datetime.utcnow() if options['skip']: feeds = Feed.objects.filter(next_scheduled_update__lte=now, average_stories_per_month__lt=options['skip'], active=True) print " ---> Skipping %s feeds" % feeds.count() for feed in feeds: feed.set_next_scheduled_update() print '.', return socket.setdefaulttimeout(options['timeout']) if options['force']: feeds = Feed.objects.all() elif options['username']: feeds = Feed.objects.filter(subscribers__user=User.objects.get(username=options['username'])) else: feeds = Feed.objects.filter(next_scheduled_update__lte=now, active=True) feeds = feeds.order_by('?') for f in feeds: f.queued_date = datetime.datetime.utcnow() f.set_next_scheduled_update() num_workers = min(len(feeds), options['workerthreads']) if options['single_threaded']: num_workers = 1 options['compute_scores'] = True import pymongo db = pymongo.Connection(settings.MONGODB_SLAVE['host'], slave_okay=True, replicaset='nbset').newsblur options['slave_db'] = db disp = feed_fetcher.Dispatcher(options, num_workers) feeds_queue = [] for _ in range(num_workers): feeds_queue.append([]) i = 0 for feed in feeds: feeds_queue[i%num_workers].append(feed.pk) i += 1 disp.add_jobs(feeds_queue, i) print " ---> Fetching %s feeds..." % feeds.count() disp.run_jobs()
def handle(self, *args, **options): if options['daemonize']: daemonize() settings.LOG_TO_STREAM = True now = datetime.datetime.utcnow() if options['skip']: feeds = Feed.objects.filter(next_scheduled_update__lte=now, average_stories_per_month__lt=options['skip'], active=True) print " ---> Skipping %s feeds" % feeds.count() for feed in feeds: feed.set_next_scheduled_update() print '.', return socket.setdefaulttimeout(options['timeout']) if options['force']: feeds = Feed.objects.all() elif options['username']: feeds = Feed.objects.filter(subscribers__user=User.objects.get(username=options['username'])) elif options['feed']: feeds = Feed.objects.filter(pk=options['feed']) else: feeds = Feed.objects.filter(next_scheduled_update__lte=now, active=True) feeds = feeds.order_by('?') for f in feeds: f.set_next_scheduled_update() num_workers = min(len(feeds), options['workerthreads']) if options['single_threaded']: num_workers = 1 options['compute_scores'] = True options['quick'] = float(MStatistics.get('quick_fetch', 0)) options['updates_off'] = MStatistics.get('updates_off', False) disp = feed_fetcher.Dispatcher(options, num_workers) feeds_queue = [] for _ in range(num_workers): feeds_queue.append([]) i = 0 for feed in feeds: feeds_queue[i%num_workers].append(feed.pk) i += 1 disp.add_jobs(feeds_queue, i) django.db.connection.close() print " ---> Fetching %s feeds..." % feeds.count() disp.run_jobs()
def handle(self, *args, **options): if options['daemonize']: daemonize() settings.LOG_TO_STREAM = True try: user = User.objects.get(username__icontains=options['username']) except User.MultipleObjectsReturned: user = User.objects.get(username=options['username']) reader_importer = GoogleReaderImporter(user) reader_importer.import_starred_items(count=options['count'])
def handle(self, *args, **options): if options['daemonize']: daemonize() settings.LOG_TO_STREAM = True now = datetime.datetime.now() if options['skip']: feeds = Feed.objects.filter(next_scheduled_update__lte=now, average_stories_per_month__lt=options['skip'], active=True) print " ---> Skipping %s feeds" % feeds.count() for feed in feeds: feed.set_next_scheduled_update() print '.', return socket.setdefaulttimeout(options['timeout']) feeds = Feed.objects.filter(next_scheduled_update__lte=now, active=True).order_by('?') if options['force']: feeds = Feed.objects.all() num_workers = min(len(feeds), options['workerthreads']) if options['single_threaded']: num_workers = 1 disp = feed_fetcher.Dispatcher(options, num_workers) feeds_queue = [] for _ in range(num_workers): feeds_queue.append([]) i = 0 for feed in feeds: feeds_queue[i%num_workers].append(feed) i += 1 disp.add_jobs(feeds_queue, i) print " ---> Fetching %s feeds..." % feeds.count() disp.run_jobs() disp.poll()
def handle(self, *args, **options): if options['daemonize']: daemonize() settings.LOG_TO_STREAM = True # Added by Xinyan Lu: domain based feed refresh if options['domain']: feeds = Feed.objects.filter(feed_address__contains=options['domain']) elif options['id']: feeds = Feed.objects.filter(id=options['id']) elif options['force']: # feeds = Feed.objects.all() feeds = Feed.objects.filter(num_subscribers__gt=2) else: feeds = Feed.objects.filter(next_scheduled_update__lte=now, active=True) feeds = feeds.order_by('?') num_feeds = len(feeds) i=0 for feed in feeds: start = time.time() i += 1 stories = MStory.objects(story_feed_id=feed.pk) for story in stories: if story.story_content_z: story_content = zlib.decompress(story.story_content_z) else: story_content = '' SearchStory.index(story_id=story.story_guid, story_title=story.story_title, story_content=story_content, story_author=story.story_author_name, story_date=story.story_date, db_id=str(story.id)) delta = time.time() - start done_msg = (u'---> [%-30s] ~FYProcessed in ~FM~SB%.4ss~FY~SN ~FB%d~FY[%d]' % ( feed.feed_title[:30],delta, num_feeds,i,)) logging.debug(done_msg) print 'Index fetch done!'
def handle(self, *args, **options): if options['daemonize']: daemonize() settings.LOG_TO_STREAM = True now = datetime.datetime.utcnow() if options['skip']: feeds = Feed.objects.filter( next_scheduled_update__lte=now, average_stories_per_month__lt=options['skip'], active=True) print " ---> Skipping %s feeds" % feeds.count() for feed in feeds: feed.set_next_scheduled_update() print '.', return socket.setdefaulttimeout(options['timeout']) if options['force']: feeds = Feed.objects.all() elif options['username']: feeds = Feed.objects.filter(subscribers__user=User.objects.get( username=options['username'])) # Added by Xinyan Lu: domain based feed refresh elif options['domain']: feeds = Feed.objects.filter( feed_address__contains=options['domain']) elif options['id']: feeds = Feed.objects.filter(id=options['id']) elif options['all']: # feeds = Feed.objects.all() feeds = Feed.objects.filter(num_subscribers__gte=2) else: # feeds = Feed.objects.filter(next_scheduled_update__lte=now, active=True) feeds = Feed.objects.filter(next_scheduled_update__lte=now, num_subscribers__gte=2) if options['has_feed_exception']: feeds = feeds.filter(has_feed_exception=True) feeds = feeds.order_by('?') if options['verbose']: print 'num of feeds:', len(feeds) for f in feeds: f.set_next_scheduled_update() if options['verbose']: print 'set_next_scheduled_update done' num_workers = min(len(feeds), options['workerthreads']) if options['single_threaded']: num_workers = 1 if options['verbose']: print 'num_workers', num_workers options['compute_scores'] = True # Modified by Xinyan Lu ([email protected]): No MStatistics data available # options['quick'] = float(MStatistics.get('quick_fetch', 0)) options['quick'] = 0 print options disp = feed_fetcher.Dispatcher(options, num_workers) feeds_queue = [] for _ in range(num_workers): feeds_queue.append([]) i = 0 for feed in feeds: feeds_queue[i % num_workers].append(feed.pk) i += 1 disp.add_jobs(feeds_queue, i) django.db.connection.close() print " ---> Fetching %s feeds..." % feeds.count() disp.run_jobs()
def handle(self, *args, **options): if options['daemonize']: daemonize() settings.LOG_TO_STREAM = True now = datetime.datetime.utcnow() if options['skip']: feeds = Feed.objects.filter(next_scheduled_update__lte=now, average_stories_per_month__lt=options['skip']) print " ---> Skipping %s feeds" % feeds.count() for feed in feeds: feed.set_next_scheduled_update() print '.', return socket.setdefaulttimeout(options['timeout']) if options['force']: feeds = Feed.objects.all() elif options['username']: feeds = Feed.objects.filter(subscribers__user=User.objects.get(username=options['username'])) # Added by Xinyan Lu: domain based feed refresh elif options['domain']: feeds = Feed.objects.filter(feed_address__contains=options['domain']) elif options['id']: feeds = Feed.objects.filter(id=options['id']) elif options['all']: # feeds = Feed.objects.all() feeds = Feed.objects.filter(num_subscribers__gte=2) else: # feeds = Feed.objects.filter(next_scheduled_update__lte=now) feeds = Feed.objects.filter(next_scheduled_update__lte=now, num_subscribers__gte=2) if options['has_feed_exception']: feeds = feeds.filter(has_feed_exception=True) feeds = feeds.order_by('?') if options['verbose']: print 'num of feeds:',len(feeds) for f in feeds: f.set_next_scheduled_update() if options['verbose']: print 'set_next_scheduled_update done' num_workers = min(len(feeds), options['workerthreads']) if options['single_threaded']: num_workers = 1 if options['verbose']: print 'num_workers',num_workers options['compute_scores'] = True # Modified by Xinyan Lu ([email protected]): No MStatistics data available # options['quick'] = float(MStatistics.get('quick_fetch', 0)) options['quick'] = 0 disp = feed_fetcher.Dispatcher(options, num_workers) feeds_queue = [] for _ in range(num_workers): feeds_queue.append([]) i = 0 for feed in feeds: feeds_queue[i%num_workers].append(feed.pk) i += 1 disp.add_jobs(feeds_queue, i) django.db.connection.close() print " ---> Fetching %s feeds..." % feeds.count() disp.run_jobs()