Example #1
0
    def handle(self, *args, **options):
        if options['deleteall']:
            MImage.drop()
            return

        if options['daemonize']:
            daemonize()

        settings.LOG_TO_STREAM = True

        # Added by Xinyan Lu: domain based feed refresh
        if options['domain']:
            feeds = Feed.objects.filter(
                feed_address__contains=options['domain'])
        elif options['id']:
            feeds = Feed.objects.filter(id=options['id'])
        else:
            feeds = Feed.objects.filter(num_subscribers__gte=1)

        # feeds = feeds.order_by('?')

        num_feeds = len(feeds)
        print 'num feeds', num_feeds
        for feed in feeds:
            UpdateFeedImages.apply_async(args=(feed.pk, ),
                                         queue='update_feed_images')
        print 'dispatch done'
Example #2
0
    def handle(self, *args, **options):
        
        if options['daemonize']:
            daemonize()
        
        settings.LOG_TO_STREAM = True        
            
        r = redis.Redis(connection_pool=settings.REDIS_FEED_POOL)
        
        if options['initialize']:
            feeds = Feed.objects.filter(num_subscribers__gte=1).order_by('?')
            print 'Query feeds done with num of feeds',len(feeds)
            r.ltrim('freeze_feeds',1,0)

            pipeline = r.pipeline()
            for feed in feeds:
                pipeline.rpush('freeze_feeds',feed.pk)
            pipeline.execute()
            print 'Initialize freeze_feeds done'

        feed_id = r.lpop('freeze_feeds')
        while feed_id:
            try:
                frozen_num = MStory.freeze_feed(int(feed_id))
                if frozen_num > 0:
                    r.rpush('freeze_feeds',feed_id)
            except Exception, e:
                logging.error(str(e)+\
                            traceback.format_exc()+'\n'+\
                            'Error from:  freeze_feeds\n')
            feed_id = r.lpop('freeze_feeds')
Example #3
0
    def handle(self, *args, **options):

        if options['daemonize']:
            daemonize()

        settings.LOG_TO_STREAM = True

        r = redis.Redis(connection_pool=settings.REDIS_FEED_POOL)

        if options['initialize']:
            feeds = Feed.objects.filter(num_subscribers__gte=1).order_by('?')
            print 'Query feeds done with num of feeds', len(feeds)
            r.ltrim('freeze_feeds', 1, 0)

            pipeline = r.pipeline()
            for feed in feeds:
                pipeline.rpush('freeze_feeds', feed.pk)
            pipeline.execute()
            print 'Initialize freeze_feeds done'

        feed_id = r.lpop('freeze_feeds')
        while feed_id:
            try:
                frozen_num = MStory.freeze_feed(int(feed_id))
                if frozen_num > 0:
                    r.rpush('freeze_feeds', feed_id)
            except Exception, e:
                logging.error(str(e)+\
                            traceback.format_exc()+'\n'+\
                            'Error from:  freeze_feeds\n')
            feed_id = r.lpop('freeze_feeds')
Example #4
0
    def handle(self, *args, **options):
        if options['deleteall']:
            MImage.drop()
            return

        if options['daemonize']:
            daemonize()
        
        settings.LOG_TO_STREAM = True        
            
        # Added by Xinyan Lu: domain based feed refresh
        if options['domain']:
            feeds = Feed.objects.filter(feed_address__contains=options['domain'])
        elif options['id']:
            feeds = Feed.objects.filter(id=options['id'])
        else:
            feeds = Feed.objects.filter(num_subscribers__gte=1)
        
        # feeds = feeds.order_by('?')

        num_feeds = len(feeds)
        print 'num feeds',num_feeds
        for feed in feeds:
            UpdateFeedImages.apply_async(args=(feed.pk,), queue='update_feed_images')
        print 'dispatch done'
    def handle(self, *args, **options):
        if options['daemonize']:
            daemonize()

        settings.LOG_TO_STREAM = True
        now = datetime.datetime.utcnow()

        if options['skip']:
            feeds = Feed.objects.filter(
                next_scheduled_update__lte=now,
                average_stories_per_month__lt=options['skip'],
                active=True)
            print " ---> Skipping %s feeds" % feeds.count()
            for feed in feeds:
                feed.set_next_scheduled_update()
                print '.',
            return

        socket.setdefaulttimeout(options['timeout'])
        if options['force']:
            feeds = Feed.objects.all()
        elif options['username']:
            feeds = Feed.objects.filter(subscribers__user=User.objects.get(
                username=options['username']))
        elif options['feed']:
            feeds = Feed.objects.filter(pk=options['feed'])
        else:
            feeds = Feed.objects.filter(next_scheduled_update__lte=now,
                                        active=True)

        feeds = feeds.order_by('?')

        for f in feeds:
            f.set_next_scheduled_update()

        num_workers = min(len(feeds), options['workerthreads'])
        if options['single_threaded']:
            num_workers = 1

        options['compute_scores'] = True
        options['quick'] = float(MStatistics.get('quick_fetch', 0))
        options['updates_off'] = MStatistics.get('updates_off', False)

        disp = feed_fetcher.Dispatcher(options, num_workers)

        feeds_queue = []
        for _ in range(num_workers):
            feeds_queue.append([])

        i = 0
        for feed in feeds:
            feeds_queue[i % num_workers].append(feed.pk)
            i += 1
        disp.add_jobs(feeds_queue, i)

        django.db.connection.close()

        print " ---> Fetching %s feeds..." % feeds.count()
        disp.run_jobs()
Example #6
0
    def handle(self, *args, **options):
        if options['daemonize']:
            daemonize()

        if options['title']:
            feed = Feed.objects.get(feed_title__icontains=options['title'])
        else:
            feed = Feed.objects.get(pk=options['feed'])
        self._refresh_feeds([feed], force=options['force'])
Example #7
0
 def handle(self, *args, **options):
     if options['daemonize']:
         daemonize()
     
     if options['title']:
         feed = Feed.objects.get(feed_title__icontains=options['title'])
     else:
         feed = Feed.objects.get(pk=options['feed'])
     feed.update(force=options['force'], single_threaded=True, verbose=2)
Example #8
0
 def handle(self, *args, **options):
     if options['daemonize']:
         daemonize()
     
     if options['title']:
         feed = Feed.objects.get(feed_title__icontains=options['title'])
     else:
         feed = Feed.objects.get(pk=options['feed'])
     self._refresh_feeds([feed], force=options['force'])
Example #9
0
 def handle(self, *args, **options):
     if options['daemonize']:
         daemonize()
         
     settings.LOG_TO_STREAM = True
     now = datetime.datetime.utcnow()
     
     if options['skip']:
         feeds = Feed.objects.filter(next_scheduled_update__lte=now,
                                     average_stories_per_month__lt=options['skip'],
                                     active=True)
         print " ---> Skipping %s feeds" % feeds.count()
         for feed in feeds:
             feed.set_next_scheduled_update()
             print '.',
         return
         
     socket.setdefaulttimeout(options['timeout'])
     if options['force']:
         feeds = Feed.objects.all()
     elif options['username']:
         feeds = Feed.objects.filter(subscribers__user=User.objects.get(username=options['username']))
     else:
         feeds = Feed.objects.filter(next_scheduled_update__lte=now, active=True)
     
     feeds = feeds.order_by('?')
     
     for f in feeds:
         f.queued_date = datetime.datetime.utcnow()
         f.set_next_scheduled_update()
     
     num_workers = min(len(feeds), options['workerthreads'])
     if options['single_threaded']:
         num_workers = 1
     
     options['compute_scores'] = True
     
         
     import pymongo
     db = pymongo.Connection(settings.MONGODB_SLAVE['host'], slave_okay=True, replicaset='nbset').newsblur
     
     options['slave_db'] = db
     
     disp = feed_fetcher.Dispatcher(options, num_workers)        
     
     feeds_queue = []
     for _ in range(num_workers):
         feeds_queue.append([])
         
     i = 0
     for feed in feeds:
         feeds_queue[i%num_workers].append(feed.pk)
         i += 1
     disp.add_jobs(feeds_queue, i)
     
     print " ---> Fetching %s feeds..." % feeds.count()
     disp.run_jobs()
Example #10
0
 def handle(self, *args, **options):
     if options['daemonize']:
         daemonize()
     
     settings.LOG_TO_STREAM = True
     now = datetime.datetime.utcnow()
     
     if options['skip']:
         feeds = Feed.objects.filter(next_scheduled_update__lte=now,
                                     average_stories_per_month__lt=options['skip'],
                                     active=True)
         print " ---> Skipping %s feeds" % feeds.count()
         for feed in feeds:
             feed.set_next_scheduled_update()
             print '.',
         return
         
     socket.setdefaulttimeout(options['timeout'])
     if options['force']:
         feeds = Feed.objects.all()
     elif options['username']:
         feeds = Feed.objects.filter(subscribers__user=User.objects.get(username=options['username']))
     elif options['feed']:
         feeds = Feed.objects.filter(pk=options['feed'])
     else:
         feeds = Feed.objects.filter(next_scheduled_update__lte=now, active=True)
     
     feeds = feeds.order_by('?')
     
     for f in feeds:
         f.set_next_scheduled_update()
     
     num_workers = min(len(feeds), options['workerthreads'])
     if options['single_threaded']:
         num_workers = 1
     
     options['compute_scores'] = True
     options['quick'] = float(MStatistics.get('quick_fetch', 0))
     options['updates_off'] = MStatistics.get('updates_off', False)
     
     disp = feed_fetcher.Dispatcher(options, num_workers)        
     
     feeds_queue = []
     for _ in range(num_workers):
         feeds_queue.append([])
     
     i = 0
     for feed in feeds:
         feeds_queue[i%num_workers].append(feed.pk)
         i += 1
     disp.add_jobs(feeds_queue, i)
     
     django.db.connection.close()
     
     print " ---> Fetching %s feeds..." % feeds.count()
     disp.run_jobs()
Example #11
0
 def handle(self, *args, **options):
     if options['daemonize']:
         daemonize()
         
     settings.LOG_TO_STREAM = True
     
     try:
         user = User.objects.get(username__icontains=options['username'])
     except User.MultipleObjectsReturned:
         user = User.objects.get(username=options['username'])
     reader_importer = GoogleReaderImporter(user)
     reader_importer.import_starred_items(count=options['count'])
    def handle(self, *args, **options):
        if options['daemonize']:
            daemonize()

        settings.LOG_TO_STREAM = True

        try:
            user = User.objects.get(username__icontains=options['username'])
        except User.MultipleObjectsReturned:
            user = User.objects.get(username=options['username'])
        reader_importer = GoogleReaderImporter(user)
        reader_importer.import_starred_items(count=options['count'])
Example #13
0
 def handle(self, *args, **options):
     if options['daemonize']:
         daemonize()
         
     settings.LOG_TO_STREAM = True
     now = datetime.datetime.now()
     
     if options['skip']:
         feeds = Feed.objects.filter(next_scheduled_update__lte=now,
                                     average_stories_per_month__lt=options['skip'],
                                     active=True)
         print " ---> Skipping %s feeds" % feeds.count()
         for feed in feeds:
             feed.set_next_scheduled_update()
             print '.',
         return
         
     socket.setdefaulttimeout(options['timeout'])
     feeds = Feed.objects.filter(next_scheduled_update__lte=now, active=True).order_by('?')
     
     if options['force']:
         feeds = Feed.objects.all()
     
     num_workers = min(len(feeds), options['workerthreads'])
     if options['single_threaded']:
         num_workers = 1
     
     disp = feed_fetcher.Dispatcher(options, num_workers)        
     
     feeds_queue = []
     for _ in range(num_workers):
         feeds_queue.append([])
         
     i = 0
     for feed in feeds:
         feeds_queue[i%num_workers].append(feed)
         i += 1
     disp.add_jobs(feeds_queue, i)
     
     print " ---> Fetching %s feeds..." % feeds.count()
     disp.run_jobs()
     disp.poll()
Example #14
0
    def handle(self, *args, **options):
        if options['daemonize']:
            daemonize()
        
        settings.LOG_TO_STREAM = True        
            
        # Added by Xinyan Lu: domain based feed refresh
        if options['domain']:
            feeds = Feed.objects.filter(feed_address__contains=options['domain'])
        elif options['id']:
            feeds = Feed.objects.filter(id=options['id'])
        elif options['force']:
            # feeds = Feed.objects.all()
            feeds = Feed.objects.filter(num_subscribers__gt=2)
        else:
            feeds = Feed.objects.filter(next_scheduled_update__lte=now, active=True)
        
        feeds = feeds.order_by('?')

        num_feeds = len(feeds)
        i=0
        for feed in feeds:
            start = time.time()
            i += 1
            stories = MStory.objects(story_feed_id=feed.pk)
            for story in stories:
                if story.story_content_z:
                    story_content = zlib.decompress(story.story_content_z)
                else:
                    story_content = ''
                SearchStory.index(story_id=story.story_guid,
                                 story_title=story.story_title,
                                 story_content=story_content,
                                 story_author=story.story_author_name,
                                 story_date=story.story_date,
                                 db_id=str(story.id))
            delta = time.time() - start
            done_msg = (u'---> [%-30s] ~FYProcessed in ~FM~SB%.4ss~FY~SN ~FB%d~FY[%d]' % (
                feed.feed_title[:30],delta, num_feeds,i,))
            logging.debug(done_msg)
        print 'Index fetch done!'
Example #15
0
    def handle(self, *args, **options):
        if options['daemonize']:
            daemonize()

        settings.LOG_TO_STREAM = True
        now = datetime.datetime.utcnow()

        if options['skip']:
            feeds = Feed.objects.filter(
                next_scheduled_update__lte=now,
                average_stories_per_month__lt=options['skip'],
                active=True)
            print " ---> Skipping %s feeds" % feeds.count()
            for feed in feeds:
                feed.set_next_scheduled_update()
                print '.',
            return

        socket.setdefaulttimeout(options['timeout'])
        if options['force']:
            feeds = Feed.objects.all()
        elif options['username']:
            feeds = Feed.objects.filter(subscribers__user=User.objects.get(
                username=options['username']))
        # Added by Xinyan Lu: domain based feed refresh
        elif options['domain']:
            feeds = Feed.objects.filter(
                feed_address__contains=options['domain'])
        elif options['id']:
            feeds = Feed.objects.filter(id=options['id'])
        elif options['all']:
            # feeds = Feed.objects.all()
            feeds = Feed.objects.filter(num_subscribers__gte=2)
        else:
            # feeds = Feed.objects.filter(next_scheduled_update__lte=now, active=True)
            feeds = Feed.objects.filter(next_scheduled_update__lte=now,
                                        num_subscribers__gte=2)

        if options['has_feed_exception']:
            feeds = feeds.filter(has_feed_exception=True)

        feeds = feeds.order_by('?')
        if options['verbose']:
            print 'num of feeds:', len(feeds)

        for f in feeds:
            f.set_next_scheduled_update()

        if options['verbose']:
            print 'set_next_scheduled_update done'

        num_workers = min(len(feeds), options['workerthreads'])
        if options['single_threaded']:
            num_workers = 1

        if options['verbose']:
            print 'num_workers', num_workers

        options['compute_scores'] = True
        # Modified by Xinyan Lu ([email protected]): No MStatistics data available
        # options['quick'] = float(MStatistics.get('quick_fetch', 0))
        options['quick'] = 0
        print options
        disp = feed_fetcher.Dispatcher(options, num_workers)

        feeds_queue = []
        for _ in range(num_workers):
            feeds_queue.append([])

        i = 0
        for feed in feeds:
            feeds_queue[i % num_workers].append(feed.pk)
            i += 1
        disp.add_jobs(feeds_queue, i)

        django.db.connection.close()

        print " ---> Fetching %s feeds..." % feeds.count()
        disp.run_jobs()
Example #16
0
    def handle(self, *args, **options):
        if options['daemonize']:
            daemonize()
        
        settings.LOG_TO_STREAM = True
        now = datetime.datetime.utcnow()
        
        if options['skip']:
            feeds = Feed.objects.filter(next_scheduled_update__lte=now,
                                        average_stories_per_month__lt=options['skip'])
            print " ---> Skipping %s feeds" % feeds.count()
            for feed in feeds:
                feed.set_next_scheduled_update()
                print '.',
            return
            
        socket.setdefaulttimeout(options['timeout'])
        if options['force']:
            feeds = Feed.objects.all()
        elif options['username']:
            feeds = Feed.objects.filter(subscribers__user=User.objects.get(username=options['username']))
        # Added by Xinyan Lu: domain based feed refresh
        elif options['domain']:
            feeds = Feed.objects.filter(feed_address__contains=options['domain'])
        elif options['id']:
            feeds = Feed.objects.filter(id=options['id'])
        elif options['all']:
            # feeds = Feed.objects.all()
            feeds = Feed.objects.filter(num_subscribers__gte=2)
        else:
            # feeds = Feed.objects.filter(next_scheduled_update__lte=now)
            feeds = Feed.objects.filter(next_scheduled_update__lte=now, num_subscribers__gte=2)
        
        if options['has_feed_exception']:
            feeds = feeds.filter(has_feed_exception=True)
        
        feeds = feeds.order_by('?')
        if options['verbose']:
            print 'num of feeds:',len(feeds)
        
        for f in feeds:
            f.set_next_scheduled_update()

        if options['verbose']:
            print 'set_next_scheduled_update done'
        
        num_workers = min(len(feeds), options['workerthreads'])
        if options['single_threaded']:
            num_workers = 1

        if options['verbose']:
            print 'num_workers',num_workers
        
        options['compute_scores'] = True
        # Modified by Xinyan Lu ([email protected]): No MStatistics data available
        # options['quick'] = float(MStatistics.get('quick_fetch', 0))
        options['quick'] = 0
        
        disp = feed_fetcher.Dispatcher(options, num_workers)        
        
        feeds_queue = []
        for _ in range(num_workers):
            feeds_queue.append([])
        
        i = 0
        for feed in feeds:
            feeds_queue[i%num_workers].append(feed.pk)
            i += 1
        disp.add_jobs(feeds_queue, i)
        
        django.db.connection.close()
        
        print " ---> Fetching %s feeds..." % feeds.count()
        disp.run_jobs()