Example #1
0
    def feed_stories(cls, user_id, feed_ids=None, offset=0, limit=6, order="newest", read_filter="all", usersubs=None):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)

        if order == "oldest":
            range_func = r.zrange
        else:
            range_func = r.zrevrange

        ranked_stories_keys = "zU:%s:feeds" % (user_id)
        unread_ranked_stories_keys = "zhU:%s:feeds" % (user_id)
        stories_cached = r.exists(ranked_stories_keys)
        unreads_cached = True if read_filter == "unread" else r.exists(unread_ranked_stories_keys)
        if offset and stories_cached and unreads_cached:
            story_hashes = range_func(ranked_stories_keys, offset, limit)
            if read_filter == "unread":
                unread_story_hashes = story_hashes
            else:
                unread_story_hashes = range_func(unread_ranked_stories_keys, 0, offset + limit)
            return story_hashes, unread_story_hashes
        else:
            r.delete(ranked_stories_keys)
            r.delete(unread_ranked_stories_keys)

        story_hashes = cls.story_hashes(
            user_id,
            feed_ids=feed_ids,
            read_filter=read_filter,
            order=order,
            include_timestamps=True,
            group_by_feed=False,
            usersubs=usersubs,
        )
        if not story_hashes:
            return [], []

        for story_hash_group in chunks(story_hashes, 100):
            r.zadd(ranked_stories_keys, **dict(story_hash_group))
        story_hashes = range_func(ranked_stories_keys, offset, limit)

        if read_filter == "unread":
            unread_feed_story_hashes = story_hashes
        else:
            unread_story_hashes = cls.story_hashes(
                user_id,
                feed_ids=feed_ids,
                read_filter="unread",
                order=order,
                include_timestamps=True,
                group_by_feed=False,
            )
            if unread_story_hashes:
                for unread_story_hash_group in chunks(unread_story_hashes, 100):
                    r.zadd(unread_ranked_stories_keys, **dict(unread_story_hash_group))
            unread_feed_story_hashes = range_func(unread_ranked_stories_keys, offset, limit)

        r.expire(ranked_stories_keys, 60 * 60)
        r.expire(unread_ranked_stories_keys, 60 * 60)

        return story_hashes, unread_feed_story_hashes
Example #2
0
    def feed_stories(cls, user_id, feed_ids=None, offset=0, limit=6, 
                     order='newest', read_filter='all', usersubs=None, cutoff_date=None):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        
        if order == 'oldest':
            range_func = r.zrange
        else:
            range_func = r.zrevrange
        
        if not feed_ids:
            feed_ids = []
        
        # feeds_string = ""
        feeds_string = ','.join(str(f) for f in sorted(feed_ids))[:30]
        ranked_stories_keys  = 'zU:%s:feeds:%s' % (user_id, feeds_string)
        unread_ranked_stories_keys  = 'zhU:%s:feeds:%s' % (user_id, feeds_string)
        stories_cached = r.exists(ranked_stories_keys)
        unreads_cached = True if read_filter == "unread" else r.exists(unread_ranked_stories_keys)
        if offset and stories_cached and unreads_cached:
            story_hashes = range_func(ranked_stories_keys, offset, limit)
            if read_filter == "unread":
                unread_story_hashes = story_hashes
            else:
                unread_story_hashes = range_func(unread_ranked_stories_keys, 0, offset+limit)
            return story_hashes, unread_story_hashes
        else:
            r.delete(ranked_stories_keys)
            r.delete(unread_ranked_stories_keys)
        
        story_hashes = cls.story_hashes(user_id, feed_ids=feed_ids, 
                                        read_filter=read_filter, order=order, 
                                        include_timestamps=True,
                                        group_by_feed=False, usersubs=usersubs,
                                        cutoff_date=cutoff_date)
        if not story_hashes:
            return [], []
        
        for story_hash_group in chunks(story_hashes, 100):
            r.zadd(ranked_stories_keys, **dict(story_hash_group))
        story_hashes = range_func(ranked_stories_keys, offset, limit)

        if read_filter == "unread":
            unread_feed_story_hashes = story_hashes
        else:
            unread_story_hashes = cls.story_hashes(user_id, feed_ids=feed_ids, 
                                                   read_filter="unread", order=order, 
                                                   include_timestamps=True,
                                                   group_by_feed=False,
                                                   cutoff_date=cutoff_date)
            if unread_story_hashes:
                for unread_story_hash_group in chunks(unread_story_hashes, 100):
                    r.zadd(unread_ranked_stories_keys, **dict(unread_story_hash_group))
            unread_feed_story_hashes = range_func(unread_ranked_stories_keys, offset, limit)
        
        r.expire(ranked_stories_keys, 60*60)
        r.expire(unread_ranked_stories_keys, 60*60)
        
        return story_hashes, unread_feed_story_hashes
Example #3
0
    def index_subscriptions_for_search(self):
        from apps.rss_feeds.models import Feed
        from apps.reader.models import UserSubscription

        SearchStory.create_elasticsearch_mapping()

        start = time.time()
        user = User.objects.get(pk=self.user_id)
        r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL)
        r.publish(user.username, 'search_index_complete:start')

        subscriptions = UserSubscription.objects.filter(user=user).only('feed')
        total = subscriptions.count()

        feed_ids = []
        for sub in subscriptions:
            try:
                feed_ids.append(sub.feed.pk)
            except Feed.DoesNotExist:
                continue

        feed_id_chunks = [c for c in chunks(feed_ids, 6)]
        logging.user(
            user, "~FCIndexing ~SB%s feeds~SN in %s chunks..." %
            (total, len(feed_id_chunks)))

        search_chunks = [
            IndexSubscriptionsChunkForSearch.s(
                feed_ids=feed_id_chunk,
                user_id=self.user_id).set(queue='search_indexer')
            for feed_id_chunk in feed_id_chunks
        ]
        callback = FinishIndexSubscriptionsForSearch.s(
            user_id=self.user_id, start=start).set(queue='search_indexer')
        celery.chord(search_chunks)(callback)
Example #4
0
    def index_subscriptions_for_search(self):
        from apps.rss_feeds.models import Feed
        from apps.reader.models import UserSubscription

        SearchStory.create_elasticsearch_mapping()

        start = time.time()
        user = User.objects.get(pk=self.user_id)
        r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL)
        r.publish(user.username, 'search_index_complete:start')

        subscriptions = UserSubscription.objects.filter(user=user).only('feed')
        total = subscriptions.count()

        feed_ids = []
        for sub in subscriptions:
            try:
                feed_ids.append(sub.feed.pk)
            except Feed.DoesNotExist:
                continue

        feed_id_chunks = [c for c in chunks(feed_ids, 6)]
        logging.user(
            user, "~FCIndexing ~SB%s feeds~SN in %s chunks..." %
            (total, len(feed_id_chunks)))

        tasks = [
            IndexSubscriptionsChunkForSearch().s(
                feed_ids=feed_id_chunk,
                user_id=self.user_id).set(queue='search_indexer')
            for feed_id_chunk in feed_id_chunks
        ]
        group = celery.group(*tasks)
        res = group.apply_async(queue='search_indexer')
        res.join_native()

        duration = time.time() - start
        logging.user(
            user, "~FCIndexed ~SB%s feeds~SN in ~FM~SB%s~FC~SN sec." %
            (total, round(duration, 2)))
        r.publish(user.username, 'search_index_complete:done')

        self.subscriptions_indexed = True
        self.subscriptions_indexing = False
        self.save()
Example #5
0
 def index_subscriptions_for_search(self):
     from apps.rss_feeds.models import Feed
     from apps.reader.models import UserSubscription
     
     SearchStory.create_elasticsearch_mapping()
     
     start = time.time()
     user = User.objects.get(pk=self.user_id)
     r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL)
     r.publish(user.username, 'search_index_complete:start')
     
     subscriptions = UserSubscription.objects.filter(user=user).only('feed')
     total = subscriptions.count()
     
     feed_ids = []
     for sub in subscriptions:
         try:
             feed_ids.append(sub.feed.pk)
         except Feed.DoesNotExist:
             continue
     
     feed_id_chunks = [c for c in chunks(feed_ids, 6)]
     logging.user(user, "~FCIndexing ~SB%s feeds~SN in %s chunks..." % 
                  (total, len(feed_id_chunks)))
     
     tasks = [IndexSubscriptionsChunkForSearch().s(feed_ids=feed_id_chunk,
                                                   user_id=self.user_id
                                                   ).set(queue='search_indexer')
              for feed_id_chunk in feed_id_chunks]
     group = celery.group(*tasks)
     res = group.apply_async(queue='search_indexer')
     res.join_native()
     
     duration = time.time() - start
     logging.user(user, "~FCIndexed ~SB%s feeds~SN in ~FM~SB%s~FC~SN sec." % 
                  (total, round(duration, 2)))
     r.publish(user.username, 'search_index_complete:done')
     
     self.subscriptions_indexed = True
     self.subscriptions_indexing = False
     self.save()
Example #6
0
    def story_hashes(cls, user_id, feed_ids=None, usersubs=None, read_filter="unread", order="newest", 
                     include_timestamps=False, group_by_feed=True, cutoff_date=None):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        pipeline = r.pipeline()
        story_hashes = {} if group_by_feed else []
        
        if not usersubs:
            usersubs = cls.subs_for_feeds(user_id, feed_ids=feed_ids, read_filter=read_filter)
            feed_ids = [sub.feed_id for sub in usersubs]
            if not feed_ids:
                return story_hashes

        read_dates = dict((us.feed_id, int(us.mark_read_date.strftime('%s'))) for us in usersubs)
        current_time = int(time.time() + 60*60*24)
        if not cutoff_date:
            cutoff_date = datetime.datetime.now() - datetime.timedelta(days=settings.DAYS_OF_STORY_HASHES)
        unread_timestamp = int(time.mktime(cutoff_date.timetuple()))-1000
        feed_counter = 0

        for feed_id_group in chunks(feed_ids, 20):
            pipeline = r.pipeline()
            for feed_id in feed_id_group:
                stories_key               = 'F:%s' % feed_id
                sorted_stories_key        = 'zF:%s' % feed_id
                read_stories_key          = 'RS:%s:%s' % (user_id, feed_id)
                unread_stories_key        = 'U:%s:%s' % (user_id, feed_id)
                unread_ranked_stories_key = 'zU:%s:%s' % (user_id, feed_id)
                expire_unread_stories_key = False
            
                max_score = current_time
                if read_filter == 'unread':
                    # +1 for the intersection b/w zF and F, which carries an implicit score of 1.
                    min_score = read_dates[feed_id] + 1
                    pipeline.sdiffstore(unread_stories_key, stories_key, read_stories_key)
                    expire_unread_stories_key = True
                else:
                    min_score = unread_timestamp
                    unread_stories_key = stories_key

                if order == 'oldest':
                    byscorefunc = pipeline.zrangebyscore
                else:
                    byscorefunc = pipeline.zrevrangebyscore
                    min_score, max_score = max_score, min_score
            
                pipeline.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key])
                byscorefunc(unread_ranked_stories_key, min_score, max_score, withscores=include_timestamps)
                pipeline.delete(unread_ranked_stories_key)
                if expire_unread_stories_key:
                    pipeline.delete(unread_stories_key)

        
            results = pipeline.execute()
        
            for hashes in results:
                if not isinstance(hashes, list): continue
                if group_by_feed:
                    story_hashes[feed_ids[feed_counter]] = hashes
                    feed_counter += 1
                else:
                    story_hashes.extend(hashes)
        
        return story_hashes
Example #7
0
    def feed_stories(
        cls,
        user_id,
        feed_ids=None,
        offset=0,
        limit=6,
        order="newest",
        read_filter="all",
        usersubs=None,
        cutoff_date=None,
        all_feed_ids=None,
    ):
        rt = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_TEMP_POOL)

        if order == "oldest":
            range_func = rt.zrange
        else:
            range_func = rt.zrevrange

        if not feed_ids:
            feed_ids = []
        if not all_feed_ids:
            all_feed_ids = [f for f in feed_ids]

        # feeds_string = ""
        feeds_string = ",".join(str(f) for f in sorted(all_feed_ids))[:30]
        ranked_stories_keys = "zU:%s:feeds:%s" % (user_id, feeds_string)
        unread_ranked_stories_keys = "zhU:%s:feeds:%s" % (user_id, feeds_string)
        stories_cached = rt.exists(ranked_stories_keys)
        unreads_cached = True if read_filter == "unread" else rt.exists(unread_ranked_stories_keys)
        if offset and stories_cached and unreads_cached:
            story_hashes = range_func(ranked_stories_keys, offset, limit)
            if read_filter == "unread":
                unread_story_hashes = story_hashes
            else:
                unread_story_hashes = range_func(unread_ranked_stories_keys, 0, offset + limit)
            return story_hashes, unread_story_hashes
        else:
            rt.delete(ranked_stories_keys)
            rt.delete(unread_ranked_stories_keys)

        story_hashes = cls.story_hashes(
            user_id,
            feed_ids=feed_ids,
            read_filter=read_filter,
            order=order,
            include_timestamps=True,
            group_by_feed=False,
            usersubs=usersubs,
            cutoff_date=cutoff_date,
        )
        if not story_hashes:
            return [], []

        pipeline = rt.pipeline()
        for story_hash_group in chunks(story_hashes, 100):
            pipeline.zadd(ranked_stories_keys, **dict(story_hash_group))
        pipeline.execute()
        story_hashes = range_func(ranked_stories_keys, offset, limit)

        if read_filter == "unread":
            unread_feed_story_hashes = story_hashes
            rt.zunionstore(unread_ranked_stories_keys, [ranked_stories_keys])
        else:
            unread_story_hashes = cls.story_hashes(
                user_id,
                feed_ids=feed_ids,
                read_filter="unread",
                order=order,
                include_timestamps=True,
                group_by_feed=False,
                cutoff_date=cutoff_date,
            )
            if unread_story_hashes:
                for unread_story_hash_group in chunks(unread_story_hashes, 100):
                    rt.zadd(unread_ranked_stories_keys, **dict(unread_story_hash_group))
            unread_feed_story_hashes = range_func(unread_ranked_stories_keys, offset, limit)

        rt.expire(ranked_stories_keys, 60 * 60)
        rt.expire(unread_ranked_stories_keys, 60 * 60)

        return story_hashes, unread_feed_story_hashes