def feed_stories(cls, user_id, feed_ids=None, offset=0, limit=6, order="newest", read_filter="all", usersubs=None): r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) if order == "oldest": range_func = r.zrange else: range_func = r.zrevrange ranked_stories_keys = "zU:%s:feeds" % (user_id) unread_ranked_stories_keys = "zhU:%s:feeds" % (user_id) stories_cached = r.exists(ranked_stories_keys) unreads_cached = True if read_filter == "unread" else r.exists(unread_ranked_stories_keys) if offset and stories_cached and unreads_cached: story_hashes = range_func(ranked_stories_keys, offset, limit) if read_filter == "unread": unread_story_hashes = story_hashes else: unread_story_hashes = range_func(unread_ranked_stories_keys, 0, offset + limit) return story_hashes, unread_story_hashes else: r.delete(ranked_stories_keys) r.delete(unread_ranked_stories_keys) story_hashes = cls.story_hashes( user_id, feed_ids=feed_ids, read_filter=read_filter, order=order, include_timestamps=True, group_by_feed=False, usersubs=usersubs, ) if not story_hashes: return [], [] for story_hash_group in chunks(story_hashes, 100): r.zadd(ranked_stories_keys, **dict(story_hash_group)) story_hashes = range_func(ranked_stories_keys, offset, limit) if read_filter == "unread": unread_feed_story_hashes = story_hashes else: unread_story_hashes = cls.story_hashes( user_id, feed_ids=feed_ids, read_filter="unread", order=order, include_timestamps=True, group_by_feed=False, ) if unread_story_hashes: for unread_story_hash_group in chunks(unread_story_hashes, 100): r.zadd(unread_ranked_stories_keys, **dict(unread_story_hash_group)) unread_feed_story_hashes = range_func(unread_ranked_stories_keys, offset, limit) r.expire(ranked_stories_keys, 60 * 60) r.expire(unread_ranked_stories_keys, 60 * 60) return story_hashes, unread_feed_story_hashes
def feed_stories(cls, user_id, feed_ids=None, offset=0, limit=6, order='newest', read_filter='all', usersubs=None, cutoff_date=None): r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) if order == 'oldest': range_func = r.zrange else: range_func = r.zrevrange if not feed_ids: feed_ids = [] # feeds_string = "" feeds_string = ','.join(str(f) for f in sorted(feed_ids))[:30] ranked_stories_keys = 'zU:%s:feeds:%s' % (user_id, feeds_string) unread_ranked_stories_keys = 'zhU:%s:feeds:%s' % (user_id, feeds_string) stories_cached = r.exists(ranked_stories_keys) unreads_cached = True if read_filter == "unread" else r.exists(unread_ranked_stories_keys) if offset and stories_cached and unreads_cached: story_hashes = range_func(ranked_stories_keys, offset, limit) if read_filter == "unread": unread_story_hashes = story_hashes else: unread_story_hashes = range_func(unread_ranked_stories_keys, 0, offset+limit) return story_hashes, unread_story_hashes else: r.delete(ranked_stories_keys) r.delete(unread_ranked_stories_keys) story_hashes = cls.story_hashes(user_id, feed_ids=feed_ids, read_filter=read_filter, order=order, include_timestamps=True, group_by_feed=False, usersubs=usersubs, cutoff_date=cutoff_date) if not story_hashes: return [], [] for story_hash_group in chunks(story_hashes, 100): r.zadd(ranked_stories_keys, **dict(story_hash_group)) story_hashes = range_func(ranked_stories_keys, offset, limit) if read_filter == "unread": unread_feed_story_hashes = story_hashes else: unread_story_hashes = cls.story_hashes(user_id, feed_ids=feed_ids, read_filter="unread", order=order, include_timestamps=True, group_by_feed=False, cutoff_date=cutoff_date) if unread_story_hashes: for unread_story_hash_group in chunks(unread_story_hashes, 100): r.zadd(unread_ranked_stories_keys, **dict(unread_story_hash_group)) unread_feed_story_hashes = range_func(unread_ranked_stories_keys, offset, limit) r.expire(ranked_stories_keys, 60*60) r.expire(unread_ranked_stories_keys, 60*60) return story_hashes, unread_feed_story_hashes
def index_subscriptions_for_search(self): from apps.rss_feeds.models import Feed from apps.reader.models import UserSubscription SearchStory.create_elasticsearch_mapping() start = time.time() user = User.objects.get(pk=self.user_id) r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL) r.publish(user.username, 'search_index_complete:start') subscriptions = UserSubscription.objects.filter(user=user).only('feed') total = subscriptions.count() feed_ids = [] for sub in subscriptions: try: feed_ids.append(sub.feed.pk) except Feed.DoesNotExist: continue feed_id_chunks = [c for c in chunks(feed_ids, 6)] logging.user( user, "~FCIndexing ~SB%s feeds~SN in %s chunks..." % (total, len(feed_id_chunks))) search_chunks = [ IndexSubscriptionsChunkForSearch.s( feed_ids=feed_id_chunk, user_id=self.user_id).set(queue='search_indexer') for feed_id_chunk in feed_id_chunks ] callback = FinishIndexSubscriptionsForSearch.s( user_id=self.user_id, start=start).set(queue='search_indexer') celery.chord(search_chunks)(callback)
def index_subscriptions_for_search(self): from apps.rss_feeds.models import Feed from apps.reader.models import UserSubscription SearchStory.create_elasticsearch_mapping() start = time.time() user = User.objects.get(pk=self.user_id) r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL) r.publish(user.username, 'search_index_complete:start') subscriptions = UserSubscription.objects.filter(user=user).only('feed') total = subscriptions.count() feed_ids = [] for sub in subscriptions: try: feed_ids.append(sub.feed.pk) except Feed.DoesNotExist: continue feed_id_chunks = [c for c in chunks(feed_ids, 6)] logging.user( user, "~FCIndexing ~SB%s feeds~SN in %s chunks..." % (total, len(feed_id_chunks))) tasks = [ IndexSubscriptionsChunkForSearch().s( feed_ids=feed_id_chunk, user_id=self.user_id).set(queue='search_indexer') for feed_id_chunk in feed_id_chunks ] group = celery.group(*tasks) res = group.apply_async(queue='search_indexer') res.join_native() duration = time.time() - start logging.user( user, "~FCIndexed ~SB%s feeds~SN in ~FM~SB%s~FC~SN sec." % (total, round(duration, 2))) r.publish(user.username, 'search_index_complete:done') self.subscriptions_indexed = True self.subscriptions_indexing = False self.save()
def index_subscriptions_for_search(self): from apps.rss_feeds.models import Feed from apps.reader.models import UserSubscription SearchStory.create_elasticsearch_mapping() start = time.time() user = User.objects.get(pk=self.user_id) r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL) r.publish(user.username, 'search_index_complete:start') subscriptions = UserSubscription.objects.filter(user=user).only('feed') total = subscriptions.count() feed_ids = [] for sub in subscriptions: try: feed_ids.append(sub.feed.pk) except Feed.DoesNotExist: continue feed_id_chunks = [c for c in chunks(feed_ids, 6)] logging.user(user, "~FCIndexing ~SB%s feeds~SN in %s chunks..." % (total, len(feed_id_chunks))) tasks = [IndexSubscriptionsChunkForSearch().s(feed_ids=feed_id_chunk, user_id=self.user_id ).set(queue='search_indexer') for feed_id_chunk in feed_id_chunks] group = celery.group(*tasks) res = group.apply_async(queue='search_indexer') res.join_native() duration = time.time() - start logging.user(user, "~FCIndexed ~SB%s feeds~SN in ~FM~SB%s~FC~SN sec." % (total, round(duration, 2))) r.publish(user.username, 'search_index_complete:done') self.subscriptions_indexed = True self.subscriptions_indexing = False self.save()
def story_hashes(cls, user_id, feed_ids=None, usersubs=None, read_filter="unread", order="newest", include_timestamps=False, group_by_feed=True, cutoff_date=None): r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) pipeline = r.pipeline() story_hashes = {} if group_by_feed else [] if not usersubs: usersubs = cls.subs_for_feeds(user_id, feed_ids=feed_ids, read_filter=read_filter) feed_ids = [sub.feed_id for sub in usersubs] if not feed_ids: return story_hashes read_dates = dict((us.feed_id, int(us.mark_read_date.strftime('%s'))) for us in usersubs) current_time = int(time.time() + 60*60*24) if not cutoff_date: cutoff_date = datetime.datetime.now() - datetime.timedelta(days=settings.DAYS_OF_STORY_HASHES) unread_timestamp = int(time.mktime(cutoff_date.timetuple()))-1000 feed_counter = 0 for feed_id_group in chunks(feed_ids, 20): pipeline = r.pipeline() for feed_id in feed_id_group: stories_key = 'F:%s' % feed_id sorted_stories_key = 'zF:%s' % feed_id read_stories_key = 'RS:%s:%s' % (user_id, feed_id) unread_stories_key = 'U:%s:%s' % (user_id, feed_id) unread_ranked_stories_key = 'zU:%s:%s' % (user_id, feed_id) expire_unread_stories_key = False max_score = current_time if read_filter == 'unread': # +1 for the intersection b/w zF and F, which carries an implicit score of 1. min_score = read_dates[feed_id] + 1 pipeline.sdiffstore(unread_stories_key, stories_key, read_stories_key) expire_unread_stories_key = True else: min_score = unread_timestamp unread_stories_key = stories_key if order == 'oldest': byscorefunc = pipeline.zrangebyscore else: byscorefunc = pipeline.zrevrangebyscore min_score, max_score = max_score, min_score pipeline.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key]) byscorefunc(unread_ranked_stories_key, min_score, max_score, withscores=include_timestamps) pipeline.delete(unread_ranked_stories_key) if expire_unread_stories_key: pipeline.delete(unread_stories_key) results = pipeline.execute() for hashes in results: if not isinstance(hashes, list): continue if group_by_feed: story_hashes[feed_ids[feed_counter]] = hashes feed_counter += 1 else: story_hashes.extend(hashes) return story_hashes
def feed_stories( cls, user_id, feed_ids=None, offset=0, limit=6, order="newest", read_filter="all", usersubs=None, cutoff_date=None, all_feed_ids=None, ): rt = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_TEMP_POOL) if order == "oldest": range_func = rt.zrange else: range_func = rt.zrevrange if not feed_ids: feed_ids = [] if not all_feed_ids: all_feed_ids = [f for f in feed_ids] # feeds_string = "" feeds_string = ",".join(str(f) for f in sorted(all_feed_ids))[:30] ranked_stories_keys = "zU:%s:feeds:%s" % (user_id, feeds_string) unread_ranked_stories_keys = "zhU:%s:feeds:%s" % (user_id, feeds_string) stories_cached = rt.exists(ranked_stories_keys) unreads_cached = True if read_filter == "unread" else rt.exists(unread_ranked_stories_keys) if offset and stories_cached and unreads_cached: story_hashes = range_func(ranked_stories_keys, offset, limit) if read_filter == "unread": unread_story_hashes = story_hashes else: unread_story_hashes = range_func(unread_ranked_stories_keys, 0, offset + limit) return story_hashes, unread_story_hashes else: rt.delete(ranked_stories_keys) rt.delete(unread_ranked_stories_keys) story_hashes = cls.story_hashes( user_id, feed_ids=feed_ids, read_filter=read_filter, order=order, include_timestamps=True, group_by_feed=False, usersubs=usersubs, cutoff_date=cutoff_date, ) if not story_hashes: return [], [] pipeline = rt.pipeline() for story_hash_group in chunks(story_hashes, 100): pipeline.zadd(ranked_stories_keys, **dict(story_hash_group)) pipeline.execute() story_hashes = range_func(ranked_stories_keys, offset, limit) if read_filter == "unread": unread_feed_story_hashes = story_hashes rt.zunionstore(unread_ranked_stories_keys, [ranked_stories_keys]) else: unread_story_hashes = cls.story_hashes( user_id, feed_ids=feed_ids, read_filter="unread", order=order, include_timestamps=True, group_by_feed=False, cutoff_date=cutoff_date, ) if unread_story_hashes: for unread_story_hash_group in chunks(unread_story_hashes, 100): rt.zadd(unread_ranked_stories_keys, **dict(unread_story_hash_group)) unread_feed_story_hashes = range_func(unread_ranked_stories_keys, offset, limit) rt.expire(ranked_stories_keys, 60 * 60) rt.expire(unread_ranked_stories_keys, 60 * 60) return story_hashes, unread_feed_story_hashes