def load_starred_stories(request): user = get_user(request) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 10)) page = int(request.REQUEST.get('page', 0)) if page: offset = limit * page mstories = MStarredStory.objects(user_id=user.pk).order_by('-starred_date')[offset:offset+limit] stories = Feed.format_stories(mstories) for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) starred_date = localtime_for_timezone(story['starred_date'], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) story['read_status'] = 1 story['starred'] = True story['intelligence'] = { 'feed': 0, 'author': 0, 'tags': 0, 'title': 0, } logging.user(request.user, "~FCLoading starred stories: ~SB%s stories" % (len(stories))) return dict(stories=stories)
def load_starred_stories(request): user = get_user(request) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 10)) page = int(request.REQUEST.get('page', 0)) if page: offset = limit * (page - 1) mstories = MStarredStory.objects(user_id=user.pk).order_by('-starred_date')[offset:offset+limit] stories = Feed.format_stories(mstories) for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) starred_date = localtime_for_timezone(story['starred_date'], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) story['read_status'] = 1 story['starred'] = True story['intelligence'] = { 'feed': 0, 'author': 0, 'tags': 0, 'title': 0, } logging.user(request, "~FCLoading starred stories: ~SB%s stories" % (len(stories))) return dict(stories=stories)
def load_river_stories(request): user = get_user(request) feed_ids = [int(feed_id) for feed_id in request.POST.getlist("feeds")] offset = int(request.REQUEST.get("offset", 0)) limit = int(request.REQUEST.get("limit", 25)) page = int(request.REQUEST.get("page", 0)) + 1 read_stories = int(request.REQUEST.get("read_stories", 0)) # if page: offset = limit * page if page: limit = limit * page - read_stories def feed_qvalues(feed_id): feed = UserSubscription.objects.get(feed__pk=feed_id, user=user) return Q(story_feed_id=feed_id) & Q(story_date__gte=feed.mark_read_date) feed_last_reads = map(feed_qvalues, feed_ids) qs = reduce(lambda q1, q2: q1 | q2, feed_last_reads) read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only("story") read_stories = [rs.story.id for rs in read_stories] mstories = MStory.objects(Q(id__nin=read_stories) & qs)[offset : offset + limit] stories = Feed.format_stories(mstories) starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id__in=feed_ids).only( "story_guid", "starred_date" ) starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) for story in stories: story_date = localtime_for_timezone(story["story_date"], user.profile.timezone) story["short_parsed_date"] = format_story_link_date__short(story_date) story["long_parsed_date"] = format_story_link_date__long(story_date) story["read_status"] = 0 if story["id"] in starred_stories: story["starred"] = True starred_date = localtime_for_timezone(starred_stories[story["id"]], user.profile.timezone) story["starred_date"] = format_story_link_date__long(starred_date) story["intelligence"] = {"feed": 0, "author": 0, "tags": 0, "title": 0} logging.info( " ---> [%s] ~FCLoading river stories: ~SB%s stories ~SN(%s feeds)" % (request.user, len(stories), len(feed_ids)) ) return dict(stories=stories)
def format_stories(self, stories_db): stories = [] for story_db in stories_db: story = {} story['story_tags'] = story_db.story_tags or [] story['short_parsed_date'] = format_story_link_date__short(story_db.story_date) story['long_parsed_date'] = format_story_link_date__long(story_db.story_date) story['story_date'] = story_db.story_date story['story_authors'] = story_db.story_author_name story['story_title'] = story_db.story_title story['story_content'] = story_db.story_content_z and zlib.decompress(story_db.story_content_z) story['story_permalink'] = story_db.story_permalink story['story_feed_id'] = self.pk story['id'] = story_db.story_guid stories.append(story) return stories
def format_stories(self, stories_db): stories = [] # from django.db import connection # print "Formatting Stories: %s" % stories_db.count() for story_db in stories_db: story = {} story['story_tags'] = story_db.story_tags or [] story['short_parsed_date'] = format_story_link_date__short(story_db.story_date) story['long_parsed_date'] = format_story_link_date__long(story_db.story_date) story['story_date'] = story_db.story_date story['story_authors'] = story_db.story_author_name story['story_title'] = story_db.story_title story['story_content'] = story_db.story_content_z and zlib.decompress(story_db.story_content_z) story['story_permalink'] = story_db.story_permalink story['story_feed_id'] = self.pk story['id'] = story_db.story_guid stories.append(story) return stories
def load_starred_stories(request): user = get_user(request) offset = int(request.REQUEST.get("offset", 0)) limit = int(request.REQUEST.get("limit", 10)) page = int(request.REQUEST.get("page", 0)) if page: offset = limit * page mstories = MStarredStory.objects(user_id=user.pk).order_by("-starred_date")[offset : offset + limit] stories = Feed.format_stories(mstories) for story in stories: story_date = localtime_for_timezone(story["story_date"], user.profile.timezone) story["short_parsed_date"] = format_story_link_date__short(story_date) story["long_parsed_date"] = format_story_link_date__long(story_date) starred_date = localtime_for_timezone(story["starred_date"], user.profile.timezone) story["starred_date"] = format_story_link_date__long(starred_date) story["read_status"] = 1 story["starred"] = True story["intelligence"] = {"feed": 0, "author": 0, "tags": 0, "title": 0} logging.info(" ---> [%s] ~FCLoading starred stories: ~SB%s stories" % (request.user, len(stories))) return dict(stories=stories)
def load_river_stories(request): limit = 18 offset = 0 start = datetime.datetime.utcnow() user = get_user(request) feed_ids = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id] original_feed_ids = list(feed_ids) page = int(request.REQUEST.get('page', 0))+1 read_stories_count = int(request.REQUEST.get('read_stories_count', 0)) bottom_delta = datetime.timedelta(days=settings.DAYS_OF_UNREAD) if not feed_ids: logging.user(request.user, "~FCLoading empty river stories: page %s" % (page)) return dict(stories=[]) # Fetch all stories at and before the page number. # Not a single page, because reading stories can move them up in the unread order. # `read_stories_count` is an optimization, works best when all 25 stories before have been read. limit = limit * page - read_stories_count # Read stories to exclude read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story') read_stories = [rs.story.id for rs in read_stories] # Determine mark_as_read dates for all feeds to ignore all stories before this date. # max_feed_count = 0 feed_counts = {} feed_last_reads = {} for feed_id in feed_ids: try: usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user) except UserSubscription.DoesNotExist: continue if not usersub: continue feed_counts[feed_id] = (usersub.unread_count_negative * 1 + usersub.unread_count_neutral * 10 + usersub.unread_count_positive * 20) # if feed_counts[feed_id] > max_feed_count: # max_feed_count = feed_counts[feed_id] feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple())) feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:50] feed_ids = [f[0] for f in feed_counts] feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids]) feed_counts = dict(feed_counts) # After excluding read stories, all that's left are stories # past the mark_read_date. Everything returned is guaranteed to be unread. mstories = MStory.objects( id__nin=read_stories, story_feed_id__in=feed_ids, story_date__gte=start - bottom_delta ).map_reduce("""function() { var d = feed_last_reads[this[~story_feed_id]]; if (this[~story_date].getTime()/1000 > d) { emit(this[~id], this); } }""", """function(key, values) { return values[0]; }""", output='inline', scope={ 'feed_last_reads': feed_last_reads } ) mstories = [story.value for story in mstories] mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, bottom_delta), story_score(x, bottom_delta))) # story_feed_counts = defaultdict(int) # mstories_pruned = [] # for story in mstories: # print story['story_title'], story_feed_counts[story['story_feed_id']] # if story_feed_counts[story['story_feed_id']] >= 3: continue # mstories_pruned.append(story) # story_feed_counts[story['story_feed_id']] += 1 stories = [] for i, story in enumerate(mstories): if i < offset: continue if i >= offset + limit: break stories.append(bunch(story)) stories = Feed.format_stories(stories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) # Find starred stories starred_stories = MStarredStory.objects( user_id=user.pk, story_feed_id__in=found_feed_ids ).only('story_guid', 'starred_date') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) # Intelligence classifiers for all feeds involved def sort_by_feed(classifiers): feed_classifiers = defaultdict(list) for classifier in classifiers: feed_classifiers[classifier.feed_id].append(classifier) return feed_classifiers classifier_feeds = sort_by_feed(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_authors = sort_by_feed(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_titles = sort_by_feed(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_tags = sort_by_feed(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids)) # Just need to format stories for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']), 'author': apply_classifier_authors(classifier_authors[story['story_feed_id']], story), 'tags': apply_classifier_tags(classifier_tags[story['story_feed_id']], story), 'title': apply_classifier_titles(classifier_titles[story['story_feed_id']], story), } diff = datetime.datetime.utcnow() - start timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000))) logging.user(request.user, "~FCLoading river stories: page %s - ~SB%s/%s " "stories ~SN(%s/%s/%s feeds) ~FB(%s seconds)" % (page, len(stories), len(mstories), len(found_feed_ids), len(feed_ids), len(original_feed_ids), timediff)) return dict(stories=stories)
def load_single_feed(request, feed_id): start = time.time() user = get_user(request) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 12)) page = int(request.REQUEST.get('page', 1)) dupe_feed_id = None userstories_db = None if page: offset = limit * (page-1) if not feed_id: raise Http404 try: feed = Feed.objects.get(id=feed_id) except Feed.DoesNotExist: feed_address = request.REQUEST.get('feed_address') dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address) if dupe_feed: feed = dupe_feed[0].feed dupe_feed_id = feed_id else: raise Http404 stories = feed.get_stories(offset, limit) # Get intelligence classifier for user classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id)) classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id)) classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id)) classifier_tags = list(MClassifierTag.objects(user_id=user.pk, feed_id=feed_id)) checkpoint1 = time.time() usersub = UserSubscription.objects.get(user=user, feed=feed) userstories = [] if usersub: userstories_db = MUserStory.objects(user_id=user.pk, feed_id=feed.pk, read_date__gte=usersub.mark_read_date) starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed_id).only('story_guid', 'starred_date') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) for us in userstories_db: if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode): userstories.append(us.story.story_guid) elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode): userstories.append(us.story.id) # TODO: Remove me after migration from story.id->guid checkpoint2 = time.time() for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) if usersub: if story['id'] in userstories: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] > usersub.last_read_date: story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) else: story['read_status'] = 1 story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, feed), 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), } checkpoint3 = time.time() # Intelligence feed_tags = json.decode(feed.data.popular_tags) if feed.data.popular_tags else [] feed_authors = json.decode(feed.data.popular_authors) if feed.data.popular_authors else [] classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, classifier_authors, classifier_titles, classifier_tags) if usersub: usersub.feed_opens += 1 usersub.save() timediff = time.time()-start last_update = relative_timesince(feed.last_update) logging.user(request.user, "~FYLoading feed: ~SB%s%s ~SN(%.4s seconds)" % ( feed, ('~SN/p%s' % page) if page > 1 else '', timediff)) FeedLoadtime.objects.create(feed=feed, loadtime=timediff) if timediff >= 1: diff1 = checkpoint1-start diff2 = checkpoint2-start diff3 = checkpoint3-start logging.user(request.user, "~FYSlow feed load: ~SB%.4s/%.4s(%s)/%.4s" % ( diff1, diff2, userstories_db and userstories_db.count(), diff3)) data = dict(stories=stories, feed_tags=feed_tags, feed_authors=feed_authors, classifiers=classifiers, last_update=last_update, feed_id=feed.pk) if dupe_feed_id: data['dupe_feed_id'] = dupe_feed_id if not usersub: data.update(feed.canonical()) return data
classifier_titles = sort_by_feed(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_tags = sort_by_feed(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids)) except OperationFailure: logging.info(" ***> Classifiers failure") else: for feed_id in found_feed_ids: classifiers[feed_id] = get_classifiers_for_user(user, feed_id, classifier_feeds[feed_id], classifier_authors[feed_id], classifier_titles[feed_id], classifier_tags[feed_id]) # Just need to format stories for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']), 'author': apply_classifier_authors(classifier_authors[story['story_feed_id']], story), 'tags': apply_classifier_tags(classifier_tags[story['story_feed_id']], story), 'title': apply_classifier_titles(classifier_titles[story['story_feed_id']], story), } diff = time.time() - start timediff = round(float(diff), 2)