def bootstrap_userstories(): print "Mongo DB userstories: %s" % MUserStory.objects().count() # db.userstories.drop() print "Dropped! Mongo DB userstories: %s" % MUserStory.objects().count() print "UserStories: %s" % UserStory.objects.all().count() pprint(db.userstories.index_information()) userstories = UserStory.objects.all().values() for userstory in userstories: try: story = Story.objects.get(pk=userstory['story_id']) except Story.DoesNotExist: continue try: userstory['story'] = MStory.objects( story_feed_id=story.story_feed.pk, story_guid=story.story_guid)[0] except: print '!', continue print '.', del userstory['id'] del userstory['opinion'] del userstory['story_id'] try: MUserStory(**userstory).save() except: print '\n\n!\n\n' continue print "\nMongo DB userstories: %s" % MUserStory.objects().count()
def delete_feed(request): feed_id = int(request.POST['feed_id']) user_sub = get_object_or_404(UserSubscription, user=request.user, feed=feed_id) user_sub.delete() MUserStory.objects(user_id=request.user.pk, feed_id=feed_id).delete() def _find_feed_in_folders(old_folders): new_folders = [] for k, folder in enumerate(old_folders): if isinstance(folder, int): if folder == feed_id: logging.info(" ---> [%s] Delete folder: %s'th item: %s folders/feeds" % ( request.user, k, len(old_folders) )) # folders.remove(folder) else: new_folders.append(folder) elif isinstance(folder, dict): for f_k, f_v in folder.items(): new_folders.append({f_k: _find_feed_in_folders(f_v)}) return new_folders user_sub_folders_object = UserSubscriptionFolders.objects.get(user=request.user) user_sub_folders = json.decode(user_sub_folders_object.folders) user_sub_folders = _find_feed_in_folders(user_sub_folders) user_sub_folders_object.folders = json.encode(user_sub_folders) user_sub_folders_object.save() return dict(code=1)
def bootstrap_userstories(): print "Mongo DB userstories: %s" % MUserStory.objects().count() # db.userstories.drop() print "Dropped! Mongo DB userstories: %s" % MUserStory.objects().count() print "UserStories: %s" % UserStory.objects.all().count() pprint(db.userstories.index_information()) userstories = UserStory.objects.all().values() for userstory in userstories: try: story = Story.objects.get(pk=userstory['story_id']) except Story.DoesNotExist: continue try: userstory['story'] = MStory.objects(story_feed_id=story.story_feed.pk, story_guid=story.story_guid)[0] except: print '!', continue print '.', del userstory['id'] del userstory['opinion'] del userstory['story_id'] try: MUserStory(**userstory).save() except: print '\n\n!\n\n' continue print "\nMongo DB userstories: %s" % MUserStory.objects().count()
def mark_story_as_unread(request): story_id = request.POST['story_id'] feed_id = int(request.POST['feed_id']) try: usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id) except Feed.DoesNotExist: duplicate_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id) if duplicate_feed: try: usersub = UserSubscription.objects.get(user=request.user, feed=duplicate_feed[0].feed) except Feed.DoesNotExist: return dict(code=-1) if not usersub.needs_unread_recalc: usersub.needs_unread_recalc = True usersub.save() data = dict(code=0, payload=dict(story_id=story_id)) logging.user(request.user, "~FY~SBUnread~SN story in feed: %s" % (usersub.feed)) story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0] m = MUserStory.objects(story=story, user_id=request.user.pk, feed_id=feed_id) m.delete() return data
def trim_feed(self, verbose=False): from apps.reader.models import MUserStory trim_cutoff = 500 if self.active_subscribers <= 1: trim_cutoff = 50 elif self.active_subscribers <= 3: trim_cutoff = 100 elif self.active_subscribers <= 5: trim_cutoff = 150 elif self.active_subscribers <= 10: trim_cutoff = 250 elif self.active_subscribers <= 25: trim_cutoff = 350 stories = MStory.objects( story_feed_id=self.pk, ).order_by('-story_date') if stories.count() > trim_cutoff: if verbose: print 'Found %s stories in %s. Trimming to %s...' % ( stories.count(), self, trim_cutoff) story_trim_date = stories[trim_cutoff].story_date extra_stories = MStory.objects(story_feed_id=self.pk, story_date__lte=story_trim_date) extra_stories.delete() # print "Deleted stories, %s left." % MStory.objects(story_feed_id=self.pk).count() userstories = MUserStory.objects(feed_id=self.pk, read_date__lte=story_trim_date) if userstories.count(): # print "Found %s user stories. Deleting..." % userstories.count() userstories.delete()
def mark_feed_as_read(request): feed_id = int(request.REQUEST['feed_id']) feed = Feed.objects.get(id=feed_id) code = 0 us = UserSubscription.objects.get(feed=feed, user=request.user) try: us.mark_feed_read() except IntegrityError: code = -1 else: code = 1 logging.info(" ---> [%s] Marking feed as read: %s" % (request.user, feed,)) MUserStory.objects(user_id=request.user.pk, feed_id=feed_id).delete() return dict(code=code)
def mark_story_as_unread(request): story_id = request.POST['story_id'] feed_id = int(request.POST['feed_id']) usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id) if not usersub.needs_unread_recalc: usersub.needs_unread_recalc = True usersub.save() data = dict(code=0, payload=dict(story_id=story_id)) logging.user(request, "~FY~SBUnread~SN story in feed: %s" % (usersub.feed)) story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0] if story.story_date < usersub.mark_read_date: # Story is outside the mark as read range, so invert all stories before. newer_stories = MStory.objects(story_feed_id=story.story_feed_id, story_date__gte=story.story_date, story_date__lte=usersub.mark_read_date ).only('story_guid') newer_stories = [s.story_guid for s in newer_stories] usersub.mark_read_date = story.story_date - datetime.timedelta(minutes=1) usersub.needs_unread_recalc = True usersub.save() # Mark stories as read only after the mark_read_date has been moved, otherwise # these would be ignored. data = usersub.mark_story_ids_as_read(newer_stories, request=request) m = MUserStory.objects(story_id=story_id, user_id=request.user.pk, feed_id=feed_id) m.delete() return data
def mark_story_as_unread(request): story_id = request.POST['story_id'] feed_id = int(request.POST['feed_id']) try: usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id) except Feed.DoesNotExist: duplicate_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id) if duplicate_feed: try: usersub = UserSubscription.objects.get(user=request.user, feed=duplicate_feed[0].feed) except Feed.DoesNotExist: return dict(code=-1) if not usersub.needs_unread_recalc: usersub.needs_unread_recalc = True usersub.save() data = dict(code=0, payload=dict(story_id=story_id)) logging.user(request, "~FY~SBUnread~SN story in feed: %s" % (usersub.feed)) story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0] m = MUserStory.objects(story=story, user_id=request.user.pk, feed_id=feed_id) m.delete() return data
def trim_feed(self): from apps.reader.models import MUserStory trim_cutoff = 500 if self.active_subscribers <= 1: trim_cutoff = 50 elif self.active_subscribers <= 3: trim_cutoff = 100 elif self.active_subscribers <= 5: trim_cutoff = 150 elif self.active_subscribers <= 10: trim_cutoff = 250 elif self.active_subscribers <= 25: trim_cutoff = 350 stories = MStory.objects(story_feed_id=self.pk).order_by("-story_date") if stories.count() > trim_cutoff: # print 'Found %s stories in %s. Trimming...' % (stories.count(), self), story_trim_date = stories[trim_cutoff].story_date extra_stories = MStory.objects(story_feed_id=self.pk, story_date__lte=story_trim_date) extra_stories.delete() # print "Deleted stories, %s left." % MStory.objects(story_feed_id=self.pk).count() userstories = MUserStory.objects(feed_id=self.pk, read_date__lte=story_trim_date) if userstories.count(): # print "Found %s user stories. Deleting..." % userstories.count() userstories.delete()
def calculate_metrics(): from apps.rss_feeds.models import MStory from apps.reader.models import MUserStory return { 'stories': MStory.objects().count(), 'read_stories': MUserStory.objects().count(), }
def calculate_metrics(self): from apps.rss_feeds.models import MStory from apps.reader.models import MUserStory return { 'stories': MStory.objects().count(), 'read_stories': MUserStory.objects().count(), }
def mark_feed_as_read(request): feed_ids = [int(f) for f in request.REQUEST.getlist('feed_id') if f] code = 0 for feed_id in feed_ids: try: feed = Feed.objects.get(id=feed_id) except Feed.DoesNotExist: continue code = 0 us = UserSubscription.objects.get(feed=feed, user=request.user) try: us.mark_feed_read() except IntegrityError: code = -1 else: code = 1 logging.user(request.user, "~FMMarking feed as read: ~SB%s" % (feed,)) MUserStory.objects(user_id=request.user.pk, feed_id=feed_id).delete() return dict(code=code)
def load_river_stories(request): user = get_user(request) feed_ids = [int(feed_id) for feed_id in request.POST.getlist("feeds")] offset = int(request.REQUEST.get("offset", 0)) limit = int(request.REQUEST.get("limit", 25)) page = int(request.REQUEST.get("page", 0)) + 1 read_stories = int(request.REQUEST.get("read_stories", 0)) # if page: offset = limit * page if page: limit = limit * page - read_stories def feed_qvalues(feed_id): feed = UserSubscription.objects.get(feed__pk=feed_id, user=user) return Q(story_feed_id=feed_id) & Q(story_date__gte=feed.mark_read_date) feed_last_reads = map(feed_qvalues, feed_ids) qs = reduce(lambda q1, q2: q1 | q2, feed_last_reads) read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only("story") read_stories = [rs.story.id for rs in read_stories] mstories = MStory.objects(Q(id__nin=read_stories) & qs)[offset : offset + limit] stories = Feed.format_stories(mstories) starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id__in=feed_ids).only( "story_guid", "starred_date" ) starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) for story in stories: story_date = localtime_for_timezone(story["story_date"], user.profile.timezone) story["short_parsed_date"] = format_story_link_date__short(story_date) story["long_parsed_date"] = format_story_link_date__long(story_date) story["read_status"] = 0 if story["id"] in starred_stories: story["starred"] = True starred_date = localtime_for_timezone(starred_stories[story["id"]], user.profile.timezone) story["starred_date"] = format_story_link_date__long(starred_date) story["intelligence"] = {"feed": 0, "author": 0, "tags": 0, "title": 0} logging.info( " ---> [%s] ~FCLoading river stories: ~SB%s stories ~SN(%s feeds)" % (request.user, len(stories), len(feed_ids)) ) return dict(stories=stories)
#!/usr/bin/env python from utils.munin.base import MuninGraph from apps.rss_feeds.models import MStory from apps.reader.models import MUserStory graph_config = { 'graph_category' : 'NewsBlur', 'graph_title' : 'NewsBlur Stories', 'graph_vlabel' : 'Stories', 'stories.label': 'stories', 'tags.label': 'tags', 'authors.label': 'authors', 'read_stories.label': 'read_stories', } metrics = { 'stories': MStory.objects().count(), 'read_stories': MUserStory.objects().count(), } if __name__ == '__main__': MuninGraph(graph_config, metrics).run()
#!/usr/bin/env python from utils.munin.base import MuninGraph from apps.rss_feeds.models import MStory from apps.reader.models import MUserStory graph_config = { 'graph_category': 'NewsBlur', 'graph_title': 'NewsBlur Stories', 'graph_vlabel': 'Stories', 'stories.label': 'stories', 'tags.label': 'tags', 'authors.label': 'authors', 'read_stories.label': 'read_stories', } metrics = { 'stories': MStory.objects().count(), 'read_stories': MUserStory.objects().count(), } if __name__ == '__main__': MuninGraph(graph_config, metrics).run()
def load_river_blurblog(request): limit = 10 start = time.time() user = get_user(request) social_user_ids = [int(uid) for uid in request.REQUEST.getlist("social_user_ids") if uid] original_user_ids = list(social_user_ids) page = int(request.REQUEST.get("page", 1)) order = request.REQUEST.get("order", "newest") read_filter = request.REQUEST.get("read_filter", "unread") relative_user_id = request.REQUEST.get("relative_user_id", None) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD) if not relative_user_id: relative_user_id = get_user(request).pk if not social_user_ids: socialsubs = MSocialSubscription.objects.filter(user_id=user.pk) social_user_ids = [s.subscription_user_id for s in socialsubs] offset = (page - 1) * limit limit = page * limit - 1 story_ids, story_dates = MSocialSubscription.feed_stories( user.pk, social_user_ids, offset=offset, limit=limit, order=order, read_filter=read_filter ) mstories = MStory.objects(id__in=story_ids) story_id_to_dates = dict(zip(story_ids, story_dates)) def sort_stories_by_id(a, b): return int(story_id_to_dates[str(b.id)]) - int(story_id_to_dates[str(a.id)]) sorted_mstories = sorted(mstories, cmp=sort_stories_by_id) stories = Feed.format_stories(sorted_mstories) for s, story in enumerate(stories): story["story_date"] = datetime.datetime.fromtimestamp(story_dates[s]) stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, relative_user_id, check_all=True) story_feed_ids = list(set(s["story_feed_id"] for s in stories)) usersubs = UserSubscription.objects.filter(user__pk=user.pk, feed__pk__in=story_feed_ids) usersubs_map = dict((sub.feed_id, sub) for sub in usersubs) unsub_feed_ids = list(set(story_feed_ids).difference(set(usersubs_map.keys()))) unsub_feeds = Feed.objects.filter(pk__in=unsub_feed_ids) unsub_feeds = [feed.canonical(include_favicon=False) for feed in unsub_feeds] # Find starred stories if story_feed_ids: story_ids = [story["id"] for story in stories] starred_stories = MStarredStory.objects(user_id=user.pk, story_guid__in=story_ids).only( "story_guid", "starred_date" ) starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) shared_stories = MSharedStory.objects(user_id=user.pk, story_guid__in=story_ids).only( "story_guid", "shared_date", "comments" ) shared_stories = dict( [ (story.story_guid, dict(shared_date=story.shared_date, comments=story.comments)) for story in shared_stories ] ) userstories_db = MUserStory.objects(user_id=user.pk, feed_id__in=story_feed_ids, story_id__in=story_ids).only( "story_id" ) userstories = set(us.story_id for us in userstories_db) else: starred_stories = {} shared_stories = {} userstories = [] # Intelligence classifiers for all feeds involved if story_feed_ids: classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=story_feed_ids)) classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=story_feed_ids)) classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=story_feed_ids)) classifier_tags = list(MClassifierTag.objects(user_id=user.pk, feed_id__in=story_feed_ids)) else: classifier_feeds = [] classifier_authors = [] classifier_titles = [] classifier_tags = [] classifiers = sort_classifiers_by_feed( user=user, feed_ids=story_feed_ids, classifier_feeds=classifier_feeds, classifier_authors=classifier_authors, classifier_titles=classifier_titles, classifier_tags=classifier_tags, ) # Just need to format stories for story in stories: if story["id"] in userstories: story["read_status"] = 1 elif story["story_date"] < UNREAD_CUTOFF: story["read_status"] = 1 else: story["read_status"] = 0 story_date = localtime_for_timezone(story["story_date"], user.profile.timezone) story["short_parsed_date"] = format_story_link_date__short(story_date, now) story["long_parsed_date"] = format_story_link_date__long(story_date, now) if story["id"] in starred_stories: story["starred"] = True starred_date = localtime_for_timezone(starred_stories[story["id"]], user.profile.timezone) story["starred_date"] = format_story_link_date__long(starred_date, now) story["intelligence"] = { "feed": apply_classifier_feeds(classifier_feeds, story["story_feed_id"]), "author": apply_classifier_authors(classifier_authors, story), "tags": apply_classifier_tags(classifier_tags, story), "title": apply_classifier_titles(classifier_titles, story), } if story["id"] in shared_stories: story["shared"] = True shared_date = localtime_for_timezone(shared_stories[story["id"]]["shared_date"], user.profile.timezone) story["shared_date"] = format_story_link_date__long(shared_date, now) story["shared_comments"] = strip_tags(shared_stories[story["id"]]["comments"]) diff = time.time() - start timediff = round(float(diff), 2) logging.user( request, "~FYLoading ~FCriver blurblogs stories~FY: ~SBp%s~SN (%s/%s " "stories, ~SN%s/%s/%s feeds)" % (page, len(stories), len(mstories), len(story_feed_ids), len(social_user_ids), len(original_user_ids)), ) return { "stories": stories, "user_profiles": user_profiles, "feeds": unsub_feeds, "classifiers": classifiers, "elapsed_time": timediff, }
def load_river_stories(request): limit = 18 offset = int(request.REQUEST.get('offset', 0)) start = time.time() user = get_user(request) feed_ids = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id] original_feed_ids = list(feed_ids) page = int(request.REQUEST.get('page', 1)) read_stories_count = int(request.REQUEST.get('read_stories_count', 0)) days_to_keep_unreads = datetime.timedelta(days=settings.DAYS_OF_UNREAD) if not feed_ids: logging.user(request, "~FCLoading empty river stories: page %s" % (page)) return dict(stories=[]) # Fetch all stories at and before the page number. # Not a single page, because reading stories can move them up in the unread order. # `read_stories_count` is an optimization, works best when all 25 stories before have been read. offset = (page-1) * limit - read_stories_count limit = page * limit - read_stories_count # Read stories to exclude read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story_id') read_stories = [rs.story_id for rs in read_stories] # Determine mark_as_read dates for all feeds to ignore all stories before this date. feed_counts = {} feed_last_reads = {} for feed_id in feed_ids: try: usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user) except UserSubscription.DoesNotExist: continue if not usersub: continue feed_counts[feed_id] = (usersub.unread_count_negative * 1 + usersub.unread_count_neutral * 10 + usersub.unread_count_positive * 20) feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple())) feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:40] feed_ids = [f[0] for f in feed_counts] feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids if feed_id in feed_last_reads]) feed_counts = dict(feed_counts) # After excluding read stories, all that's left are stories # past the mark_read_date. Everything returned is guaranteed to be unread. mstories = MStory.objects( story_guid__nin=read_stories, story_feed_id__in=feed_ids, # story_date__gte=start - days_to_keep_unreads ).map_reduce("""function() { var d = feed_last_reads[this[~story_feed_id]]; if (this[~story_date].getTime()/1000 > d) { emit(this[~id], this); } }""", """function(key, values) { return values[0]; }""", output='inline', scope={ 'feed_last_reads': feed_last_reads } ) try: mstories = [story.value for story in mstories if story and story.value] except OperationFailure, e: raise e
# Switch to original feed for the user subscription logging.info(" ===> %s " % user_sub.user) user_sub.feed = original_feed user_sub.needs_unread_recalc = True try: user_sub.save() folders = json.decode(user_sub_folders.folders) folders = rewrite_folders(folders, original_feed, duplicate_feed) user_sub_folders.folders = json.encode(folders) user_sub_folders.save() except (IntegrityError, OperationError): logging.info(" !!!!> %s already subscribed" % user_sub.user) user_sub.delete() # Switch read stories user_stories = MUserStory.objects(feed_id=duplicate_feed.pk) logging.info(" ---> %s read stories" % user_stories.count()) for user_story in user_stories: user_story.feed_id = original_feed.pk duplicate_story = user_story.story story_guid = duplicate_story.story_guid if hasattr(duplicate_story, "story_guid") else duplicate_story.id original_story = MStory.objects(story_feed_id=original_feed.pk, story_guid=story_guid) if original_story: user_story.story = original_story[0] try: user_story.save() except OperationError: # User read the story in the original feed, too. Ugh, just ignore it. pass else:
def load_single_feed(request, feed_id): start = time.time() user = get_user(request) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 12)) page = int(request.REQUEST.get('page', 1)) dupe_feed_id = None userstories_db = None if page: offset = limit * (page-1) if not feed_id: raise Http404 try: feed = Feed.objects.get(id=feed_id) except Feed.DoesNotExist: feed_address = request.REQUEST.get('feed_address') dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address) if dupe_feed: feed = dupe_feed[0].feed dupe_feed_id = feed_id else: raise Http404 stories = feed.get_stories(offset, limit) # Get intelligence classifier for user classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id)) classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id)) classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id)) classifier_tags = list(MClassifierTag.objects(user_id=user.pk, feed_id=feed_id)) checkpoint1 = time.time() usersub = UserSubscription.objects.get(user=user, feed=feed) userstories = [] if usersub and stories: story_ids = [story['id'] for story in stories] userstories_db = MUserStory.objects(user_id=user.pk, feed_id=feed.pk, story_id__in=story_ids).only('story_id') starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed_id, story_guid__in=story_ids).only('story_guid', 'starred_date') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) userstories = set(us.story_id for us in userstories_db) checkpoint2 = time.time() for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) if usersub: if story['id'] in userstories: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] > usersub.last_read_date: story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) else: story['read_status'] = 1 story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, feed), 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), } checkpoint3 = time.time() # Intelligence feed_tags = json.decode(feed.data.popular_tags) if feed.data.popular_tags else [] feed_authors = json.decode(feed.data.popular_authors) if feed.data.popular_authors else [] classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, classifier_authors, classifier_titles, classifier_tags) if usersub: usersub.feed_opens += 1 usersub.save() diff1 = checkpoint1-start diff2 = checkpoint2-start diff3 = checkpoint3-start timediff = time.time()-start last_update = relative_timesince(feed.last_update) logging.user(request, "~FYLoading feed: ~SB%s%s ~SN(%.4s seconds, ~SB%.4s/%.4s(%s)/%.4s~SN)" % ( feed.feed_title[:32], ('~SN/p%s' % page) if page > 1 else '', timediff, diff1, diff2, userstories_db and userstories_db.count() or '~SN0~SB', diff3)) FeedLoadtime.objects.create(feed=feed, loadtime=timediff) data = dict(stories=stories, feed_tags=feed_tags, feed_authors=feed_authors, classifiers=classifiers, last_update=last_update, feed_id=feed.pk) if dupe_feed_id: data['dupe_feed_id'] = dupe_feed_id if not usersub: data.update(feed.canonical()) return data
def load_river_stories(request): limit = 18 offset = 0 start = datetime.datetime.utcnow() user = get_user(request) feed_ids = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id] original_feed_ids = list(feed_ids) page = int(request.REQUEST.get('page', 1)) read_stories_count = int(request.REQUEST.get('read_stories_count', 0)) new_flag = request.REQUEST.get('new_flag', False) bottom_delta = datetime.timedelta(days=settings.DAYS_OF_UNREAD) if not feed_ids: logging.user(request, "~FCLoading empty river stories: page %s" % (page)) return dict(stories=[]) # Fetch all stories at and before the page number. # Not a single page, because reading stories can move them up in the unread order. # `read_stories_count` is an optimization, works best when all 25 stories before have been read. limit = limit * page - read_stories_count # Read stories to exclude read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story_id') read_stories = [rs.story_id for rs in read_stories] # Determine mark_as_read dates for all feeds to ignore all stories before this date. # max_feed_count = 0 feed_counts = {} feed_last_reads = {} for feed_id in feed_ids: try: usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user) except UserSubscription.DoesNotExist: continue if not usersub: continue feed_counts[feed_id] = (usersub.unread_count_negative * 1 + usersub.unread_count_neutral * 10 + usersub.unread_count_positive * 20) # if feed_counts[feed_id] > max_feed_count: # max_feed_count = feed_counts[feed_id] feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple())) feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:50] feed_ids = [f[0] for f in feed_counts] feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids if feed_id in feed_last_reads]) feed_counts = dict(feed_counts) # After excluding read stories, all that's left are stories # past the mark_read_date. Everything returned is guaranteed to be unread. mstories = MStory.objects( story_guid__nin=read_stories, story_feed_id__in=feed_ids, # story_date__gte=start - bottom_delta ).map_reduce("""function() { var d = feed_last_reads[this[~story_feed_id]]; if (this[~story_date].getTime()/1000 > d) { emit(this[~id], this); } }""", """function(key, values) { return values[0]; }""", output='inline', scope={ 'feed_last_reads': feed_last_reads } ) mstories = [story.value for story in mstories if story and story.value] mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, bottom_delta), story_score(x, bottom_delta))) # story_feed_counts = defaultdict(int) # mstories_pruned = [] # for story in mstories: # print story['story_title'], story_feed_counts[story['story_feed_id']] # if story_feed_counts[story['story_feed_id']] >= 3: continue # mstories_pruned.append(story) # story_feed_counts[story['story_feed_id']] += 1 stories = [] for i, story in enumerate(mstories): if i < offset: continue if i >= offset + limit: break stories.append(bunch(story)) stories = Feed.format_stories(stories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) # Find starred stories starred_stories = MStarredStory.objects( user_id=user.pk, story_feed_id__in=found_feed_ids ).only('story_guid', 'starred_date') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) # Intelligence classifiers for all feeds involved def sort_by_feed(classifiers): feed_classifiers = defaultdict(list) for classifier in classifiers: feed_classifiers[classifier.feed_id].append(classifier) return feed_classifiers classifier_feeds = sort_by_feed(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_authors = sort_by_feed(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_titles = sort_by_feed(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_tags = sort_by_feed(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifiers = {} for feed_id in found_feed_ids: classifiers[feed_id] = get_classifiers_for_user(user, feed_id, classifier_feeds[feed_id], classifier_authors[feed_id], classifier_titles[feed_id], classifier_tags[feed_id]) # Just need to format stories for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']), 'author': apply_classifier_authors(classifier_authors[story['story_feed_id']], story), 'tags': apply_classifier_tags(classifier_tags[story['story_feed_id']], story), 'title': apply_classifier_titles(classifier_titles[story['story_feed_id']], story), } diff = datetime.datetime.utcnow() - start timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000))) logging.user(request, "~FCLoading river stories: page %s - ~SB%s/%s " "stories ~SN(%s/%s/%s feeds) ~FB(%s seconds)" % (page, len(stories), len(mstories), len(found_feed_ids), len(feed_ids), len(original_feed_ids), timediff)) if new_flag: return dict(stories=stories, classifiers=classifiers) else: logging.user(request, "~BR~FCNo new flag on river") return dict(stories=stories)
def load_river_stories(request): limit = 18 offset = 0 start = time.time() user = get_user(request) feed_ids = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id] original_feed_ids = list(feed_ids) page = int(request.REQUEST.get('page', 1)) read_stories_count = int(request.REQUEST.get('read_stories_count', 0)) bottom_delta = datetime.timedelta(days=settings.DAYS_OF_UNREAD) if not feed_ids: logging.user(request, "~FCLoading empty river stories: page %s" % (page)) return dict(stories=[]) # Fetch all stories at and before the page number. # Not a single page, because reading stories can move them up in the unread order. # `read_stories_count` is an optimization, works best when all 25 stories before have been read. limit = limit * page - read_stories_count # Read stories to exclude read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story_id') read_stories = [rs.story_id for rs in read_stories] # Determine mark_as_read dates for all feeds to ignore all stories before this date. # max_feed_count = 0 feed_counts = {} feed_last_reads = {} for feed_id in feed_ids: try: usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user) except UserSubscription.DoesNotExist: continue if not usersub: continue feed_counts[feed_id] = (usersub.unread_count_negative * 1 + usersub.unread_count_neutral * 10 + usersub.unread_count_positive * 20) # if feed_counts[feed_id] > max_feed_count: # max_feed_count = feed_counts[feed_id] feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple())) feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:40] feed_ids = [f[0] for f in feed_counts] feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids if feed_id in feed_last_reads]) feed_counts = dict(feed_counts) # After excluding read stories, all that's left are stories # past the mark_read_date. Everything returned is guaranteed to be unread. mstories = MStory.objects( story_guid__nin=read_stories, story_feed_id__in=feed_ids, # story_date__gte=start - bottom_delta ).map_reduce("""function() { var d = feed_last_reads[this[~story_feed_id]]; if (this[~story_date].getTime()/1000 > d) { emit(this[~id], this); } }""", """function(key, values) { return values[0]; }""", output='inline', scope={ 'feed_last_reads': feed_last_reads } ) try: mstories = [story.value for story in mstories if story and story.value] except OperationFailure, e: raise e
def load_single_feed(request): user = get_user(request) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 30)) page = int(request.REQUEST.get('page', 0)) if page: offset = limit * page feed_id = int(request.REQUEST['feed_id']) feed = Feed.objects.get(id=feed_id) force_update = request.GET.get('force_update', False) now = datetime.datetime.now() stories = feed.get_stories(offset, limit) if force_update: feed.update(force_update) # Get intelligence classifier for user classifier_feeds = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id) classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id) classifier_titles = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id) classifier_tags = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id) try: usersub = UserSubscription.objects.get(user=user, feed=feed) except UserSubscription.DoesNotExist: # FIXME: Why is this happening for `conesus` when logged into another account?! logging.info(" ***> [%s] UserSub DNE, creating: %s" % (user, feed)) usersub = UserSubscription.objects.create(user=user, feed=feed) userstories = MUserStory.objects(user_id=user.pk, feed_id=feed.pk, read_date__gte=usersub.mark_read_date) userstories = [us.story.id for us in userstories] for story in stories: classifier_feeds.rewind() classifier_authors.rewind() classifier_tags.rewind() classifier_titles.rewind() if story.get('id') in userstories: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] > usersub.last_read_date: story['read_status'] = 0 story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, feed), 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), } # Intelligence feed_tags = json.decode(feed.popular_tags) if feed.popular_tags else [] feed_authors = json.decode(feed.popular_authors) if feed.popular_authors else [] classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, classifier_authors, classifier_titles, classifier_tags) usersub.feed_opens += 1 usersub.save() diff = datetime.datetime.now()-now logging.info(" ---> [%s] Loading feed: %s (%s.%s seconds)" % (request.user, feed, diff.seconds, diff.microseconds / 1000)) data = dict(stories=stories, feed_tags=feed_tags, feed_authors=feed_authors, classifiers=classifiers) return data
def load_social_stories(request, user_id, username=None): start = time.time() user = get_user(request) social_user_id = int(user_id) social_user = get_object_or_404(User, pk=social_user_id) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 6)) page = request.REQUEST.get('page') order = request.REQUEST.get('order', 'newest') read_filter = request.REQUEST.get('read_filter', 'all') stories = [] if page: offset = limit * (int(page) - 1) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD) social_profile = MSocialProfile.get_user(social_user.pk) try: socialsub = MSocialSubscription.objects.get(user_id=user.pk, subscription_user_id=social_user_id) except MSocialSubscription.DoesNotExist: socialsub = None mstories = MSharedStory.objects(user_id=social_user.pk).order_by('-shared_date')[offset:offset+limit] stories = Feed.format_stories(mstories) if socialsub and (read_filter == 'unread' or order == 'oldest'): story_ids = socialsub.get_stories(order=order, read_filter=read_filter, offset=offset, limit=limit) story_date_order = "%sshared_date" % ('' if order == 'oldest' else '-') if story_ids: mstories = MSharedStory.objects(user_id=social_user.pk, story_db_id__in=story_ids).order_by(story_date_order) stories = Feed.format_stories(mstories) else: mstories = MSharedStory.objects(user_id=social_user.pk).order_by('-shared_date')[offset:offset+limit] stories = Feed.format_stories(mstories) if not stories: return dict(stories=[]) checkpoint1 = time.time() stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, user.pk, check_all=True) story_feed_ids = list(set(s['story_feed_id'] for s in stories)) usersubs = UserSubscription.objects.filter(user__pk=user.pk, feed__pk__in=story_feed_ids) usersubs_map = dict((sub.feed_id, sub) for sub in usersubs) unsub_feed_ids = list(set(story_feed_ids).difference(set(usersubs_map.keys()))) unsub_feeds = Feed.objects.filter(pk__in=unsub_feed_ids) unsub_feeds = [feed.canonical(include_favicon=False) for feed in unsub_feeds] date_delta = UNREAD_CUTOFF if socialsub and date_delta < socialsub.mark_read_date: date_delta = socialsub.mark_read_date # Get intelligence classifier for user classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, social_user_id=social_user_id)) classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, social_user_id=social_user_id)) classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, social_user_id=social_user_id)) classifier_tags = list(MClassifierTag.objects(user_id=user.pk, social_user_id=social_user_id)) # Merge with feed specific classifiers classifier_feeds = classifier_feeds + list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=story_feed_ids)) classifier_authors = classifier_authors + list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=story_feed_ids)) classifier_titles = classifier_titles + list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=story_feed_ids)) classifier_tags = classifier_tags + list(MClassifierTag.objects(user_id=user.pk, feed_id__in=story_feed_ids)) checkpoint2 = time.time() story_ids = [story['id'] for story in stories] userstories_db = MUserStory.objects(user_id=user.pk, feed_id__in=story_feed_ids, story_id__in=story_ids).only('story_id') userstories = set(us.story_id for us in userstories_db) starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id__in=story_feed_ids, story_guid__in=story_ids).only('story_guid', 'starred_date') shared_stories = MSharedStory.objects(user_id=user.pk, story_feed_id__in=story_feed_ids, story_guid__in=story_ids)\ .only('story_guid', 'shared_date', 'comments') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) shared_stories = dict([(story.story_guid, dict(shared_date=story.shared_date, comments=story.comments)) for story in shared_stories]) for story in stories: story['social_user_id'] = social_user_id story_feed_id = story['story_feed_id'] # story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) shared_date = localtime_for_timezone(story['shared_date'], user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(shared_date, now) story['long_parsed_date'] = format_story_link_date__long(shared_date, now) if not socialsub: story['read_status'] = 1 elif story['id'] in userstories: story['read_status'] = 1 elif story['shared_date'] < date_delta: story['read_status'] = 1 elif not usersubs_map.get(story_feed_id): story['read_status'] = 0 elif not story.get('read_status') and story['story_date'] < usersubs_map[story_feed_id].mark_read_date: story['read_status'] = 1 elif not story.get('read_status') and story['shared_date'] < date_delta: story['read_status'] = 1 # elif not story.get('read_status') and socialsub and story['shared_date'] > socialsub.last_read_date: # story['read_status'] = 0 else: story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) if story['id'] in shared_stories: story['shared'] = True shared_date = localtime_for_timezone(shared_stories[story['id']]['shared_date'], user.profile.timezone) story['shared_date'] = format_story_link_date__long(shared_date, now) story['shared_comments'] = strip_tags(shared_stories[story['id']]['comments']) story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, story['story_feed_id'], social_user_id=social_user_id), 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), } classifiers = sort_classifiers_by_feed(user=user, feed_ids=story_feed_ids, classifier_feeds=classifier_feeds, classifier_authors=classifier_authors, classifier_titles=classifier_titles, classifier_tags=classifier_tags) if socialsub: socialsub.feed_opens += 1 socialsub.save() diff1 = checkpoint1-start diff2 = checkpoint2-start logging.user(request, "~FYLoading ~FMshared stories~FY: ~SB%s%s ~SN(~SB%.4ss/%.4ss~SN)" % ( social_profile.title[:22], ('~SN/p%s' % page) if page > 1 else '', diff1, diff2)) return { "stories": stories, "user_profiles": user_profiles, "feeds": unsub_feeds, "classifiers": classifiers, }
# Switch to original feed for the user subscription logging.info(" ===> %s " % user_sub.user) user_sub.feed = original_feed user_sub.needs_unread_recalc = True try: user_sub.save() folders = json.decode(user_sub_folders.folders) folders = rewrite_folders(folders, original_feed, duplicate_feed) user_sub_folders.folders = json.encode(folders) user_sub_folders.save() except (IntegrityError, OperationError): logging.info(" !!!!> %s already subscribed" % user_sub.user) user_sub.delete() # Switch read stories user_stories = MUserStory.objects(feed_id=duplicate_feed.pk) logging.info(" ---> %s read stories" % user_stories.count()) for user_story in user_stories: user_story.feed_id = original_feed.pk duplicate_story = user_story.story story_guid = duplicate_story.story_guid if hasattr( duplicate_story, 'story_guid') else duplicate_story.id original_story = MStory.objects(story_feed_id=original_feed.pk, story_guid=story_guid) if original_story: user_story.story = original_story[0] try: user_story.save() except OperationError: # User read the story in the original feed, too. Ugh, just ignore it.
def load_river_stories(request): limit = 18 offset = 0 start = datetime.datetime.utcnow() user = get_user(request) feed_ids = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id] original_feed_ids = list(feed_ids) page = int(request.REQUEST.get('page', 0))+1 read_stories_count = int(request.REQUEST.get('read_stories_count', 0)) bottom_delta = datetime.timedelta(days=settings.DAYS_OF_UNREAD) if not feed_ids: logging.user(request.user, "~FCLoading empty river stories: page %s" % (page)) return dict(stories=[]) # Fetch all stories at and before the page number. # Not a single page, because reading stories can move them up in the unread order. # `read_stories_count` is an optimization, works best when all 25 stories before have been read. limit = limit * page - read_stories_count # Read stories to exclude read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story') read_stories = [rs.story.id for rs in read_stories] # Determine mark_as_read dates for all feeds to ignore all stories before this date. # max_feed_count = 0 feed_counts = {} feed_last_reads = {} for feed_id in feed_ids: try: usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user) except UserSubscription.DoesNotExist: continue if not usersub: continue feed_counts[feed_id] = (usersub.unread_count_negative * 1 + usersub.unread_count_neutral * 10 + usersub.unread_count_positive * 20) # if feed_counts[feed_id] > max_feed_count: # max_feed_count = feed_counts[feed_id] feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple())) feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:50] feed_ids = [f[0] for f in feed_counts] feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids]) feed_counts = dict(feed_counts) # After excluding read stories, all that's left are stories # past the mark_read_date. Everything returned is guaranteed to be unread. mstories = MStory.objects( id__nin=read_stories, story_feed_id__in=feed_ids, story_date__gte=start - bottom_delta ).map_reduce("""function() { var d = feed_last_reads[this[~story_feed_id]]; if (this[~story_date].getTime()/1000 > d) { emit(this[~id], this); } }""", """function(key, values) { return values[0]; }""", output='inline', scope={ 'feed_last_reads': feed_last_reads } ) mstories = [story.value for story in mstories] mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, bottom_delta), story_score(x, bottom_delta))) # story_feed_counts = defaultdict(int) # mstories_pruned = [] # for story in mstories: # print story['story_title'], story_feed_counts[story['story_feed_id']] # if story_feed_counts[story['story_feed_id']] >= 3: continue # mstories_pruned.append(story) # story_feed_counts[story['story_feed_id']] += 1 stories = [] for i, story in enumerate(mstories): if i < offset: continue if i >= offset + limit: break stories.append(bunch(story)) stories = Feed.format_stories(stories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) # Find starred stories starred_stories = MStarredStory.objects( user_id=user.pk, story_feed_id__in=found_feed_ids ).only('story_guid', 'starred_date') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) # Intelligence classifiers for all feeds involved def sort_by_feed(classifiers): feed_classifiers = defaultdict(list) for classifier in classifiers: feed_classifiers[classifier.feed_id].append(classifier) return feed_classifiers classifier_feeds = sort_by_feed(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_authors = sort_by_feed(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_titles = sort_by_feed(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_tags = sort_by_feed(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids)) # Just need to format stories for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']), 'author': apply_classifier_authors(classifier_authors[story['story_feed_id']], story), 'tags': apply_classifier_tags(classifier_tags[story['story_feed_id']], story), 'title': apply_classifier_titles(classifier_titles[story['story_feed_id']], story), } diff = datetime.datetime.utcnow() - start timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000))) logging.user(request.user, "~FCLoading river stories: page %s - ~SB%s/%s " "stories ~SN(%s/%s/%s feeds) ~FB(%s seconds)" % (page, len(stories), len(mstories), len(found_feed_ids), len(feed_ids), len(original_feed_ids), timediff)) return dict(stories=stories)
def load_single_feed(request, feed_id): start = time.time() user = get_user(request) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 12)) page = int(request.REQUEST.get('page', 1)) dupe_feed_id = None userstories_db = None if page: offset = limit * (page-1) if not feed_id: raise Http404 try: feed = Feed.objects.get(id=feed_id) except Feed.DoesNotExist: feed_address = request.REQUEST.get('feed_address') dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address) if dupe_feed: feed = dupe_feed[0].feed dupe_feed_id = feed_id else: raise Http404 stories = feed.get_stories(offset, limit) # Get intelligence classifier for user classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id)) classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id)) classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id)) classifier_tags = list(MClassifierTag.objects(user_id=user.pk, feed_id=feed_id)) checkpoint1 = time.time() usersub = UserSubscription.objects.get(user=user, feed=feed) userstories = [] if usersub: userstories_db = MUserStory.objects(user_id=user.pk, feed_id=feed.pk, read_date__gte=usersub.mark_read_date) starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed_id).only('story_guid', 'starred_date') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) for us in userstories_db: if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode): userstories.append(us.story.story_guid) elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode): userstories.append(us.story.id) # TODO: Remove me after migration from story.id->guid checkpoint2 = time.time() for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) if usersub: if story['id'] in userstories: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] > usersub.last_read_date: story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) else: story['read_status'] = 1 story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, feed), 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), } checkpoint3 = time.time() # Intelligence feed_tags = json.decode(feed.data.popular_tags) if feed.data.popular_tags else [] feed_authors = json.decode(feed.data.popular_authors) if feed.data.popular_authors else [] classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, classifier_authors, classifier_titles, classifier_tags) if usersub: usersub.feed_opens += 1 usersub.save() timediff = time.time()-start last_update = relative_timesince(feed.last_update) logging.user(request.user, "~FYLoading feed: ~SB%s%s ~SN(%.4s seconds)" % ( feed, ('~SN/p%s' % page) if page > 1 else '', timediff)) FeedLoadtime.objects.create(feed=feed, loadtime=timediff) if timediff >= 1: diff1 = checkpoint1-start diff2 = checkpoint2-start diff3 = checkpoint3-start logging.user(request.user, "~FYSlow feed load: ~SB%.4s/%.4s(%s)/%.4s" % ( diff1, diff2, userstories_db and userstories_db.count(), diff3)) data = dict(stories=stories, feed_tags=feed_tags, feed_authors=feed_authors, classifiers=classifiers, last_update=last_update, feed_id=feed.pk) if dupe_feed_id: data['dupe_feed_id'] = dupe_feed_id if not usersub: data.update(feed.canonical()) return data
def load_single_feed(request): user = get_user(request) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 30)) page = int(request.REQUEST.get('page', 0)) if page: offset = limit * page feed_id = int(request.REQUEST.get('feed_id', 0)) if feed_id == 0: raise Http404 try: feed = Feed.objects.get(id=feed_id) except Feed.DoesNotExist: feed_address = request.REQUEST.get('feed_address') dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address) if dupe_feed: feed = dupe_feed[0].feed else: raise Http404 force_update = request.GET.get('force_update', False) now = datetime.datetime.utcnow() stories = feed.get_stories(offset, limit) if force_update: feed.update(force_update) # Get intelligence classifier for user classifier_feeds = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id) classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id) classifier_titles = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id) classifier_tags = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id) try: usersub = UserSubscription.objects.get(user=user, feed=feed) except UserSubscription.DoesNotExist: # FIXME: Why is this happening for `conesus` when logged into another account?! logging.info(" ***> [%s] UserSub DNE, creating: %s" % (user, feed)) usersub = UserSubscription.objects.create(user=user, feed=feed) userstories = [] userstories_db = MUserStory.objects(user_id=user.pk, feed_id=feed.pk, read_date__gte=usersub.mark_read_date) for us in userstories_db: if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode): userstories.append(us.story.story_guid) elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode): userstories.append(us.story.id) # TODO: Remove me after migration from story.id->guid for story in stories: classifier_feeds.rewind() classifier_authors.rewind() classifier_tags.rewind() classifier_titles.rewind() if story['id'] in userstories: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] > usersub.last_read_date: story['read_status'] = 0 story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, feed), 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), } # Intelligence feed_tags = json.decode(feed.popular_tags) if feed.popular_tags else [] feed_authors = json.decode(feed.popular_authors) if feed.popular_authors else [] classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, classifier_authors, classifier_titles, classifier_tags) usersub.feed_opens += 1 usersub.save() diff = datetime.datetime.utcnow()-now timediff = float("%s.%s" % (diff.seconds, (diff.microseconds / 1000))) logging.info(" ---> [%s] Loading feed: %s (%s seconds)" % (request.user, feed, timediff)) FeedLoadtime.objects.create(feed=feed, loadtime=timediff) last_update = relative_timesince(feed.last_update) data = dict(stories=stories, feed_tags=feed_tags, feed_authors=feed_authors, classifiers=classifiers, last_update=last_update, feed_id=feed.pk) return data
def load_single_feed(request, feed_id): start = datetime.datetime.utcnow() user = get_user(request) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 12)) page = int(request.REQUEST.get('page', 1)) if page: offset = limit * (page-1) dupe_feed_id = None if not feed_id: raise Http404 try: feed = Feed.objects.get(id=feed_id) except Feed.DoesNotExist: feed_address = request.REQUEST.get('feed_address') dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address) if dupe_feed: feed = dupe_feed[0].feed dupe_feed_id = feed_id else: raise Http404 stories = feed.get_stories(offset, limit) # Get intelligence classifier for user classifier_feeds = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id) classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id) classifier_titles = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id) classifier_tags = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id) usersub = UserSubscription.objects.get(user=user, feed=feed) userstories = [] if usersub: userstories_db = MUserStory.objects(user_id=user.pk, feed_id=feed.pk, read_date__gte=usersub.mark_read_date) starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed_id).only('story_guid', 'starred_date') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) for us in userstories_db: if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode): userstories.append(us.story.story_guid) elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode): userstories.append(us.story.id) # TODO: Remove me after migration from story.id->guid for story in stories: [x.rewind() for x in [classifier_feeds, classifier_authors, classifier_tags, classifier_titles]] story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) if usersub: if story['id'] in userstories: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] > usersub.last_read_date: story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) else: story['read_status'] = 1 story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, feed), 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), } # Intelligence feed_tags = json.decode(feed.data.popular_tags) if feed.data.popular_tags else [] feed_authors = json.decode(feed.data.popular_authors) if feed.data.popular_authors else [] classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, classifier_authors, classifier_titles, classifier_tags) if usersub: usersub.feed_opens += 1 usersub.save() diff = datetime.datetime.utcnow()-start timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000))) last_update = relative_timesince(feed.last_update) logging.user(request.user, "~FYLoading feed: ~SB%s%s ~SN(%s seconds)" % ( feed, ('~SN/p%s' % page) if page > 1 else '', timediff)) FeedLoadtime.objects.create(feed=feed, loadtime=timediff) data = dict(stories=stories, feed_tags=feed_tags, feed_authors=feed_authors, classifiers=classifiers, last_update=last_update, feed_id=feed.pk) if dupe_feed_id: data['dupe_feed_id'] = dupe_feed_id if not usersub: data.update(feed.canonical()) return data
def load_single_feed(request): user = get_user(request) offset = int(request.REQUEST.get("offset", 0)) limit = int(request.REQUEST.get("limit", 30)) page = int(request.REQUEST.get("page", 0)) if page: offset = limit * page feed_id = int(request.REQUEST.get("feed_id", 0)) if feed_id == 0: raise Http404 try: feed = Feed.objects.get(id=feed_id) except Feed.DoesNotExist: feed_address = request.REQUEST.get("feed_address") dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address) if dupe_feed: feed = dupe_feed[0].feed else: raise Http404 force_update = request.GET.get("force_update", False) now = datetime.datetime.utcnow() stories = feed.get_stories(offset, limit) if force_update: feed.update(force_update) # Get intelligence classifier for user classifier_feeds = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id) classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id) classifier_titles = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id) classifier_tags = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id) usersub = UserSubscription.objects.get(user=user, feed=feed) userstories = [] userstories_db = MUserStory.objects(user_id=user.pk, feed_id=feed.pk, read_date__gte=usersub.mark_read_date) starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed_id).only("story_guid", "starred_date") starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) for us in userstories_db: if hasattr(us.story, "story_guid") and isinstance(us.story.story_guid, unicode): userstories.append(us.story.story_guid) elif hasattr(us.story, "id") and isinstance(us.story.id, unicode): userstories.append(us.story.id) # TODO: Remove me after migration from story.id->guid for story in stories: classifier_feeds.rewind() classifier_authors.rewind() classifier_tags.rewind() classifier_titles.rewind() story_date = localtime_for_timezone(story["story_date"], user.profile.timezone) story["short_parsed_date"] = format_story_link_date__short(story_date) story["long_parsed_date"] = format_story_link_date__long(story_date) if story["id"] in userstories: story["read_status"] = 1 elif not story.get("read_status") and story["story_date"] < usersub.mark_read_date: story["read_status"] = 1 elif not story.get("read_status") and story["story_date"] > usersub.last_read_date: story["read_status"] = 0 if story["id"] in starred_stories: story["starred"] = True starred_date = localtime_for_timezone(starred_stories[story["id"]], user.profile.timezone) story["starred_date"] = format_story_link_date__long(starred_date) story["intelligence"] = { "feed": apply_classifier_feeds(classifier_feeds, feed), "author": apply_classifier_authors(classifier_authors, story), "tags": apply_classifier_tags(classifier_tags, story), "title": apply_classifier_titles(classifier_titles, story), } # Intelligence feed_tags = json.decode(feed.popular_tags) if feed.popular_tags else [] feed_authors = json.decode(feed.popular_authors) if feed.popular_authors else [] classifiers = get_classifiers_for_user( user, feed_id, classifier_feeds, classifier_authors, classifier_titles, classifier_tags ) usersub.feed_opens += 1 usersub.save() diff = datetime.datetime.utcnow() - now timediff = float("%s.%s" % (diff.seconds, (diff.microseconds / 1000))) last_update = relative_timesince(feed.last_update) logging.info(" ---> [%s] ~FYLoading feed: ~SB%s ~SN(%s seconds)" % (request.user, feed, timediff)) FeedLoadtime.objects.create(feed=feed, loadtime=timediff) data = dict( stories=stories, feed_tags=feed_tags, feed_authors=feed_authors, classifiers=classifiers, last_update=last_update, feed_id=feed.pk, ) return data