Example #1
0
def load_river_stories(request):
    limit              = 18
    offset             = 0
    start              = datetime.datetime.utcnow()
    user               = get_user(request)
    feed_ids           = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id]
    original_feed_ids  = list(feed_ids)
    page               = int(request.REQUEST.get('page', 0))+1
    read_stories_count = int(request.REQUEST.get('read_stories_count', 0))
    bottom_delta       = datetime.timedelta(days=settings.DAYS_OF_UNREAD)
    
    if not feed_ids: 
        logging.user(request.user, "~FCLoading empty river stories: page %s" % (page))
        return dict(stories=[])
    
    # Fetch all stories at and before the page number.
    # Not a single page, because reading stories can move them up in the unread order.
    # `read_stories_count` is an optimization, works best when all 25 stories before have been read.
    limit = limit * page - read_stories_count
    
    # Read stories to exclude
    read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story')
    read_stories = [rs.story.id for rs in read_stories]
    
    # Determine mark_as_read dates for all feeds to ignore all stories before this date.
    # max_feed_count     = 0
    feed_counts     = {}
    feed_last_reads = {}
    for feed_id in feed_ids:
        try:
            usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user)
        except UserSubscription.DoesNotExist:
            continue
        if not usersub: continue
        feed_counts[feed_id] = (usersub.unread_count_negative * 1 + 
                                usersub.unread_count_neutral * 10 +
                                usersub.unread_count_positive * 20)
        # if feed_counts[feed_id] > max_feed_count:
        #     max_feed_count = feed_counts[feed_id]
        feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple()))
    feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:50]
    feed_ids = [f[0] for f in feed_counts]
    feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids])
    feed_counts = dict(feed_counts)
    
    # After excluding read stories, all that's left are stories 
    # past the mark_read_date. Everything returned is guaranteed to be unread.
    mstories = MStory.objects(
        id__nin=read_stories,
        story_feed_id__in=feed_ids,
        story_date__gte=start - bottom_delta
    ).map_reduce("""function() {
            var d = feed_last_reads[this[~story_feed_id]];
            if (this[~story_date].getTime()/1000 > d) {
                emit(this[~id], this);
            }
        }""",
        """function(key, values) {
            return values[0];
        }""",
        output='inline',
        scope={
            'feed_last_reads': feed_last_reads
        }
    )
    mstories = [story.value for story in mstories]

    mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, bottom_delta), story_score(x, bottom_delta)))

    # story_feed_counts = defaultdict(int)
    # mstories_pruned = []
    # for story in mstories:
    #     print story['story_title'], story_feed_counts[story['story_feed_id']]
    #     if story_feed_counts[story['story_feed_id']] >= 3: continue
    #     mstories_pruned.append(story)
    #     story_feed_counts[story['story_feed_id']] += 1
    stories = []
    for i, story in enumerate(mstories):
        if i < offset: continue
        if i >= offset + limit: break
        stories.append(bunch(story))
    stories = Feed.format_stories(stories)
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    
    # Find starred stories
    starred_stories = MStarredStory.objects(
        user_id=user.pk,
        story_feed_id__in=found_feed_ids
    ).only('story_guid', 'starred_date')
    starred_stories = dict([(story.story_guid, story.starred_date) 
                            for story in starred_stories])
    
    # Intelligence classifiers for all feeds involved
    def sort_by_feed(classifiers):
        feed_classifiers = defaultdict(list)
        for classifier in classifiers:
            feed_classifiers[classifier.feed_id].append(classifier)
        return feed_classifiers
    classifier_feeds   = sort_by_feed(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_authors = sort_by_feed(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_titles  = sort_by_feed(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_tags    = sort_by_feed(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    
    # Just need to format stories
    for story in stories:
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date']  = format_story_link_date__long(story_date, now)
        story['read_status'] = 0
        if story['id'] in starred_stories:
            story['starred'] = True
            starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
            story['starred_date'] = format_story_link_date__long(starred_date, now)
        story['intelligence'] = {
            'feed':   apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']),
            'author': apply_classifier_authors(classifier_authors[story['story_feed_id']], story),
            'tags':   apply_classifier_tags(classifier_tags[story['story_feed_id']], story),
            'title':  apply_classifier_titles(classifier_titles[story['story_feed_id']], story),
        }
    
    diff = datetime.datetime.utcnow() - start
    timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000)))
    logging.user(request.user, "~FCLoading river stories: page %s - ~SB%s/%s "
                               "stories ~SN(%s/%s/%s feeds) ~FB(%s seconds)" % 
                               (page, len(stories), len(mstories), len(found_feed_ids), 
                               len(feed_ids), len(original_feed_ids), timediff))
    
    return dict(stories=stories)
Example #2
0
    def calculate_feed_scores(self, silent=False, stories=None, force=False):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if self.user.profile.last_seen_on < UNREAD_CUTOFF and not force:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return

        # if not self.feed.fetched_once:
        #     if not silent:
        #         logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
        #     self.needs_unread_recalc = False
        #     self.save()
        #     return

        feed_scores = dict(negative=0, neutral=0, positive=0)

        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta

        if not stories:
            stories = cache.get('S:%s' % self.feed_id)

        unread_story_hashes = self.get_stories(read_filter='unread',
                                               limit=500,
                                               hashes_only=True)

        if not stories:
            stories_db = MStory.objects(story_hash__in=unread_story_hashes)
            stories = Feed.format_stories(stories_db, self.feed_id)

        oldest_unread_story_date = now
        unread_stories = []
        for story in stories:
            if story['story_date'] < date_delta:
                continue
            if story['story_hash'] in unread_story_hashes:
                unread_stories.append(story)
                if story['story_date'] < oldest_unread_story_date:
                    oldest_unread_story_date = story['story_date']

        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))

        classifier_feeds = list(
            MClassifierFeed.objects(user_id=self.user_id,
                                    feed_id=self.feed_id,
                                    social_user_id=0))
        classifier_authors = list(
            MClassifierAuthor.objects(user_id=self.user_id,
                                      feed_id=self.feed_id))
        classifier_titles = list(
            MClassifierTitle.objects(user_id=self.user_id,
                                     feed_id=self.feed_id))
        classifier_tags = list(
            MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id))

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))

        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }

        for story in unread_stories:
            scores.update({
                'author':
                apply_classifier_authors(classifier_authors, story),
                'tags':
                apply_classifier_tags(classifier_tags, story),
                'title':
                apply_classifier_titles(classifier_titles, story),
            })

            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1

        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))

        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False

        self.save()

        if (self.unread_count_positive == 0
                and self.unread_count_neutral == 0):
            self.mark_feed_read()

        if not silent:
            logging.user(
                self.user,
                '~FC~SNComputing scores: %s (~SB%s~SN/~SB%s~SN/~SB%s~SN)' %
                (self.feed, feed_scores['negative'], feed_scores['neutral'],
                 feed_scores['positive']))

        return self
Example #3
0
def load_single_feed(request, feed_id):
    start        = time.time()
    user         = get_user(request)
    offset       = int(request.REQUEST.get('offset', 0))
    limit        = int(request.REQUEST.get('limit', 12))
    page         = int(request.REQUEST.get('page', 1))
    dupe_feed_id = None
    userstories_db = None
    
    if page: offset = limit * (page-1)
    if not feed_id: raise Http404
        
    try:
        feed = Feed.objects.get(id=feed_id)
    except Feed.DoesNotExist:
        feed_address = request.REQUEST.get('feed_address')
        dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
        if dupe_feed:
            feed = dupe_feed[0].feed
            dupe_feed_id = feed_id
        else:
            raise Http404
        
    stories = feed.get_stories(offset, limit) 
        
    # Get intelligence classifier for user
    classifier_feeds   = list(MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id))
    classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id))
    classifier_titles  = list(MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id))
    classifier_tags    = list(MClassifierTag.objects(user_id=user.pk, feed_id=feed_id))
    
    checkpoint1 = time.time()
    
    usersub = UserSubscription.objects.get(user=user, feed=feed)
    userstories = []
    if usersub:
        userstories_db = MUserStory.objects(user_id=user.pk,
                                            feed_id=feed.pk,
                                            read_date__gte=usersub.mark_read_date)
        starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed_id).only('story_guid', 'starred_date')
        starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])

        for us in userstories_db:
            if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode):
                userstories.append(us.story.story_guid)
            elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode):
                userstories.append(us.story.id) # TODO: Remove me after migration from story.id->guid
            
    checkpoint2 = time.time()
    
    for story in stories:
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date'] = format_story_link_date__long(story_date, now)
        if usersub:
            if story['id'] in userstories:
                story['read_status'] = 1
            elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date:
                story['read_status'] = 1
            elif not story.get('read_status') and story['story_date'] > usersub.last_read_date:
                story['read_status'] = 0
            if story['id'] in starred_stories:
                story['starred'] = True
                starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
                story['starred_date'] = format_story_link_date__long(starred_date, now)
        else:
            story['read_status'] = 1
        story['intelligence'] = {
            'feed': apply_classifier_feeds(classifier_feeds, feed),
            'author': apply_classifier_authors(classifier_authors, story),
            'tags': apply_classifier_tags(classifier_tags, story),
            'title': apply_classifier_titles(classifier_titles, story),
        }

    checkpoint3 = time.time()
    
    # Intelligence
    feed_tags = json.decode(feed.data.popular_tags) if feed.data.popular_tags else []
    feed_authors = json.decode(feed.data.popular_authors) if feed.data.popular_authors else []
    classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, 
                                           classifier_authors, classifier_titles, classifier_tags)
    
    if usersub:
        usersub.feed_opens += 1
        usersub.save()
    timediff = time.time()-start
    last_update = relative_timesince(feed.last_update)
    logging.user(request.user, "~FYLoading feed: ~SB%s%s ~SN(%.4s seconds)" % (
        feed, ('~SN/p%s' % page) if page > 1 else '', timediff))
    FeedLoadtime.objects.create(feed=feed, loadtime=timediff)
    
    if timediff >= 1:
        diff1 = checkpoint1-start
        diff2 = checkpoint2-start
        diff3 = checkpoint3-start
        logging.user(request.user, "~FYSlow feed load: ~SB%.4s/%.4s(%s)/%.4s" % (
            diff1, diff2, userstories_db and userstories_db.count(), diff3))
    
    data = dict(stories=stories, 
                feed_tags=feed_tags, 
                feed_authors=feed_authors, 
                classifiers=classifiers,
                last_update=last_update,
                feed_id=feed.pk)
    
    if dupe_feed_id: data['dupe_feed_id'] = dupe_feed_id
    if not usersub:
        data.update(feed.canonical())
        
    return data
Example #4
0
def load_single_feed(request, feed_id):
    start = datetime.datetime.utcnow()
    user = get_user(request)
    offset = int(request.REQUEST.get('offset', 0))
    limit = int(request.REQUEST.get('limit', 12))
    page = int(request.REQUEST.get('page', 1))
    if page:
        offset = limit * (page-1)
    dupe_feed_id = None
    if not feed_id:
        raise Http404
        
    try:
        feed = Feed.objects.get(id=feed_id)
    except Feed.DoesNotExist:
        feed_address = request.REQUEST.get('feed_address')
        dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
        if dupe_feed:
            feed = dupe_feed[0].feed
            dupe_feed_id = feed_id
        else:
            raise Http404
        
    stories = feed.get_stories(offset, limit) 
        
    # Get intelligence classifier for user
    classifier_feeds   = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id)
    classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id)
    classifier_titles  = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id)
    classifier_tags    = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id)
    
    usersub = UserSubscription.objects.get(user=user, feed=feed)
    userstories = []
    if usersub:
        userstories_db = MUserStory.objects(user_id=user.pk,
                                            feed_id=feed.pk,
                                            read_date__gte=usersub.mark_read_date)
        starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed_id).only('story_guid', 'starred_date')
        starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])

        for us in userstories_db:
            if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode):
                userstories.append(us.story.story_guid)
            elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode):
                userstories.append(us.story.id) # TODO: Remove me after migration from story.id->guid
            
    for story in stories:
        [x.rewind() for x in [classifier_feeds, classifier_authors, classifier_tags, classifier_titles]]
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date'] = format_story_link_date__long(story_date, now)
        if usersub:
            if story['id'] in userstories:
                story['read_status'] = 1
            elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date:
                story['read_status'] = 1
            elif not story.get('read_status') and story['story_date'] > usersub.last_read_date:
                story['read_status'] = 0
            if story['id'] in starred_stories:
                story['starred'] = True
                starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
                story['starred_date'] = format_story_link_date__long(starred_date, now)
        else:
            story['read_status'] = 1
        story['intelligence'] = {
            'feed': apply_classifier_feeds(classifier_feeds, feed),
            'author': apply_classifier_authors(classifier_authors, story),
            'tags': apply_classifier_tags(classifier_tags, story),
            'title': apply_classifier_titles(classifier_titles, story),
        }
    
    # Intelligence
    feed_tags = json.decode(feed.data.popular_tags) if feed.data.popular_tags else []
    feed_authors = json.decode(feed.data.popular_authors) if feed.data.popular_authors else []
    classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, 
                                           classifier_authors, classifier_titles, classifier_tags)
    
    if usersub:
        usersub.feed_opens += 1
        usersub.save()
    
    diff = datetime.datetime.utcnow()-start
    timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000)))
    last_update = relative_timesince(feed.last_update)
    logging.user(request.user, "~FYLoading feed: ~SB%s%s ~SN(%s seconds)" % (
        feed, ('~SN/p%s' % page) if page > 1 else '', timediff))
    FeedLoadtime.objects.create(feed=feed, loadtime=timediff)
    
    data = dict(stories=stories, 
                feed_tags=feed_tags, 
                feed_authors=feed_authors, 
                classifiers=classifiers,
                last_update=last_update,
                feed_id=feed.pk)
    
    if dupe_feed_id: data['dupe_feed_id'] = dupe_feed_id
    if not usersub:
        data.update(feed.canonical())
        
    return data
Example #5
0
def load_single_feed(request):
    user = get_user(request)
    offset = int(request.REQUEST.get("offset", 0))
    limit = int(request.REQUEST.get("limit", 30))
    page = int(request.REQUEST.get("page", 0))
    if page:
        offset = limit * page
    feed_id = int(request.REQUEST.get("feed_id", 0))
    if feed_id == 0:
        raise Http404

    try:
        feed = Feed.objects.get(id=feed_id)
    except Feed.DoesNotExist:
        feed_address = request.REQUEST.get("feed_address")
        dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
        if dupe_feed:
            feed = dupe_feed[0].feed
        else:
            raise Http404

    force_update = request.GET.get("force_update", False)

    now = datetime.datetime.utcnow()
    stories = feed.get_stories(offset, limit)

    if force_update:
        feed.update(force_update)

    # Get intelligence classifier for user
    classifier_feeds = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id)
    classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id)
    classifier_titles = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id)
    classifier_tags = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id)

    usersub = UserSubscription.objects.get(user=user, feed=feed)
    userstories = []
    userstories_db = MUserStory.objects(user_id=user.pk, feed_id=feed.pk, read_date__gte=usersub.mark_read_date)
    starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed_id).only("story_guid", "starred_date")
    starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])

    for us in userstories_db:
        if hasattr(us.story, "story_guid") and isinstance(us.story.story_guid, unicode):
            userstories.append(us.story.story_guid)
        elif hasattr(us.story, "id") and isinstance(us.story.id, unicode):
            userstories.append(us.story.id)  # TODO: Remove me after migration from story.id->guid

    for story in stories:
        classifier_feeds.rewind()
        classifier_authors.rewind()
        classifier_tags.rewind()
        classifier_titles.rewind()
        story_date = localtime_for_timezone(story["story_date"], user.profile.timezone)
        story["short_parsed_date"] = format_story_link_date__short(story_date)
        story["long_parsed_date"] = format_story_link_date__long(story_date)
        if story["id"] in userstories:
            story["read_status"] = 1
        elif not story.get("read_status") and story["story_date"] < usersub.mark_read_date:
            story["read_status"] = 1
        elif not story.get("read_status") and story["story_date"] > usersub.last_read_date:
            story["read_status"] = 0
        if story["id"] in starred_stories:
            story["starred"] = True
            starred_date = localtime_for_timezone(starred_stories[story["id"]], user.profile.timezone)
            story["starred_date"] = format_story_link_date__long(starred_date)
        story["intelligence"] = {
            "feed": apply_classifier_feeds(classifier_feeds, feed),
            "author": apply_classifier_authors(classifier_authors, story),
            "tags": apply_classifier_tags(classifier_tags, story),
            "title": apply_classifier_titles(classifier_titles, story),
        }

    # Intelligence
    feed_tags = json.decode(feed.popular_tags) if feed.popular_tags else []
    feed_authors = json.decode(feed.popular_authors) if feed.popular_authors else []
    classifiers = get_classifiers_for_user(
        user, feed_id, classifier_feeds, classifier_authors, classifier_titles, classifier_tags
    )

    usersub.feed_opens += 1
    usersub.save()

    diff = datetime.datetime.utcnow() - now
    timediff = float("%s.%s" % (diff.seconds, (diff.microseconds / 1000)))
    last_update = relative_timesince(feed.last_update)
    logging.info(" ---> [%s] ~FYLoading feed: ~SB%s ~SN(%s seconds)" % (request.user, feed, timediff))
    FeedLoadtime.objects.create(feed=feed, loadtime=timediff)

    data = dict(
        stories=stories,
        feed_tags=feed_tags,
        feed_authors=feed_authors,
        classifiers=classifiers,
        last_update=last_update,
        feed_id=feed.pk,
    )
    return data
Example #6
0
                                                        classifier_titles[feed_id],
                                                        classifier_tags[feed_id])
    
    # Just need to format stories
    for story in stories:
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date']  = format_story_link_date__long(story_date, now)
        story['read_status'] = 0
        if story['id'] in starred_stories:
            story['starred'] = True
            starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
            story['starred_date'] = format_story_link_date__long(starred_date, now)
        story['intelligence'] = {
            'feed':   apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']),
            'author': apply_classifier_authors(classifier_authors[story['story_feed_id']], story),
            'tags':   apply_classifier_tags(classifier_tags[story['story_feed_id']], story),
            'title':  apply_classifier_titles(classifier_titles[story['story_feed_id']], story),
        }

    diff = time.time() - start
    timediff = round(float(diff), 2)
    logging.user(request, "~FCLoading river stories: page %s - ~SB%s/%s "
                               "stories ~SN(%s/%s/%s feeds) ~FB(%s seconds)" % 
                               (page, len(stories), len(mstories), len(found_feed_ids), 
                               len(feed_ids), len(original_feed_ids), timediff))
    
    return dict(stories=stories, classifiers=classifiers, elapsed_time=timediff)
    
    
Example #7
0
def load_river_stories(request):
    limit              = 18
    offset             = 0
    start              = datetime.datetime.utcnow()
    user               = get_user(request)
    feed_ids           = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id]
    original_feed_ids  = list(feed_ids)
    page               = int(request.REQUEST.get('page', 1))
    read_stories_count = int(request.REQUEST.get('read_stories_count', 0))
    new_flag           = request.REQUEST.get('new_flag', False)
    bottom_delta       = datetime.timedelta(days=settings.DAYS_OF_UNREAD)
    
    if not feed_ids: 
        logging.user(request, "~FCLoading empty river stories: page %s" % (page))
        return dict(stories=[])
    
    # Fetch all stories at and before the page number.
    # Not a single page, because reading stories can move them up in the unread order.
    # `read_stories_count` is an optimization, works best when all 25 stories before have been read.
    limit = limit * page - read_stories_count
    
    # Read stories to exclude
    read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story_id')
    read_stories = [rs.story_id for rs in read_stories]
    
    # Determine mark_as_read dates for all feeds to ignore all stories before this date.
    # max_feed_count     = 0
    feed_counts     = {}
    feed_last_reads = {}
    for feed_id in feed_ids:
        try:
            usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user)
        except UserSubscription.DoesNotExist:
            continue
        if not usersub: continue
        feed_counts[feed_id] = (usersub.unread_count_negative * 1 + 
                                usersub.unread_count_neutral * 10 +
                                usersub.unread_count_positive * 20)
        # if feed_counts[feed_id] > max_feed_count:
        #     max_feed_count = feed_counts[feed_id]
        feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple()))
    feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:50]
    feed_ids = [f[0] for f in feed_counts]
    feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids
                            if feed_id in feed_last_reads])
    feed_counts = dict(feed_counts)

    # After excluding read stories, all that's left are stories 
    # past the mark_read_date. Everything returned is guaranteed to be unread.
    mstories = MStory.objects(
        story_guid__nin=read_stories,
        story_feed_id__in=feed_ids,
        # story_date__gte=start - bottom_delta
    ).map_reduce("""function() {
            var d = feed_last_reads[this[~story_feed_id]];
            if (this[~story_date].getTime()/1000 > d) {
                emit(this[~id], this);
            }
        }""",
        """function(key, values) {
            return values[0];
        }""",
        output='inline',
        scope={
            'feed_last_reads': feed_last_reads
        }
    )
    mstories = [story.value for story in mstories if story and story.value]

    mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, bottom_delta), story_score(x, bottom_delta)))

    # story_feed_counts = defaultdict(int)
    # mstories_pruned = []
    # for story in mstories:
    #     print story['story_title'], story_feed_counts[story['story_feed_id']]
    #     if story_feed_counts[story['story_feed_id']] >= 3: continue
    #     mstories_pruned.append(story)
    #     story_feed_counts[story['story_feed_id']] += 1
    stories = []
    for i, story in enumerate(mstories):
        if i < offset: continue
        if i >= offset + limit: break
        stories.append(bunch(story))
    stories = Feed.format_stories(stories)
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    
    # Find starred stories
    starred_stories = MStarredStory.objects(
        user_id=user.pk,
        story_feed_id__in=found_feed_ids
    ).only('story_guid', 'starred_date')
    starred_stories = dict([(story.story_guid, story.starred_date) 
                            for story in starred_stories])
    
    # Intelligence classifiers for all feeds involved
    def sort_by_feed(classifiers):
        feed_classifiers = defaultdict(list)
        for classifier in classifiers:
            feed_classifiers[classifier.feed_id].append(classifier)
        return feed_classifiers
    classifier_feeds   = sort_by_feed(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_authors = sort_by_feed(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_titles  = sort_by_feed(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_tags    = sort_by_feed(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    
    classifiers = {}
    for feed_id in found_feed_ids:
        classifiers[feed_id] = get_classifiers_for_user(user, feed_id, classifier_feeds[feed_id], 
                                                        classifier_authors[feed_id],
                                                        classifier_titles[feed_id],
                                                        classifier_tags[feed_id])
    
    # Just need to format stories
    for story in stories:
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date']  = format_story_link_date__long(story_date, now)
        story['read_status'] = 0
        if story['id'] in starred_stories:
            story['starred'] = True
            starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
            story['starred_date'] = format_story_link_date__long(starred_date, now)
        story['intelligence'] = {
            'feed':   apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']),
            'author': apply_classifier_authors(classifier_authors[story['story_feed_id']], story),
            'tags':   apply_classifier_tags(classifier_tags[story['story_feed_id']], story),
            'title':  apply_classifier_titles(classifier_titles[story['story_feed_id']], story),
        }
    
    diff = datetime.datetime.utcnow() - start
    timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000)))
    logging.user(request, "~FCLoading river stories: page %s - ~SB%s/%s "
                               "stories ~SN(%s/%s/%s feeds) ~FB(%s seconds)" % 
                               (page, len(stories), len(mstories), len(found_feed_ids), 
                               len(feed_ids), len(original_feed_ids), timediff))
    
    if new_flag:
        return dict(stories=stories, classifiers=classifiers)
    else:
        logging.user(request, "~BR~FCNo new flag on river")
        return dict(stories=stories)
Example #8
0
    def calculate_feed_scores(self, silent=False, stories_db=None):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if self.user.profile.last_seen_on < UNREAD_CUTOFF:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return
        
        if not self.feed.fetched_once:
            if not silent:
                logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
            self.needs_unread_recalc = False
            self.save()
            return

        if not silent:
            logging.info(' ---> [%s] Computing scores: %s' % (self.user, self.feed))
        feed_scores = dict(negative=0, neutral=0, positive=0)
        
        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta
            
        read_stories = MUserStory.objects(user_id=self.user.pk,
                                          feed_id=self.feed.pk,
                                          read_date__gte=self.mark_read_date)
        # if not silent:
        #     logging.info(' ---> [%s]    Read stories: %s' % (self.user, datetime.datetime.now() - now))
        read_stories_ids = []
        for us in read_stories:
            if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode):
                read_stories_ids.append(us.story.story_guid)
            elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode):
                read_stories_ids.append(us.story.id) # TODO: Remove me after migration from story.id->guid
        stories_db = stories_db or MStory.objects(story_feed_id=self.feed.pk,
                                                  story_date__gte=date_delta)
        # if not silent:
        #     logging.info(' ---> [%s]    MStory: %s' % (self.user, datetime.datetime.now() - now))
        unread_stories_db = []
        for story in stories_db:
            if story.story_date < date_delta:
                continue
            if hasattr(story, 'story_guid') and story.story_guid not in read_stories_ids:
                unread_stories_db.append(story)
            elif isinstance(story.id, unicode) and story.id not in read_stories_ids:
                unread_stories_db.append(story)
        stories = Feed.format_stories(unread_stories_db, self.feed.pk)
        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))
        
        classifier_feeds = MClassifierFeed.objects(user_id=self.user.pk, feed_id=self.feed.pk)
        classifier_authors = MClassifierAuthor.objects(user_id=self.user.pk, feed_id=self.feed.pk)
        classifier_titles = MClassifierTitle.objects(user_id=self.user.pk, feed_id=self.feed.pk)
        classifier_tags = MClassifierTag.objects(user_id=self.user.pk, feed_id=self.feed.pk)
        
        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))
            
        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }
        
        for story in stories:
            classifier_authors.rewind()
            classifier_tags.rewind()
            classifier_titles.rewind()
            scores.update({
                'author': apply_classifier_authors(classifier_authors, story),
                'tags': apply_classifier_tags(classifier_tags, story),
                'title': apply_classifier_titles(classifier_titles, story),
            })
            
            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1
                
        
        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))
            
        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.needs_unread_recalc = False
        
        self.save()
        
        if (self.unread_count_positive == 0 and 
            self.unread_count_neutral == 0):
            self.mark_feed_read()
        
        cache.delete('usersub:%s' % self.user.id)
        
        return
Example #9
0
    def calculate_feed_scores(self, silent=False):
        if self.user.profile.last_seen_on < MONTH_AGO:
            if not silent:
                logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 month+)' % (self.user, self.feed))
            return
        
        if not self.feed.fetched_once:
            if not silent:
                logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
            self.needs_unread_recalc = False
            self.save()
            return

        if not silent:
            logging.info(' ---> [%s] Computing scores: %s' % (self.user, self.feed))
        feed_scores = dict(negative=0, neutral=0, positive=0)
        
        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = datetime.datetime.now()-datetime.timedelta(days=DAYS_OF_UNREAD)
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta
            
        read_stories = MUserStory.objects(user_id=self.user.pk,
                                          feed_id=self.feed.pk)
        read_stories_ids = [rs.story.id for rs in read_stories]
        from django.db import connection
        connection.queries = []
        stories_db = MStory.objects(story_feed_id=self.feed.pk,
                                    story_date__gte=date_delta)
        stories_db = [story for story in stories_db if story.id not in read_stories_ids]
        stories = self.feed.format_stories(stories_db)
        
        classifier_feeds = MClassifierFeed.objects(user_id=self.user.pk, feed_id=self.feed.pk)
        classifier_authors = MClassifierAuthor.objects(user_id=self.user.pk, feed_id=self.feed.pk)
        classifier_titles = MClassifierTitle.objects(user_id=self.user.pk, feed_id=self.feed.pk)
        classifier_tags = MClassifierTag.objects(user_id=self.user.pk, feed_id=self.feed.pk)
        
        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }
        
        for story in stories:
            classifier_authors.rewind()
            classifier_tags.rewind()
            classifier_titles.rewind()
            scores.update({
                'author': apply_classifier_authors(classifier_authors, story),
                'tags': apply_classifier_tags(classifier_tags, story),
                'title': apply_classifier_titles(classifier_titles, story),
            })
            
            max_score = max(scores['feed'], scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['feed'], scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            if min_score < 0:
                feed_scores['negative'] += 1
            if max_score == 0 and min_score == 0:
                feed_scores['neutral'] += 1
        
        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.needs_unread_recalc = False
        
        self.save()
        
        if (self.unread_count_positive == 0 and 
            self.unread_count_neutral == 0):
            self.mark_feed_read()
        
        cache.delete('usersub:%s' % self.user.id)
        
        return
Example #10
0
    def calculate_feed_scores(self, silent=False, stories=None, force=False):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        oldest_unread_story_date = now

        if self.user.profile.last_seen_on < UNREAD_CUTOFF and not force:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return self
        ong = self.unread_count_negative
        ont = self.unread_count_neutral
        ops = self.unread_count_positive

        # if not self.feed.fetched_once:
        #     if not silent:
        #         logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
        #     self.needs_unread_recalc = False
        #     self.save()
        #     return

        feed_scores = dict(negative=0, neutral=0, positive=0)

        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta

        if self.is_trained:
            if not stories:
                stories = cache.get("S:%s" % self.feed_id)

            unread_story_hashes = self.get_stories(read_filter="unread", limit=500, hashes_only=True)

            if not stories:
                stories_db = MStory.objects(story_hash__in=unread_story_hashes)
                stories = Feed.format_stories(stories_db, self.feed_id)

            unread_stories = []
            for story in stories:
                if story["story_date"] < date_delta:
                    continue
                if story["story_hash"] in unread_story_hashes:
                    unread_stories.append(story)
                    if story["story_date"] < oldest_unread_story_date:
                        oldest_unread_story_date = story["story_date"]

            # if not silent:
            #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))

            classifier_feeds = list(
                MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0)
            )
            classifier_authors = list(MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id))
            classifier_titles = list(MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id))
            classifier_tags = list(MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id))

            if (
                not len(classifier_feeds)
                and not len(classifier_authors)
                and not len(classifier_titles)
                and not len(classifier_tags)
            ):
                self.is_trained = False

            # if not silent:
            #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))

            scores = {"feed": apply_classifier_feeds(classifier_feeds, self.feed)}

            for story in unread_stories:
                scores.update(
                    {
                        "author": apply_classifier_authors(classifier_authors, story),
                        "tags": apply_classifier_tags(classifier_tags, story),
                        "title": apply_classifier_titles(classifier_titles, story),
                    }
                )

                max_score = max(scores["author"], scores["tags"], scores["title"])
                min_score = min(scores["author"], scores["tags"], scores["title"])
                if max_score > 0:
                    feed_scores["positive"] += 1
                elif min_score < 0:
                    feed_scores["negative"] += 1
                else:
                    if scores["feed"] > 0:
                        feed_scores["positive"] += 1
                    elif scores["feed"] < 0:
                        feed_scores["negative"] += 1
                    else:
                        feed_scores["neutral"] += 1
        else:
            unread_story_hashes = self.get_stories(read_filter="unread", limit=500, hashes_only=True, withscores=True)
            feed_scores["neutral"] = len(unread_story_hashes)
            if feed_scores["neutral"]:
                oldest_unread_story_date = datetime.datetime.fromtimestamp(unread_story_hashes[-1][1])

        if not silent:
            logging.user(
                self.user,
                "~FBUnread count (~SB%s~SN%s): ~SN(~FC%s~FB/~FC%s~FB/~FC%s~FB) ~SBto~SN (~FC%s~FB/~FC%s~FB/~FC%s~FB)"
                % (
                    self.feed_id,
                    "/~FMtrained~FB" if self.is_trained else "",
                    ong,
                    ont,
                    ops,
                    feed_scores["negative"],
                    feed_scores["neutral"],
                    feed_scores["positive"],
                ),
            )

        self.unread_count_positive = feed_scores["positive"]
        self.unread_count_neutral = feed_scores["neutral"]
        self.unread_count_negative = feed_scores["negative"]
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False

        self.save()

        if self.unread_count_positive == 0 and self.unread_count_neutral == 0:
            self.mark_feed_read()

        if not silent:
            logging.user(
                self.user,
                "~FC~SNComputing scores: %s (~SB%s~SN/~SB%s~SN/~SB%s~SN)"
                % (self.feed, feed_scores["negative"], feed_scores["neutral"], feed_scores["positive"]),
            )

        return self
Example #11
0
def load_single_feed(request):
    user = get_user(request)
    offset = int(request.REQUEST.get('offset', 0))
    limit = int(request.REQUEST.get('limit', 30))
    page = int(request.REQUEST.get('page', 0))
    if page:
        offset = limit * page
    feed_id = int(request.REQUEST['feed_id'])
    feed = Feed.objects.get(id=feed_id)
    force_update = request.GET.get('force_update', False)
    
    now = datetime.datetime.now()
    stories = feed.get_stories(offset, limit) 
        
    if force_update:
        feed.update(force_update)
    
    # Get intelligence classifier for user
    classifier_feeds = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id)
    classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id)
    classifier_titles = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id)
    classifier_tags = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id)
    
    try:
        usersub = UserSubscription.objects.get(user=user, feed=feed)
    except UserSubscription.DoesNotExist:
        # FIXME: Why is this happening for `conesus` when logged into another account?!
        logging.info(" ***> [%s] UserSub DNE, creating: %s" % (user, feed))
        usersub = UserSubscription.objects.create(user=user, feed=feed)
            

    userstories = MUserStory.objects(user_id=user.pk, 
                                     feed_id=feed.pk,
                                     read_date__gte=usersub.mark_read_date)
    userstories = [us.story.id for us in userstories]
    for story in stories:
        classifier_feeds.rewind()
        classifier_authors.rewind()
        classifier_tags.rewind()
        classifier_titles.rewind()
        if story.get('id') in userstories:
            story['read_status'] = 1
        elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date:
            story['read_status'] = 1
        elif not story.get('read_status') and story['story_date'] > usersub.last_read_date:
            story['read_status'] = 0
        story['intelligence'] = {
            'feed': apply_classifier_feeds(classifier_feeds, feed),
            'author': apply_classifier_authors(classifier_authors, story),
            'tags': apply_classifier_tags(classifier_tags, story),
            'title': apply_classifier_titles(classifier_titles, story),
        }
    
    # Intelligence
    feed_tags = json.decode(feed.popular_tags) if feed.popular_tags else []
    feed_authors = json.decode(feed.popular_authors) if feed.popular_authors else []
    classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, 
                                           classifier_authors, classifier_titles, classifier_tags)
    
    usersub.feed_opens += 1
    usersub.save()
    
    diff = datetime.datetime.now()-now
    logging.info(" ---> [%s] Loading feed: %s (%s.%s seconds)" % (request.user, feed, 
                                                                  diff.seconds, 
                                                                  diff.microseconds / 1000))
    
    data = dict(stories=stories, 
                feed_tags=feed_tags, 
                feed_authors=feed_authors, 
                classifiers=classifiers)
    return data
Example #12
0
                                                            classifier_titles[feed_id],
                                                            classifier_tags[feed_id])
    
    # Just need to format stories
    for story in stories:
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date']  = format_story_link_date__long(story_date, now)
        story['read_status'] = 0
        if story['id'] in starred_stories:
            story['starred'] = True
            starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
            story['starred_date'] = format_story_link_date__long(starred_date, now)
        story['intelligence'] = {
            'feed':   apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']),
            'author': apply_classifier_authors(classifier_authors[story['story_feed_id']], story),
            'tags':   apply_classifier_tags(classifier_tags[story['story_feed_id']], story),
            'title':  apply_classifier_titles(classifier_titles[story['story_feed_id']], story),
        }

    diff = time.time() - start
    timediff = round(float(diff), 2)
    logging.user(request, "~FCLoading river stories: page %s - ~SB%s/%s "
                               "stories ~SN(%s/%s/%s feeds) ~FB(%s seconds)" % 
                               (page, len(stories), len(mstories), len(found_feed_ids), 
                               len(feed_ids), len(original_feed_ids), timediff))
    
    return dict(stories=stories, classifiers=classifiers, elapsed_time=timediff)
    
    
Example #13
0
    def calculate_feed_scores(self, silent=False, stories_db=None):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if self.user.profile.last_seen_on < UNREAD_CUTOFF:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return

        if not self.feed.fetched_once:
            if not silent:
                logging.info(' ---> [%s] NOT Computing scores: %s' %
                             (self.user, self.feed))
            self.needs_unread_recalc = False
            self.save()
            return

        feed_scores = dict(negative=0, neutral=0, positive=0)

        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta

        read_stories = MUserStory.objects(user_id=self.user_id,
                                          feed_id=self.feed_id,
                                          read_date__gte=self.mark_read_date)
        # if not silent:
        #     logging.info(' ---> [%s]    Read stories: %s' % (self.user, datetime.datetime.now() - now))
        read_stories_ids = [us.story_id for us in read_stories]
        stories_db = stories_db or MStory.objects(story_feed_id=self.feed_id,
                                                  story_date__gte=date_delta)
        # if not silent:
        #     logging.info(' ---> [%s]    MStory: %s' % (self.user, datetime.datetime.now() - now))
        oldest_unread_story_date = now
        unread_stories_db = []
        for story in stories_db:
            if story.story_date < date_delta:
                continue
            if hasattr(
                    story,
                    'story_guid') and story.story_guid not in read_stories_ids:
                unread_stories_db.append(story)
                if story.story_date < oldest_unread_story_date:
                    oldest_unread_story_date = story.story_date
        stories = Feed.format_stories(unread_stories_db, self.feed_id)
        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))

        classifier_feeds = list(
            MClassifierFeed.objects(user_id=self.user_id,
                                    feed_id=self.feed_id,
                                    social_user_id=0))
        classifier_authors = list(
            MClassifierAuthor.objects(user_id=self.user_id,
                                      feed_id=self.feed_id))
        classifier_titles = list(
            MClassifierTitle.objects(user_id=self.user_id,
                                     feed_id=self.feed_id))
        classifier_tags = list(
            MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id))

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))

        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }

        for story in stories:
            scores.update({
                'author':
                apply_classifier_authors(classifier_authors, story),
                'tags':
                apply_classifier_tags(classifier_tags, story),
                'title':
                apply_classifier_titles(classifier_titles, story),
            })

            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1

        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))

        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False

        self.save()

        if (self.unread_count_positive == 0 and self.unread_count_neutral == 0
                and self.unread_count_negative == 0):
            self.mark_feed_read()

        if not silent:
            logging.info(' ---> [%s] Computing scores: %s (%s/%s/%s)' %
                         (self.user, self.feed, feed_scores['negative'],
                          feed_scores['neutral'], feed_scores['positive']))

        return self
Example #14
0
def load_river_blurblog(request):
    limit = 10
    start = time.time()
    user = get_user(request)
    social_user_ids = [int(uid) for uid in request.REQUEST.getlist("social_user_ids") if uid]
    original_user_ids = list(social_user_ids)
    page = int(request.REQUEST.get("page", 1))
    order = request.REQUEST.get("order", "newest")
    read_filter = request.REQUEST.get("read_filter", "unread")
    relative_user_id = request.REQUEST.get("relative_user_id", None)
    now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
    UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

    if not relative_user_id:
        relative_user_id = get_user(request).pk

    if not social_user_ids:
        socialsubs = MSocialSubscription.objects.filter(user_id=user.pk)
        social_user_ids = [s.subscription_user_id for s in socialsubs]

    offset = (page - 1) * limit
    limit = page * limit - 1

    story_ids, story_dates = MSocialSubscription.feed_stories(
        user.pk, social_user_ids, offset=offset, limit=limit, order=order, read_filter=read_filter
    )
    mstories = MStory.objects(id__in=story_ids)
    story_id_to_dates = dict(zip(story_ids, story_dates))

    def sort_stories_by_id(a, b):
        return int(story_id_to_dates[str(b.id)]) - int(story_id_to_dates[str(a.id)])

    sorted_mstories = sorted(mstories, cmp=sort_stories_by_id)
    stories = Feed.format_stories(sorted_mstories)
    for s, story in enumerate(stories):
        story["story_date"] = datetime.datetime.fromtimestamp(story_dates[s])
    stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, relative_user_id, check_all=True)

    story_feed_ids = list(set(s["story_feed_id"] for s in stories))
    usersubs = UserSubscription.objects.filter(user__pk=user.pk, feed__pk__in=story_feed_ids)
    usersubs_map = dict((sub.feed_id, sub) for sub in usersubs)
    unsub_feed_ids = list(set(story_feed_ids).difference(set(usersubs_map.keys())))
    unsub_feeds = Feed.objects.filter(pk__in=unsub_feed_ids)
    unsub_feeds = [feed.canonical(include_favicon=False) for feed in unsub_feeds]

    # Find starred stories
    if story_feed_ids:
        story_ids = [story["id"] for story in stories]
        starred_stories = MStarredStory.objects(user_id=user.pk, story_guid__in=story_ids).only(
            "story_guid", "starred_date"
        )
        starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])
        shared_stories = MSharedStory.objects(user_id=user.pk, story_guid__in=story_ids).only(
            "story_guid", "shared_date", "comments"
        )
        shared_stories = dict(
            [
                (story.story_guid, dict(shared_date=story.shared_date, comments=story.comments))
                for story in shared_stories
            ]
        )

        userstories_db = MUserStory.objects(user_id=user.pk, feed_id__in=story_feed_ids, story_id__in=story_ids).only(
            "story_id"
        )
        userstories = set(us.story_id for us in userstories_db)

    else:
        starred_stories = {}
        shared_stories = {}
        userstories = []

    # Intelligence classifiers for all feeds involved
    if story_feed_ids:
        classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=story_feed_ids))
        classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=story_feed_ids))
        classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=story_feed_ids))
        classifier_tags = list(MClassifierTag.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    else:
        classifier_feeds = []
        classifier_authors = []
        classifier_titles = []
        classifier_tags = []
    classifiers = sort_classifiers_by_feed(
        user=user,
        feed_ids=story_feed_ids,
        classifier_feeds=classifier_feeds,
        classifier_authors=classifier_authors,
        classifier_titles=classifier_titles,
        classifier_tags=classifier_tags,
    )

    # Just need to format stories
    for story in stories:
        if story["id"] in userstories:
            story["read_status"] = 1
        elif story["story_date"] < UNREAD_CUTOFF:
            story["read_status"] = 1
        else:
            story["read_status"] = 0
        story_date = localtime_for_timezone(story["story_date"], user.profile.timezone)
        story["short_parsed_date"] = format_story_link_date__short(story_date, now)
        story["long_parsed_date"] = format_story_link_date__long(story_date, now)
        if story["id"] in starred_stories:
            story["starred"] = True
            starred_date = localtime_for_timezone(starred_stories[story["id"]], user.profile.timezone)
            story["starred_date"] = format_story_link_date__long(starred_date, now)
        story["intelligence"] = {
            "feed": apply_classifier_feeds(classifier_feeds, story["story_feed_id"]),
            "author": apply_classifier_authors(classifier_authors, story),
            "tags": apply_classifier_tags(classifier_tags, story),
            "title": apply_classifier_titles(classifier_titles, story),
        }
        if story["id"] in shared_stories:
            story["shared"] = True
            shared_date = localtime_for_timezone(shared_stories[story["id"]]["shared_date"], user.profile.timezone)
            story["shared_date"] = format_story_link_date__long(shared_date, now)
            story["shared_comments"] = strip_tags(shared_stories[story["id"]]["comments"])

    diff = time.time() - start
    timediff = round(float(diff), 2)
    logging.user(
        request,
        "~FYLoading ~FCriver blurblogs stories~FY: ~SBp%s~SN (%s/%s "
        "stories, ~SN%s/%s/%s feeds)"
        % (page, len(stories), len(mstories), len(story_feed_ids), len(social_user_ids), len(original_user_ids)),
    )

    return {
        "stories": stories,
        "user_profiles": user_profiles,
        "feeds": unsub_feeds,
        "classifiers": classifiers,
        "elapsed_time": timediff,
    }
Example #15
0
    def calculate_feed_scores(self, silent=False, stories=None, force=False):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        
        if self.user.profile.last_seen_on < UNREAD_CUTOFF and not force:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return
        
        # if not self.feed.fetched_once:
        #     if not silent:
        #         logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
        #     self.needs_unread_recalc = False
        #     self.save()
        #     return
            
        feed_scores = dict(negative=0, neutral=0, positive=0)
        
        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta
        
        if not stories:
            stories = cache.get('S:%s' % self.feed_id)
            
        unread_story_hashes = self.get_stories(read_filter='unread', limit=500, hashes_only=True)
        
        if not stories:
            stories_db = MStory.objects(story_hash__in=unread_story_hashes)
            stories = Feed.format_stories(stories_db, self.feed_id)
        
        oldest_unread_story_date = now
        unread_stories = []
        for story in stories:
            if story['story_date'] < date_delta:
                continue
            if story['story_hash'] in unread_story_hashes:
                unread_stories.append(story)
                if story['story_date'] < oldest_unread_story_date:
                    oldest_unread_story_date = story['story_date']

        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))
        
        classifier_feeds   = list(MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0))
        classifier_authors = list(MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id))
        classifier_titles  = list(MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id))
        classifier_tags    = list(MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id))

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))
            
        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }
        
        for story in unread_stories:
            scores.update({
                'author' : apply_classifier_authors(classifier_authors, story),
                'tags'   : apply_classifier_tags(classifier_tags, story),
                'title'  : apply_classifier_titles(classifier_titles, story),
            })
            
            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1
                
        
        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))

        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False
        
        self.save()

        if (self.unread_count_positive == 0 and 
            self.unread_count_neutral == 0):
            self.mark_feed_read()
        
        if not silent:
            logging.user(self.user, '~FC~SNComputing scores: %s (~SB%s~SN/~SB%s~SN/~SB%s~SN)' % (self.feed, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive']))
            
        return self
Example #16
0
def load_social_stories(request, user_id, username=None):
    start          = time.time()
    user           = get_user(request)
    social_user_id = int(user_id)
    social_user    = get_object_or_404(User, pk=social_user_id)
    offset         = int(request.REQUEST.get('offset', 0))
    limit          = int(request.REQUEST.get('limit', 6))
    page           = request.REQUEST.get('page')
    order          = request.REQUEST.get('order', 'newest')
    read_filter    = request.REQUEST.get('read_filter', 'all')
    stories        = []
    
    if page: offset = limit * (int(page) - 1)
    now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
    UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
    
    social_profile = MSocialProfile.get_user(social_user.pk)
    try:
        socialsub = MSocialSubscription.objects.get(user_id=user.pk, subscription_user_id=social_user_id)
    except MSocialSubscription.DoesNotExist:
        socialsub = None
    mstories = MSharedStory.objects(user_id=social_user.pk).order_by('-shared_date')[offset:offset+limit]
    stories = Feed.format_stories(mstories)
    
    if socialsub and (read_filter == 'unread' or order == 'oldest'):
        story_ids = socialsub.get_stories(order=order, read_filter=read_filter, offset=offset, limit=limit)
        story_date_order = "%sshared_date" % ('' if order == 'oldest' else '-')
        if story_ids:
            mstories = MSharedStory.objects(user_id=social_user.pk,
                                            story_db_id__in=story_ids).order_by(story_date_order)
            stories = Feed.format_stories(mstories)
    else:
        mstories = MSharedStory.objects(user_id=social_user.pk).order_by('-shared_date')[offset:offset+limit]
        stories = Feed.format_stories(mstories)

    if not stories:
        return dict(stories=[])
    
    checkpoint1 = time.time()
    
    stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, user.pk, check_all=True)

    story_feed_ids = list(set(s['story_feed_id'] for s in stories))
    usersubs = UserSubscription.objects.filter(user__pk=user.pk, feed__pk__in=story_feed_ids)
    usersubs_map = dict((sub.feed_id, sub) for sub in usersubs)
    unsub_feed_ids = list(set(story_feed_ids).difference(set(usersubs_map.keys())))
    unsub_feeds = Feed.objects.filter(pk__in=unsub_feed_ids)
    unsub_feeds = [feed.canonical(include_favicon=False) for feed in unsub_feeds]
    date_delta = UNREAD_CUTOFF
    if socialsub and date_delta < socialsub.mark_read_date:
        date_delta = socialsub.mark_read_date
    
    # Get intelligence classifier for user
    classifier_feeds   = list(MClassifierFeed.objects(user_id=user.pk, social_user_id=social_user_id))
    classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, social_user_id=social_user_id))
    classifier_titles  = list(MClassifierTitle.objects(user_id=user.pk, social_user_id=social_user_id))
    classifier_tags    = list(MClassifierTag.objects(user_id=user.pk, social_user_id=social_user_id))
    # Merge with feed specific classifiers
    classifier_feeds   = classifier_feeds + list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    classifier_authors = classifier_authors + list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    classifier_titles  = classifier_titles + list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    classifier_tags    = classifier_tags + list(MClassifierTag.objects(user_id=user.pk, feed_id__in=story_feed_ids))

    checkpoint2 = time.time()
    
    story_ids = [story['id'] for story in stories]
    userstories_db = MUserStory.objects(user_id=user.pk,
                                        feed_id__in=story_feed_ids,
                                        story_id__in=story_ids).only('story_id')
    userstories = set(us.story_id for us in userstories_db)

    starred_stories = MStarredStory.objects(user_id=user.pk, 
                                            story_feed_id__in=story_feed_ids, 
                                            story_guid__in=story_ids).only('story_guid', 'starred_date')
    shared_stories = MSharedStory.objects(user_id=user.pk, 
                                          story_feed_id__in=story_feed_ids, 
                                          story_guid__in=story_ids)\
                                 .only('story_guid', 'shared_date', 'comments')
    starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])
    shared_stories = dict([(story.story_guid, dict(shared_date=story.shared_date, comments=story.comments))
                           for story in shared_stories])
    
    for story in stories:
        story['social_user_id'] = social_user_id
        story_feed_id = story['story_feed_id']
        # story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        shared_date = localtime_for_timezone(story['shared_date'], user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(shared_date, now)
        story['long_parsed_date'] = format_story_link_date__long(shared_date, now)
        
        if not socialsub:
            story['read_status'] = 1
        elif story['id'] in userstories:
            story['read_status'] = 1
        elif story['shared_date'] < date_delta:
            story['read_status'] = 1
        elif not usersubs_map.get(story_feed_id):
            story['read_status'] = 0
        elif not story.get('read_status') and story['story_date'] < usersubs_map[story_feed_id].mark_read_date:
            story['read_status'] = 1
        elif not story.get('read_status') and story['shared_date'] < date_delta:
            story['read_status'] = 1
        # elif not story.get('read_status') and socialsub and story['shared_date'] > socialsub.last_read_date:
        #     story['read_status'] = 0
        else:
            story['read_status'] = 0

        if story['id'] in starred_stories:
            story['starred'] = True
            starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
            story['starred_date'] = format_story_link_date__long(starred_date, now)
        if story['id'] in shared_stories:
            story['shared'] = True
            shared_date = localtime_for_timezone(shared_stories[story['id']]['shared_date'],
                                                 user.profile.timezone)
            story['shared_date'] = format_story_link_date__long(shared_date, now)
            story['shared_comments'] = strip_tags(shared_stories[story['id']]['comments'])

        story['intelligence'] = {
            'feed': apply_classifier_feeds(classifier_feeds, story['story_feed_id'],
                                           social_user_id=social_user_id),
            'author': apply_classifier_authors(classifier_authors, story),
            'tags': apply_classifier_tags(classifier_tags, story),
            'title': apply_classifier_titles(classifier_titles, story),
        }
    
    
    classifiers = sort_classifiers_by_feed(user=user, feed_ids=story_feed_ids,
                                           classifier_feeds=classifier_feeds,
                                           classifier_authors=classifier_authors,
                                           classifier_titles=classifier_titles,
                                           classifier_tags=classifier_tags)
                                           
    if socialsub:
        socialsub.feed_opens += 1
        socialsub.save()
    
    diff1 = checkpoint1-start
    diff2 = checkpoint2-start
    logging.user(request, "~FYLoading ~FMshared stories~FY: ~SB%s%s ~SN(~SB%.4ss/%.4ss~SN)" % (
    social_profile.title[:22], ('~SN/p%s' % page) if page > 1 else '', diff1, diff2))

    return {
        "stories": stories, 
        "user_profiles": user_profiles, 
        "feeds": unsub_feeds, 
        "classifiers": classifiers,
    }
Example #17
0
    def calculate_feed_scores(self, silent=False, stories_db=None):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if self.user.profile.last_seen_on < UNREAD_CUTOFF:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return
        
        if not self.feed.fetched_once:
            if not silent:
                logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
            self.needs_unread_recalc = False
            self.save()
            return
            
        feed_scores = dict(negative=0, neutral=0, positive=0)
        
        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta

        read_stories = MUserStory.objects(user_id=self.user.pk,
                                          feed_id=self.feed.pk,
                                          read_date__gte=self.mark_read_date)
        # if not silent:
        #     logging.info(' ---> [%s]    Read stories: %s' % (self.user, datetime.datetime.now() - now))
        read_stories_ids = []
        for us in read_stories:
            read_stories_ids.append(us.story_id)
        stories_db = stories_db or MStory.objects(story_feed_id=self.feed.pk,
                                                  story_date__gte=date_delta)
        # if not silent:
        #     logging.info(' ---> [%s]    MStory: %s' % (self.user, datetime.datetime.now() - now))
        oldest_unread_story_date = now
        unread_stories_db = []
        for story in stories_db:
            if story.story_date < date_delta:
                continue
            if hasattr(story, 'story_guid') and story.story_guid not in read_stories_ids:
                unread_stories_db.append(story)
                if story.story_date < oldest_unread_story_date:
                    oldest_unread_story_date = story.story_date
        stories = Feed.format_stories(unread_stories_db, self.feed.pk)
        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))
        
        classifier_feeds   = list(MClassifierFeed.objects(user_id=self.user.pk, feed_id=self.feed.pk))
        classifier_authors = list(MClassifierAuthor.objects(user_id=self.user.pk, feed_id=self.feed.pk))
        classifier_titles  = list(MClassifierTitle.objects(user_id=self.user.pk, feed_id=self.feed.pk))
        classifier_tags    = list(MClassifierTag.objects(user_id=self.user.pk, feed_id=self.feed.pk))

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))
            
        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }
        
        for story in stories:
            scores.update({
                'author' : apply_classifier_authors(classifier_authors, story),
                'tags'   : apply_classifier_tags(classifier_tags, story),
                'title'  : apply_classifier_titles(classifier_titles, story),
            })
            
            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1
                
        
        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))
            
        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False
        
        self.save()

        # if (self.unread_count_positive == 0 and 
        #     self.unread_count_neutral == 0):
        #     self.mark_feed_read()
        
        cache.delete('usersub:%s' % self.user.id)
        
        if not silent:
            logging.info(' ---> [%s] Computing scores: %s (%s/%s/%s)' % (self.user, self.feed, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive']))
            
        return
Example #18
0
def load_single_feed(request):
    user = get_user(request)
    offset = int(request.REQUEST.get('offset', 0))
    limit = int(request.REQUEST.get('limit', 30))
    page = int(request.REQUEST.get('page', 0))
    if page:
        offset = limit * page
    feed_id = int(request.REQUEST.get('feed_id', 0))
    if feed_id == 0:
        raise Http404
        
    try:
        feed = Feed.objects.get(id=feed_id)
    except Feed.DoesNotExist:
        feed_address = request.REQUEST.get('feed_address')
        dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
        if dupe_feed:
            feed = dupe_feed[0].feed
        else:
            raise Http404
        
    force_update = request.GET.get('force_update', False)
    
    now = datetime.datetime.utcnow()
    stories = feed.get_stories(offset, limit) 
        
    if force_update:
        feed.update(force_update)
    
    # Get intelligence classifier for user
    classifier_feeds = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id)
    classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id)
    classifier_titles = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id)
    classifier_tags = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id)
    
    try:
        usersub = UserSubscription.objects.get(user=user, feed=feed)
    except UserSubscription.DoesNotExist:
        # FIXME: Why is this happening for `conesus` when logged into another account?!
        logging.info(" ***> [%s] UserSub DNE, creating: %s" % (user, feed))
        usersub = UserSubscription.objects.create(user=user, feed=feed)
            
    userstories = []
    userstories_db = MUserStory.objects(user_id=user.pk, 
                                        feed_id=feed.pk,
                                        read_date__gte=usersub.mark_read_date)
    for us in userstories_db:
        if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode):
            userstories.append(us.story.story_guid)
        elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode):
            userstories.append(us.story.id) # TODO: Remove me after migration from story.id->guid
            
    for story in stories:
        classifier_feeds.rewind()
        classifier_authors.rewind()
        classifier_tags.rewind()
        classifier_titles.rewind()
        if story['id'] in userstories:
            story['read_status'] = 1
        elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date:
            story['read_status'] = 1
        elif not story.get('read_status') and story['story_date'] > usersub.last_read_date:
            story['read_status'] = 0
        story['intelligence'] = {
            'feed': apply_classifier_feeds(classifier_feeds, feed),
            'author': apply_classifier_authors(classifier_authors, story),
            'tags': apply_classifier_tags(classifier_tags, story),
            'title': apply_classifier_titles(classifier_titles, story),
        }
    
    # Intelligence
    feed_tags = json.decode(feed.popular_tags) if feed.popular_tags else []
    feed_authors = json.decode(feed.popular_authors) if feed.popular_authors else []
    classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, 
                                           classifier_authors, classifier_titles, classifier_tags)
    
    usersub.feed_opens += 1
    usersub.save()
    
    diff = datetime.datetime.utcnow()-now
    timediff = float("%s.%s" % (diff.seconds, (diff.microseconds / 1000)))
    logging.info(" ---> [%s] Loading feed: %s (%s seconds)" % (request.user, feed, timediff))
    FeedLoadtime.objects.create(feed=feed, loadtime=timediff)
    
    last_update = relative_timesince(feed.last_update)
    data = dict(stories=stories, 
                feed_tags=feed_tags, 
                feed_authors=feed_authors, 
                classifiers=classifiers,
                last_update=last_update,
                feed_id=feed.pk)
    return data
Example #19
0
def load_single_feed(request, feed_id):
    start        = time.time()
    user         = get_user(request)
    offset       = int(request.REQUEST.get('offset', 0))
    limit        = int(request.REQUEST.get('limit', 12))
    page         = int(request.REQUEST.get('page', 1))
    dupe_feed_id = None
    userstories_db = None
    
    if page: offset = limit * (page-1)
    if not feed_id: raise Http404
        
    try:
        feed = Feed.objects.get(id=feed_id)
    except Feed.DoesNotExist:
        feed_address = request.REQUEST.get('feed_address')
        dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
        if dupe_feed:
            feed = dupe_feed[0].feed
            dupe_feed_id = feed_id
        else:
            raise Http404
        
    stories = feed.get_stories(offset, limit) 
        
    # Get intelligence classifier for user
    classifier_feeds   = list(MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id))
    classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id))
    classifier_titles  = list(MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id))
    classifier_tags    = list(MClassifierTag.objects(user_id=user.pk, feed_id=feed_id))
    
    checkpoint1 = time.time()
    
    usersub = UserSubscription.objects.get(user=user, feed=feed)
    userstories = []
    if usersub and stories:
        story_ids = [story['id'] for story in stories]
        userstories_db = MUserStory.objects(user_id=user.pk,
                                            feed_id=feed.pk,
                                            story_id__in=story_ids).only('story_id')
        starred_stories = MStarredStory.objects(user_id=user.pk, 
                                                story_feed_id=feed_id, 
                                                story_guid__in=story_ids).only('story_guid', 'starred_date')
        starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])
        userstories = set(us.story_id for us in userstories_db)
            
    checkpoint2 = time.time()
    
    for story in stories:
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date'] = format_story_link_date__long(story_date, now)
        if usersub:
            if story['id'] in userstories:
                story['read_status'] = 1
            elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date:
                story['read_status'] = 1
            elif not story.get('read_status') and story['story_date'] > usersub.last_read_date:
                story['read_status'] = 0
            if story['id'] in starred_stories:
                story['starred'] = True
                starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
                story['starred_date'] = format_story_link_date__long(starred_date, now)
        else:
            story['read_status'] = 1
        story['intelligence'] = {
            'feed': apply_classifier_feeds(classifier_feeds, feed),
            'author': apply_classifier_authors(classifier_authors, story),
            'tags': apply_classifier_tags(classifier_tags, story),
            'title': apply_classifier_titles(classifier_titles, story),
        }

    checkpoint3 = time.time()
    
    # Intelligence
    feed_tags = json.decode(feed.data.popular_tags) if feed.data.popular_tags else []
    feed_authors = json.decode(feed.data.popular_authors) if feed.data.popular_authors else []
    classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, 
                                           classifier_authors, classifier_titles, classifier_tags)
    
    if usersub:
        usersub.feed_opens += 1
        usersub.save()
    diff1 = checkpoint1-start
    diff2 = checkpoint2-start
    diff3 = checkpoint3-start
    timediff = time.time()-start
    last_update = relative_timesince(feed.last_update)
    logging.user(request, "~FYLoading feed: ~SB%s%s ~SN(%.4s seconds, ~SB%.4s/%.4s(%s)/%.4s~SN)" % (
        feed.feed_title[:32], ('~SN/p%s' % page) if page > 1 else '', timediff,
        diff1, diff2, userstories_db and userstories_db.count() or '~SN0~SB', diff3))
    FeedLoadtime.objects.create(feed=feed, loadtime=timediff)
    
    data = dict(stories=stories, 
                feed_tags=feed_tags, 
                feed_authors=feed_authors, 
                classifiers=classifiers,
                last_update=last_update,
                feed_id=feed.pk)
    
    if dupe_feed_id: data['dupe_feed_id'] = dupe_feed_id
    if not usersub:
        data.update(feed.canonical())
        
    return data
Example #20
0
    def calculate_feed_scores(self, silent=False, stories_db=None):
        now = datetime.datetime.utcnow()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if self.user.profile.last_seen_on < UNREAD_CUTOFF:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return

        if not self.feed.fetched_once:
            if not silent:
                logging.info(' ---> [%s] NOT Computing scores: %s' %
                             (self.user, self.feed))
            self.needs_unread_recalc = False
            self.save()
            return

        if not silent:
            logging.info(' ---> [%s] Computing scores: %s' %
                         (self.user, self.feed))
        feed_scores = dict(negative=0, neutral=0, positive=0)

        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta

        read_stories = MUserStory.objects(user_id=self.user.pk,
                                          feed_id=self.feed.pk,
                                          read_date__gte=self.mark_read_date)
        # if not silent:
        #     logging.info(' ---> [%s]    Read stories: %s' % (self.user, datetime.datetime.now() - now))
        read_stories_ids = []
        for us in read_stories:
            if hasattr(us.story, 'story_guid') and isinstance(
                    us.story.story_guid, unicode):
                read_stories_ids.append(us.story.story_guid)
            elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode):
                read_stories_ids.append(
                    us.story.id
                )  # TODO: Remove me after migration from story.id->guid
        stories_db = stories_db or MStory.objects(story_feed_id=self.feed.pk,
                                                  story_date__gte=date_delta)
        # if not silent:
        #     logging.info(' ---> [%s]    MStory: %s' % (self.user, datetime.datetime.now() - now))
        oldest_unread_story_date = now
        unread_stories_db = []
        for story in stories_db:
            if story.story_date < date_delta:
                continue
            if hasattr(
                    story,
                    'story_guid') and story.story_guid not in read_stories_ids:
                unread_stories_db.append(story)
                if story.story_date < oldest_unread_story_date:
                    oldest_unread_story_date = story.story_date
        stories = Feed.format_stories(unread_stories_db, self.feed.pk)
        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))

        classifier_feeds = MClassifierFeed.objects(user_id=self.user.pk,
                                                   feed_id=self.feed.pk)
        classifier_authors = MClassifierAuthor.objects(user_id=self.user.pk,
                                                       feed_id=self.feed.pk)
        classifier_titles = MClassifierTitle.objects(user_id=self.user.pk,
                                                     feed_id=self.feed.pk)
        classifier_tags = MClassifierTag.objects(user_id=self.user.pk,
                                                 feed_id=self.feed.pk)

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))

        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }

        for story in stories:
            classifier_authors.rewind()
            classifier_tags.rewind()
            classifier_titles.rewind()
            scores.update({
                'author':
                apply_classifier_authors(classifier_authors, story),
                'tags':
                apply_classifier_tags(classifier_tags, story),
                'title':
                apply_classifier_titles(classifier_titles, story),
            })

            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1

        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))

        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False

        self.save()

        if (self.unread_count_positive == 0
                and self.unread_count_neutral == 0):
            self.mark_feed_read()

        cache.delete('usersub:%s' % self.user.id)

        return
Example #21
0
    def calculate_feed_scores(self, silent=False, stories=None, force=False):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        oldest_unread_story_date = now
        
        if self.user.profile.last_seen_on < self.user.profile.unread_cutoff and not force:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return self
        ong = self.unread_count_negative
        ont = self.unread_count_neutral
        ops = self.unread_count_positive
        
        # if not self.feed.fetched_once:
        #     if not silent:
        #         logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
        #     self.needs_unread_recalc = False
        #     self.save()
        #     return
            
        feed_scores = dict(negative=0, neutral=0, positive=0)
        
        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = self.user.profile.unread_cutoff
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta
        
        if self.is_trained:
            if not stories:
                stories = cache.get('S:%s' % self.feed_id)
            
            unread_story_hashes = self.story_hashes(user_id=self.user_id, feed_ids=[self.feed_id],
                                                    usersubs=[self],
                                                    read_filter='unread', group_by_feed=False,
                                                    cutoff_date=self.user.profile.unread_cutoff)
        
            if not stories:
                stories_db = MStory.objects(story_hash__in=unread_story_hashes)
                stories = Feed.format_stories(stories_db, self.feed_id)
        
            unread_stories = []
            for story in stories:
                if story['story_date'] < date_delta:
                    continue
                if story['story_hash'] in unread_story_hashes:
                    unread_stories.append(story)
                    if story['story_date'] < oldest_unread_story_date:
                        oldest_unread_story_date = story['story_date']

            # if not silent:
            #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))
        
            classifier_feeds   = list(MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0))
            classifier_authors = list(MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id))
            classifier_titles  = list(MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id))
            classifier_tags    = list(MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id))
            
            if (not len(classifier_feeds) and 
                not len(classifier_authors) and 
                not len(classifier_titles) and 
                not len(classifier_tags)):
                self.is_trained = False
            
            # if not silent:
            #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))
            
            scores = {
                'feed': apply_classifier_feeds(classifier_feeds, self.feed),
            }
        
            for story in unread_stories:
                scores.update({
                    'author' : apply_classifier_authors(classifier_authors, story),
                    'tags'   : apply_classifier_tags(classifier_tags, story),
                    'title'  : apply_classifier_titles(classifier_titles, story),
                })
            
                max_score = max(scores['author'], scores['tags'], scores['title'])
                min_score = min(scores['author'], scores['tags'], scores['title'])
                if max_score > 0:
                    feed_scores['positive'] += 1
                elif min_score < 0:
                    feed_scores['negative'] += 1
                else:
                    if scores['feed'] > 0:
                        feed_scores['positive'] += 1
                    elif scores['feed'] < 0:
                        feed_scores['negative'] += 1
                    else:
                        feed_scores['neutral'] += 1
        else:
            unread_story_hashes = self.story_hashes(user_id=self.user_id, feed_ids=[self.feed_id],
                                                    usersubs=[self],
                                                    read_filter='unread', group_by_feed=False,
                                                    include_timestamps=True,
                                                    cutoff_date=self.user.profile.unread_cutoff)

            feed_scores['neutral'] = len(unread_story_hashes)
            if feed_scores['neutral']:
                oldest_unread_story_date = datetime.datetime.fromtimestamp(unread_story_hashes[-1][1])
        
        if not silent:
            logging.user(self.user, '~FBUnread count (~SB%s~SN%s): ~SN(~FC%s~FB/~FC%s~FB/~FC%s~FB) ~SBto~SN (~FC%s~FB/~FC%s~FB/~FC%s~FB)' % (self.feed_id, '/~FMtrained~FB' if self.is_trained else '', ong, ont, ops, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive']))

        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False
        
        self.save()

        if (self.unread_count_positive == 0 and 
            self.unread_count_neutral == 0):
            self.mark_feed_read()
        
        if not silent:
            logging.user(self.user, '~FC~SNComputing scores: %s (~SB%s~SN/~SB%s~SN/~SB%s~SN)' % (self.feed, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive']))
        
        self.trim_read_stories()
        
        return self
Example #22
0
    def calculate_feed_scores(self, silent=False, stories=None):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if self.user.profile.last_seen_on < UNREAD_CUTOFF:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return

        if not self.feed.fetched_once:
            if not silent:
                logging.info(" ---> [%s] NOT Computing scores: %s" % (self.user, self.feed))
            self.needs_unread_recalc = False
            self.save()
            return

        feed_scores = dict(negative=0, neutral=0, positive=0)

        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta

        read_stories = MUserStory.objects(
            user_id=self.user_id, feed_id=self.feed_id, read_date__gte=self.mark_read_date
        )
        read_stories_ids = [us.story_id for us in read_stories]

        if not stories:
            stories_db = MStory.objects(story_feed_id=self.feed_id, story_date__gte=date_delta)
            stories = Feed.format_stories(stories_db, self.feed_id)

        oldest_unread_story_date = now
        unread_stories = []
        for story in stories:
            if story["story_date"] < date_delta:
                continue
            if story["id"] not in read_stories_ids:
                unread_stories.append(story)
                if story["story_date"] < oldest_unread_story_date:
                    oldest_unread_story_date = story["story_date"]

        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))

        classifier_feeds = list(MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0))
        classifier_authors = list(MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id))
        classifier_titles = list(MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id))
        classifier_tags = list(MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id))

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))

        scores = {"feed": apply_classifier_feeds(classifier_feeds, self.feed)}

        for story in unread_stories:
            scores.update(
                {
                    "author": apply_classifier_authors(classifier_authors, story),
                    "tags": apply_classifier_tags(classifier_tags, story),
                    "title": apply_classifier_titles(classifier_titles, story),
                }
            )

            max_score = max(scores["author"], scores["tags"], scores["title"])
            min_score = min(scores["author"], scores["tags"], scores["title"])
            if max_score > 0:
                feed_scores["positive"] += 1
            elif min_score < 0:
                feed_scores["negative"] += 1
            else:
                if scores["feed"] > 0:
                    feed_scores["positive"] += 1
                elif scores["feed"] < 0:
                    feed_scores["negative"] += 1
                else:
                    feed_scores["neutral"] += 1

        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))

        self.unread_count_positive = feed_scores["positive"]
        self.unread_count_neutral = feed_scores["neutral"]
        self.unread_count_negative = feed_scores["negative"]
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False

        self.save()

        if self.unread_count_positive == 0 and self.unread_count_neutral == 0 and self.unread_count_negative == 0:
            self.mark_feed_read()

        if not silent:
            logging.info(
                " ---> [%s] Computing scores: %s (%s/%s/%s)"
                % (self.user, self.feed, feed_scores["negative"], feed_scores["neutral"], feed_scores["positive"])
            )

        return self