Example #1
0
    def count_unreads_for_subscribers(self, feed):
        user_subs = UserSubscription.objects.filter(
            feed=feed, active=True, user__profile__last_seen_on__gte=feed.unread_cutoff
        ).order_by("-last_read_date")

        if not user_subs.count():
            return

        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options["compute_scores"]:
            r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
            stories = MStory.objects(story_feed_id=feed.pk, story_date__gte=feed.unread_cutoff)
            stories = Feed.format_stories(stories, feed.pk)
            story_hashes = r.zrangebyscore(
                "zF:%s" % feed.pk, int(feed.unread_cutoff.strftime("%s")), int(time.time() + 60 * 60 * 24)
            )
            missing_story_hashes = set(story_hashes) - set([s["story_hash"] for s in stories])
            if missing_story_hashes:
                missing_stories = MStory.objects(
                    story_feed_id=feed.pk, story_hash__in=missing_story_hashes
                ).read_preference(pymongo.ReadPreference.PRIMARY)
                missing_stories = Feed.format_stories(missing_stories, feed.pk)
                stories = missing_stories + stories
                logging.debug(
                    u"   ---> [%-30s] ~FYFound ~SB~FC%s(of %s)/%s~FY~SN un-secondaried stories while computing scores"
                    % (feed.title[:30], len(missing_stories), len(missing_story_hashes), len(stories))
                )
            cache.set("S:%s" % feed.pk, stories, 60)
            logging.debug(
                u"   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)"
                % (
                    feed.title[:30],
                    len(stories),
                    user_subs.count(),
                    feed.num_subscribers,
                    feed.active_subscribers,
                    feed.premium_subscribers,
                )
            )
            self.calculate_feed_scores_with_stories(user_subs, stories)
        elif self.options.get("mongodb_replication_lag"):
            logging.debug(
                u"   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag"
                % (feed.title[:30], self.options.get("mongodb_replication_lag"))
            )
Example #2
0
def more_like_this(request):
    user = get_user(request)
    get_post = getattr(request, request.method)
    order = get_post.get('order', 'newest')
    page = int(get_post.get('page', 1))
    limit = int(get_post.get('limit', 10))
    offset = limit * (page - 1)
    story_hash = get_post.get('story_hash')

    feed_ids = [
        us.feed_id for us in UserSubscription.objects.filter(user=user)
    ]
    feed_ids, _ = MStory.split_story_hash(story_hash)
    story_ids = SearchStory.more_like_this([feed_ids],
                                           story_hash,
                                           order,
                                           offset=offset,
                                           limit=limit)
    stories_db = MStory.objects(story_hash__in=story_ids).order_by(
        '-story_date' if order == "newest" else 'story_date')
    stories = Feed.format_stories(stories_db)

    return {
        "stories": stories,
    }
Example #3
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(
            days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(feed=feed,
                                                    active=True,
                                                    user__profile__last_seen_on__gte=UNREAD_CUTOFF)\
                                            .order_by('-last_read_date')

        if not user_subs.count():
            return

        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options['compute_scores']:
            stories = MStory.objects(story_feed_id=feed.pk,
                                     story_date__gte=UNREAD_CUTOFF)\
                            .read_preference(pymongo.ReadPreference.PRIMARY)
            stories = Feed.format_stories(stories, feed.pk)
            logging.debug(
                u'   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)'
                % (feed.title[:30], len(stories), user_subs.count(),
                   feed.num_subscribers, feed.active_subscribers,
                   feed.premium_subscribers))
            self.calculate_feed_scores_with_stories(user_subs, stories)
        elif self.options.get('mongodb_replication_lag'):
            logging.debug(
                u'   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag'
                %
                (feed.title[:30], self.options.get('mongodb_replication_lag')))
Example #4
0
def load_starred_stories(request):
    user = get_user(request)
    offset = int(request.REQUEST.get('offset', 0))
    limit = int(request.REQUEST.get('limit', 10))
    page = int(request.REQUEST.get('page', 0))
    if page: offset = limit * page
        
    mstories = MStarredStory.objects(user_id=user.pk).order_by('-starred_date')[offset:offset+limit]
    stories = Feed.format_stories(mstories)
    
    for story in stories:
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date'] = format_story_link_date__long(story_date, now)
        starred_date = localtime_for_timezone(story['starred_date'], user.profile.timezone)
        story['starred_date'] = format_story_link_date__long(starred_date, now)
        story['read_status'] = 1
        story['starred'] = True
        story['intelligence'] = {
            'feed': 0,
            'author': 0,
            'tags': 0,
            'title': 0,
        }
    
    logging.user(request.user, "~FCLoading starred stories: ~SB%s stories" % (len(stories)))
    
    return dict(stories=stories)
Example #5
0
def load_starred_stories(request):
    user = get_user(request)
    offset = int(request.REQUEST.get('offset', 0))
    limit = int(request.REQUEST.get('limit', 10))
    page = int(request.REQUEST.get('page', 0))
    if page: offset = limit * (page - 1)
        
    mstories = MStarredStory.objects(user_id=user.pk).order_by('-starred_date')[offset:offset+limit]
    stories = Feed.format_stories(mstories)
    
    for story in stories:
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date'] = format_story_link_date__long(story_date, now)
        starred_date = localtime_for_timezone(story['starred_date'], user.profile.timezone)
        story['starred_date'] = format_story_link_date__long(starred_date, now)
        story['read_status'] = 1
        story['starred'] = True
        story['intelligence'] = {
            'feed': 0,
            'author': 0,
            'tags': 0,
            'title': 0,
        }
    
    logging.user(request, "~FCLoading starred stories: ~SB%s stories" % (len(stories)))
    
    return dict(stories=stories)
Example #6
0
def story_public_comments(request):
    format           = request.REQUEST.get('format', 'json')
    relative_user_id = request.REQUEST.get('user_id', None)
    feed_id          = int(request.REQUEST['feed_id'])
    story_id         = request.REQUEST['story_id']
  
    if not relative_user_id:
        relative_user_id = get_user(request).pk
    
    stories = MSharedStory.objects.filter(story_feed_id=feed_id, story_guid=story_id).limit(1)
    stories = Feed.format_stories(stories)
    stories, profiles = MSharedStory.stories_with_comments_and_profiles(stories, relative_user_id, 
                                                                        check_all=True,
                                                                        public=True)

    if format == 'html':
        stories = MSharedStory.attach_users_to_stories(stories, profiles)
        return render_to_response('social/story_comments.xhtml', {
            'story': stories[0],
        }, context_instance=RequestContext(request))
    else:
        return json.json_response(request, {
            'comments': stories[0]['public_comments'], 
            'user_profiles': profiles,
        })
Example #7
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(feed=feed, 
                                                    active=True,
                                                    user__profile__last_seen_on__gte=UNREAD_CUTOFF)\
                                            .order_by('-last_read_date')
        
        if not user_subs.count():
            return
            
        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options['compute_scores']:
            stories = MStory.objects(story_feed_id=feed.pk,
                                     story_date__gte=UNREAD_CUTOFF)\
                            .read_preference(pymongo.ReadPreference.PRIMARY)
            stories = Feed.format_stories(stories, feed.pk)
            logging.debug(u'   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)' % (
                          feed.title[:30], len(stories), user_subs.count(),
                          feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers))        
            self.calculate_feed_scores_with_stories(user_subs, stories)
        elif self.options.get('mongodb_replication_lag'):
            logging.debug(u'   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag' % (
              feed.title[:30], self.options.get('mongodb_replication_lag')))
Example #8
0
    def count_unreads_for_subscribers(self, feed):
        user_subs = UserSubscription.objects.filter(feed=feed,
                                                    active=True,
                                                    user__profile__last_seen_on__gte=feed.unread_cutoff)\
                                            .order_by('-last_read_date')

        if not user_subs.count():
            return

        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options['compute_scores']:
            r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
            stories = MStory.objects(story_feed_id=feed.pk,
                                     story_date__gte=feed.unread_cutoff)
            stories = Feed.format_stories(stories, feed.pk)
            story_hashes = r.zrangebyscore(
                'zF:%s' % feed.pk, int(feed.unread_cutoff.strftime('%s')),
                int(time.time() + 60 * 60 * 24))
            missing_story_hashes = set(story_hashes) - set(
                [s['story_hash'] for s in stories])
            if missing_story_hashes:
                missing_stories = MStory.objects(story_feed_id=feed.pk,
                                                 story_hash__in=missing_story_hashes)\
                                        .read_preference(pymongo.ReadPreference.PRIMARY)
                missing_stories = Feed.format_stories(missing_stories, feed.pk)
                stories = missing_stories + stories
                logging.debug(
                    u'   ---> [%-30s] ~FYFound ~SB~FC%s(of %s)/%s~FY~SN un-secondaried stories while computing scores'
                    % (feed.title[:30], len(missing_stories),
                       len(missing_story_hashes), len(stories)))
            cache.set("S:%s" % feed.pk, stories, 60)
            logging.debug(
                u'   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)'
                % (feed.title[:30], len(stories), user_subs.count(),
                   feed.num_subscribers, feed.active_subscribers,
                   feed.premium_subscribers))
            self.calculate_feed_scores_with_stories(user_subs, stories)
        elif self.options.get('mongodb_replication_lag'):
            logging.debug(
                u'   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag'
                %
                (feed.title[:30], self.options.get('mongodb_replication_lag')))
Example #9
0
    def push_feed_notifications(cls, feed_id, new_stories, force=False):
        feed = Feed.get_by_id(feed_id)
        notifications = MUserFeedNotification.users_for_feed(feed.pk)
        logging.debug(
            "   ---> [%-30s] ~FCPushing out notifications to ~SB%s users~SN for ~FB~SB%s stories"
            % (feed, len(notifications), new_stories))
        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)

        latest_story_hashes = r.zrange("zF:%s" % feed.pk, -1 * new_stories, -1)
        mstories = MStory.objects.filter(
            story_hash__in=latest_story_hashes).order_by('-story_date')
        stories = Feed.format_stories(mstories)
        total_sent_count = 0

        for user_feed_notification in notifications:
            sent_count = 0
            last_notification_date = user_feed_notification.last_notification_date
            try:
                usersub = UserSubscription.objects.get(
                    user=user_feed_notification.user_id,
                    feed=user_feed_notification.feed_id)
            except UserSubscription.DoesNotExist:
                continue
            classifiers = user_feed_notification.classifiers(usersub)

            if classifiers == None:
                if settings.DEBUG:
                    logging.debug("Has no usersubs")
                continue

            for story in stories:
                if sent_count >= 3:
                    if settings.DEBUG:
                        logging.debug("Sent too many, ignoring...")
                    continue
                if story['story_date'] <= last_notification_date and not force:
                    if settings.DEBUG:
                        logging.debug(
                            "Story date older than last notification date: %s <= %s"
                            % (story['story_date'], last_notification_date))
                    continue

                if story[
                        'story_date'] > user_feed_notification.last_notification_date:
                    user_feed_notification.last_notification_date = story[
                        'story_date']
                    user_feed_notification.save()

                story['story_content'] = html.unescape(story['story_content'])

                sent = user_feed_notification.push_story_notification(
                    story, classifiers, usersub)
                if sent:
                    sent_count += 1
                    total_sent_count += 1
        return total_sent_count, len(notifications)
Example #10
0
File: views.py Project: 76/NewsBlur
def api_saved_story(request):
    user = request.user
    body = request.body_json
    after = body.get('after', None)
    before = body.get('before', None)
    limit = body.get('limit', 50)
    fields = body.get('triggerFields')
    story_tag = fields['story_tag']
    entries = []
    
    if story_tag == "all":
        story_tag = ""
    
    params = dict(user_id=user.pk)
    if story_tag:
        params.update(dict(user_tags__contains=story_tag))
    mstories = MStarredStory.objects(**params).order_by('-starred_date')[:limit]
    stories = Feed.format_stories(mstories)        
    
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    feeds = dict([(f.pk, {
        "title": f.feed_title,
        "website": f.feed_link,
        "address": f.feed_address,
    }) for f in Feed.objects.filter(pk__in=found_feed_ids)])

    for story in stories:
        if before and int(story['story_date'].strftime("%s")) > before: continue
        if after and int(story['story_date'].strftime("%s")) < after: continue
        feed = feeds.get(story['story_feed_id'], None)
        entries.append({
            "StoryTitle": story['story_title'],
            "StoryContent": story['story_content'],
            "StoryURL": story['story_permalink'],
            "StoryAuthor": story['story_authors'],
            "PublishedAt": story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "SavedAt": story['starred_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "Tags": ', '.join(story['user_tags']),
            "Site": feed and feed['title'],
            "SiteURL": feed and feed['website'],
            "SiteRSS": feed and feed['address'],
            "ifttt": {
                "id": story['story_hash'],
                "timestamp": int(story['starred_date'].strftime("%s"))
            },
        })

    if after:
        entries = sorted(entries, key=lambda s: s['ifttt']['timestamp'])
        
    logging.user(request, "~FCChecking saved stories from ~SBIFTTT~SB: ~SB%s~SN - ~SB%s~SN stories" % (story_tag if story_tag else "[All stories]", len(entries)))
    
    return {"data": entries}
Example #11
0
def api_saved_story(request):
    user = request.user
    body = request.body_json
    after = body.get('after', None)
    before = body.get('before', None)
    limit = body.get('limit', 50)
    fields = body.get('triggerFields')
    story_tag = fields['story_tag']
    entries = []
    
    if story_tag == "all":
        story_tag = ""
    
    params = dict(user_id=user.pk)
    if story_tag:
        params.update(dict(user_tags__contains=story_tag))
    mstories = MStarredStory.objects(**params).order_by('-starred_date')[:limit]
    stories = Feed.format_stories(mstories)        
    
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    feeds = dict([(f.pk, {
        "title": f.feed_title,
        "website": f.feed_link,
        "address": f.feed_address,
    }) for f in Feed.objects.filter(pk__in=found_feed_ids)])

    for story in stories:
        if before and int(story['story_date'].strftime("%s")) > before: continue
        if after and int(story['story_date'].strftime("%s")) < after: continue
        feed = feeds.get(story['story_feed_id'], None)
        entries.append({
            "StoryTitle": story['story_title'],
            "StoryContent": story['story_content'],
            "StoryURL": story['story_permalink'],
            "StoryAuthor": story['story_authors'],
            "PublishedAt": story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "SavedAt": story['starred_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "Tags": ', '.join(story['user_tags']),
            "Site": feed and feed['title'],
            "SiteURL": feed and feed['website'],
            "SiteRSS": feed and feed['address'],
            "meta": {
                "id": story['story_hash'],
                "timestamp": int(story['starred_date'].strftime("%s"))
            },
        })

    if after:
        entries = sorted(entries, key=lambda s: s['meta']['timestamp'])
        
    logging.user(request, "~FCChecking saved stories from ~SBIFTTT~SB: ~SB%s~SN - ~SB%s~SN stories" % (story_tag if story_tag else "[All stories]", len(entries)))
    
    return {"data": entries}
Example #12
0
def api_saved_story(request):
    user = request.user
    body = json.decode(request.body)
    after = body.get('after', None)
    before = body.get('before', None)
    limit = body.get('limit', 50)
    fields = body.get('triggerFields')
    story_tag = fields['story_tag']
    entries = []
    
    if story_tag == "all":
        story_tag = ""
    
    mstories = MStarredStory.objects(
        user_id=user.pk,
        user_tags__contains=story_tag
    ).order_by('-starred_date')[:limit]
    stories = Feed.format_stories(mstories)        
    
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    feeds = dict([(f.pk, {
        "title": f.feed_title,
        "website": f.feed_link,
        "address": f.feed_address,
    }) for f in Feed.objects.filter(pk__in=found_feed_ids)])

    for story in stories:
        if before and int(story['story_date'].strftime("%s")) > before: continue
        if after and int(story['story_date'].strftime("%s")) < after: continue
        feed = feeds.get(story['story_feed_id'], None)
        entries.append({
            "StoryTitle": story['story_title'],
            "StoryContent": story['story_content'],
            "StoryUrl": story['story_permalink'],
            "StoryAuthor": story['story_authors'],
            "StoryDate": story['story_date'].isoformat(),
            "SavedDate": story['starred_date'].isoformat(),
            "SavedTags": ', '.join(story['user_tags']),
            "SiteTitle": feed and feed['title'],
            "SiteWebsite": feed and feed['website'],
            "SiteFeedAddress": feed and feed['address'],
            "ifttt": {
                "id": story['story_hash'],
                "timestamp": int(story['starred_date'].strftime("%s"))
            },
        })
    
    logging.user(request, "~FCChecking saved stories from ~SBIFTTT~SB: ~SB%s~SN - ~SB%s~SN stories" % (story_tag if story_tag else "[All stories]", len(entries)))
    
    return {"data": entries}
Example #13
0
    def push_feed_notifications(cls, feed_id, new_stories, force=False):
        feed = Feed.get_by_id(feed_id)
        notifications = MUserFeedNotification.users_for_feed(feed.pk)
        logging.debug("   ---> [%-30s] ~FCPushing out notifications to ~SB%s users~SN for ~FB~SB%s stories" % (
                      feed, len(notifications), new_stories))
        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        
        latest_story_hashes = r.zrange("zF:%s" % feed.pk, -1 * new_stories, -1)
        mstories = MStory.objects.filter(story_hash__in=latest_story_hashes).order_by('-story_date')
        stories = Feed.format_stories(mstories)
        total_sent_count = 0
        
        for user_feed_notification in notifications:
            sent_count = 0
            last_notification_date = user_feed_notification.last_notification_date
            try:
                usersub = UserSubscription.objects.get(user=user_feed_notification.user_id,
                                                       feed=user_feed_notification.feed_id)
            except UserSubscription.DoesNotExist:
                continue
            classifiers = user_feed_notification.classifiers(usersub)

            if classifiers == None:
                logging.debug("Has no usersubs")
                continue

            for story in stories:
                if sent_count >= 3:
                    logging.debug("Sent too many, ignoring...")
                    continue                    
                if story['story_date'] <= last_notification_date and not force:
                    logging.debug("Story date older than last notification date: %s <= %s" % (story['story_date'], last_notification_date))
                    continue
                
                if story['story_date'] > user_feed_notification.last_notification_date:
                    user_feed_notification.last_notification_date = story['story_date']
                    user_feed_notification.save()
                
                story['story_content'] = HTMLParser().unescape(story['story_content'])
                
                sent = user_feed_notification.push_story_notification(story, classifiers, usersub)
                if sent: 
                    sent_count += 1
                    total_sent_count += 1
        return total_sent_count, len(notifications)
Example #14
0
def load_river_stories(request):
    user = get_user(request)
    feed_ids = [int(feed_id) for feed_id in request.POST.getlist("feeds")]
    offset = int(request.REQUEST.get("offset", 0))
    limit = int(request.REQUEST.get("limit", 25))
    page = int(request.REQUEST.get("page", 0)) + 1
    read_stories = int(request.REQUEST.get("read_stories", 0))
    # if page: offset = limit * page
    if page:
        limit = limit * page - read_stories

    def feed_qvalues(feed_id):
        feed = UserSubscription.objects.get(feed__pk=feed_id, user=user)
        return Q(story_feed_id=feed_id) & Q(story_date__gte=feed.mark_read_date)

    feed_last_reads = map(feed_qvalues, feed_ids)
    qs = reduce(lambda q1, q2: q1 | q2, feed_last_reads)

    read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only("story")
    read_stories = [rs.story.id for rs in read_stories]
    mstories = MStory.objects(Q(id__nin=read_stories) & qs)[offset : offset + limit]
    stories = Feed.format_stories(mstories)

    starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id__in=feed_ids).only(
        "story_guid", "starred_date"
    )
    starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])

    for story in stories:
        story_date = localtime_for_timezone(story["story_date"], user.profile.timezone)
        story["short_parsed_date"] = format_story_link_date__short(story_date)
        story["long_parsed_date"] = format_story_link_date__long(story_date)
        story["read_status"] = 0
        if story["id"] in starred_stories:
            story["starred"] = True
            starred_date = localtime_for_timezone(starred_stories[story["id"]], user.profile.timezone)
            story["starred_date"] = format_story_link_date__long(starred_date)
        story["intelligence"] = {"feed": 0, "author": 0, "tags": 0, "title": 0}

    logging.info(
        " ---> [%s] ~FCLoading river stories: ~SB%s stories ~SN(%s feeds)" % (request.user, len(stories), len(feed_ids))
    )

    return dict(stories=stories)
Example #15
0
def story_public_comments(request):
    format = request.REQUEST.get("format", "json")
    relative_user_id = request.REQUEST.get("user_id", None)
    feed_id = int(request.REQUEST["feed_id"])
    story_id = request.REQUEST["story_id"]

    if not relative_user_id:
        relative_user_id = get_user(request).pk

    stories = MSharedStory.objects.filter(story_feed_id=feed_id, story_guid=story_id).limit(1)
    stories = Feed.format_stories(stories)
    stories, profiles = MSharedStory.stories_with_comments_and_profiles(
        stories, relative_user_id, check_all=True, public=True
    )

    if format == "html":
        stories = MSharedStory.attach_users_to_stories(stories, profiles)
        return render_to_response(
            "social/story_comments.xhtml", {"story": stories[0]}, context_instance=RequestContext(request)
        )
    else:
        return json.json_response(request, {"comments": stories[0]["public_comments"], "user_profiles": profiles})
Example #16
0
def load_starred_stories(request):
    user = get_user(request)
    offset = int(request.REQUEST.get("offset", 0))
    limit = int(request.REQUEST.get("limit", 10))
    page = int(request.REQUEST.get("page", 0))
    if page:
        offset = limit * page

    mstories = MStarredStory.objects(user_id=user.pk).order_by("-starred_date")[offset : offset + limit]
    stories = Feed.format_stories(mstories)

    for story in stories:
        story_date = localtime_for_timezone(story["story_date"], user.profile.timezone)
        story["short_parsed_date"] = format_story_link_date__short(story_date)
        story["long_parsed_date"] = format_story_link_date__long(story_date)
        starred_date = localtime_for_timezone(story["starred_date"], user.profile.timezone)
        story["starred_date"] = format_story_link_date__long(starred_date)
        story["read_status"] = 1
        story["starred"] = True
        story["intelligence"] = {"feed": 0, "author": 0, "tags": 0, "title": 0}

    logging.info(" ---> [%s] ~FCLoading starred stories: ~SB%s stories" % (request.user, len(stories)))

    return dict(stories=stories)
Example #17
0
    def get_stories(self, offset=0, limit=6, order='newest', read_filter='all', withscores=False):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_POOL)
        ignore_user_stories = False
    
        stories_key         = 'F:%s' % (self.feed_id)
        read_stories_key    = 'RS:%s:%s' % (self.user_id, self.feed_id)
        unread_stories_key  = 'U:%s:%s' % (self.user_id, self.feed_id)

        unread_ranked_stories_key  = 'zU:%s:%s' % (self.user_id, self.feed_id)
        if offset and not withscores and r.exists(unread_ranked_stories_key):
            pass
        else:
            r.delete(unread_ranked_stories_key)
            if not r.exists(stories_key):
                print " ---> No stories on feed: %s" % self
                return []
            elif read_filter != 'unread' or not r.exists(read_stories_key):
                ignore_user_stories = True
                unread_stories_key = stories_key
            else:
                r.sdiffstore(unread_stories_key, stories_key, read_stories_key)
            sorted_stories_key          = 'zF:%s' % (self.feed_id)
            unread_ranked_stories_key   = 'zU:%s:%s' % (self.user_id, self.feed_id)
            r.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key])
        
        current_time    = int(time.time() + 60*60*24)
        if order == 'oldest':
            byscorefunc = r.zrangebyscore
            if read_filter == 'unread' or True:
                min_score = int(time.mktime(self.mark_read_date.timetuple())) + 1
            else:
                now = datetime.datetime.now()
                two_weeks_ago = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
                min_score = int(time.mktime(two_weeks_ago.timetuple()))-1000
            max_score = current_time
        else:
            byscorefunc = r.zrevrangebyscore
            min_score = current_time
            if read_filter == 'unread':
                # +1 for the intersection b/w zF and F, which carries an implicit score of 1.
                max_score = int(time.mktime(self.mark_read_date.timetuple())) + 1
            else:
                max_score = 0

        if settings.DEBUG:
            debug_stories = r.zrevrange(unread_ranked_stories_key, 0, -1, withscores=True)
            print " ---> Unread all stories (%s - %s) %s stories: %s" % (
                min_score,
                max_score,
                len(debug_stories),
                debug_stories)
        story_ids = byscorefunc(unread_ranked_stories_key, min_score, 
                                  max_score, start=offset, num=500,
                                  withscores=withscores)[:limit]
        r.expire(unread_ranked_stories_key, 24*60*60)
        if not ignore_user_stories:
            r.delete(unread_stories_key)
        
        # XXX TODO: Remove below line after combing redis for these None's.
        story_ids = [s for s in story_ids if s and s != 'None'] # ugh, hack
        
        if withscores:
            return story_ids
        elif story_ids:
            story_date_order = "%sstory_date" % ('' if order == 'oldest' else '-')
            mstories = MStory.objects(id__in=story_ids).order_by(story_date_order)
            stories = Feed.format_stories(mstories)
            return stories
        else:
            return []
Example #18
0
    def calculate_feed_scores(self, silent=False, stories=None, force=False):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        
        if self.user.profile.last_seen_on < UNREAD_CUTOFF and not force:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return
        
        # if not self.feed.fetched_once:
        #     if not silent:
        #         logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
        #     self.needs_unread_recalc = False
        #     self.save()
        #     return
            
        feed_scores = dict(negative=0, neutral=0, positive=0)
        
        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta
        
        if not stories:
            stories = cache.get('S:%s' % self.feed_id)
            
        unread_story_hashes = self.get_stories(read_filter='unread', limit=500, hashes_only=True)
        
        if not stories:
            stories_db = MStory.objects(story_hash__in=unread_story_hashes)
            stories = Feed.format_stories(stories_db, self.feed_id)
        
        oldest_unread_story_date = now
        unread_stories = []
        for story in stories:
            if story['story_date'] < date_delta:
                continue
            if story['story_hash'] in unread_story_hashes:
                unread_stories.append(story)
                if story['story_date'] < oldest_unread_story_date:
                    oldest_unread_story_date = story['story_date']

        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))
        
        classifier_feeds   = list(MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0))
        classifier_authors = list(MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id))
        classifier_titles  = list(MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id))
        classifier_tags    = list(MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id))

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))
            
        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }
        
        for story in unread_stories:
            scores.update({
                'author' : apply_classifier_authors(classifier_authors, story),
                'tags'   : apply_classifier_tags(classifier_tags, story),
                'title'  : apply_classifier_titles(classifier_titles, story),
            })
            
            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1
                
        
        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))

        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False
        
        self.save()

        if (self.unread_count_positive == 0 and 
            self.unread_count_neutral == 0):
            self.mark_feed_read()
        
        if not silent:
            logging.user(self.user, '~FC~SNComputing scores: %s (~SB%s~SN/~SB%s~SN/~SB%s~SN)' % (self.feed, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive']))
            
        return self
Example #19
0
def load_river_stories(request):
    limit              = 18
    offset             = 0
    start              = datetime.datetime.utcnow()
    user               = get_user(request)
    feed_ids           = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id]
    original_feed_ids  = list(feed_ids)
    page               = int(request.REQUEST.get('page', 0))+1
    read_stories_count = int(request.REQUEST.get('read_stories_count', 0))
    bottom_delta       = datetime.timedelta(days=settings.DAYS_OF_UNREAD)
    
    if not feed_ids: 
        logging.user(request.user, "~FCLoading empty river stories: page %s" % (page))
        return dict(stories=[])
    
    # Fetch all stories at and before the page number.
    # Not a single page, because reading stories can move them up in the unread order.
    # `read_stories_count` is an optimization, works best when all 25 stories before have been read.
    limit = limit * page - read_stories_count
    
    # Read stories to exclude
    read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story')
    read_stories = [rs.story.id for rs in read_stories]
    
    # Determine mark_as_read dates for all feeds to ignore all stories before this date.
    # max_feed_count     = 0
    feed_counts     = {}
    feed_last_reads = {}
    for feed_id in feed_ids:
        try:
            usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user)
        except UserSubscription.DoesNotExist:
            continue
        if not usersub: continue
        feed_counts[feed_id] = (usersub.unread_count_negative * 1 + 
                                usersub.unread_count_neutral * 10 +
                                usersub.unread_count_positive * 20)
        # if feed_counts[feed_id] > max_feed_count:
        #     max_feed_count = feed_counts[feed_id]
        feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple()))
    feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:50]
    feed_ids = [f[0] for f in feed_counts]
    feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids])
    feed_counts = dict(feed_counts)
    
    # After excluding read stories, all that's left are stories 
    # past the mark_read_date. Everything returned is guaranteed to be unread.
    mstories = MStory.objects(
        id__nin=read_stories,
        story_feed_id__in=feed_ids,
        story_date__gte=start - bottom_delta
    ).map_reduce("""function() {
            var d = feed_last_reads[this[~story_feed_id]];
            if (this[~story_date].getTime()/1000 > d) {
                emit(this[~id], this);
            }
        }""",
        """function(key, values) {
            return values[0];
        }""",
        output='inline',
        scope={
            'feed_last_reads': feed_last_reads
        }
    )
    mstories = [story.value for story in mstories]

    mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, bottom_delta), story_score(x, bottom_delta)))

    # story_feed_counts = defaultdict(int)
    # mstories_pruned = []
    # for story in mstories:
    #     print story['story_title'], story_feed_counts[story['story_feed_id']]
    #     if story_feed_counts[story['story_feed_id']] >= 3: continue
    #     mstories_pruned.append(story)
    #     story_feed_counts[story['story_feed_id']] += 1
    stories = []
    for i, story in enumerate(mstories):
        if i < offset: continue
        if i >= offset + limit: break
        stories.append(bunch(story))
    stories = Feed.format_stories(stories)
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    
    # Find starred stories
    starred_stories = MStarredStory.objects(
        user_id=user.pk,
        story_feed_id__in=found_feed_ids
    ).only('story_guid', 'starred_date')
    starred_stories = dict([(story.story_guid, story.starred_date) 
                            for story in starred_stories])
    
    # Intelligence classifiers for all feeds involved
    def sort_by_feed(classifiers):
        feed_classifiers = defaultdict(list)
        for classifier in classifiers:
            feed_classifiers[classifier.feed_id].append(classifier)
        return feed_classifiers
    classifier_feeds   = sort_by_feed(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_authors = sort_by_feed(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_titles  = sort_by_feed(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_tags    = sort_by_feed(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    
    # Just need to format stories
    for story in stories:
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date']  = format_story_link_date__long(story_date, now)
        story['read_status'] = 0
        if story['id'] in starred_stories:
            story['starred'] = True
            starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
            story['starred_date'] = format_story_link_date__long(starred_date, now)
        story['intelligence'] = {
            'feed':   apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']),
            'author': apply_classifier_authors(classifier_authors[story['story_feed_id']], story),
            'tags':   apply_classifier_tags(classifier_tags[story['story_feed_id']], story),
            'title':  apply_classifier_titles(classifier_titles[story['story_feed_id']], story),
        }
    
    diff = datetime.datetime.utcnow() - start
    timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000)))
    logging.user(request.user, "~FCLoading river stories: page %s - ~SB%s/%s "
                               "stories ~SN(%s/%s/%s feeds) ~FB(%s seconds)" % 
                               (page, len(stories), len(mstories), len(found_feed_ids), 
                               len(feed_ids), len(original_feed_ids), timediff))
    
    return dict(stories=stories)
Example #20
0
def load_social_page(request, user_id, username=None, **kwargs):
    start = time.time()
    user = request.user
    social_user_id = int(user_id)
    social_user = get_object_or_404(User, pk=social_user_id)
    offset = int(request.REQUEST.get("offset", 0))
    limit = int(request.REQUEST.get("limit", 6))
    page = request.REQUEST.get("page")
    format = request.REQUEST.get("format", None)
    has_next_page = False
    feed_id = kwargs.get("feed_id") or request.REQUEST.get("feed_id")
    if page:
        offset = limit * (int(page) - 1)

    user_social_profile = None
    user_social_services = None
    if user.is_authenticated():
        user_social_profile = MSocialProfile.get_user(user.pk)
        user_social_services = MSocialServices.get_user(user.pk)
    social_profile = MSocialProfile.get_user(social_user_id)

    params = dict(user_id=social_user.pk)
    if feed_id:
        params["story_feed_id"] = feed_id
    mstories = MSharedStory.objects(**params).order_by("-shared_date")[offset : offset + limit + 1]
    stories = Feed.format_stories(mstories)
    if len(stories) > limit:
        has_next_page = True
        stories = stories[:-1]

    checkpoint1 = time.time()

    if not stories:
        params = {
            "user": user,
            "stories": [],
            "feeds": {},
            "social_user": social_user,
            "social_profile": social_profile,
            "user_social_services": user_social_services,
            "user_social_profile": json.encode(user_social_profile and user_social_profile.page()),
        }
        template = "social/social_page.xhtml"
        return render_to_response(template, params, context_instance=RequestContext(request))

    story_feed_ids = list(set(s["story_feed_id"] for s in stories))
    feeds = Feed.objects.filter(pk__in=story_feed_ids)
    feeds = dict((feed.pk, feed.canonical(include_favicon=False)) for feed in feeds)
    for story in stories:
        if story["story_feed_id"] in feeds:
            # Feed could have been deleted.
            story["feed"] = feeds[story["story_feed_id"]]
        shared_date = localtime_for_timezone(story["shared_date"], social_user.profile.timezone)
        story["shared_date"] = shared_date

    stories, profiles = MSharedStory.stories_with_comments_and_profiles(stories, social_user.pk, check_all=True)

    checkpoint2 = time.time()

    if user.is_authenticated():
        for story in stories:
            if user.pk in story["share_user_ids"]:
                story["shared_by_user"] = True
                shared_story = MSharedStory.objects.get(
                    user_id=user.pk, story_feed_id=story["story_feed_id"], story_guid=story["id"]
                )
                story["user_comments"] = shared_story.comments

    stories = MSharedStory.attach_users_to_stories(stories, profiles)

    params = {
        "social_user": social_user,
        "stories": stories,
        "user_social_profile": user_social_profile,
        "user_social_profile_page": json.encode(user_social_profile and user_social_profile.page()),
        "user_social_services": user_social_services,
        "user_social_services_page": json.encode(user_social_services and user_social_services.to_json()),
        "social_profile": social_profile,
        "feeds": feeds,
        "user_profile": hasattr(user, "profile") and user.profile,
        "has_next_page": has_next_page,
        "holzer_truism": random.choice(jennyholzer.TRUISMS),  # if not has_next_page else None
    }

    diff1 = checkpoint1 - start
    diff2 = checkpoint2 - start
    timediff = time.time() - start
    logging.user(
        request,
        "~FYLoading ~FMsocial page~FY: ~SB%s%s ~SN(%.4s seconds, ~SB%.4s/%.4s~SN)"
        % (social_profile.title[:22], ("~SN/p%s" % page) if page > 1 else "", timediff, diff1, diff2),
    )
    if format == "html":
        template = "social/social_stories.xhtml"
    else:
        template = "social/social_page.xhtml"

    return render_to_response(template, params, context_instance=RequestContext(request))
Example #21
0
    def calculate_feed_scores(self, silent=False, stories_db=None):
        now = datetime.datetime.utcnow()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if self.user.profile.last_seen_on < UNREAD_CUTOFF:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return

        if not self.feed.fetched_once:
            if not silent:
                logging.info(' ---> [%s] NOT Computing scores: %s' %
                             (self.user, self.feed))
            self.needs_unread_recalc = False
            self.save()
            return

        if not silent:
            logging.info(' ---> [%s] Computing scores: %s' %
                         (self.user, self.feed))
        feed_scores = dict(negative=0, neutral=0, positive=0)

        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta

        read_stories = MUserStory.objects(user_id=self.user.pk,
                                          feed_id=self.feed.pk,
                                          read_date__gte=self.mark_read_date)
        # if not silent:
        #     logging.info(' ---> [%s]    Read stories: %s' % (self.user, datetime.datetime.now() - now))
        read_stories_ids = []
        for us in read_stories:
            if hasattr(us.story, 'story_guid') and isinstance(
                    us.story.story_guid, unicode):
                read_stories_ids.append(us.story.story_guid)
            elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode):
                read_stories_ids.append(
                    us.story.id
                )  # TODO: Remove me after migration from story.id->guid
        stories_db = stories_db or MStory.objects(story_feed_id=self.feed.pk,
                                                  story_date__gte=date_delta)
        # if not silent:
        #     logging.info(' ---> [%s]    MStory: %s' % (self.user, datetime.datetime.now() - now))
        oldest_unread_story_date = now
        unread_stories_db = []
        for story in stories_db:
            if story.story_date < date_delta:
                continue
            if hasattr(
                    story,
                    'story_guid') and story.story_guid not in read_stories_ids:
                unread_stories_db.append(story)
                if story.story_date < oldest_unread_story_date:
                    oldest_unread_story_date = story.story_date
        stories = Feed.format_stories(unread_stories_db, self.feed.pk)
        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))

        classifier_feeds = MClassifierFeed.objects(user_id=self.user.pk,
                                                   feed_id=self.feed.pk)
        classifier_authors = MClassifierAuthor.objects(user_id=self.user.pk,
                                                       feed_id=self.feed.pk)
        classifier_titles = MClassifierTitle.objects(user_id=self.user.pk,
                                                     feed_id=self.feed.pk)
        classifier_tags = MClassifierTag.objects(user_id=self.user.pk,
                                                 feed_id=self.feed.pk)

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))

        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }

        for story in stories:
            classifier_authors.rewind()
            classifier_tags.rewind()
            classifier_titles.rewind()
            scores.update({
                'author':
                apply_classifier_authors(classifier_authors, story),
                'tags':
                apply_classifier_tags(classifier_tags, story),
                'title':
                apply_classifier_titles(classifier_titles, story),
            })

            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1

        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))

        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False

        self.save()

        if (self.unread_count_positive == 0
                and self.unread_count_neutral == 0):
            self.mark_feed_read()

        cache.delete('usersub:%s' % self.user.id)

        return
Example #22
0
File: views.py Project: 76/NewsBlur
def api_shared_story(request):
    user = request.user
    body = request.body_json
    after = body.get('after', None)
    before = body.get('before', None)
    limit = body.get('limit', 50)
    fields = body.get('triggerFields')
    blurblog_user = fields['blurblog_user']
    entries = []
    
    if isinstance(blurblog_user, int) or blurblog_user.isdigit():
        social_user_ids = [int(blurblog_user)]
    elif blurblog_user == "all":
        socialsubs = MSocialSubscription.objects.filter(user_id=user.pk)
        social_user_ids = [ss.subscription_user_id for ss in socialsubs]

    mstories = MSharedStory.objects(
        user_id__in=social_user_ids
    ).order_by('-shared_date')[:limit]        
    stories = Feed.format_stories(mstories)
    
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    share_user_ids = list(set([story['user_id'] for story in stories]))
    users = dict([(u.pk, u.username) 
                 for u in User.objects.filter(pk__in=share_user_ids).only('pk', 'username')])
    feeds = dict([(f.pk, {
        "title": f.feed_title,
        "website": f.feed_link,
        "address": f.feed_address,
    }) for f in Feed.objects.filter(pk__in=found_feed_ids)])
    
    classifier_feeds   = list(MClassifierFeed.objects(user_id=user.pk, 
                                                      social_user_id__in=social_user_ids))
    classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk,
                                                        social_user_id__in=social_user_ids))
    classifier_titles  = list(MClassifierTitle.objects(user_id=user.pk,
                                                       social_user_id__in=social_user_ids))
    classifier_tags    = list(MClassifierTag.objects(user_id=user.pk, 
                                                     social_user_id__in=social_user_ids))
    # Merge with feed specific classifiers
    classifier_feeds   = classifier_feeds + list(MClassifierFeed.objects(user_id=user.pk,
                                                                         feed_id__in=found_feed_ids))
    classifier_authors = classifier_authors + list(MClassifierAuthor.objects(user_id=user.pk,
                                                                             feed_id__in=found_feed_ids))
    classifier_titles  = classifier_titles + list(MClassifierTitle.objects(user_id=user.pk,
                                                                           feed_id__in=found_feed_ids))
    classifier_tags    = classifier_tags + list(MClassifierTag.objects(user_id=user.pk,
                                                                       feed_id__in=found_feed_ids))
        
    for story in stories:
        if before and int(story['shared_date'].strftime("%s")) > before: continue
        if after and int(story['shared_date'].strftime("%s")) < after: continue
        score = compute_story_score(story, classifier_titles=classifier_titles, 
                                    classifier_authors=classifier_authors, 
                                    classifier_tags=classifier_tags,
                                    classifier_feeds=classifier_feeds)
        if score < 0: continue
        feed = feeds.get(story['story_feed_id'], None)
        entries.append({
            "StoryTitle": story['story_title'],
            "StoryContent": story['story_content'],
            "StoryURL": story['story_permalink'],
            "StoryAuthor": story['story_authors'],
            "PublishedAt": story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "StoryScore": score,
            "Comments": story['comments'],
            "Username": users.get(story['user_id']),
            "SharedAt": story['shared_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "Site": feed and feed['title'],
            "SiteURL": feed and feed['website'],
            "SiteRSS": feed and feed['address'],
            "ifttt": {
                "id": story['story_hash'],
                "timestamp": int(story['shared_date'].strftime("%s"))
            },
        })

    if after:
        entries = sorted(entries, key=lambda s: s['ifttt']['timestamp'])
        
    logging.user(request, "~FMChecking shared stories from ~SB~FCIFTTT~SN~FM: ~SB~FM%s~FM~SN - ~SB%s~SN stories" % (blurblog_user, len(entries)))

    return {"data": entries}
Example #23
0
File: views.py Project: 76/NewsBlur
def api_unread_story(request, trigger_slug=None):
    user = request.user
    body = request.body_json
    after = body.get('after', None)
    before = body.get('before', None)
    limit = body.get('limit', 50)
    fields = body.get('triggerFields')
    feed_or_folder = fields['feed_or_folder']
    entries = []

    if isinstance(feed_or_folder, int) or feed_or_folder.isdigit():
        feed_id = int(feed_or_folder)
        usersub = UserSubscription.objects.get(user=user, feed_id=feed_id)
        found_feed_ids = [feed_id]
        found_trained_feed_ids = [feed_id] if usersub.is_trained else []
        stories = usersub.get_stories(order="newest", read_filter="unread", 
                                      offset=0, limit=limit,
                                      default_cutoff_date=user.profile.unread_cutoff)
    else:
        folder_title = feed_or_folder
        if folder_title == "Top Level":
            folder_title = " "
        usf = UserSubscriptionFolders.objects.get(user=user)
        flat_folders = usf.flatten_folders()
        feed_ids = None
        if folder_title != "all":
            feed_ids = flat_folders.get(folder_title)
        usersubs = UserSubscription.subs_for_feeds(user.pk, feed_ids=feed_ids,
                                                   read_filter="unread")
        feed_ids = [sub.feed_id for sub in usersubs]
        params = {
            "user_id": user.pk, 
            "feed_ids": feed_ids,
            "offset": 0,
            "limit": limit,
            "order": "newest",
            "read_filter": "unread",
            "usersubs": usersubs,
            "cutoff_date": user.profile.unread_cutoff,
        }
        story_hashes, unread_feed_story_hashes = UserSubscription.feed_stories(**params)
        mstories = MStory.objects(story_hash__in=story_hashes).order_by('-story_date')
        stories = Feed.format_stories(mstories)
        found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
        trained_feed_ids = [sub.feed_id for sub in usersubs if sub.is_trained]
        found_trained_feed_ids = list(set(trained_feed_ids) & set(found_feed_ids))
    
    if found_trained_feed_ids:
        classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk,
                                                        feed_id__in=found_trained_feed_ids))
        classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, 
                                                            feed_id__in=found_trained_feed_ids))
        classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, 
                                                          feed_id__in=found_trained_feed_ids))
        classifier_tags = list(MClassifierTag.objects(user_id=user.pk, 
                                                      feed_id__in=found_trained_feed_ids))
    feeds = dict([(f.pk, {
        "title": f.feed_title,
        "website": f.feed_link,
        "address": f.feed_address,
    }) for f in Feed.objects.filter(pk__in=found_feed_ids)])

    for story in stories:
        if before and int(story['story_date'].strftime("%s")) > before: continue
        if after and int(story['story_date'].strftime("%s")) < after: continue
        score = 0
        if found_trained_feed_ids and story['story_feed_id'] in found_trained_feed_ids:
            score = compute_story_score(story, classifier_titles=classifier_titles, 
                                        classifier_authors=classifier_authors, 
                                        classifier_tags=classifier_tags,
                                        classifier_feeds=classifier_feeds)
            if score < 0: continue
            if trigger_slug == "new-unread-focus-story" and score < 1: continue
        feed = feeds.get(story['story_feed_id'], None)
        entries.append({
            "StoryTitle": story['story_title'],
            "StoryContent": story['story_content'],
            "StoryURL": story['story_permalink'],
            "StoryAuthor": story['story_authors'],
            "PublishedAt": story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "StoryScore": score,
            "Site": feed and feed['title'],
            "SiteURL": feed and feed['website'],
            "SiteRSS": feed and feed['address'],
            "ifttt": {
                "id": story['story_hash'],
                "timestamp": int(story['story_date'].strftime("%s"))
            },
        })
    
    if after:
        entries = sorted(entries, key=lambda s: s['ifttt']['timestamp'])
        
    logging.user(request, "~FYChecking unread%s stories with ~SB~FCIFTTT~SN~FY: ~SB%s~SN - ~SB%s~SN stories" % (" ~SBfocus~SN" if trigger_slug == "new-unread-focus-story" else "", feed_or_folder, len(entries)))
    
    return {"data": entries[:limit]}
Example #24
0
def load_river_stories(request):
    limit              = 18
    offset             = 0
    start              = datetime.datetime.utcnow()
    user               = get_user(request)
    feed_ids           = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id]
    original_feed_ids  = list(feed_ids)
    page               = int(request.REQUEST.get('page', 1))
    read_stories_count = int(request.REQUEST.get('read_stories_count', 0))
    new_flag           = request.REQUEST.get('new_flag', False)
    bottom_delta       = datetime.timedelta(days=settings.DAYS_OF_UNREAD)
    
    if not feed_ids: 
        logging.user(request, "~FCLoading empty river stories: page %s" % (page))
        return dict(stories=[])
    
    # Fetch all stories at and before the page number.
    # Not a single page, because reading stories can move them up in the unread order.
    # `read_stories_count` is an optimization, works best when all 25 stories before have been read.
    limit = limit * page - read_stories_count
    
    # Read stories to exclude
    read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story_id')
    read_stories = [rs.story_id for rs in read_stories]
    
    # Determine mark_as_read dates for all feeds to ignore all stories before this date.
    # max_feed_count     = 0
    feed_counts     = {}
    feed_last_reads = {}
    for feed_id in feed_ids:
        try:
            usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user)
        except UserSubscription.DoesNotExist:
            continue
        if not usersub: continue
        feed_counts[feed_id] = (usersub.unread_count_negative * 1 + 
                                usersub.unread_count_neutral * 10 +
                                usersub.unread_count_positive * 20)
        # if feed_counts[feed_id] > max_feed_count:
        #     max_feed_count = feed_counts[feed_id]
        feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple()))
    feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:50]
    feed_ids = [f[0] for f in feed_counts]
    feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids
                            if feed_id in feed_last_reads])
    feed_counts = dict(feed_counts)

    # After excluding read stories, all that's left are stories 
    # past the mark_read_date. Everything returned is guaranteed to be unread.
    mstories = MStory.objects(
        story_guid__nin=read_stories,
        story_feed_id__in=feed_ids,
        # story_date__gte=start - bottom_delta
    ).map_reduce("""function() {
            var d = feed_last_reads[this[~story_feed_id]];
            if (this[~story_date].getTime()/1000 > d) {
                emit(this[~id], this);
            }
        }""",
        """function(key, values) {
            return values[0];
        }""",
        output='inline',
        scope={
            'feed_last_reads': feed_last_reads
        }
    )
    mstories = [story.value for story in mstories if story and story.value]

    mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, bottom_delta), story_score(x, bottom_delta)))

    # story_feed_counts = defaultdict(int)
    # mstories_pruned = []
    # for story in mstories:
    #     print story['story_title'], story_feed_counts[story['story_feed_id']]
    #     if story_feed_counts[story['story_feed_id']] >= 3: continue
    #     mstories_pruned.append(story)
    #     story_feed_counts[story['story_feed_id']] += 1
    stories = []
    for i, story in enumerate(mstories):
        if i < offset: continue
        if i >= offset + limit: break
        stories.append(bunch(story))
    stories = Feed.format_stories(stories)
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    
    # Find starred stories
    starred_stories = MStarredStory.objects(
        user_id=user.pk,
        story_feed_id__in=found_feed_ids
    ).only('story_guid', 'starred_date')
    starred_stories = dict([(story.story_guid, story.starred_date) 
                            for story in starred_stories])
    
    # Intelligence classifiers for all feeds involved
    def sort_by_feed(classifiers):
        feed_classifiers = defaultdict(list)
        for classifier in classifiers:
            feed_classifiers[classifier.feed_id].append(classifier)
        return feed_classifiers
    classifier_feeds   = sort_by_feed(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_authors = sort_by_feed(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_titles  = sort_by_feed(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_tags    = sort_by_feed(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    
    classifiers = {}
    for feed_id in found_feed_ids:
        classifiers[feed_id] = get_classifiers_for_user(user, feed_id, classifier_feeds[feed_id], 
                                                        classifier_authors[feed_id],
                                                        classifier_titles[feed_id],
                                                        classifier_tags[feed_id])
    
    # Just need to format stories
    for story in stories:
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date']  = format_story_link_date__long(story_date, now)
        story['read_status'] = 0
        if story['id'] in starred_stories:
            story['starred'] = True
            starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
            story['starred_date'] = format_story_link_date__long(starred_date, now)
        story['intelligence'] = {
            'feed':   apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']),
            'author': apply_classifier_authors(classifier_authors[story['story_feed_id']], story),
            'tags':   apply_classifier_tags(classifier_tags[story['story_feed_id']], story),
            'title':  apply_classifier_titles(classifier_titles[story['story_feed_id']], story),
        }
    
    diff = datetime.datetime.utcnow() - start
    timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000)))
    logging.user(request, "~FCLoading river stories: page %s - ~SB%s/%s "
                               "stories ~SN(%s/%s/%s feeds) ~FB(%s seconds)" % 
                               (page, len(stories), len(mstories), len(found_feed_ids), 
                               len(feed_ids), len(original_feed_ids), timediff))
    
    if new_flag:
        return dict(stories=stories, classifiers=classifiers)
    else:
        logging.user(request, "~BR~FCNo new flag on river")
        return dict(stories=stories)
Example #25
0
    def get_stories(self,
                    offset=0,
                    limit=6,
                    order='newest',
                    read_filter='all',
                    withscores=False,
                    hashes_only=False):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        ignore_user_stories = False

        stories_key = 'F:%s' % (self.feed_id)
        read_stories_key = 'RS:%s:%s' % (self.user_id, self.feed_id)
        unread_stories_key = 'U:%s:%s' % (self.user_id, self.feed_id)

        unread_ranked_stories_key = 'z%sU:%s:%s' % ('h' if hashes_only else '',
                                                    self.user_id, self.feed_id)
        if offset and not withscores and r.exists(unread_ranked_stories_key):
            pass
        else:
            r.delete(unread_ranked_stories_key)
            if not r.exists(stories_key):
                print " ---> No stories on feed: %s" % self
                return []
            elif read_filter != 'unread' or not r.exists(read_stories_key):
                ignore_user_stories = True
                unread_stories_key = stories_key
            else:
                r.sdiffstore(unread_stories_key, stories_key, read_stories_key)
            sorted_stories_key = 'zF:%s' % (self.feed_id)
            r.zinterstore(unread_ranked_stories_key,
                          [sorted_stories_key, unread_stories_key])

        current_time = int(time.time() + 60 * 60 * 24)
        if order == 'oldest':
            byscorefunc = r.zrangebyscore
            if read_filter == 'unread':
                min_score = int(time.mktime(
                    self.mark_read_date.timetuple())) + 1
            else:
                now = datetime.datetime.now()
                two_weeks_ago = now - datetime.timedelta(
                    days=settings.DAYS_OF_UNREAD)
                min_score = int(time.mktime(two_weeks_ago.timetuple())) - 1000
            max_score = current_time
        else:
            byscorefunc = r.zrevrangebyscore
            min_score = current_time
            if read_filter == 'unread':
                # +1 for the intersection b/w zF and F, which carries an implicit score of 1.
                max_score = int(time.mktime(
                    self.mark_read_date.timetuple())) + 1
            else:
                max_score = 0

        if settings.DEBUG:
            debug_stories = r.zrevrange(unread_ranked_stories_key,
                                        0,
                                        -1,
                                        withscores=True)
            print " ---> Unread all stories (%s - %s) %s stories: %s" % (
                min_score, max_score, len(debug_stories), debug_stories)
        story_ids = byscorefunc(unread_ranked_stories_key,
                                min_score,
                                max_score,
                                start=offset,
                                num=500,
                                withscores=withscores)[:limit]
        r.expire(unread_ranked_stories_key, 1 * 60 * 60)
        if not ignore_user_stories:
            r.delete(unread_stories_key)

        if withscores or hashes_only:
            return story_ids
        elif story_ids:
            story_date_order = "%sstory_date" % (''
                                                 if order == 'oldest' else '-')
            mstories = MStory.objects(
                story_hash__in=story_ids).order_by(story_date_order)
            stories = Feed.format_stories(mstories)
            return stories
        else:
            return []
Example #26
0
    def calculate_feed_scores(self, silent=False, stories=None, force=False):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if self.user.profile.last_seen_on < UNREAD_CUTOFF and not force:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return

        # if not self.feed.fetched_once:
        #     if not silent:
        #         logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
        #     self.needs_unread_recalc = False
        #     self.save()
        #     return

        feed_scores = dict(negative=0, neutral=0, positive=0)

        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta

        if not stories:
            stories = cache.get('S:%s' % self.feed_id)

        unread_story_hashes = self.get_stories(read_filter='unread',
                                               limit=500,
                                               hashes_only=True)

        if not stories:
            stories_db = MStory.objects(story_hash__in=unread_story_hashes)
            stories = Feed.format_stories(stories_db, self.feed_id)

        oldest_unread_story_date = now
        unread_stories = []
        for story in stories:
            if story['story_date'] < date_delta:
                continue
            if story['story_hash'] in unread_story_hashes:
                unread_stories.append(story)
                if story['story_date'] < oldest_unread_story_date:
                    oldest_unread_story_date = story['story_date']

        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))

        classifier_feeds = list(
            MClassifierFeed.objects(user_id=self.user_id,
                                    feed_id=self.feed_id,
                                    social_user_id=0))
        classifier_authors = list(
            MClassifierAuthor.objects(user_id=self.user_id,
                                      feed_id=self.feed_id))
        classifier_titles = list(
            MClassifierTitle.objects(user_id=self.user_id,
                                     feed_id=self.feed_id))
        classifier_tags = list(
            MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id))

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))

        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }

        for story in unread_stories:
            scores.update({
                'author':
                apply_classifier_authors(classifier_authors, story),
                'tags':
                apply_classifier_tags(classifier_tags, story),
                'title':
                apply_classifier_titles(classifier_titles, story),
            })

            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1

        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))

        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False

        self.save()

        if (self.unread_count_positive == 0
                and self.unread_count_neutral == 0):
            self.mark_feed_read()

        if not silent:
            logging.user(
                self.user,
                '~FC~SNComputing scores: %s (~SB%s~SN/~SB%s~SN/~SB%s~SN)' %
                (self.feed, feed_scores['negative'], feed_scores['neutral'],
                 feed_scores['positive']))

        return self
Example #27
0
def load_social_stories(request, user_id, username=None):
    start          = time.time()
    user           = get_user(request)
    social_user_id = int(user_id)
    social_user    = get_object_or_404(User, pk=social_user_id)
    offset         = int(request.REQUEST.get('offset', 0))
    limit          = int(request.REQUEST.get('limit', 6))
    page           = request.REQUEST.get('page')
    order          = request.REQUEST.get('order', 'newest')
    read_filter    = request.REQUEST.get('read_filter', 'all')
    stories        = []
    
    if page: offset = limit * (int(page) - 1)
    now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
    UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
    
    social_profile = MSocialProfile.get_user(social_user.pk)
    try:
        socialsub = MSocialSubscription.objects.get(user_id=user.pk, subscription_user_id=social_user_id)
    except MSocialSubscription.DoesNotExist:
        socialsub = None
    mstories = MSharedStory.objects(user_id=social_user.pk).order_by('-shared_date')[offset:offset+limit]
    stories = Feed.format_stories(mstories)
    
    if socialsub and (read_filter == 'unread' or order == 'oldest'):
        story_ids = socialsub.get_stories(order=order, read_filter=read_filter, offset=offset, limit=limit)
        story_date_order = "%sshared_date" % ('' if order == 'oldest' else '-')
        if story_ids:
            mstories = MSharedStory.objects(user_id=social_user.pk,
                                            story_db_id__in=story_ids).order_by(story_date_order)
            stories = Feed.format_stories(mstories)
    else:
        mstories = MSharedStory.objects(user_id=social_user.pk).order_by('-shared_date')[offset:offset+limit]
        stories = Feed.format_stories(mstories)

    if not stories:
        return dict(stories=[])
    
    checkpoint1 = time.time()
    
    stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, user.pk, check_all=True)

    story_feed_ids = list(set(s['story_feed_id'] for s in stories))
    usersubs = UserSubscription.objects.filter(user__pk=user.pk, feed__pk__in=story_feed_ids)
    usersubs_map = dict((sub.feed_id, sub) for sub in usersubs)
    unsub_feed_ids = list(set(story_feed_ids).difference(set(usersubs_map.keys())))
    unsub_feeds = Feed.objects.filter(pk__in=unsub_feed_ids)
    unsub_feeds = [feed.canonical(include_favicon=False) for feed in unsub_feeds]
    date_delta = UNREAD_CUTOFF
    if socialsub and date_delta < socialsub.mark_read_date:
        date_delta = socialsub.mark_read_date
    
    # Get intelligence classifier for user
    classifier_feeds   = list(MClassifierFeed.objects(user_id=user.pk, social_user_id=social_user_id))
    classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, social_user_id=social_user_id))
    classifier_titles  = list(MClassifierTitle.objects(user_id=user.pk, social_user_id=social_user_id))
    classifier_tags    = list(MClassifierTag.objects(user_id=user.pk, social_user_id=social_user_id))
    # Merge with feed specific classifiers
    classifier_feeds   = classifier_feeds + list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    classifier_authors = classifier_authors + list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    classifier_titles  = classifier_titles + list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    classifier_tags    = classifier_tags + list(MClassifierTag.objects(user_id=user.pk, feed_id__in=story_feed_ids))

    checkpoint2 = time.time()
    
    story_ids = [story['id'] for story in stories]
    userstories_db = MUserStory.objects(user_id=user.pk,
                                        feed_id__in=story_feed_ids,
                                        story_id__in=story_ids).only('story_id')
    userstories = set(us.story_id for us in userstories_db)

    starred_stories = MStarredStory.objects(user_id=user.pk, 
                                            story_feed_id__in=story_feed_ids, 
                                            story_guid__in=story_ids).only('story_guid', 'starred_date')
    shared_stories = MSharedStory.objects(user_id=user.pk, 
                                          story_feed_id__in=story_feed_ids, 
                                          story_guid__in=story_ids)\
                                 .only('story_guid', 'shared_date', 'comments')
    starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])
    shared_stories = dict([(story.story_guid, dict(shared_date=story.shared_date, comments=story.comments))
                           for story in shared_stories])
    
    for story in stories:
        story['social_user_id'] = social_user_id
        story_feed_id = story['story_feed_id']
        # story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        shared_date = localtime_for_timezone(story['shared_date'], user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(shared_date, now)
        story['long_parsed_date'] = format_story_link_date__long(shared_date, now)
        
        if not socialsub:
            story['read_status'] = 1
        elif story['id'] in userstories:
            story['read_status'] = 1
        elif story['shared_date'] < date_delta:
            story['read_status'] = 1
        elif not usersubs_map.get(story_feed_id):
            story['read_status'] = 0
        elif not story.get('read_status') and story['story_date'] < usersubs_map[story_feed_id].mark_read_date:
            story['read_status'] = 1
        elif not story.get('read_status') and story['shared_date'] < date_delta:
            story['read_status'] = 1
        # elif not story.get('read_status') and socialsub and story['shared_date'] > socialsub.last_read_date:
        #     story['read_status'] = 0
        else:
            story['read_status'] = 0

        if story['id'] in starred_stories:
            story['starred'] = True
            starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
            story['starred_date'] = format_story_link_date__long(starred_date, now)
        if story['id'] in shared_stories:
            story['shared'] = True
            shared_date = localtime_for_timezone(shared_stories[story['id']]['shared_date'],
                                                 user.profile.timezone)
            story['shared_date'] = format_story_link_date__long(shared_date, now)
            story['shared_comments'] = strip_tags(shared_stories[story['id']]['comments'])

        story['intelligence'] = {
            'feed': apply_classifier_feeds(classifier_feeds, story['story_feed_id'],
                                           social_user_id=social_user_id),
            'author': apply_classifier_authors(classifier_authors, story),
            'tags': apply_classifier_tags(classifier_tags, story),
            'title': apply_classifier_titles(classifier_titles, story),
        }
    
    
    classifiers = sort_classifiers_by_feed(user=user, feed_ids=story_feed_ids,
                                           classifier_feeds=classifier_feeds,
                                           classifier_authors=classifier_authors,
                                           classifier_titles=classifier_titles,
                                           classifier_tags=classifier_tags)
                                           
    if socialsub:
        socialsub.feed_opens += 1
        socialsub.save()
    
    diff1 = checkpoint1-start
    diff2 = checkpoint2-start
    logging.user(request, "~FYLoading ~FMshared stories~FY: ~SB%s%s ~SN(~SB%.4ss/%.4ss~SN)" % (
    social_profile.title[:22], ('~SN/p%s' % page) if page > 1 else '', diff1, diff2))

    return {
        "stories": stories, 
        "user_profiles": user_profiles, 
        "feeds": unsub_feeds, 
        "classifiers": classifiers,
    }
Example #28
0
    def calculate_feed_scores(self, silent=False, stories_db=None):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if self.user.profile.last_seen_on < UNREAD_CUTOFF:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return

        if not self.feed.fetched_once:
            if not silent:
                logging.info(' ---> [%s] NOT Computing scores: %s' %
                             (self.user, self.feed))
            self.needs_unread_recalc = False
            self.save()
            return

        feed_scores = dict(negative=0, neutral=0, positive=0)

        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta

        read_stories = MUserStory.objects(user_id=self.user_id,
                                          feed_id=self.feed_id,
                                          read_date__gte=self.mark_read_date)
        # if not silent:
        #     logging.info(' ---> [%s]    Read stories: %s' % (self.user, datetime.datetime.now() - now))
        read_stories_ids = [us.story_id for us in read_stories]
        stories_db = stories_db or MStory.objects(story_feed_id=self.feed_id,
                                                  story_date__gte=date_delta)
        # if not silent:
        #     logging.info(' ---> [%s]    MStory: %s' % (self.user, datetime.datetime.now() - now))
        oldest_unread_story_date = now
        unread_stories_db = []
        for story in stories_db:
            if story.story_date < date_delta:
                continue
            if hasattr(
                    story,
                    'story_guid') and story.story_guid not in read_stories_ids:
                unread_stories_db.append(story)
                if story.story_date < oldest_unread_story_date:
                    oldest_unread_story_date = story.story_date
        stories = Feed.format_stories(unread_stories_db, self.feed_id)
        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))

        classifier_feeds = list(
            MClassifierFeed.objects(user_id=self.user_id,
                                    feed_id=self.feed_id,
                                    social_user_id=0))
        classifier_authors = list(
            MClassifierAuthor.objects(user_id=self.user_id,
                                      feed_id=self.feed_id))
        classifier_titles = list(
            MClassifierTitle.objects(user_id=self.user_id,
                                     feed_id=self.feed_id))
        classifier_tags = list(
            MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id))

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))

        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }

        for story in stories:
            scores.update({
                'author':
                apply_classifier_authors(classifier_authors, story),
                'tags':
                apply_classifier_tags(classifier_tags, story),
                'title':
                apply_classifier_titles(classifier_titles, story),
            })

            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1

        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))

        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False

        self.save()

        if (self.unread_count_positive == 0 and self.unread_count_neutral == 0
                and self.unread_count_negative == 0):
            self.mark_feed_read()

        if not silent:
            logging.info(' ---> [%s] Computing scores: %s (%s/%s/%s)' %
                         (self.user, self.feed, feed_scores['negative'],
                          feed_scores['neutral'], feed_scores['positive']))

        return self
Example #29
0
    def get_stories(self, offset=0, limit=6, order="newest", read_filter="all", withscores=False, hashes_only=False):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        ignore_user_stories = False

        stories_key = "F:%s" % (self.feed_id)
        read_stories_key = "RS:%s:%s" % (self.user_id, self.feed_id)
        unread_stories_key = "U:%s:%s" % (self.user_id, self.feed_id)

        unread_ranked_stories_key = "z%sU:%s:%s" % ("h" if hashes_only else "", self.user_id, self.feed_id)
        if offset and not withscores and r.exists(unread_ranked_stories_key):
            pass
        else:
            r.delete(unread_ranked_stories_key)
            if not r.exists(stories_key):
                # print " ---> No stories on feed: %s" % self
                return []
            elif read_filter != "unread" or not r.exists(read_stories_key):
                ignore_user_stories = True
                unread_stories_key = stories_key
            else:
                r.sdiffstore(unread_stories_key, stories_key, read_stories_key)
            sorted_stories_key = "zF:%s" % (self.feed_id)
            r.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key])

        current_time = int(time.time() + 60 * 60 * 24)
        if order == "oldest":
            byscorefunc = r.zrangebyscore
            if read_filter == "unread":
                min_score = int(time.mktime(self.mark_read_date.timetuple())) + 1
            else:
                now = datetime.datetime.now()
                two_weeks_ago = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
                min_score = int(time.mktime(two_weeks_ago.timetuple())) - 1000
            max_score = current_time
        else:
            byscorefunc = r.zrevrangebyscore
            min_score = current_time
            if read_filter == "unread":
                # +1 for the intersection b/w zF and F, which carries an implicit score of 1.
                max_score = int(time.mktime(self.mark_read_date.timetuple())) + 1
            else:
                max_score = 0

        if settings.DEBUG and False:
            debug_stories = r.zrevrange(unread_ranked_stories_key, 0, -1, withscores=True)
            print " ---> Unread all stories (%s - %s) %s stories: %s" % (
                min_score,
                max_score,
                len(debug_stories),
                debug_stories,
            )
        story_ids = byscorefunc(
            unread_ranked_stories_key, min_score, max_score, start=offset, num=500, withscores=withscores
        )[:limit]
        r.expire(unread_ranked_stories_key, 1 * 60 * 60)
        if not ignore_user_stories:
            r.delete(unread_stories_key)

        if withscores:
            story_ids = [(s[0], int(s[1])) for s in story_ids]

        if withscores or hashes_only:
            return story_ids
        elif story_ids:
            story_date_order = "%sstory_date" % ("" if order == "oldest" else "-")
            mstories = MStory.objects(story_hash__in=story_ids).order_by(story_date_order)
            stories = Feed.format_stories(mstories)
            return stories
        else:
            return []
Example #30
0
    def calculate_feed_scores(self, silent=False, stories_db=None):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if self.user.profile.last_seen_on < UNREAD_CUTOFF:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return
        
        if not self.feed.fetched_once:
            if not silent:
                logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
            self.needs_unread_recalc = False
            self.save()
            return
            
        feed_scores = dict(negative=0, neutral=0, positive=0)
        
        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta

        read_stories = MUserStory.objects(user_id=self.user.pk,
                                          feed_id=self.feed.pk,
                                          read_date__gte=self.mark_read_date)
        # if not silent:
        #     logging.info(' ---> [%s]    Read stories: %s' % (self.user, datetime.datetime.now() - now))
        read_stories_ids = []
        for us in read_stories:
            read_stories_ids.append(us.story_id)
        stories_db = stories_db or MStory.objects(story_feed_id=self.feed.pk,
                                                  story_date__gte=date_delta)
        # if not silent:
        #     logging.info(' ---> [%s]    MStory: %s' % (self.user, datetime.datetime.now() - now))
        oldest_unread_story_date = now
        unread_stories_db = []
        for story in stories_db:
            if story.story_date < date_delta:
                continue
            if hasattr(story, 'story_guid') and story.story_guid not in read_stories_ids:
                unread_stories_db.append(story)
                if story.story_date < oldest_unread_story_date:
                    oldest_unread_story_date = story.story_date
        stories = Feed.format_stories(unread_stories_db, self.feed.pk)
        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))
        
        classifier_feeds   = list(MClassifierFeed.objects(user_id=self.user.pk, feed_id=self.feed.pk))
        classifier_authors = list(MClassifierAuthor.objects(user_id=self.user.pk, feed_id=self.feed.pk))
        classifier_titles  = list(MClassifierTitle.objects(user_id=self.user.pk, feed_id=self.feed.pk))
        classifier_tags    = list(MClassifierTag.objects(user_id=self.user.pk, feed_id=self.feed.pk))

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))
            
        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }
        
        for story in stories:
            scores.update({
                'author' : apply_classifier_authors(classifier_authors, story),
                'tags'   : apply_classifier_tags(classifier_tags, story),
                'title'  : apply_classifier_titles(classifier_titles, story),
            })
            
            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1
                
        
        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))
            
        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False
        
        self.save()

        # if (self.unread_count_positive == 0 and 
        #     self.unread_count_neutral == 0):
        #     self.mark_feed_read()
        
        cache.delete('usersub:%s' % self.user.id)
        
        if not silent:
            logging.info(' ---> [%s] Computing scores: %s (%s/%s/%s)' % (self.user, self.feed, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive']))
            
        return
Example #31
0
    def calculate_feed_scores(self, silent=False, stories=None, force=False):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        oldest_unread_story_date = now
        
        if self.user.profile.last_seen_on < self.user.profile.unread_cutoff and not force:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return self
        ong = self.unread_count_negative
        ont = self.unread_count_neutral
        ops = self.unread_count_positive
        
        # if not self.feed.fetched_once:
        #     if not silent:
        #         logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
        #     self.needs_unread_recalc = False
        #     self.save()
        #     return
            
        feed_scores = dict(negative=0, neutral=0, positive=0)
        
        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = self.user.profile.unread_cutoff
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta
        
        if self.is_trained:
            if not stories:
                stories = cache.get('S:%s' % self.feed_id)
            
            unread_story_hashes = self.story_hashes(user_id=self.user_id, feed_ids=[self.feed_id],
                                                    usersubs=[self],
                                                    read_filter='unread', group_by_feed=False,
                                                    cutoff_date=self.user.profile.unread_cutoff)
        
            if not stories:
                stories_db = MStory.objects(story_hash__in=unread_story_hashes)
                stories = Feed.format_stories(stories_db, self.feed_id)
        
            unread_stories = []
            for story in stories:
                if story['story_date'] < date_delta:
                    continue
                if story['story_hash'] in unread_story_hashes:
                    unread_stories.append(story)
                    if story['story_date'] < oldest_unread_story_date:
                        oldest_unread_story_date = story['story_date']

            # if not silent:
            #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))
        
            classifier_feeds   = list(MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0))
            classifier_authors = list(MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id))
            classifier_titles  = list(MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id))
            classifier_tags    = list(MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id))
            
            if (not len(classifier_feeds) and 
                not len(classifier_authors) and 
                not len(classifier_titles) and 
                not len(classifier_tags)):
                self.is_trained = False
            
            # if not silent:
            #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))
            
            scores = {
                'feed': apply_classifier_feeds(classifier_feeds, self.feed),
            }
        
            for story in unread_stories:
                scores.update({
                    'author' : apply_classifier_authors(classifier_authors, story),
                    'tags'   : apply_classifier_tags(classifier_tags, story),
                    'title'  : apply_classifier_titles(classifier_titles, story),
                })
            
                max_score = max(scores['author'], scores['tags'], scores['title'])
                min_score = min(scores['author'], scores['tags'], scores['title'])
                if max_score > 0:
                    feed_scores['positive'] += 1
                elif min_score < 0:
                    feed_scores['negative'] += 1
                else:
                    if scores['feed'] > 0:
                        feed_scores['positive'] += 1
                    elif scores['feed'] < 0:
                        feed_scores['negative'] += 1
                    else:
                        feed_scores['neutral'] += 1
        else:
            unread_story_hashes = self.story_hashes(user_id=self.user_id, feed_ids=[self.feed_id],
                                                    usersubs=[self],
                                                    read_filter='unread', group_by_feed=False,
                                                    include_timestamps=True,
                                                    cutoff_date=self.user.profile.unread_cutoff)

            feed_scores['neutral'] = len(unread_story_hashes)
            if feed_scores['neutral']:
                oldest_unread_story_date = datetime.datetime.fromtimestamp(unread_story_hashes[-1][1])
        
        if not silent:
            logging.user(self.user, '~FBUnread count (~SB%s~SN%s): ~SN(~FC%s~FB/~FC%s~FB/~FC%s~FB) ~SBto~SN (~FC%s~FB/~FC%s~FB/~FC%s~FB)' % (self.feed_id, '/~FMtrained~FB' if self.is_trained else '', ong, ont, ops, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive']))

        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False
        
        self.save()

        if (self.unread_count_positive == 0 and 
            self.unread_count_neutral == 0):
            self.mark_feed_read()
        
        if not silent:
            logging.user(self.user, '~FC~SNComputing scores: %s (~SB%s~SN/~SB%s~SN/~SB%s~SN)' % (self.feed, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive']))
        
        self.trim_read_stories()
        
        return self
Example #32
0
    mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, bottom_delta), story_score(x, bottom_delta)))

    # story_feed_counts = defaultdict(int)
    # mstories_pruned = []
    # for story in mstories:
    #     print story['story_title'], story_feed_counts[story['story_feed_id']]
    #     if story_feed_counts[story['story_feed_id']] >= 3: continue
    #     mstories_pruned.append(story)
    #     story_feed_counts[story['story_feed_id']] += 1
    stories = []
    for i, story in enumerate(mstories):
        if i < offset: continue
        if i >= offset + limit: break
        stories.append(bunch(story))
    stories = Feed.format_stories(stories)
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    
    # Find starred stories
    starred_stories = MStarredStory.objects(
        user_id=user.pk,
        story_feed_id__in=found_feed_ids
    ).only('story_guid', 'starred_date')
    starred_stories = dict([(story.story_guid, story.starred_date) 
                            for story in starred_stories])
    
    # Intelligence classifiers for all feeds involved
    def sort_by_feed(classifiers):
        feed_classifiers = defaultdict(list)
        for classifier in classifiers:
            feed_classifiers[classifier.feed_id].append(classifier)
Example #33
0
    def calculate_feed_scores(self, silent=False, stories=None, force=False):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        oldest_unread_story_date = now

        if self.user.profile.last_seen_on < UNREAD_CUTOFF and not force:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return self
        ong = self.unread_count_negative
        ont = self.unread_count_neutral
        ops = self.unread_count_positive

        # if not self.feed.fetched_once:
        #     if not silent:
        #         logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
        #     self.needs_unread_recalc = False
        #     self.save()
        #     return

        feed_scores = dict(negative=0, neutral=0, positive=0)

        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta

        if self.is_trained:
            if not stories:
                stories = cache.get("S:%s" % self.feed_id)

            unread_story_hashes = self.get_stories(read_filter="unread", limit=500, hashes_only=True)

            if not stories:
                stories_db = MStory.objects(story_hash__in=unread_story_hashes)
                stories = Feed.format_stories(stories_db, self.feed_id)

            unread_stories = []
            for story in stories:
                if story["story_date"] < date_delta:
                    continue
                if story["story_hash"] in unread_story_hashes:
                    unread_stories.append(story)
                    if story["story_date"] < oldest_unread_story_date:
                        oldest_unread_story_date = story["story_date"]

            # if not silent:
            #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))

            classifier_feeds = list(
                MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0)
            )
            classifier_authors = list(MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id))
            classifier_titles = list(MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id))
            classifier_tags = list(MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id))

            if (
                not len(classifier_feeds)
                and not len(classifier_authors)
                and not len(classifier_titles)
                and not len(classifier_tags)
            ):
                self.is_trained = False

            # if not silent:
            #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))

            scores = {"feed": apply_classifier_feeds(classifier_feeds, self.feed)}

            for story in unread_stories:
                scores.update(
                    {
                        "author": apply_classifier_authors(classifier_authors, story),
                        "tags": apply_classifier_tags(classifier_tags, story),
                        "title": apply_classifier_titles(classifier_titles, story),
                    }
                )

                max_score = max(scores["author"], scores["tags"], scores["title"])
                min_score = min(scores["author"], scores["tags"], scores["title"])
                if max_score > 0:
                    feed_scores["positive"] += 1
                elif min_score < 0:
                    feed_scores["negative"] += 1
                else:
                    if scores["feed"] > 0:
                        feed_scores["positive"] += 1
                    elif scores["feed"] < 0:
                        feed_scores["negative"] += 1
                    else:
                        feed_scores["neutral"] += 1
        else:
            unread_story_hashes = self.get_stories(read_filter="unread", limit=500, hashes_only=True, withscores=True)
            feed_scores["neutral"] = len(unread_story_hashes)
            if feed_scores["neutral"]:
                oldest_unread_story_date = datetime.datetime.fromtimestamp(unread_story_hashes[-1][1])

        if not silent:
            logging.user(
                self.user,
                "~FBUnread count (~SB%s~SN%s): ~SN(~FC%s~FB/~FC%s~FB/~FC%s~FB) ~SBto~SN (~FC%s~FB/~FC%s~FB/~FC%s~FB)"
                % (
                    self.feed_id,
                    "/~FMtrained~FB" if self.is_trained else "",
                    ong,
                    ont,
                    ops,
                    feed_scores["negative"],
                    feed_scores["neutral"],
                    feed_scores["positive"],
                ),
            )

        self.unread_count_positive = feed_scores["positive"]
        self.unread_count_neutral = feed_scores["neutral"]
        self.unread_count_negative = feed_scores["negative"]
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False

        self.save()

        if self.unread_count_positive == 0 and self.unread_count_neutral == 0:
            self.mark_feed_read()

        if not silent:
            logging.user(
                self.user,
                "~FC~SNComputing scores: %s (~SB%s~SN/~SB%s~SN/~SB%s~SN)"
                % (self.feed, feed_scores["negative"], feed_scores["neutral"], feed_scores["positive"]),
            )

        return self
Example #34
0
def load_social_page(request, user_id, username=None, **kwargs):
    start = time.time()
    user = request.user
    social_user_id = int(user_id)
    social_user = get_object_or_404(User, pk=social_user_id)
    offset = int(request.REQUEST.get('offset', 0))
    limit = int(request.REQUEST.get('limit', 6))
    page = request.REQUEST.get('page')
    format = request.REQUEST.get('format', None)
    has_next_page = False
    feed_id = kwargs.get('feed_id') or request.REQUEST.get('feed_id')
    if page: offset = limit * (int(page) - 1)

    user_social_profile = None
    if user.is_authenticated():
        user_social_profile = MSocialProfile.get_user(user.pk)
    social_profile = MSocialProfile.get_user(social_user_id)
    params = dict(user_id=social_user.pk)
    if feed_id:
        params['story_feed_id'] = feed_id
    mstories = MSharedStory.objects(**params).order_by('-shared_date')[offset:offset+limit+1]
    stories = Feed.format_stories(mstories)
    if len(stories) > limit:
        has_next_page = True
        stories = stories[:-1]

    checkpoint1 = time.time()

    if not stories:
        params = {
            "user": user,
            "stories": [],
            "feeds": {},
            "social_user": social_user,
            "social_profile": social_profile,
            'user_social_profile' : json.encode(user_social_profile and user_social_profile.page()),
        }
        template = 'social/social_page.xhtml'
        return render_to_response(template, params, context_instance=RequestContext(request))

    story_feed_ids = list(set(s['story_feed_id'] for s in stories))
    feeds = Feed.objects.filter(pk__in=story_feed_ids)
    feeds = dict((feed.pk, feed.canonical(include_favicon=False)) for feed in feeds)
    for story in stories:
        if story['story_feed_id'] in feeds:
            # Feed could have been deleted.
            story['feed'] = feeds[story['story_feed_id']]
        shared_date = localtime_for_timezone(story['shared_date'], social_user.profile.timezone)
        story['shared_date'] = shared_date
    
    stories, profiles = MSharedStory.stories_with_comments_and_profiles(stories, social_user.pk, 
                                                                        check_all=True)

    checkpoint2 = time.time()
    
    if user.is_authenticated():
        for story in stories:
            if user.pk in story['shared_by_friends'] or user.pk in story['shared_by_public']:
                story['shared_by_user'] = True
                shared_story = MSharedStory.objects.get(user_id=user.pk, 
                                                        story_feed_id=story['story_feed_id'],
                                                        story_guid=story['id'])
                story['user_comments'] = shared_story.comments

    stories = MSharedStory.attach_users_to_stories(stories, profiles)

    params = {
        'social_user'   : social_user,
        'stories'       : stories,
        'user_social_profile' : json.encode(user_social_profile and user_social_profile.page()),
        'social_profile': social_profile,
        'feeds'         : feeds,
        'user_profile'  : hasattr(user, 'profile') and user.profile,
        'has_next_page' : has_next_page,
        'holzer_truism' : random.choice(jennyholzer.TRUISMS) #if not has_next_page else None
    }

    diff1 = checkpoint1-start
    diff2 = checkpoint2-start
    timediff = time.time()-start
    logging.user(request, "~FYLoading ~FMsocial page~FY: ~SB%s%s ~SN(%.4s seconds, ~SB%.4s/%.4s~SN)" % (
        social_profile.title[:22], ('~SN/p%s' % page) if page > 1 else '', timediff,
        diff1, diff2))
    if format == 'html':
        template = 'social/social_stories.xhtml'
    else:
        template = 'social/social_page.xhtml'
        
    return render_to_response(template, params, context_instance=RequestContext(request))
Example #35
0
def api_shared_story(request):
    user = request.user
    body = request.body_json
    after = body.get('after', None)
    before = body.get('before', None)
    limit = body.get('limit', 50)
    fields = body.get('triggerFields')
    blurblog_user = fields['blurblog_user']
    entries = []
    
    if isinstance(blurblog_user, int) or blurblog_user.isdigit():
        social_user_ids = [int(blurblog_user)]
    elif blurblog_user == "all":
        socialsubs = MSocialSubscription.objects.filter(user_id=user.pk)
        social_user_ids = [ss.subscription_user_id for ss in socialsubs]

    mstories = MSharedStory.objects(
        user_id__in=social_user_ids
    ).order_by('-shared_date')[:limit]        
    stories = Feed.format_stories(mstories)
    
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    share_user_ids = list(set([story['user_id'] for story in stories]))
    users = dict([(u.pk, u.username) 
                 for u in User.objects.filter(pk__in=share_user_ids).only('pk', 'username')])
    feeds = dict([(f.pk, {
        "title": f.feed_title,
        "website": f.feed_link,
        "address": f.feed_address,
    }) for f in Feed.objects.filter(pk__in=found_feed_ids)])
    
    classifier_feeds   = list(MClassifierFeed.objects(user_id=user.pk, 
                                                      social_user_id__in=social_user_ids))
    classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk,
                                                        social_user_id__in=social_user_ids))
    classifier_titles  = list(MClassifierTitle.objects(user_id=user.pk,
                                                       social_user_id__in=social_user_ids))
    classifier_tags    = list(MClassifierTag.objects(user_id=user.pk, 
                                                     social_user_id__in=social_user_ids))
    # Merge with feed specific classifiers
    classifier_feeds   = classifier_feeds + list(MClassifierFeed.objects(user_id=user.pk,
                                                                         feed_id__in=found_feed_ids))
    classifier_authors = classifier_authors + list(MClassifierAuthor.objects(user_id=user.pk,
                                                                             feed_id__in=found_feed_ids))
    classifier_titles  = classifier_titles + list(MClassifierTitle.objects(user_id=user.pk,
                                                                           feed_id__in=found_feed_ids))
    classifier_tags    = classifier_tags + list(MClassifierTag.objects(user_id=user.pk,
                                                                       feed_id__in=found_feed_ids))
        
    for story in stories:
        if before and int(story['shared_date'].strftime("%s")) > before: continue
        if after and int(story['shared_date'].strftime("%s")) < after: continue
        score = compute_story_score(story, classifier_titles=classifier_titles, 
                                    classifier_authors=classifier_authors, 
                                    classifier_tags=classifier_tags,
                                    classifier_feeds=classifier_feeds)
        if score < 0: continue
        feed = feeds.get(story['story_feed_id'], None)
        entries.append({
            "StoryTitle": story['story_title'],
            "StoryContent": story['story_content'],
            "StoryURL": story['story_permalink'],
            "StoryAuthor": story['story_authors'],
            "PublishedAt": story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "StoryScore": score,
            "Comments": story['comments'],
            "Username": users.get(story['user_id']),
            "SharedAt": story['shared_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "Site": feed and feed['title'],
            "SiteURL": feed and feed['website'],
            "SiteRSS": feed and feed['address'],
            "meta": {
                "id": story['story_hash'],
                "timestamp": int(story['shared_date'].strftime("%s"))
            },
        })

    if after:
        entries = sorted(entries, key=lambda s: s['meta']['timestamp'])
        
    logging.user(request, "~FMChecking shared stories from ~SB~FCIFTTT~SN~FM: ~SB~FM%s~FM~SN - ~SB%s~SN stories" % (blurblog_user, len(entries)))

    return {"data": entries}
Example #36
0
def load_river_blurblog(request):
    limit = 10
    start = time.time()
    user = get_user(request)
    social_user_ids = [int(uid) for uid in request.REQUEST.getlist("social_user_ids") if uid]
    original_user_ids = list(social_user_ids)
    page = int(request.REQUEST.get("page", 1))
    order = request.REQUEST.get("order", "newest")
    read_filter = request.REQUEST.get("read_filter", "unread")
    relative_user_id = request.REQUEST.get("relative_user_id", None)
    now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
    UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

    if not relative_user_id:
        relative_user_id = get_user(request).pk

    if not social_user_ids:
        socialsubs = MSocialSubscription.objects.filter(user_id=user.pk)
        social_user_ids = [s.subscription_user_id for s in socialsubs]

    offset = (page - 1) * limit
    limit = page * limit - 1

    story_ids, story_dates = MSocialSubscription.feed_stories(
        user.pk, social_user_ids, offset=offset, limit=limit, order=order, read_filter=read_filter
    )
    mstories = MStory.objects(id__in=story_ids)
    story_id_to_dates = dict(zip(story_ids, story_dates))

    def sort_stories_by_id(a, b):
        return int(story_id_to_dates[str(b.id)]) - int(story_id_to_dates[str(a.id)])

    sorted_mstories = sorted(mstories, cmp=sort_stories_by_id)
    stories = Feed.format_stories(sorted_mstories)
    for s, story in enumerate(stories):
        story["story_date"] = datetime.datetime.fromtimestamp(story_dates[s])
    stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, relative_user_id, check_all=True)

    story_feed_ids = list(set(s["story_feed_id"] for s in stories))
    usersubs = UserSubscription.objects.filter(user__pk=user.pk, feed__pk__in=story_feed_ids)
    usersubs_map = dict((sub.feed_id, sub) for sub in usersubs)
    unsub_feed_ids = list(set(story_feed_ids).difference(set(usersubs_map.keys())))
    unsub_feeds = Feed.objects.filter(pk__in=unsub_feed_ids)
    unsub_feeds = [feed.canonical(include_favicon=False) for feed in unsub_feeds]

    # Find starred stories
    if story_feed_ids:
        story_ids = [story["id"] for story in stories]
        starred_stories = MStarredStory.objects(user_id=user.pk, story_guid__in=story_ids).only(
            "story_guid", "starred_date"
        )
        starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])
        shared_stories = MSharedStory.objects(user_id=user.pk, story_guid__in=story_ids).only(
            "story_guid", "shared_date", "comments"
        )
        shared_stories = dict(
            [
                (story.story_guid, dict(shared_date=story.shared_date, comments=story.comments))
                for story in shared_stories
            ]
        )

        userstories_db = MUserStory.objects(user_id=user.pk, feed_id__in=story_feed_ids, story_id__in=story_ids).only(
            "story_id"
        )
        userstories = set(us.story_id for us in userstories_db)

    else:
        starred_stories = {}
        shared_stories = {}
        userstories = []

    # Intelligence classifiers for all feeds involved
    if story_feed_ids:
        classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=story_feed_ids))
        classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=story_feed_ids))
        classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=story_feed_ids))
        classifier_tags = list(MClassifierTag.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    else:
        classifier_feeds = []
        classifier_authors = []
        classifier_titles = []
        classifier_tags = []
    classifiers = sort_classifiers_by_feed(
        user=user,
        feed_ids=story_feed_ids,
        classifier_feeds=classifier_feeds,
        classifier_authors=classifier_authors,
        classifier_titles=classifier_titles,
        classifier_tags=classifier_tags,
    )

    # Just need to format stories
    for story in stories:
        if story["id"] in userstories:
            story["read_status"] = 1
        elif story["story_date"] < UNREAD_CUTOFF:
            story["read_status"] = 1
        else:
            story["read_status"] = 0
        story_date = localtime_for_timezone(story["story_date"], user.profile.timezone)
        story["short_parsed_date"] = format_story_link_date__short(story_date, now)
        story["long_parsed_date"] = format_story_link_date__long(story_date, now)
        if story["id"] in starred_stories:
            story["starred"] = True
            starred_date = localtime_for_timezone(starred_stories[story["id"]], user.profile.timezone)
            story["starred_date"] = format_story_link_date__long(starred_date, now)
        story["intelligence"] = {
            "feed": apply_classifier_feeds(classifier_feeds, story["story_feed_id"]),
            "author": apply_classifier_authors(classifier_authors, story),
            "tags": apply_classifier_tags(classifier_tags, story),
            "title": apply_classifier_titles(classifier_titles, story),
        }
        if story["id"] in shared_stories:
            story["shared"] = True
            shared_date = localtime_for_timezone(shared_stories[story["id"]]["shared_date"], user.profile.timezone)
            story["shared_date"] = format_story_link_date__long(shared_date, now)
            story["shared_comments"] = strip_tags(shared_stories[story["id"]]["comments"])

    diff = time.time() - start
    timediff = round(float(diff), 2)
    logging.user(
        request,
        "~FYLoading ~FCriver blurblogs stories~FY: ~SBp%s~SN (%s/%s "
        "stories, ~SN%s/%s/%s feeds)"
        % (page, len(stories), len(mstories), len(story_feed_ids), len(social_user_ids), len(original_user_ids)),
    )

    return {
        "stories": stories,
        "user_profiles": user_profiles,
        "feeds": unsub_feeds,
        "classifiers": classifiers,
        "elapsed_time": timediff,
    }
Example #37
0
    # Prune the river to only include a set number of stories per feed
    # story_feed_counts = defaultdict(int)
    # mstories_pruned = []
    # for story in mstories:
    #     print story['story_title'], story_feed_counts[story['story_feed_id']]
    #     if story_feed_counts[story['story_feed_id']] >= 3: continue
    #     mstories_pruned.append(story)
    #     story_feed_counts[story['story_feed_id']] += 1
    
    stories = []
    for i, story in enumerate(mstories):
        if i < offset: continue
        if i >= limit: break
        stories.append(bunch(story))
    stories = Feed.format_stories(stories)
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    
    # Find starred stories
    try:
        starred_stories = MStarredStory.objects(
            user_id=user.pk,
            story_feed_id__in=found_feed_ids
        ).only('story_guid', 'starred_date')
        starred_stories = dict([(story.story_guid, story.starred_date) 
                                for story in starred_stories])
    except OperationFailure:
        logging.info(" ***> Starred stories failure")
        starred_stories = {}
    
    # Intelligence classifiers for all feeds involved
Example #38
0
    def get_stories(self, offset=0, limit=6, order='newest', read_filter='all', withscores=False,
                    hashes_only=False, cutoff_date=None):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        rt = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_TEMP_POOL)
        ignore_user_stories = False
        
        stories_key         = 'F:%s' % (self.feed_id)
        read_stories_key    = 'RS:%s:%s' % (self.user_id, self.feed_id)
        unread_stories_key  = 'U:%s:%s' % (self.user_id, self.feed_id)

        unread_ranked_stories_key  = 'z%sU:%s:%s' % ('h' if hashes_only else '', 
                                                     self.user_id, self.feed_id)
        if withscores or not offset or not rt.exists(unread_ranked_stories_key):
            rt.delete(unread_ranked_stories_key)
            if not r.exists(stories_key):
                # print " ---> No stories on feed: %s" % self
                return []
            elif read_filter == 'all' or not r.exists(read_stories_key):
                ignore_user_stories = True
                unread_stories_key = stories_key
            else:
                r.sdiffstore(unread_stories_key, stories_key, read_stories_key)
            sorted_stories_key          = 'zF:%s' % (self.feed_id)
            r.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key])
            if not ignore_user_stories:
                r.delete(unread_stories_key)
            
            dump = r.dump(unread_ranked_stories_key)
            if dump:
                pipeline = rt.pipeline()
                pipeline.delete(unread_ranked_stories_key)
                pipeline.restore(unread_ranked_stories_key, 1*60*60*1000, dump)
                pipeline.execute()
                r.delete(unread_ranked_stories_key)
        
        current_time = int(time.time() + 60*60*24)
        if not cutoff_date:
            cutoff_date = datetime.datetime.now() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if order == 'oldest':
            byscorefunc = rt.zrangebyscore
            if read_filter == 'unread':
                min_score = int(time.mktime(self.mark_read_date.timetuple())) + 1
            else:
                min_score = int(time.mktime(cutoff_date.timetuple()))-1000
            max_score = current_time
        else:
            byscorefunc = rt.zrevrangebyscore
            min_score = current_time
            if read_filter == 'unread':
                # +1 for the intersection b/w zF and F, which carries an implicit score of 1.
                max_score = int(time.mktime(self.mark_read_date.timetuple())) + 1
            else:
                max_score = 0
                
        if settings.DEBUG and False:
            debug_stories = rt.zrevrange(unread_ranked_stories_key, 0, -1, withscores=True)
            print " ---> Unread all stories (%s - %s) %s stories: %s" % (
                min_score,
                max_score,
                len(debug_stories),
                debug_stories)
        story_ids = byscorefunc(unread_ranked_stories_key, min_score, 
                                  max_score, start=offset, num=500,
                                  withscores=withscores)[:limit]
        
        if withscores:
            story_ids = [(s[0], int(s[1])) for s in story_ids]

        if withscores or hashes_only:
            return story_ids
        elif story_ids:
            story_date_order = "%sstory_date" % ('' if order == 'oldest' else '-')
            mstories = MStory.objects(story_hash__in=story_ids).order_by(story_date_order)
            stories = Feed.format_stories(mstories)
            return stories
        else:
            return []
Example #39
0
def api_unread_story(request, trigger_slug=None):
    user = request.user
    body = request.body_json
    after = body.get('after', None)
    before = body.get('before', None)
    limit = body.get('limit', 50)
    fields = body.get('triggerFields')
    feed_or_folder = fields['feed_or_folder']
    entries = []

    if isinstance(feed_or_folder, int) or feed_or_folder.isdigit():
        feed_id = int(feed_or_folder)
        try:
            usersub = UserSubscription.objects.get(user=user, feed_id=feed_id)
        except UserSubscription.DoesNotExist:
            return dict(data=[])
        found_feed_ids = [feed_id]
        found_trained_feed_ids = [feed_id] if usersub.is_trained else []
        stories = usersub.get_stories(order="newest", read_filter="unread", 
                                      offset=0, limit=limit,
                                      default_cutoff_date=user.profile.unread_cutoff)
    else:
        folder_title = feed_or_folder
        if folder_title == "Top Level":
            folder_title = " "
        usf = UserSubscriptionFolders.objects.get(user=user)
        flat_folders = usf.flatten_folders()
        feed_ids = None
        if folder_title != "all":
            feed_ids = flat_folders.get(folder_title)
        usersubs = UserSubscription.subs_for_feeds(user.pk, feed_ids=feed_ids,
                                                   read_filter="unread")
        feed_ids = [sub.feed_id for sub in usersubs]
        params = {
            "user_id": user.pk, 
            "feed_ids": feed_ids,
            "offset": 0,
            "limit": limit,
            "order": "newest",
            "read_filter": "unread",
            "usersubs": usersubs,
            "cutoff_date": user.profile.unread_cutoff,
        }
        story_hashes, unread_feed_story_hashes = UserSubscription.feed_stories(**params)
        mstories = MStory.objects(story_hash__in=story_hashes).order_by('-story_date')
        stories = Feed.format_stories(mstories)
        found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
        trained_feed_ids = [sub.feed_id for sub in usersubs if sub.is_trained]
        found_trained_feed_ids = list(set(trained_feed_ids) & set(found_feed_ids))
    
    if found_trained_feed_ids:
        classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk,
                                                        feed_id__in=found_trained_feed_ids))
        classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, 
                                                            feed_id__in=found_trained_feed_ids))
        classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, 
                                                          feed_id__in=found_trained_feed_ids))
        classifier_tags = list(MClassifierTag.objects(user_id=user.pk, 
                                                      feed_id__in=found_trained_feed_ids))
    feeds = dict([(f.pk, {
        "title": f.feed_title,
        "website": f.feed_link,
        "address": f.feed_address,
    }) for f in Feed.objects.filter(pk__in=found_feed_ids)])

    for story in stories:
        if before and int(story['story_date'].strftime("%s")) > before: continue
        if after and int(story['story_date'].strftime("%s")) < after: continue
        score = 0
        if found_trained_feed_ids and story['story_feed_id'] in found_trained_feed_ids:
            score = compute_story_score(story, classifier_titles=classifier_titles, 
                                        classifier_authors=classifier_authors, 
                                        classifier_tags=classifier_tags,
                                        classifier_feeds=classifier_feeds)
            if score < 0: continue
            if trigger_slug == "new-unread-focus-story" and score < 1: continue
        feed = feeds.get(story['story_feed_id'], None)
        entries.append({
            "StoryTitle": story['story_title'],
            "StoryContent": story['story_content'],
            "StoryURL": story['story_permalink'],
            "StoryAuthor": story['story_authors'],
            "PublishedAt": story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "StoryScore": score,
            "Site": feed and feed['title'],
            "SiteURL": feed and feed['website'],
            "SiteRSS": feed and feed['address'],
            "meta": {
                "id": story['story_hash'],
                "timestamp": int(story['story_date'].strftime("%s"))
            },
        })
    
    if after:
        entries = sorted(entries, key=lambda s: s['meta']['timestamp'])
        
    logging.user(request, "~FYChecking unread%s stories with ~SB~FCIFTTT~SN~FY: ~SB%s~SN - ~SB%s~SN stories" % (" ~SBfocus~SN" if trigger_slug == "new-unread-focus-story" else "", feed_or_folder, len(entries)))
    
    return {"data": entries[:limit]}
Example #40
0
    def calculate_feed_scores(self, silent=False, stories=None):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if self.user.profile.last_seen_on < UNREAD_CUTOFF:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return

        if not self.feed.fetched_once:
            if not silent:
                logging.info(" ---> [%s] NOT Computing scores: %s" % (self.user, self.feed))
            self.needs_unread_recalc = False
            self.save()
            return

        feed_scores = dict(negative=0, neutral=0, positive=0)

        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta

        read_stories = MUserStory.objects(
            user_id=self.user_id, feed_id=self.feed_id, read_date__gte=self.mark_read_date
        )
        read_stories_ids = [us.story_id for us in read_stories]

        if not stories:
            stories_db = MStory.objects(story_feed_id=self.feed_id, story_date__gte=date_delta)
            stories = Feed.format_stories(stories_db, self.feed_id)

        oldest_unread_story_date = now
        unread_stories = []
        for story in stories:
            if story["story_date"] < date_delta:
                continue
            if story["id"] not in read_stories_ids:
                unread_stories.append(story)
                if story["story_date"] < oldest_unread_story_date:
                    oldest_unread_story_date = story["story_date"]

        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))

        classifier_feeds = list(MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0))
        classifier_authors = list(MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id))
        classifier_titles = list(MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id))
        classifier_tags = list(MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id))

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))

        scores = {"feed": apply_classifier_feeds(classifier_feeds, self.feed)}

        for story in unread_stories:
            scores.update(
                {
                    "author": apply_classifier_authors(classifier_authors, story),
                    "tags": apply_classifier_tags(classifier_tags, story),
                    "title": apply_classifier_titles(classifier_titles, story),
                }
            )

            max_score = max(scores["author"], scores["tags"], scores["title"])
            min_score = min(scores["author"], scores["tags"], scores["title"])
            if max_score > 0:
                feed_scores["positive"] += 1
            elif min_score < 0:
                feed_scores["negative"] += 1
            else:
                if scores["feed"] > 0:
                    feed_scores["positive"] += 1
                elif scores["feed"] < 0:
                    feed_scores["negative"] += 1
                else:
                    feed_scores["neutral"] += 1

        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))

        self.unread_count_positive = feed_scores["positive"]
        self.unread_count_neutral = feed_scores["neutral"]
        self.unread_count_negative = feed_scores["negative"]
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False

        self.save()

        if self.unread_count_positive == 0 and self.unread_count_neutral == 0 and self.unread_count_negative == 0:
            self.mark_feed_read()

        if not silent:
            logging.info(
                " ---> [%s] Computing scores: %s (%s/%s/%s)"
                % (self.user, self.feed, feed_scores["negative"], feed_scores["neutral"], feed_scores["positive"])
            )

        return self
Example #41
0
    def calculate_feed_scores(self, silent=False, stories_db=None):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if self.user.profile.last_seen_on < UNREAD_CUTOFF:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return
        
        if not self.feed.fetched_once:
            if not silent:
                logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
            self.needs_unread_recalc = False
            self.save()
            return

        if not silent:
            logging.info(' ---> [%s] Computing scores: %s' % (self.user, self.feed))
        feed_scores = dict(negative=0, neutral=0, positive=0)
        
        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta
            
        read_stories = MUserStory.objects(user_id=self.user.pk,
                                          feed_id=self.feed.pk,
                                          read_date__gte=self.mark_read_date)
        # if not silent:
        #     logging.info(' ---> [%s]    Read stories: %s' % (self.user, datetime.datetime.now() - now))
        read_stories_ids = []
        for us in read_stories:
            if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode):
                read_stories_ids.append(us.story.story_guid)
            elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode):
                read_stories_ids.append(us.story.id) # TODO: Remove me after migration from story.id->guid
        stories_db = stories_db or MStory.objects(story_feed_id=self.feed.pk,
                                                  story_date__gte=date_delta)
        # if not silent:
        #     logging.info(' ---> [%s]    MStory: %s' % (self.user, datetime.datetime.now() - now))
        unread_stories_db = []
        for story in stories_db:
            if story.story_date < date_delta:
                continue
            if hasattr(story, 'story_guid') and story.story_guid not in read_stories_ids:
                unread_stories_db.append(story)
            elif isinstance(story.id, unicode) and story.id not in read_stories_ids:
                unread_stories_db.append(story)
        stories = Feed.format_stories(unread_stories_db, self.feed.pk)
        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))
        
        classifier_feeds = MClassifierFeed.objects(user_id=self.user.pk, feed_id=self.feed.pk)
        classifier_authors = MClassifierAuthor.objects(user_id=self.user.pk, feed_id=self.feed.pk)
        classifier_titles = MClassifierTitle.objects(user_id=self.user.pk, feed_id=self.feed.pk)
        classifier_tags = MClassifierTag.objects(user_id=self.user.pk, feed_id=self.feed.pk)
        
        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))
            
        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }
        
        for story in stories:
            classifier_authors.rewind()
            classifier_tags.rewind()
            classifier_titles.rewind()
            scores.update({
                'author': apply_classifier_authors(classifier_authors, story),
                'tags': apply_classifier_tags(classifier_tags, story),
                'title': apply_classifier_titles(classifier_titles, story),
            })
            
            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1
                
        
        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))
            
        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.needs_unread_recalc = False
        
        self.save()
        
        if (self.unread_count_positive == 0 and 
            self.unread_count_neutral == 0):
            self.mark_feed_read()
        
        cache.delete('usersub:%s' % self.user.id)
        
        return