Example #1
0
 def test_load_feeds__gawker(self):
     self.client.login(username='******', password='******')
     
     management.call_command('loaddata', 'gawker1.json', verbosity=0)
     
     feed = Feed.objects.get(feed_link__contains='gawker')
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 0)
     
     feed.update(force=True)
     
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 38)
     
     management.call_command('loaddata', 'gawker2.json', verbosity=0)
     
     feed.update(force=True)
     
     # Test: 1 changed char in content
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 38)
     
     url = reverse('load-single-feed', kwargs=dict(feed_id=1))
     response = self.client.get(url)
     feed = json.decode(response.content)
     self.assertEquals(len(feed['stories']), 6)
Example #2
0
def original_text(request):
    story_id = request.REQUEST.get('story_id')
    feed_id = request.REQUEST.get('feed_id')
    story_hash = request.REQUEST.get('story_hash', None)
    force = request.REQUEST.get('force', False)
    debug = request.REQUEST.get('debug', False)

    if story_hash:
        story, _ = MStory.find_story(story_hash=story_hash)
    else:
        story, _ = MStory.find_story(story_id=story_id, story_feed_id=feed_id)

    if not story:
        logging.user(request, "~FYFetching ~FGoriginal~FY story text: ~FRstory not found")
        return {'code': -1, 'message': 'Story not found.', 'original_text': None, 'failed': True}
    
    original_text = story.fetch_original_text(force=force, request=request, debug=debug)

    return {
        'feed_id': story.story_feed_id,
        'story_hash': story.story_hash,
        'story_id': story.story_guid,
        'image_urls': story.image_urls,
        'secure_image_urls': Feed.secure_image_urls(story.image_urls),
        'original_text': original_text,
        'failed': not original_text or len(original_text) < 100,
    }
Example #3
0
 def test_load_feeds__gothamist(self):
     self.client.login(username='******', password='******')
     
     management.call_command('loaddata', 'gothamist_aug_2009_1.json', verbosity=0)
     feed = Feed.objects.get(feed_link__contains='gothamist')
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 0)
     
     management.call_command('refresh_feed', force=1, feed=4, single_threaded=True, daemonize=False)
     
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 42)
     
     url = reverse('load-single-feed', kwargs=dict(feed_id=4))
     response = self.client.get(url)
     content = json.decode(response.content)
     self.assertEquals(len(content['stories']), 6)
     
     management.call_command('loaddata', 'gothamist_aug_2009_2.json', verbosity=0)
     management.call_command('refresh_feed', force=1, feed=4, single_threaded=True, daemonize=False)
     
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 42)
     
     url = reverse('load-single-feed', kwargs=dict(feed_id=4))
     response = self.client.get(url)
     # print [c['story_title'] for c in json.decode(response.content)]
     content = json.decode(response.content)
     # Test: 1 changed char in title
     self.assertEquals(len(content['stories']), 6)
Example #4
0
def reindex_stories():
    count = MStory.objects().count()
    print "Mongo DB stories: %s" % count
    p = 0.0
    i = 0

    feeds = Feed.objects.all().order_by('-average_stories_per_month')
    feed_count = feeds.count()
    f = 0
    for feed in feeds:
        f += 1
        print "%s/%s: %s" % (f, feed_count, feed,)
        sys.stdout.flush()
    
        for story in MStory.objects(story_feed_id=feed.pk):
            i += 1.0
            if round(i / count * 100) != p:
                p = round(i / count * 100)
                print '%s%%' % p
            if isinstance(story.id, unicode) and story.id:
                story.story_guid = story.id
                story.id = pymongo.objectid.ObjectId()
                try:
                    story.save()
                except mongoengine.queryset.OperationError:
                    print 'Dupe!'
                    continue
Example #5
0
 def test_load_feeds__slashdot(self):
     self.client.login(username='******', password='******')
     
     management.call_command('loaddata', 'slashdot1.json', verbosity=0)
     
     feed = Feed.objects.get(feed_link__contains='slashdot')
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 0)
     
     management.call_command('refresh_feed', force=1, feed=5, single_threaded=True, daemonize=False)
     
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 38)
     
     management.call_command('loaddata', 'slashdot2.json', verbosity=0)
     management.call_command('refresh_feed', force=1, feed=5, single_threaded=True, daemonize=False)
     
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 38)
     
     response = self.client.post('/reader/feed', { "feed_id": 5 })
     
     # pprint([c['story_title'] for c in json.decode(response.content)])
     feed = json.decode(response.content)
     
     # Test: 1 changed char in title
     self.assertEquals(len(feed['stories']), 30)
Example #6
0
def mark_story_as_unread(request):
    story_id = request.POST['story_id']
    feed_id = int(request.POST['feed_id'])

    usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id)
                
    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()
        
    data = dict(code=0, payload=dict(story_id=story_id))
    logging.user(request, "~FY~SBUnread~SN story in feed: %s" % (usersub.feed))
    
    story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
    
    if story.story_date < usersub.mark_read_date:
        # Story is outside the mark as read range, so invert all stories before.
        newer_stories = MStory.objects(story_feed_id=story.story_feed_id,
                                       story_date__gte=story.story_date,
                                       story_date__lte=usersub.mark_read_date
                                       ).only('story_guid')
        newer_stories = [s.story_guid for s in newer_stories]
        usersub.mark_read_date = story.story_date - datetime.timedelta(minutes=1)
        usersub.needs_unread_recalc = True
        usersub.save()
        
        # Mark stories as read only after the mark_read_date has been moved, otherwise
        # these would be ignored.
        data = usersub.mark_story_ids_as_read(newer_stories, request=request)
        
    m = MUserStory.objects(story_id=story_id, user_id=request.user.pk, feed_id=feed_id)
    m.delete()
    
    return data
Example #7
0
def bootstrap_stories():
    print "Mongo DB stories: %s" % MStory.objects().count()
    # db.stories.drop()
    print "Dropped! Mongo DB stories: %s" % MStory.objects().count()

    print "Stories: %s" % Story.objects.all().count()
    pprint(db.stories.index_information())

    feeds = Feed.objects.all().order_by('-average_stories_per_month')
    feed_count = feeds.count()
    i = 0
    for feed in feeds:
        i += 1
        print "%s/%s: %s (%s stories)" % (i, feed_count,
                            feed, Story.objects.filter(story_feed=feed).count())
        sys.stdout.flush()
    
        stories = Story.objects.filter(story_feed=feed).values()
        for story in stories:
            # story['story_tags'] = [tag.name for tag in Tag.objects.filter(story=story['id'])]
            try:
                story['story_tags'] = json.decode(story['story_tags'])
            except:
                continue
            del story['id']
            del story['story_author_id']
            try:
                MStory(**story).save()
            except:
                continue

    print "\nMongo DB stories: %s" % MStory.objects().count()
Example #8
0
def reindex_stories():
    db = pymongo.Connection().newsblur
    count = MStory.objects().count()
    print "Mongo DB stories: %s" % count
    p = 0.0
    i = 0

    feeds = Feed.objects.all().order_by('-average_stories_per_month')
    feed_count = feeds.count()
    f = 0
    for feed in feeds:
        f += 1
        print "%s/%s: %s" % (f, feed_count, feed,)
        sys.stdout.flush()
        for story in MStory.objects(story_feed_id=feed.pk):
            i += 1.0
            if round(i / count * 100) != p:
                p = round(i / count * 100)
                print '%s%%' % p
            if isinstance(story.id, unicode):
                story.story_guid = story.id
                story.id = pymongo.objectid.ObjectId()
                try:
                    story.save()
                except OperationError, e:
                    print " ***> OperationError: %s" % e
                except e:
                    print ' ***> Unknown Error: %s' % e
                db.stories.remove({"_id": story.story_guid})
Example #9
0
 def receive_newsletter(self, params):
     user = self.user_from_email(params['recipient'])
     if not user:
         return
     
     sender_name, sender_username, sender_domain = self.split_sender(params['from'])
     feed_address = self.feed_address(user, "%s@%s" % (sender_username, sender_domain))
     
     usf = UserSubscriptionFolders.objects.get(user=user)
     usf.add_folder('', 'Newsletters')
     
     try:
         feed = Feed.objects.get(feed_address=feed_address)
     except Feed.DoesNotExist:
         feed = Feed.objects.create(feed_address=feed_address, 
                                    feed_link='http://' + sender_domain,
                                    feed_title=sender_name,
                                    fetched_once=True,
                                    known_good=True)
         feed.update()
         logging.user(user, "~FCCreating newsletter feed: ~SB%s" % (feed))
         r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL)
         r.publish(user.username, 'reload:%s' % feed.pk)
     try:
         usersub = UserSubscription.objects.get(user=user, feed=feed)
     except UserSubscription.DoesNotExist:
         _, _, usersub = UserSubscription.add_subscription(
             user=user, 
             feed_address=feed_address,
             folder='Newsletters'
         )
     
     story_hash = MStory.ensure_story_hash(params['signature'], feed.pk)
     story_params = {
         "story_feed_id": feed.pk,
         "story_date": datetime.datetime.fromtimestamp(int(params['timestamp'])),
         "story_title": params['subject'],
         "story_content": self.get_content(params),
         "story_author_name": escape(params['from']),
         "story_permalink": reverse('newsletter-story', 
                                    kwargs={'story_hash': story_hash}),
         "story_guid": params['signature'],
     }
     try:
         story = MStory.objects.get(story_hash=story_hash)
     except MStory.DoesNotExist:
         story = MStory(**story_params)
         story.save()
     
     usersub.needs_unread_recalc = True
     usersub.save()
     
     self.publish_to_subscribers(feed)
     
     MFetchHistory.add(feed_id=feed.pk, fetch_type='push')
     logging.user(user, "~FCNewsletter feed story: ~SB%s~SN / ~SB%s" % (story.story_title, feed))
     
     return story
Example #10
0
    def count_unreads_for_subscribers(self, feed):
        user_subs = UserSubscription.objects.filter(
            feed=feed, active=True, user__profile__last_seen_on__gte=feed.unread_cutoff
        ).order_by("-last_read_date")

        if not user_subs.count():
            return

        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options["compute_scores"]:
            r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
            stories = MStory.objects(story_feed_id=feed.pk, story_date__gte=feed.unread_cutoff)
            stories = Feed.format_stories(stories, feed.pk)
            story_hashes = r.zrangebyscore(
                "zF:%s" % feed.pk, int(feed.unread_cutoff.strftime("%s")), int(time.time() + 60 * 60 * 24)
            )
            missing_story_hashes = set(story_hashes) - set([s["story_hash"] for s in stories])
            if missing_story_hashes:
                missing_stories = MStory.objects(
                    story_feed_id=feed.pk, story_hash__in=missing_story_hashes
                ).read_preference(pymongo.ReadPreference.PRIMARY)
                missing_stories = Feed.format_stories(missing_stories, feed.pk)
                stories = missing_stories + stories
                logging.debug(
                    u"   ---> [%-30s] ~FYFound ~SB~FC%s(of %s)/%s~FY~SN un-secondaried stories while computing scores"
                    % (feed.title[:30], len(missing_stories), len(missing_story_hashes), len(stories))
                )
            cache.set("S:%s" % feed.pk, stories, 60)
            logging.debug(
                u"   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)"
                % (
                    feed.title[:30],
                    len(stories),
                    user_subs.count(),
                    feed.num_subscribers,
                    feed.active_subscribers,
                    feed.premium_subscribers,
                )
            )
            self.calculate_feed_scores_with_stories(user_subs, stories)
        elif self.options.get("mongodb_replication_lag"):
            logging.debug(
                u"   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag"
                % (feed.title[:30], self.options.get("mongodb_replication_lag"))
            )
Example #11
0
 def mark_feed_read(self):
     now = datetime.datetime.now()
     if MStory.objects(story_feed_id=self.feed.pk).first():
         latest_story_date = MStory.objects(story_feed_id=self.feed.pk).order_by('-story_date')[0].story_date\
                             + datetime.timedelta(minutes=1)
     else:
         latest_story_date = now
     self.last_read_date = max(now, latest_story_date)
     self.mark_read_date = max(now, latest_story_date)
     self.unread_count_negative = 0
     self.unread_count_positive = 0
     self.unread_count_neutral = 0
     self.unread_count_updated = max(now, latest_story_date)
     self.needs_unread_relcalc = False
     self.save()
Example #12
0
 def mark_read(cls, user_id, story_feed_id, story_hash, r=None, r2=None):
     if not r:
         r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
     if not r2:
         r2 = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL2)
     
     story_hash = MStory.ensure_story_hash(story_hash, story_feed_id=story_feed_id)
     
     if not story_hash: return
     
     now = int(time.time())
     all_read_stories_key = 'RS:%s' % (user_id)
     r.sadd(all_read_stories_key, story_hash)
     r2.sadd(all_read_stories_key, story_hash)
     r2.zadd('z' + all_read_stories_key, story_hash, now)
     r.expire(all_read_stories_key, settings.DAYS_OF_UNREAD*24*60*60)
     r2.expire(all_read_stories_key, settings.DAYS_OF_UNREAD*24*60*60)
     r2.expire('z' + all_read_stories_key, settings.DAYS_OF_UNREAD*24*60*60)
     
     read_story_key = 'RS:%s:%s' % (user_id, story_feed_id)
     r.sadd(read_story_key, story_hash)
     r2.sadd(read_story_key, story_hash)
     r2.zadd('z' + read_story_key, story_hash, now)
     r.expire(read_story_key, settings.DAYS_OF_UNREAD*24*60*60)
     r2.expire(read_story_key, settings.DAYS_OF_UNREAD*24*60*60)
     r2.expire('z' + read_story_key, settings.DAYS_OF_UNREAD*24*60*60)
Example #13
0
 def story_hash(cls, story_id, story_feed_id):
     if not cls.RE_STORY_HASH.match(story_id):
         story, _ = MStory.find_story(story_feed_id=story_feed_id, story_id=story_id)
         if not story: return
         story_id = story.story_hash
     
     return story_id
Example #14
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(
            feed=feed, active=True, user__profile__last_seen_on__gte=UNREAD_CUTOFF
        ).order_by("-last_read_date")
        logging.debug(
            u"   ---> [%-30s] Computing scores: %s (%s/%s/%s) subscribers"
            % (
                unicode(feed)[:30],
                user_subs.count(),
                feed.num_subscribers,
                feed.active_subscribers,
                feed.premium_subscribers,
            )
        )

        stories_db = MStory.objects(story_feed_id=feed.pk, story_date__gte=UNREAD_CUTOFF)
        for sub in user_subs:
            cache.delete("usersub:%s" % sub.user_id)
            sub.needs_unread_recalc = True
            sub.save()

        if self.options["compute_scores"]:
            for sub in user_subs:
                silent = False if self.options["verbose"] >= 2 else True
                sub.calculate_feed_scores(silent=silent, stories_db=stories_db)
Example #15
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(
            feed=feed, active=True, user__profile__last_seen_on__gte=UNREAD_CUTOFF
        ).order_by("-last_read_date")

        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options["compute_scores"]:
            stories_db = MStory.objects(story_feed_id=feed.pk, story_date__gte=UNREAD_CUTOFF)
            logging.debug(
                u"   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)"
                % (
                    feed.title[:30],
                    stories_db.count(),
                    user_subs.count(),
                    feed.num_subscribers,
                    feed.active_subscribers,
                    feed.premium_subscribers,
                )
            )
            self.calculate_feed_scores_with_stories(user_subs, stories_db)
        elif self.options.get("mongodb_replication_lag"):
            logging.debug(
                u"   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag"
                % (feed.title[:30], self.options.get("mongodb_replication_lag"))
            )
Example #16
0
    def handle(self, *args, **options):
        
        if options['daemonize']:
            daemonize()
        
        settings.LOG_TO_STREAM = True        
            
        r = redis.Redis(connection_pool=settings.REDIS_FEED_POOL)
        
        if options['initialize']:
            feeds = Feed.objects.filter(num_subscribers__gte=1).order_by('?')
            print 'Query feeds done with num of feeds',len(feeds)
            r.ltrim('freeze_feeds',1,0)

            pipeline = r.pipeline()
            for feed in feeds:
                pipeline.rpush('freeze_feeds',feed.pk)
            pipeline.execute()
            print 'Initialize freeze_feeds done'

        feed_id = r.lpop('freeze_feeds')
        while feed_id:
            try:
                frozen_num = MStory.freeze_feed(int(feed_id))
                if frozen_num > 0:
                    r.rpush('freeze_feeds',feed_id)
            except Exception, e:
                logging.error(str(e)+\
                            traceback.format_exc()+'\n'+\
                            'Error from:  freeze_feeds\n')
            feed_id = r.lpop('freeze_feeds')
Example #17
0
def mark_story_as_read(request):
    story_ids = request.REQUEST.getlist("story_id")
    feed_id = int(request.REQUEST["feed_id"])

    try:
        usersub = UserSubscription.objects.select_related("feed").get(user=request.user, feed=feed_id)
    except Feed.DoesNotExist:
        duplicate_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
        if duplicate_feed:
            try:
                usersub = UserSubscription.objects.get(user=request.user, feed=duplicate_feed[0].feed)
            except Feed.DoesNotExist:
                return dict(code=-1)

    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()

    data = dict(code=0, payload=story_ids)

    if len(story_ids) > 1:
        logging.info(" ---> [%s] ~FYRead %s stories in feed: %s" % (request.user, len(story_ids), usersub.feed))
    else:
        logging.info(" ---> [%s] ~FYRead story in feed: %s" % (request.user, usersub.feed))

    for story_id in story_ids:
        story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
        now = datetime.datetime.utcnow()
        m = MUserStory(story=story, user_id=request.user.pk, feed_id=feed_id, read_date=now)
        try:
            m.save()
        except OperationError:
            logging.info(" ---> [%s] ~BRMarked story as read: Duplicate Story -> %s" % (request.user, story_id))

    return data
Example #18
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(feed=feed, 
                                                    active=True,
                                                    user__profile__last_seen_on__gte=UNREAD_CUTOFF)\
                                            .order_by('-last_read_date')
        
        if not user_subs.count():
            return
            
        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options['compute_scores']:
            stories = MStory.objects(story_feed_id=feed.pk,
                                     story_date__gte=UNREAD_CUTOFF)\
                            .read_preference(pymongo.ReadPreference.PRIMARY)
            stories = Feed.format_stories(stories, feed.pk)
            logging.debug(u'   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)' % (
                          feed.title[:30], len(stories), user_subs.count(),
                          feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers))        
            self.calculate_feed_scores_with_stories(user_subs, stories)
        elif self.options.get('mongodb_replication_lag'):
            logging.debug(u'   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag' % (
              feed.title[:30], self.options.get('mongodb_replication_lag')))
Example #19
0
def bootstrap_userstories():
    print "Mongo DB userstories: %s" % MUserStory.objects().count()
    # db.userstories.drop()
    print "Dropped! Mongo DB userstories: %s" % MUserStory.objects().count()

    print "UserStories: %s" % UserStory.objects.all().count()
    pprint(db.userstories.index_information())

    userstories = UserStory.objects.all().values()
    for userstory in userstories:
        try:
            story = Story.objects.get(pk=userstory['story_id'])
        except Story.DoesNotExist:
            continue
        try:
            userstory['story'] = MStory.objects(story_feed_id=story.story_feed.pk, story_guid=story.story_guid)[0]
        except:
            print '!',
            continue
        print '.',
        del userstory['id']
        del userstory['opinion']
        del userstory['story_id']
        try:
            MUserStory(**userstory).save()
        except:
            print '\n\n!\n\n'
            continue

    print "\nMongo DB userstories: %s" % MUserStory.objects().count()
Example #20
0
 def mark_feed_read(self):
     if (self.unread_count_negative == 0
         and self.unread_count_neutral == 0
         and self.unread_count_positive == 0
         and not self.needs_unread_recalc):
         return
     
     now = datetime.datetime.utcnow()
     
     # Use the latest story to get last read time.
     latest_story = MStory.objects(story_feed_id=self.feed.pk).order_by('-story_date').only('story_date').limit(1)
     if latest_story and len(latest_story) >= 1:
         latest_story_date = latest_story[0]['story_date']\
                             + datetime.timedelta(seconds=1)
     else:
         latest_story_date = now
     
     self.last_read_date = latest_story_date
     self.mark_read_date = latest_story_date
     self.unread_count_negative = 0
     self.unread_count_positive = 0
     self.unread_count_neutral = 0
     self.unread_count_updated = now
     self.oldest_unread_story_date = now
     self.needs_unread_recalc = False
     
     self.save()
     
     return True
Example #21
0
 def mark_feed_read(self, cutoff_date=None):
     if (self.unread_count_negative == 0
         and self.unread_count_neutral == 0
         and self.unread_count_positive == 0
         and not self.needs_unread_recalc):
         return
     
     recount = True
     # Use the latest story to get last read time.
     if cutoff_date:
         cutoff_date = cutoff_date + datetime.timedelta(seconds=1)
     else:
         latest_story = MStory.objects(story_feed_id=self.feed.pk).order_by('-story_date').only('story_date').limit(1)
         if latest_story and len(latest_story) >= 1:
             cutoff_date = (latest_story[0]['story_date']
                            + datetime.timedelta(seconds=1))
         else:
             cutoff_date = datetime.datetime.utcnow()
             recount = False
     
     self.last_read_date = cutoff_date
     self.mark_read_date = cutoff_date
     self.oldest_unread_story_date = cutoff_date
     if not recount:
         self.unread_count_negative = 0
         self.unread_count_positive = 0
         self.unread_count_neutral = 0
         self.unread_count_updated = datetime.datetime.utcnow()
         self.needs_unread_recalc = False
     else:
         self.needs_unread_recalc = True
     
     self.save()
     
     return True
Example #22
0
    def mark_read(cls, user_id, story_feed_id, story_hash, social_user_ids=None, r=None):
        if not r:
            r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        # if not r2:
        #     r2 = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL2)
        
        story_hash = MStory.ensure_story_hash(story_hash, story_feed_id=story_feed_id)
        
        if not story_hash: return
        
        def redis_commands(key):
            r.sadd(key, story_hash)
            # r2.sadd(key, story_hash)
            r.expire(key, settings.DAYS_OF_STORY_HASHES*24*60*60)
            # r2.expire(key, settings.DAYS_OF_STORY_HASHES*24*60*60)

        all_read_stories_key = 'RS:%s' % (user_id)
        redis_commands(all_read_stories_key)
        
        read_story_key = 'RS:%s:%s' % (user_id, story_feed_id)
        redis_commands(read_story_key)
        
        if social_user_ids:
            for social_user_id in social_user_ids:
                social_read_story_key = 'RS:%s:B:%s' % (user_id, social_user_id)
                redis_commands(social_read_story_key)
Example #23
0
 def mark_feed_read(self):
     now = datetime.datetime.utcnow()
     
     # Use the latest story to get last read time.
     latest_story = MStory.objects(story_feed_id=self.feed.pk).order_by('-story_date').only('story_date').limit(1)
     if latest_story and len(latest_story) >= 1:
         latest_story_date = latest_story[0]['story_date']\
                             + datetime.timedelta(seconds=1)
     else:
         latest_story_date = now
     
     self.last_read_date = latest_story_date
     self.mark_read_date = latest_story_date
     self.unread_count_negative = 0
     self.unread_count_positive = 0
     self.unread_count_neutral = 0
     self.unread_count_updated = now
     self.oldest_unread_story_date = now
     self.needs_unread_recalc = False
     
     # No longer removing old user read stories, since they're needed for social,
     # and they get cleaned up automatically when new stories come in.
     # MUserStory.delete_old_stories(self.user_id, self.feed_id)
     
     self.save()
Example #24
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(feed=feed, 
                                                    active=True,
                                                    user__profile__last_seen_on__gte=UNREAD_CUTOFF)\
                                            .order_by('-last_read_date')
        logging.debug(u'   ---> [%-30s] Computing scores: %s (%s/%s/%s) subscribers' % (
                      unicode(feed)[:30], user_subs.count(),
                      feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers))
        
        if self.options['slave_db']:
            slave_db = self.options['slave_db']

            stories_db_orig = slave_db.stories.find({
                "story_feed_id": feed.pk,
                "story_date": {
                    "$gte": UNREAD_CUTOFF,
                },
            })
            stories_db = []
            for story in stories_db_orig:
                stories_db.append(bunch(story))
        else:
            stories_db = MStory.objects(story_feed_id=feed.pk,
                                        story_date__gte=UNREAD_CUTOFF)
        for sub in user_subs:
            cache.delete('usersub:%s' % sub.user_id)
            sub.needs_unread_recalc = True
            sub.save()
            
        if self.options['compute_scores']:
            for sub in user_subs:
                silent = False if self.options['verbose'] >= 2 else True
                sub.calculate_feed_scores(silent=silent, stories_db=stories_db)
Example #25
0
    def mark_story_hashes_read(cls, user_id, story_hashes, r=None, s=None):
        if not r:
            r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        if not s:
            s = redis.Redis(connection_pool=settings.REDIS_POOL)
        # if not r2:
        #     r2 = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL2)
        
        p = r.pipeline()
        # p2 = r2.pipeline()
        feed_ids = set()
        friend_ids = set()
        
        if not isinstance(story_hashes, list):
            story_hashes = [story_hashes]
        
        for story_hash in story_hashes:
            feed_id, _ = MStory.split_story_hash(story_hash)
            feed_ids.add(feed_id)

            # Find other social feeds with this story to update their counts
            friend_key = "F:%s:F" % (user_id)
            share_key = "S:%s" % (story_hash)
            friends_with_shares = [int(f) for f in s.sinter(share_key, friend_key)]
            friend_ids.update(friends_with_shares)
            cls.mark_read(user_id, feed_id, story_hash, social_user_ids=friends_with_shares, r=p)
        
        p.execute()
        # p2.execute()
        
        return list(feed_ids), list(friend_ids)
Example #26
0
def imagesearch(request,page_num):
	if page_num == '':
		page_num = '1'
	page_num = int(page_num)
	num_per_page = 15

	q = request.GET.get('q',None)
	if q:
		image_server = settings.FDFS_HTTP_SERVER
		index_stories = SearchStory.query(q)[:500]
		response_images = []
		for index_story in index_stories:
			story = MStory.objects(id=index_story['db_id']).first()	
			if story and story.image_ids:
				for image_id in story.image_ids:
					if len(image_id) > 20:
						# print image_id
						image = MImage.objects(id=image_id).first()
						imagedict = dict(
							image_url=image_server+image.image_remote_id,
							story_url=story.story_guid,
							story_title = story.story_title,
							)
						response_images.append(imagedict)
						if len(response_images)>=50:
							return render(request,'imagesearch.html',locals())
	return render(request,'imagesearch.html',locals())
Example #27
0
def mark_story_as_read(request):
    story_ids = request.REQUEST.getlist('story_id')
    feed_id = int(request.REQUEST['feed_id'])
    
    usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id)
    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()
        
    data = dict(code=0, payload=story_ids)
    
    if len(story_ids) > 1:
        logging.debug(" ---> [%s] Read %s stories in feed: %s" % (request.user, len(story_ids), usersub.feed))
    else:
        logging.debug(" ---> [%s] Read story in feed: %s" % (request.user, usersub.feed))
        
    for story_id in story_ids:
        story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
        now = datetime.datetime.utcnow()
        m = MUserStory(story=story, user_id=request.user.pk, feed_id=feed_id, read_date=now)
        try:
            m.save()
        except OperationError:
            logging.info(' ---> [%s] *** Marked story as read: Duplicate Story -> %s' % (request.user, story_id))
    
    return data
Example #28
0
 def mark_story_ids_as_read(self, story_ids, request=None):
     data = dict(code=0, payload=story_ids)
     
     if not request:
         request = self.user
 
     if not self.needs_unread_recalc:
         self.needs_unread_recalc = True
         self.save()
 
     if len(story_ids) > 1:
         logging.user(request, "~FYRead %s stories in feed: %s" % (len(story_ids), self.feed))
     else:
         logging.user(request, "~FYRead story in feed: %s" % (self.feed))
     
     for story_id in set(story_ids):
         story, _ = MStory.find_story(story_feed_id=self.feed_id, story_id=story_id)
         if not story: continue
         now = datetime.datetime.utcnow()
         date = now if now > story.story_date else story.story_date # For handling future stories
         m, _ = MUserStory.objects.get_or_create(story_id=story_id, user_id=self.user_id, 
                                                 feed_id=self.feed_id, defaults={
             'read_date': date, 
             'story': story, 
             'story_date': story.story_date,
         })
             
     return data
Example #29
0
def mark_story_as_unread(request):
    story_id = request.POST['story_id']
    feed_id = int(request.POST['feed_id'])

    try:
        usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id)
    except Feed.DoesNotExist:
        duplicate_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
        if duplicate_feed:
            try:
                usersub = UserSubscription.objects.get(user=request.user, 
                                                       feed=duplicate_feed[0].feed)
            except Feed.DoesNotExist:
                return dict(code=-1)
                
    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()
        
    data = dict(code=0, payload=dict(story_id=story_id))
    logging.user(request, "~FY~SBUnread~SN story in feed: %s" % (usersub.feed))
        
    story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
    m = MUserStory.objects(story=story, user_id=request.user.pk, feed_id=feed_id)
    m.delete()
    
    return data
Example #30
0
    def switch_feed(cls, user_id, old_feed_id, new_feed_id):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        # r2 = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL2)
        p = r.pipeline()
        # p2 = r2.pipeline()
        story_hashes = cls.get_stories(user_id, old_feed_id, r=r)
        
        for story_hash in story_hashes:
            _, hash_story = MStory.split_story_hash(story_hash)
            new_story_hash = "%s:%s" % (new_feed_id, hash_story)
            read_feed_key = "RS:%s:%s" % (user_id, new_feed_id)
            p.sadd(read_feed_key, new_story_hash)
            # p2.sadd(read_feed_key, new_story_hash)
            p.expire(read_feed_key, settings.DAYS_OF_STORY_HASHES*24*60*60)
            # p2.expire(read_feed_key, settings.DAYS_OF_STORY_HASHES*24*60*60)

            read_user_key = "RS:%s" % (user_id)
            p.sadd(read_user_key, new_story_hash)
            # p2.sadd(read_user_key, new_story_hash)
            p.expire(read_user_key, settings.DAYS_OF_STORY_HASHES*24*60*60)
            # p2.expire(read_user_key, settings.DAYS_OF_STORY_HASHES*24*60*60)
        
        p.execute()
        # p2.execute()
        
        if len(story_hashes) > 0:
            logging.info(" ---> %s read stories" % len(story_hashes))
Example #31
0
    def process(self, first_run=True):
        """ Downloads and parses a feed.
        """
        self.refresh_feed()

        ret_values = {
            ENTRY_NEW: 0,
            ENTRY_UPDATED: 0,
            ENTRY_SAME: 0,
            ENTRY_ERR: 0
        }

        # logging.debug(u' ---> [%d] Processing %s' % (self.feed.id, self.feed.feed_title))

        self.feed.fetched_once = True
        self.feed.last_update = datetime.datetime.utcnow()

        if hasattr(self.fpf, 'status'):
            if self.options['verbose']:
                logging.debug(
                    u'   ---> [%-30s] Fetched feed, HTTP status %d: %s (bozo: %s)'
                    % (unicode(self.feed)[:30], self.fpf.status,
                       self.feed.feed_address, self.fpf.bozo))
                if self.fpf.bozo and self.fpf.status != 304:
                    logging.debug(
                        u'   ---> [%-30s] BOZO exception: %s (%s entries)' %
                        (unicode(self.feed)[:30], self.fpf.bozo_exception,
                         len(self.fpf.entries)))
            if self.fpf.status == 304:
                self.feed.save()
                self.feed.save_feed_history(304, "Not modified")
                return FEED_SAME, ret_values

            if self.fpf.status in (302, 301):
                if not self.fpf.href.endswith('feedburner.com/atom.xml'):
                    self.feed.feed_address = self.fpf.href
                if first_run:
                    self.feed.schedule_feed_fetch_immediately()
                if not self.fpf.entries:
                    self.feed.save()
                    self.feed.save_feed_history(self.fpf.status,
                                                "HTTP Redirect")
                    return FEED_ERRHTTP, ret_values

            if self.fpf.status >= 400:
                logging.debug(
                    "   ---> [%-30s] HTTP Status code: %s. Checking address..."
                    % (unicode(self.feed)[:30], self.fpf.status))
                fixed_feed = self.feed.check_feed_address_for_feed_link()
                if not fixed_feed:
                    self.feed.save_feed_history(self.fpf.status, "HTTP Error")
                else:
                    self.feed.schedule_feed_fetch_immediately()
                self.feed.save()
                return FEED_ERRHTTP, ret_values

        if self.fpf.bozo and isinstance(self.fpf.bozo_exception,
                                        feedparser.NonXMLContentType):
            if not self.fpf.entries:
                logging.debug(
                    "   ---> [%-30s] Feed is Non-XML. %s entries. Checking address..."
                    % (unicode(self.feed)[:30], len(self.fpf.entries)))
                fixed_feed = self.feed.check_feed_address_for_feed_link()
                if not fixed_feed:
                    self.feed.save_feed_history(502, 'Non-xml feed',
                                                self.fpf.bozo_exception)
                else:
                    self.feed.schedule_feed_fetch_immediately()
                self.feed.save()
                return FEED_ERRPARSE, ret_values
        elif self.fpf.bozo and isinstance(self.fpf.bozo_exception,
                                          xml.sax._exceptions.SAXException):
            logging.debug(
                "   ---> [%-30s] Feed is Bad XML (SAX). %s entries. Checking address..."
                % (unicode(self.feed)[:30], len(self.fpf.entries)))
            if not self.fpf.entries:
                fixed_feed = self.feed.check_feed_address_for_feed_link()
                if not fixed_feed:
                    self.feed.save_feed_history(503, 'SAX Exception',
                                                self.fpf.bozo_exception)
                else:
                    self.feed.schedule_feed_fetch_immediately()
                self.feed.save()
                return FEED_ERRPARSE, ret_values

        # the feed has changed (or it is the first time we parse it)
        # saving the etag and last_modified fields
        self.feed.etag = self.fpf.get('etag')
        if self.feed.etag:
            self.feed.etag = self.feed.etag[:255]
        # some times this is None (it never should) *sigh*
        if self.feed.etag is None:
            self.feed.etag = ''

        try:
            self.feed.last_modified = mtime(self.fpf.modified)
        except:
            pass

        self.fpf.entries = self.fpf.entries[:50]

        self.feed.feed_title = self.fpf.feed.get('title', self.feed.feed_title)
        tagline = self.fpf.feed.get('tagline', self.feed.data.feed_tagline)
        if tagline:
            self.feed.data.feed_tagline = utf8encode(tagline)
            self.feed.data.save()
        self.feed.feed_link = self.fpf.feed.get('link') or self.fpf.feed.get(
            'id') or self.feed.feed_link

        self.feed.last_update = datetime.datetime.utcnow()

        guids = []
        for entry in self.fpf.entries:
            if entry.get('id', ''):
                guids.append(entry.get('id', ''))
            elif entry.get('link'):
                guids.append(entry.link)
            elif entry.get('title'):
                guids.append(entry.title)
        self.feed.save()

        # Compare new stories to existing stories, adding and updating
        start_date = datetime.datetime.utcnow()
        # end_date = datetime.datetime.utcnow()
        story_guids = []
        for entry in self.fpf.entries:
            story = pre_process_story(entry)
            if story.get('published') < start_date:
                start_date = story.get('published')
            # if story.get('published') > end_date:
            #     end_date = story.get('published')
            story_guids.append(story.get('guid') or story.get('link'))
        existing_stories = list(
            MStory.objects(
                # story_guid__in=story_guids,
                story_date__gte=start_date,
                story_feed_id=self.feed.pk).limit(len(story_guids)))

        # MStory.objects(
        #     (Q(story_date__gte=start_date) & Q(story_date__lte=end_date))
        #     | (Q(story_guid__in=story_guids)),
        #     story_feed=self.feed
        # ).order_by('-story_date')
        ret_values = self.feed.add_update_stories(self.fpf.entries,
                                                  existing_stories)

        logging.debug(u'   ---> [%-30s] Parsed Feed: %s' % (
            unicode(self.feed)[:30],
            u' '.join(u'%s=%d' % (self.entry_trans[key], ret_values[key])
                      for key in self.entry_keys),
        ))
        self.feed.update_all_statistics()
        self.feed.trim_feed()
        self.feed.save_feed_history(200, "OK")

        return FEED_OK, ret_values
Example #32
0
    def calculate_feed_scores(self, silent=False, stories=None, force=False):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        
        if self.user.profile.last_seen_on < UNREAD_CUTOFF and not force:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return
        
        # if not self.feed.fetched_once:
        #     if not silent:
        #         logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
        #     self.needs_unread_recalc = False
        #     self.save()
        #     return
            
        feed_scores = dict(negative=0, neutral=0, positive=0)
        
        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta
        
        if not stories:
            stories = cache.get('S:%s' % self.feed_id)
            
        unread_story_hashes = self.get_stories(read_filter='unread', limit=500, hashes_only=True)
        
        if not stories:
            stories_db = MStory.objects(story_hash__in=unread_story_hashes)
            stories = Feed.format_stories(stories_db, self.feed_id)
        
        oldest_unread_story_date = now
        unread_stories = []
        for story in stories:
            if story['story_date'] < date_delta:
                continue
            if story['story_hash'] in unread_story_hashes:
                unread_stories.append(story)
                if story['story_date'] < oldest_unread_story_date:
                    oldest_unread_story_date = story['story_date']

        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))
        
        classifier_feeds   = list(MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0))
        classifier_authors = list(MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id))
        classifier_titles  = list(MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id))
        classifier_tags    = list(MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id))

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))
            
        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }
        
        for story in unread_stories:
            scores.update({
                'author' : apply_classifier_authors(classifier_authors, story),
                'tags'   : apply_classifier_tags(classifier_tags, story),
                'title'  : apply_classifier_titles(classifier_titles, story),
            })
            
            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1
                
        
        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))

        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False
        
        self.save()

        if (self.unread_count_positive == 0 and 
            self.unread_count_neutral == 0):
            self.mark_feed_read()
        
        if not silent:
            logging.user(self.user, '~FC~SNComputing scores: %s (~SB%s~SN/~SB%s~SN/~SB%s~SN)' % (self.feed, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive']))
            
        return self
Example #33
0
    def get_stories(self, offset=0, limit=6, order='newest', read_filter='all', withscores=False, hashes_only=False):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        ignore_user_stories = False
        
        stories_key         = 'F:%s' % (self.feed_id)
        read_stories_key    = 'RS:%s:%s' % (self.user_id, self.feed_id)
        unread_stories_key  = 'U:%s:%s' % (self.user_id, self.feed_id)

        unread_ranked_stories_key  = 'z%sU:%s:%s' % ('h' if hashes_only else '', 
                                                     self.user_id, self.feed_id)
        if offset and not withscores and r.exists(unread_ranked_stories_key):
            pass
        else:
            r.delete(unread_ranked_stories_key)
            if not r.exists(stories_key):
                print " ---> No stories on feed: %s" % self
                return []
            elif read_filter != 'unread' or not r.exists(read_stories_key):
                ignore_user_stories = True
                unread_stories_key = stories_key
            else:
                r.sdiffstore(unread_stories_key, stories_key, read_stories_key)
            sorted_stories_key          = 'zF:%s' % (self.feed_id)
            r.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key])
        
        current_time    = int(time.time() + 60*60*24)
        if order == 'oldest':
            byscorefunc = r.zrangebyscore
            if read_filter == 'unread':
                min_score = int(time.mktime(self.mark_read_date.timetuple())) + 1
            else:
                now = datetime.datetime.now()
                two_weeks_ago = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
                min_score = int(time.mktime(two_weeks_ago.timetuple()))-1000
            max_score = current_time
        else:
            byscorefunc = r.zrevrangebyscore
            min_score = current_time
            if read_filter == 'unread':
                # +1 for the intersection b/w zF and F, which carries an implicit score of 1.
                max_score = int(time.mktime(self.mark_read_date.timetuple())) + 1
            else:
                max_score = 0

        if settings.DEBUG:
            debug_stories = r.zrevrange(unread_ranked_stories_key, 0, -1, withscores=True)
            print " ---> Unread all stories (%s - %s) %s stories: %s" % (
                min_score,
                max_score,
                len(debug_stories),
                debug_stories)
        story_ids = byscorefunc(unread_ranked_stories_key, min_score, 
                                  max_score, start=offset, num=500,
                                  withscores=withscores)[:limit]
        r.expire(unread_ranked_stories_key, 24*60*60)
        if not ignore_user_stories:
            r.delete(unread_stories_key)
        
        if withscores or hashes_only:
            return story_ids
        elif story_ids:
            story_date_order = "%sstory_date" % ('' if order == 'oldest' else '-')
            mstories = MStory.objects(story_hash__in=story_ids).order_by(story_date_order)
            stories = Feed.format_stories(mstories)
            return stories
        else:
            return []
Example #34
0
#!/usr/bin/env python

from utils.munin.base import MuninGraph
from apps.rss_feeds.models import MStory
from apps.reader.models import MUserStory

graph_config = {
    'graph_category': 'NewsBlur',
    'graph_title': 'NewsBlur Stories',
    'graph_vlabel': 'Stories',
    'stories.label': 'stories',
    'tags.label': 'tags',
    'authors.label': 'authors',
    'read_stories.label': 'read_stories',
}

metrics = {
    'stories': MStory.objects().count(),
    'read_stories': MUserStory.objects().count(),
}

if __name__ == '__main__':
    MuninGraph(graph_config, metrics).run()
Example #35
0
class UserSubscription(models.Model):
    """
    A feed which a user has subscrubed to. Carries all of the cached information
    about the subscription, including unread counts of the three primary scores.
    
    Also has a dirty flag (needs_unread_recalc) which means that the unread counts
    are not accurate and need to be calculated with `self.calculate_feed_scores()`.
    """
    UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(
        days=settings.DAYS_OF_UNREAD)

    user = models.ForeignKey(User, related_name='subscriptions')
    feed = models.ForeignKey(Feed, related_name='subscribers')
    user_title = models.CharField(max_length=255, null=True, blank=True)
    active = models.BooleanField(default=False)
    last_read_date = models.DateTimeField(default=UNREAD_CUTOFF)
    mark_read_date = models.DateTimeField(default=UNREAD_CUTOFF)
    unread_count_neutral = models.IntegerField(default=0)
    unread_count_positive = models.IntegerField(default=0)
    unread_count_negative = models.IntegerField(default=0)
    unread_count_updated = models.DateTimeField(default=datetime.datetime.now)
    oldest_unread_story_date = models.DateTimeField(
        default=datetime.datetime.now)
    needs_unread_recalc = models.BooleanField(default=False)
    feed_opens = models.IntegerField(default=0)
    is_trained = models.BooleanField(default=False)

    objects = UserSubscriptionManager()

    def __unicode__(self):
        return '[%s (%s): %s (%s)] ' % (self.user.username, self.user.pk,
                                        self.feed.feed_title, self.feed.pk)

    class Meta:
        unique_together = ("user", "feed")

    def canonical(self, full=False, include_favicon=True, classifiers=None):
        feed = self.feed.canonical(full=full, include_favicon=include_favicon)
        feed['feed_title'] = self.user_title or feed['feed_title']
        feed['ps'] = self.unread_count_positive
        feed['nt'] = self.unread_count_neutral
        feed['ng'] = self.unread_count_negative
        feed['active'] = self.active
        feed['feed_opens'] = self.feed_opens
        feed['subscribed'] = True
        if classifiers:
            feed['classifiers'] = classifiers
        if not self.active and self.user.profile.is_premium:
            feed['active'] = True
            self.active = True
            self.save()

        return feed

    def save(self, *args, **kwargs):
        user_title_max = self._meta.get_field('user_title').max_length
        if self.user_title and len(self.user_title) > user_title_max:
            self.user_title = self.user_title[:user_title_max]
        if not self.active and self.user.profile.is_premium:
            self.active = True
        try:
            super(UserSubscription, self).save(*args, **kwargs)
        except IntegrityError:
            duplicate_feeds = DuplicateFeed.objects.filter(
                duplicate_feed_id=self.feed_id)
            for duplicate_feed in duplicate_feeds:
                already_subscribed = UserSubscription.objects.filter(
                    user=self.user, feed=duplicate_feed.feed)
                if not already_subscribed:
                    self.feed = duplicate_feed.feed
                    super(UserSubscription, self).save(*args, **kwargs)
                    break
            else:
                self.delete()

    @classmethod
    def sync_all_redis(cls, user_id, skip_feed=False):
        us = cls.objects.filter(user=user_id)

        for sub in us:
            print " ---> Syncing usersub: %s" % sub
            sub.sync_redis(skip_feed=skip_feed)

    def sync_redis(self, skip_feed=False):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_POOL)

        if not skip_feed:
            self.feed.sync_redis()

        userstories = MUserStory.objects.filter(feed_id=self.feed_id,
                                                user_id=self.user_id)
        for userstory in userstories:
            userstory.sync_redis(r=r)

    def get_stories(self,
                    offset=0,
                    limit=6,
                    order='newest',
                    read_filter='all',
                    withscores=False):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_POOL)
        ignore_user_stories = False

        stories_key = 'F:%s' % (self.feed_id)
        read_stories_key = 'RS:%s:%s' % (self.user_id, self.feed_id)
        unread_stories_key = 'U:%s:%s' % (self.user_id, self.feed_id)

        if not r.exists(stories_key):
            print " ---> No stories on feed: %s" % self
            return []
        elif read_filter != 'unread' or not r.exists(read_stories_key):
            ignore_user_stories = True
            unread_stories_key = stories_key
        else:
            r.sdiffstore(unread_stories_key, stories_key, read_stories_key)
        sorted_stories_key = 'zF:%s' % (self.feed_id)
        unread_ranked_stories_key = 'zU:%s:%s' % (self.user_id, self.feed_id)
        r.zinterstore(unread_ranked_stories_key,
                      [sorted_stories_key, unread_stories_key])

        current_time = int(time.time() + 60 * 60 * 24)
        if order == 'oldest':
            byscorefunc = r.zrangebyscore
            if read_filter == 'unread' or True:
                min_score = int(time.mktime(
                    self.mark_read_date.timetuple())) + 1
            else:
                now = datetime.datetime.now()
                two_weeks_ago = now - datetime.timedelta(
                    days=settings.DAYS_OF_UNREAD)
                min_score = int(time.mktime(two_weeks_ago.timetuple())) - 1000
            max_score = current_time
        else:
            byscorefunc = r.zrevrangebyscore
            min_score = current_time
            # +1 for the intersection b/w zF and F, which carries an implicit score of 1.
            max_score = int(time.mktime(self.mark_read_date.timetuple())) + 1

        if settings.DEBUG:
            print " ---> Unread all stories: %s" % r.zrevrange(
                unread_ranked_stories_key, 0, -1)
        story_ids = byscorefunc(unread_ranked_stories_key,
                                min_score,
                                max_score,
                                start=offset,
                                num=limit,
                                withscores=withscores)

        r.expire(unread_ranked_stories_key, 24 * 60 * 60)
        if not ignore_user_stories:
            r.delete(unread_stories_key)

        # XXX TODO: Remove below line after combing redis for these None's.
        story_ids = [s for s in story_ids if s and s != 'None']  # ugh, hack

        return story_ids

    @classmethod
    def feed_stories(cls,
                     user_id,
                     feed_ids,
                     offset=0,
                     limit=6,
                     order='newest',
                     read_filter='all'):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_POOL)

        if order == 'oldest':
            range_func = r.zrange
        else:
            range_func = r.zrevrange

        if not isinstance(feed_ids, list):
            feed_ids = [feed_ids]

        unread_ranked_stories_keys = 'zU:%s' % (user_id)
        if offset and r.exists(unread_ranked_stories_keys):
            story_guids = range_func(unread_ranked_stories_keys, offset, limit)
            return story_guids
        else:
            r.delete(unread_ranked_stories_keys)

        for feed_id in feed_ids:
            try:
                us = cls.objects.get(user=user_id, feed=feed_id)
            except cls.DoesNotExist:
                continue
            story_guids = us.get_stories(offset=0,
                                         limit=200,
                                         order=order,
                                         read_filter=read_filter,
                                         withscores=True)

            if story_guids:
                r.zadd(unread_ranked_stories_keys, **dict(story_guids))

        story_guids = range_func(unread_ranked_stories_keys, offset, limit)
        r.expire(unread_ranked_stories_keys, 24 * 60 * 60)

        return story_guids

    @classmethod
    def add_subscription(cls,
                         user,
                         feed_address,
                         folder=None,
                         bookmarklet=False,
                         auto_active=True,
                         skip_fetch=False):
        feed = None
        us = None

        logging.user(
            user, "~FRAdding URL: ~SB%s (in %s) %s" %
            (feed_address, folder, "~FCAUTO-ADD" if not auto_active else ""))

        feed = Feed.get_feed_from_url(feed_address)

        if not feed:
            code = -1
            if bookmarklet:
                message = "This site does not have an RSS feed. Nothing is linked to from this page."
            else:
                message = "This address does not point to an RSS feed or a website with an RSS feed."
        else:
            us, subscription_created = cls.objects.get_or_create(
                feed=feed,
                user=user,
                defaults={
                    'needs_unread_recalc': True,
                    'active': auto_active,
                })
            code = 1
            message = ""

        if us:
            user_sub_folders_object, created = UserSubscriptionFolders.objects.get_or_create(
                user=user, defaults={'folders': '[]'})
            if created:
                user_sub_folders = []
            else:
                user_sub_folders = json.decode(user_sub_folders_object.folders)
            user_sub_folders = add_object_to_folder(feed.pk, folder,
                                                    user_sub_folders)
            user_sub_folders_object.folders = json.encode(user_sub_folders)
            user_sub_folders_object.save()

            if auto_active or user.profile.is_premium:
                us.active = True
                us.save()

            if not skip_fetch and feed.last_update < datetime.datetime.utcnow(
            ) - datetime.timedelta(days=1):
                feed = feed.update()

            from apps.social.models import MActivity
            MActivity.new_feed_subscription(user_id=user.pk,
                                            feed_id=feed.pk,
                                            feed_title=feed.title)
            feed.setup_feed_for_premium_subscribers()

        return code, message, us

    @classmethod
    def feeds_with_updated_counts(cls,
                                  user,
                                  feed_ids=None,
                                  check_fetch_status=False):
        feeds = {}

        # Get subscriptions for user
        user_subs = cls.objects.select_related('feed').filter(user=user,
                                                              active=True)
        feed_ids = [f for f in feed_ids if f and not f.startswith('river')]
        if feed_ids:
            user_subs = user_subs.filter(feed__in=feed_ids)

        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(
            days=settings.DAYS_OF_UNREAD)

        for i, sub in enumerate(user_subs):
            # Count unreads if subscription is stale.
            if (sub.needs_unread_recalc
                    or sub.unread_count_updated < UNREAD_CUTOFF
                    or sub.oldest_unread_story_date < UNREAD_CUTOFF):
                sub = sub.calculate_feed_scores(silent=True)
            if not sub:
                continue  # TODO: Figure out the correct sub and give it a new feed_id

            feed_id = sub.feed_id
            feeds[feed_id] = {
                'ps': sub.unread_count_positive,
                'nt': sub.unread_count_neutral,
                'ng': sub.unread_count_negative,
                'id': feed_id,
            }
            if not sub.feed.fetched_once or check_fetch_status:
                feeds[feed_id]['fetched_once'] = sub.feed.fetched_once
                feeds[feed_id][
                    'not_yet_fetched'] = not sub.feed.fetched_once  # Legacy. Dammit.
            if sub.feed.favicon_fetching:
                feeds[feed_id]['favicon_fetching'] = True
            if sub.feed.has_feed_exception or sub.feed.has_page_exception:
                feeds[feed_id]['has_exception'] = True
                feeds[feed_id][
                    'exception_type'] = 'feed' if sub.feed.has_feed_exception else 'page'
                feeds[feed_id]['feed_address'] = sub.feed.feed_address
                feeds[feed_id]['exception_code'] = sub.feed.exception_code

        return feeds

    def mark_feed_read(self):
        now = datetime.datetime.utcnow()

        # Use the latest story to get last read time.
        latest_story = MStory.objects(story_feed_id=self.feed.pk).order_by(
            '-story_date').only('story_date').limit(1)
        if latest_story and len(latest_story) >= 1:
            latest_story_date = latest_story[0]['story_date']\
                                + datetime.timedelta(seconds=1)
        else:
            latest_story_date = now

        self.last_read_date = latest_story_date
        self.mark_read_date = latest_story_date
        self.unread_count_negative = 0
        self.unread_count_positive = 0
        self.unread_count_neutral = 0
        self.unread_count_updated = now
        self.oldest_unread_story_date = now
        self.needs_unread_recalc = False

        # No longer removing old user read stories, since they're needed for social,
        # and they get cleaned up automatically when new stories come in.
        # MUserStory.delete_old_stories(self.user_id, self.feed_id)

        self.save()

    def mark_story_ids_as_read(self, story_ids, request=None):
        data = dict(code=0, payload=story_ids)

        if not request:
            request = self.user

        if not self.needs_unread_recalc:
            self.needs_unread_recalc = True
            self.save()

        if len(story_ids) > 1:
            logging.user(
                request,
                "~FYRead %s stories in feed: %s" % (len(story_ids), self.feed))
        else:
            logging.user(request, "~FYRead story in feed: %s" % (self.feed))

        for story_id in set(story_ids):
            try:
                story = MStory.objects.get(story_feed_id=self.feed_id,
                                           story_guid=story_id)
            except MStory.DoesNotExist:
                # Story has been deleted, probably by feed_fetcher.
                continue
            except MStory.MultipleObjectsReturned:
                story = MStory.objects.filter(story_feed_id=self.feed_id,
                                              story_guid=story_id)[0]
            now = datetime.datetime.utcnow()
            date = now if now > story.story_date else story.story_date  # For handling future stories
            m, _ = MUserStory.objects.get_or_create(story=story,
                                                    user_id=self.user_id,
                                                    feed_id=self.feed_id,
                                                    defaults={
                                                        'read_date':
                                                        date,
                                                        'story_id':
                                                        story_id,
                                                        'story_date':
                                                        story.story_date,
                                                    })

        return data

    def calculate_feed_scores(self, silent=False, stories_db=None):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if self.user.profile.last_seen_on < UNREAD_CUTOFF:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return

        if not self.feed.fetched_once:
            if not silent:
                logging.info(' ---> [%s] NOT Computing scores: %s' %
                             (self.user, self.feed))
            self.needs_unread_recalc = False
            self.save()
            return

        feed_scores = dict(negative=0, neutral=0, positive=0)

        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta

        read_stories = MUserStory.objects(user_id=self.user_id,
                                          feed_id=self.feed_id,
                                          read_date__gte=self.mark_read_date)
        # if not silent:
        #     logging.info(' ---> [%s]    Read stories: %s' % (self.user, datetime.datetime.now() - now))
        read_stories_ids = [us.story_id for us in read_stories]
        stories_db = stories_db or MStory.objects(story_feed_id=self.feed_id,
                                                  story_date__gte=date_delta)
        # if not silent:
        #     logging.info(' ---> [%s]    MStory: %s' % (self.user, datetime.datetime.now() - now))
        oldest_unread_story_date = now
        unread_stories_db = []
        for story in stories_db:
            if story.story_date < date_delta:
                continue
            if hasattr(
                    story,
                    'story_guid') and story.story_guid not in read_stories_ids:
                unread_stories_db.append(story)
                if story.story_date < oldest_unread_story_date:
                    oldest_unread_story_date = story.story_date
        stories = Feed.format_stories(unread_stories_db, self.feed_id)
        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))

        classifier_feeds = list(
            MClassifierFeed.objects(user_id=self.user_id,
                                    feed_id=self.feed_id,
                                    social_user_id=0))
        classifier_authors = list(
            MClassifierAuthor.objects(user_id=self.user_id,
                                      feed_id=self.feed_id))
        classifier_titles = list(
            MClassifierTitle.objects(user_id=self.user_id,
                                     feed_id=self.feed_id))
        classifier_tags = list(
            MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id))

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))

        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }

        for story in stories:
            scores.update({
                'author':
                apply_classifier_authors(classifier_authors, story),
                'tags':
                apply_classifier_tags(classifier_tags, story),
                'title':
                apply_classifier_titles(classifier_titles, story),
            })

            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1

        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))

        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False

        self.save()

        if (self.unread_count_positive == 0 and self.unread_count_neutral == 0
                and self.unread_count_negative == 0):
            self.mark_feed_read()

        if not silent:
            logging.info(' ---> [%s] Computing scores: %s (%s/%s/%s)' %
                         (self.user, self.feed, feed_scores['negative'],
                          feed_scores['neutral'], feed_scores['positive']))

        return self

    def switch_feed(self, new_feed, old_feed):
        # Rewrite feed in subscription folders
        try:
            user_sub_folders = UserSubscriptionFolders.objects.get(
                user=self.user)
        except Exception, e:
            logging.info(" *** ---> UserSubscriptionFolders error: %s" % e)
            return

        # Switch to original feed for the user subscription
        logging.info("      ===> %s " % self.user)
        self.feed = new_feed
        self.needs_unread_recalc = True
        try:
            self.save()
            user_sub_folders.rewrite_feed(new_feed, old_feed)
        except (IntegrityError, OperationError):
            logging.info("      !!!!> %s already subscribed" % self.user)
            self.delete()
            return

        # Switch read stories
        user_stories = MUserStory.objects(user_id=self.user_id,
                                          feed_id=old_feed.pk)
        if user_stories.count() > 0:
            logging.info(" ---> %s read stories" % user_stories.count())

        for user_story in user_stories:
            user_story.feed_id = new_feed.pk
            duplicate_story = user_story.story
            story_guid = duplicate_story.story_guid if hasattr(
                duplicate_story, 'story_guid') else duplicate_story.id
            original_story = MStory.objects(story_feed_id=new_feed.pk,
                                            story_guid=story_guid)

            if original_story:
                user_story.story = original_story[0]
                try:
                    user_story.save()
                except OperationError:
                    # User read the story in the original feed, too. Ugh, just ignore it.
                    pass
            else:
                logging.info(" ***> Can't find original story: %s" %
                             duplicate_story.id)
                user_story.delete()

        def switch_feed_for_classifier(model):
            duplicates = model.objects(feed_id=old_feed.pk,
                                       user_id=self.user_id)
            if duplicates.count():
                logging.info(" ---> Switching %s %s" %
                             (duplicates.count(), model))
            for duplicate in duplicates:
                duplicate.feed_id = new_feed.pk
                try:
                    duplicate.save()
                    pass
                except (IntegrityError, OperationError):
                    logging.info("      !!!!> %s already exists" % duplicate)
                    duplicate.delete()

        switch_feed_for_classifier(MClassifierTitle)
        switch_feed_for_classifier(MClassifierAuthor)
        switch_feed_for_classifier(MClassifierFeed)
        switch_feed_for_classifier(MClassifierTag)
Example #36
0
    def calculate_feed_scores(self, silent=False, stories_db=None):
        now = datetime.datetime.utcnow()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if self.user.profile.last_seen_on < UNREAD_CUTOFF:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return
        
        if not self.feed.fetched_once:
            if not silent:
                logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
            self.needs_unread_recalc = False
            self.save()
            return

        if not silent:
            logging.info(' ---> [%s] Computing scores: %s' % (self.user, self.feed))
        feed_scores = dict(negative=0, neutral=0, positive=0)
        
        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta
            
        read_stories = MUserStory.objects(user_id=self.user.pk,
                                          feed_id=self.feed.pk,
                                          read_date__gte=self.mark_read_date)
        # if not silent:
        #     logging.info(' ---> [%s]    Read stories: %s' % (self.user, datetime.datetime.now() - now))
        read_stories_ids = []
        for us in read_stories:
            if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode):
                read_stories_ids.append(us.story.story_guid)
            elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode):
                read_stories_ids.append(us.story.id) # TODO: Remove me after migration from story.id->guid
        stories_db = stories_db or MStory.objects(story_feed_id=self.feed.pk,
                                                  story_date__gte=date_delta)
        # if not silent:
        #     logging.info(' ---> [%s]    MStory: %s' % (self.user, datetime.datetime.now() - now))
        oldest_unread_story_date = now
        unread_stories_db = []
        for story in stories_db:
            if story.story_date < date_delta:
                continue
            if hasattr(story, 'story_guid') and story.story_guid not in read_stories_ids:
                unread_stories_db.append(story)
                if story.story_date < oldest_unread_story_date:
                    oldest_unread_story_date = story.story_date
        stories = Feed.format_stories(unread_stories_db, self.feed.pk)
        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))
        
        classifier_feeds   = list(MClassifierFeed.objects(user_id=self.user.pk, feed_id=self.feed.pk))
        classifier_authors = list(MClassifierAuthor.objects(user_id=self.user.pk, feed_id=self.feed.pk))
        classifier_titles  = list(MClassifierTitle.objects(user_id=self.user.pk, feed_id=self.feed.pk))
        classifier_tags    = list(MClassifierTag.objects(user_id=self.user.pk, feed_id=self.feed.pk))

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))
            
        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }
        
        for story in stories:
            scores.update({
                'author' : apply_classifier_authors(classifier_authors, story),
                'tags'   : apply_classifier_tags(classifier_tags, story),
                'title'  : apply_classifier_titles(classifier_titles, story),
            })
            
            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1
                
        
        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))
            
        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False
        
        self.save()

        # if (self.unread_count_positive == 0 and 
        #     self.unread_count_neutral == 0):
        #     self.mark_feed_read()
        
        cache.delete('usersub:%s' % self.user.id)
        
        return
Example #37
0
def api_unread_story(request, trigger_slug=None):
    user = request.user
    body = request.body_json
    after = body.get('after', None)
    before = body.get('before', None)
    limit = body.get('limit', 50)
    fields = body.get('triggerFields')
    feed_or_folder = fields['feed_or_folder']
    entries = []

    if isinstance(feed_or_folder, int) or feed_or_folder.isdigit():
        feed_id = int(feed_or_folder)
        try:
            usersub = UserSubscription.objects.get(user=user, feed_id=feed_id)
        except UserSubscription.DoesNotExist:
            return dict(data=[])
        found_feed_ids = [feed_id]
        found_trained_feed_ids = [feed_id] if usersub.is_trained else []
        stories = usersub.get_stories(order="newest", read_filter="unread", 
                                      offset=0, limit=limit,
                                      default_cutoff_date=user.profile.unread_cutoff)
    else:
        folder_title = feed_or_folder
        if folder_title == "Top Level":
            folder_title = " "
        usf = UserSubscriptionFolders.objects.get(user=user)
        flat_folders = usf.flatten_folders()
        feed_ids = None
        if folder_title != "all":
            feed_ids = flat_folders.get(folder_title)
        usersubs = UserSubscription.subs_for_feeds(user.pk, feed_ids=feed_ids,
                                                   read_filter="unread")
        feed_ids = [sub.feed_id for sub in usersubs]
        params = {
            "user_id": user.pk, 
            "feed_ids": feed_ids,
            "offset": 0,
            "limit": limit,
            "order": "newest",
            "read_filter": "unread",
            "usersubs": usersubs,
            "cutoff_date": user.profile.unread_cutoff,
        }
        story_hashes, unread_feed_story_hashes = UserSubscription.feed_stories(**params)
        mstories = MStory.objects(story_hash__in=story_hashes).order_by('-story_date')
        stories = Feed.format_stories(mstories)
        found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
        trained_feed_ids = [sub.feed_id for sub in usersubs if sub.is_trained]
        found_trained_feed_ids = list(set(trained_feed_ids) & set(found_feed_ids))
    
    if found_trained_feed_ids:
        classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk,
                                                        feed_id__in=found_trained_feed_ids))
        classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, 
                                                            feed_id__in=found_trained_feed_ids))
        classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, 
                                                          feed_id__in=found_trained_feed_ids))
        classifier_tags = list(MClassifierTag.objects(user_id=user.pk, 
                                                      feed_id__in=found_trained_feed_ids))
    feeds = dict([(f.pk, {
        "title": f.feed_title,
        "website": f.feed_link,
        "address": f.feed_address,
    }) for f in Feed.objects.filter(pk__in=found_feed_ids)])

    for story in stories:
        if before and int(story['story_date'].strftime("%s")) > before: continue
        if after and int(story['story_date'].strftime("%s")) < after: continue
        score = 0
        if found_trained_feed_ids and story['story_feed_id'] in found_trained_feed_ids:
            score = compute_story_score(story, classifier_titles=classifier_titles, 
                                        classifier_authors=classifier_authors, 
                                        classifier_tags=classifier_tags,
                                        classifier_feeds=classifier_feeds)
            if score < 0: continue
            if trigger_slug == "new-unread-focus-story" and score < 1: continue
        feed = feeds.get(story['story_feed_id'], None)
        entries.append({
            "StoryTitle": story['story_title'],
            "StoryContent": story['story_content'],
            "StoryURL": story['story_permalink'],
            "StoryAuthor": story['story_authors'],
            "PublishedAt": story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "StoryScore": score,
            "Site": feed and feed['title'],
            "SiteURL": feed and feed['website'],
            "SiteRSS": feed and feed['address'],
            "meta": {
                "id": story['story_hash'],
                "timestamp": int(story['story_date'].strftime("%s"))
            },
        })
    
    if after:
        entries = sorted(entries, key=lambda s: s['meta']['timestamp'])
        
    logging.user(request, "~FYChecking unread%s stories with ~SB~FCIFTTT~SN~FY: ~SB%s~SN - ~SB%s~SN stories" % (" ~SBfocus~SN" if trigger_slug == "new-unread-focus-story" else "", feed_or_folder, len(entries)))
    
    return {"data": entries[:limit]}
Example #38
0
        self._state[doc][position] = new_topic

    def change_count(self, doc, word, topic, delta):
        self._docs[doc].inc(topic, delta)
        self._topics[topic].inc(word, delta)

    def sample(self, iterations=100, hyper_delay=10):
        assert self._state
        for ii in xrange(iterations):
            for dd in self._data:
                for ww in xrange(len(self._data[dd])):
                    self.sample_word(dd, ww)
            print("Iteration %i %f" %
                  (ii, self.lhood(self._alpha, self._lambda)))
            if hyper_delay >= 0 and ii % hyper_delay == 0:
                self.optimize_hyperparameters()

    def print_topics(self, num_words=15):
        for ii in self._topics:
            print("%i:%s\n" %
                  (ii, "\t".join(self._topics[ii].keys()[:num_words])))


if __name__ == "__main__":
    stories = MStory.objects(story_feed_id=199)
    d = create_data(stories, doc_limit=250, delimiter="")
    lda = LdaSampler(5)
    lda.initialize(d)

    lda.sample(50)
    lda.print_topics()
Example #39
0
def api_share_new_story(request):
    user = request.user
    body = request.body_json
    fields = body.get('actionFields')
    story_url = urlnorm.normalize(fields['story_url'])
    story_content = fields.get('story_content', "")
    story_title = fields.get('story_title', "")
    story_author = fields.get('story_author', "")
    comments = fields.get('comments', None)
        
    logging.user(request.user, "~FBFinding feed (api_share_new_story): %s" % story_url)
    original_feed = Feed.get_feed_from_url(story_url, create=True, fetch=True)
    story_hash = MStory.guid_hash_unsaved(story_url)
    feed_id = (original_feed and original_feed.pk or 0)
    if not user.profile.is_premium and MSharedStory.feed_quota(user.pk, story_hash, feed_id=feed_id):
        return {"errors": [{
            'message': 'Only premium users can share multiple stories per day from the same site.'
        }]}
    
    quota = 3
    if MSharedStory.feed_quota(user.pk, story_hash, quota=quota):
        logging.user(request, "~BM~FRNOT ~FYSharing story from ~SB~FCIFTTT~FY, over quota: ~SB%s: %s" % (story_url, comments))        
        return {"errors": [{
            'message': 'You can only share %s stories per day.' % quota
        }]}
        
    if not story_content or not story_title:
        ti = TextImporter(feed=original_feed, story_url=story_url, request=request)
        original_story = ti.fetch(return_document=True)
        if original_story:
            story_url = original_story['url']
            if not story_content:
                story_content = original_story['content']
            if not story_title:
                story_title = original_story['title']
    
    if story_content:
        story_content = lxml.html.fromstring(story_content)
        story_content.make_links_absolute(story_url)
        story_content = lxml.html.tostring(story_content)
    
    shared_story = MSharedStory.objects.filter(user_id=user.pk,
                                               story_feed_id=original_feed and original_feed.pk or 0,
                                               story_guid=story_url).limit(1).first()
    if not shared_story:
        title_max = MSharedStory._fields['story_title'].max_length
        story_db = {
            "story_guid": story_url,
            "story_permalink": story_url,
            "story_title": story_title and story_title[:title_max] or "[Untitled]",
            "story_feed_id": original_feed and original_feed.pk or 0,
            "story_content": story_content,
            "story_author_name": story_author,
            "story_date": datetime.datetime.now(),
            "user_id": user.pk,
            "comments": comments,
            "has_comments": bool(comments),
        }
        try:
            shared_story = MSharedStory.objects.create(**story_db)
            socialsubs = MSocialSubscription.objects.filter(subscription_user_id=user.pk)
            for socialsub in socialsubs:
                socialsub.needs_unread_recalc = True
                socialsub.save()
            logging.user(request, "~BM~FYSharing story from ~SB~FCIFTTT~FY: ~SB%s: %s" % (story_url, comments))
        except NotUniqueError:
            logging.user(request, "~BM~FY~SBAlready~SN shared story from ~SB~FCIFTTT~FY: ~SB%s: %s" % (story_url, comments))
    else:
        logging.user(request, "~BM~FY~SBAlready~SN shared story from ~SB~FCIFTTT~FY: ~SB%s: %s" % (story_url, comments))
    
    try:
        socialsub = MSocialSubscription.objects.get(user_id=user.pk, 
                                                    subscription_user_id=user.pk)
    except MSocialSubscription.DoesNotExist:
        socialsub = None
    
    if socialsub and shared_story:
        socialsub.mark_story_ids_as_read([shared_story.story_hash], 
                                          shared_story.story_feed_id, 
                                          request=request)
    elif shared_story:
        RUserStory.mark_read(user.pk, shared_story.story_feed_id, shared_story.story_hash)
    
    if shared_story:
        shared_story.publish_update_to_subscribers()
    
    return {"data": [{
        "id": shared_story and shared_story.story_guid,
        "url": shared_story and shared_story.blurblog_permalink()
    }]}
Example #40
0
    def receive_newsletter(self, params):
        user = self._user_from_email(params['recipient'])
        if not user:
            return

        sender_name, sender_username, sender_domain = self._split_sender(
            params['from'])
        feed_address = self._feed_address(
            user, "%s@%s" % (sender_username, sender_domain))

        try:
            usf = UserSubscriptionFolders.objects.get(user=user)
        except UserSubscriptionFolders.DoesNotExist:
            logging.user(user,
                         "~FRUser does not have a USF, ignoring newsletter.")
            return
        usf.add_folder('', 'Newsletters')

        # First look for the email address
        try:
            feed = Feed.objects.get(feed_address=feed_address)
        except Feed.MultipleObjectsReturned:
            feeds = Feed.objects.filter(feed_address=feed_address)[:1]
            if feeds.count():
                feed = feeds[0]
        except Feed.DoesNotExist:
            feed = None

        # If not found, check among titles user has subscribed to
        if not feed:
            newsletter_subs = UserSubscription.objects.filter(
                user=user,
                feed__feed_address__contains="newsletter:").only('feed')
            newsletter_feed_ids = [us.feed.pk for us in newsletter_subs]
            feeds = Feed.objects.filter(feed_title__iexact=sender_name,
                                        pk__in=newsletter_feed_ids)
            if feeds.count():
                feed = feeds[0]

        # Create a new feed if it doesn't exist by sender name or email
        if not feed:
            feed = Feed.objects.create(feed_address=feed_address,
                                       feed_link='http://' + sender_domain,
                                       feed_title=sender_name,
                                       fetched_once=True,
                                       known_good=True)
            feed.update()
            logging.user(user, "~FCCreating newsletter feed: ~SB%s" % (feed))
            r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL)
            r.publish(user.username, 'reload:%s' % feed.pk)
            self._check_if_first_newsletter(user)

        feed.last_update = datetime.datetime.now()
        feed.last_story_date = datetime.datetime.now()
        feed.save()

        if feed.feed_title != sender_name:
            feed.feed_title = sender_name
            feed.save()

        try:
            usersub = UserSubscription.objects.get(user=user, feed=feed)
        except UserSubscription.DoesNotExist:
            _, _, usersub = UserSubscription.add_subscription(
                user=user, feed_address=feed_address, folder='Newsletters')
            r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL)
            r.publish(user.username, 'reload:feeds')

        story_hash = MStory.ensure_story_hash(params['signature'], feed.pk)
        story_content = self._get_content(params)
        plain_story_content = self._get_content(params, force_plain=True)
        if len(plain_story_content) > len(story_content):
            story_content = plain_story_content
        story_content = self._clean_content(story_content)
        story_params = {
            "story_feed_id":
            feed.pk,
            "story_date":
            datetime.datetime.fromtimestamp(int(params['timestamp'])),
            "story_title":
            params['subject'],
            "story_content":
            story_content,
            "story_author_name":
            params['from'],
            "story_permalink":
            "https://%s%s" %
            (Site.objects.get_current().domain,
             reverse('newsletter-story', kwargs={'story_hash': story_hash})),
            "story_guid":
            params['signature'],
        }

        try:
            story = MStory.objects.get(story_hash=story_hash)
        except MStory.DoesNotExist:
            story = MStory(**story_params)
            story.save()

        usersub.needs_unread_recalc = True
        usersub.save()

        self._publish_to_subscribers(feed, story.story_hash)

        MFetchHistory.add(feed_id=feed.pk, fetch_type='push')
        logging.user(
            user, "~FCNewsletter feed story: ~SB%s~SN / ~SB%s" %
            (story.story_title, feed))

        return story
Example #41
0
def load_river_stories(request):
    limit                = 18
    offset               = int(request.REQUEST.get('offset', 0))
    start                = time.time()
    user                 = get_user(request)
    feed_ids             = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id]
    original_feed_ids    = list(feed_ids)
    page                 = int(request.REQUEST.get('page', 1))
    read_stories_count   = int(request.REQUEST.get('read_stories_count', 0))
    days_to_keep_unreads = datetime.timedelta(days=settings.DAYS_OF_UNREAD)
    
    if not feed_ids: 
        logging.user(request, "~FCLoading empty river stories: page %s" % (page))
        return dict(stories=[])
    
    # Fetch all stories at and before the page number.
    # Not a single page, because reading stories can move them up in the unread order.
    # `read_stories_count` is an optimization, works best when all 25 stories before have been read.
    offset = (page-1) * limit - read_stories_count
    limit = page * limit - read_stories_count
    
    # Read stories to exclude
    read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story_id')
    read_stories = [rs.story_id for rs in read_stories]
    
    # Determine mark_as_read dates for all feeds to ignore all stories before this date.
    feed_counts     = {}
    feed_last_reads = {}
    for feed_id in feed_ids:
        try:
            usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user)
        except UserSubscription.DoesNotExist:
            continue
        if not usersub: continue
        feed_counts[feed_id] = (usersub.unread_count_negative * 1 + 
                                usersub.unread_count_neutral * 10 +
                                usersub.unread_count_positive * 20)
        feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple()))

    feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:40]
    feed_ids = [f[0] for f in feed_counts]
    feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids
                            if feed_id in feed_last_reads])
    feed_counts = dict(feed_counts)

    # After excluding read stories, all that's left are stories 
    # past the mark_read_date. Everything returned is guaranteed to be unread.
    mstories = MStory.objects(
        story_guid__nin=read_stories,
        story_feed_id__in=feed_ids,
        # story_date__gte=start - days_to_keep_unreads
    ).map_reduce("""function() {
            var d = feed_last_reads[this[~story_feed_id]];
            if (this[~story_date].getTime()/1000 > d) {
                emit(this[~id], this);
            }
        }""",
        """function(key, values) {
            return values[0];
        }""",
        output='inline',
        scope={
            'feed_last_reads': feed_last_reads
        }
    )
    try:
        mstories = [story.value for story in mstories if story and story.value]
    except OperationFailure, e:
        raise e
Example #42
0
class ProcessFeed:
    def __init__(self, feed_id, fpf, options, raw_feed=None):
        self.feed_id = feed_id
        self.options = options
        self.fpf = fpf
        self.raw_feed = raw_feed

    def refresh_feed(self):
        self.feed = Feed.get_by_id(self.feed_id)
        if self.feed_id != self.feed.pk:
            logging.debug(" ***> Feed has changed: from %s to %s" %
                          (self.feed_id, self.feed.pk))
            self.feed_id = self.feed.pk

    def process(self):
        """ Downloads and parses a feed.
        """
        start = time.time()
        self.refresh_feed()

        ret_values = dict(new=0, updated=0, same=0, error=0)

        if hasattr(self.fpf, 'status'):
            if self.options['verbose']:
                if self.fpf.bozo and self.fpf.status != 304:
                    logging.debug(
                        u'   ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)'
                        % (self.feed.log_title[:30], self.fpf.bozo_exception,
                           len(self.fpf.entries)))

            if self.fpf.status == 304:
                self.feed = self.feed.save()
                self.feed.save_feed_history(304, "Not modified")
                return FEED_SAME, ret_values

            # 302 and 307: Temporary redirect: ignore
            # 301 and 308: Permanent redirect: save it (after 10 tries)
            if self.fpf.status == 301 or self.fpf.status == 308:
                if self.fpf.href.endswith('feedburner.com/atom.xml'):
                    return FEED_ERRHTTP, ret_values
                redirects, non_redirects = self.feed.count_redirects_in_history(
                    'feed')
                self.feed.save_feed_history(
                    self.fpf.status,
                    "HTTP Redirect (%d to go)" % (10 - len(redirects)))
                if len(redirects) >= 10 or len(non_redirects) == 0:
                    address = self.fpf.href
                    if self.options['force'] and address:
                        address = qurl(address, remove=['_'])
                    self.feed.feed_address = address
                if not self.feed.known_good:
                    self.feed.fetched_once = True
                    logging.debug(
                        "   ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..."
                        % (self.feed.log_title[:30], self.fpf.status))
                    self.feed = self.feed.schedule_feed_fetch_immediately()
                if not self.fpf.entries:
                    self.feed = self.feed.save()
                    self.feed.save_feed_history(self.fpf.status,
                                                "HTTP Redirect")
                    return FEED_ERRHTTP, ret_values
            if self.fpf.status >= 400:
                logging.debug(
                    "   ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..."
                    % (self.feed.log_title[:30], self.fpf.status))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(self.fpf.status, "HTTP Error")
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRHTTP, ret_values

        if not self.fpf:
            logging.debug(
                "   ---> [%-30s] ~SB~FRFeed is Non-XML. No feedparser feed either!"
                % (self.feed.log_title[:30]))
            self.feed.save_feed_history(551, "Broken feed")
            return FEED_ERRHTTP, ret_values

        if self.fpf and not self.fpf.entries:
            if self.fpf.bozo and isinstance(self.fpf.bozo_exception,
                                            feedparser.NonXMLContentType):
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..."
                    % (self.feed.log_title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(552, 'Non-xml feed',
                                                self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values
            elif self.fpf.bozo and isinstance(
                    self.fpf.bozo_exception, xml.sax._exceptions.SAXException):
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..."
                    % (self.feed.log_title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(553, 'Not an RSS feed',
                                                self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values

        # the feed has changed (or it is the first time we parse it)
        # saving the etag and last_modified fields
        original_etag = self.feed.etag
        self.feed.etag = self.fpf.get('etag')
        if self.feed.etag:
            self.feed.etag = self.feed.etag[:255]
        # some times this is None (it never should) *sigh*
        if self.feed.etag is None:
            self.feed.etag = ''
        if self.feed.etag != original_etag:
            self.feed.save(update_fields=['etag'])

        original_last_modified = self.feed.last_modified
        if hasattr(self.fpf, 'modified') and self.fpf.modified:
            try:
                self.feed.last_modified = datetime.datetime.strptime(
                    self.fpf.modified, '%a, %d %b %Y %H:%M:%S %Z')
            except Exception, e:
                self.feed.last_modified = None
                logging.debug("Broken mtime %s: %s" %
                              (self.feed.last_modified, e))
                pass
        if self.feed.last_modified != original_last_modified:
            self.feed.save(update_fields=['last_modified'])

        self.fpf.entries = self.fpf.entries[:100]

        original_title = self.feed.feed_title
        if self.fpf.feed.get('title'):
            self.feed.feed_title = strip_tags(self.fpf.feed.get('title'))
        if self.feed.feed_title != original_title:
            self.feed.save(update_fields=['feed_title'])

        tagline = self.fpf.feed.get('tagline', self.feed.data.feed_tagline)
        if tagline:
            original_tagline = self.feed.data.feed_tagline
            self.feed.data.feed_tagline = smart_unicode(tagline)
            if self.feed.data.feed_tagline != original_tagline:
                self.feed.data.save(update_fields=['feed_tagline'])

        if not self.feed.feed_link_locked:
            new_feed_link = self.fpf.feed.get('link') or self.fpf.feed.get(
                'id') or self.feed.feed_link
            if self.options['force'] and new_feed_link:
                new_feed_link = qurl(new_feed_link, remove=['_'])
            if new_feed_link != self.feed.feed_link:
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed's page is different: %s to %s"
                    % (self.feed.log_title[:30], self.feed.feed_link,
                       new_feed_link))
                redirects, non_redirects = self.feed.count_redirects_in_history(
                    'page')
                self.feed.save_page_history(
                    301, "HTTP Redirect (%s to go)" % (10 - len(redirects)))
                if len(redirects) >= 10 or len(non_redirects) == 0:
                    self.feed.feed_link = new_feed_link
                    self.feed.save(update_fields=['feed_link'])

        # Determine if stories aren't valid and replace broken guids
        guids_seen = set()
        permalinks_seen = set()
        for entry in self.fpf.entries:
            guids_seen.add(entry.get('guid'))
            permalinks_seen.add(Feed.get_permalink(entry))
        guid_difference = len(guids_seen) != len(self.fpf.entries)
        single_guid = len(guids_seen) == 1
        replace_guids = single_guid and guid_difference
        permalink_difference = len(permalinks_seen) != len(self.fpf.entries)
        single_permalink = len(permalinks_seen) == 1
        replace_permalinks = single_permalink and permalink_difference

        # Compare new stories to existing stories, adding and updating
        start_date = datetime.datetime.utcnow()
        story_hashes = []
        stories = []
        for entry in self.fpf.entries:
            story = pre_process_story(entry, self.fpf.encoding)
            if story.get('published') < start_date:
                start_date = story.get('published')
            if replace_guids:
                if replace_permalinks:
                    new_story_guid = unicode(story.get('published'))
                    if self.options['verbose']:
                        logging.debug(
                            u'   ---> [%-30s] ~FBReplacing guid (%s) with timestamp: %s'
                            % (self.feed.log_title[:30], story.get('guid'),
                               new_story_guid))
                    story['guid'] = new_story_guid
                else:
                    new_story_guid = Feed.get_permalink(story)
                    if self.options['verbose']:
                        logging.debug(
                            u'   ---> [%-30s] ~FBReplacing guid (%s) with permalink: %s'
                            % (self.feed.log_title[:30], story.get('guid'),
                               new_story_guid))
                    story['guid'] = new_story_guid
            story['story_hash'] = MStory.feed_guid_hash_unsaved(
                self.feed.pk, story.get('guid'))
            stories.append(story)
            story_hashes.append(story.get('story_hash'))

        original_story_hash_count = len(story_hashes)
        story_hashes_in_unread_cutoff = self.feed.story_hashes_in_unread_cutoff[:
                                                                                original_story_hash_count]
        story_hashes.extend(story_hashes_in_unread_cutoff)
        story_hashes = list(set(story_hashes))
        if self.options['verbose'] or settings.DEBUG:
            logging.debug(
                u'   ---> [%-30s] ~FBFound ~SB%s~SN guids, adding ~SB%s~SN/%s guids from db'
                % (self.feed.log_title[:30], original_story_hash_count,
                   len(story_hashes) - original_story_hash_count,
                   len(story_hashes_in_unread_cutoff)))

        existing_stories = dict((s.story_hash, s) for s in MStory.objects(
            story_hash__in=story_hashes,
            # story_date__gte=start_date,
            # story_feed_id=self.feed.pk
        ))
        # if len(existing_stories) == 0:
        #     existing_stories = dict((s.story_hash, s) for s in MStory.objects(
        #         story_date__gte=start_date,
        #         story_feed_id=self.feed.pk
        #     ))

        ret_values = self.feed.add_update_stories(
            stories,
            existing_stories,
            verbose=self.options['verbose'],
            updates_off=self.options['updates_off'])

        # PubSubHubbub
        if (hasattr(self.fpf, 'feed') and hasattr(self.fpf.feed, 'links')
                and self.fpf.feed.links):
            hub_url = None
            self_url = self.feed.feed_address
            for link in self.fpf.feed.links:
                if link['rel'] == 'hub' and not hub_url:
                    hub_url = link['href']
                elif link['rel'] == 'self':
                    self_url = link['href']
            push_expired = False
            if self.feed.is_push:
                try:
                    push_expired = self.feed.push.lease_expires < datetime.datetime.now(
                    )
                except PushSubscription.DoesNotExist:
                    self.feed.is_push = False
            if (hub_url and self_url and not settings.DEBUG
                    and self.feed.active_subscribers > 0
                    and (push_expired or not self.feed.is_push
                         or self.options.get('force'))):
                logging.debug(
                    u'   ---> [%-30s] ~BB~FW%sSubscribing to PuSH hub: %s' %
                    (self.feed.log_title[:30],
                     "~SKRe-~SN" if push_expired else "", hub_url))
                try:
                    PushSubscription.objects.subscribe(self_url,
                                                       feed=self.feed,
                                                       hub=hub_url)
                except TimeoutError:
                    logging.debug(
                        u'   ---> [%-30s] ~BB~FW~FRTimed out~FW subscribing to PuSH hub: %s'
                        % (self.feed.log_title[:30], hub_url))
            elif (self.feed.is_push
                  and (self.feed.active_subscribers <= 0 or not hub_url)):
                logging.debug(
                    u'   ---> [%-30s] ~BB~FWTurning off PuSH, no hub found' %
                    (self.feed.log_title[:30]))
                self.feed.is_push = False
                self.feed = self.feed.save()

        # Push notifications
        if ret_values['new'] > 0 and MUserFeedNotification.feed_has_users(
                self.feed.pk) > 0:
            QueueNotifications.delay(self.feed.pk, ret_values['new'])

        # All Done
        logging.debug(
            u'   ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s'
            % (self.feed.log_title[:30], '~FG~SB' if ret_values['new'] else '',
               ret_values['new'], '~FY~SB' if ret_values['updated'] else '',
               ret_values['updated'], '~SB' if ret_values['same'] else '',
               ret_values['same'], '~FR~SB' if ret_values['error'] else '',
               ret_values['error'], len(self.fpf.entries)))
        self.feed.update_all_statistics(has_new_stories=bool(
            ret_values['new']),
                                        force=self.options['force'])
        fetch_date = datetime.datetime.now()
        if ret_values['new']:
            if not getattr(settings, 'TEST_DEBUG', False):
                self.feed.trim_feed()
                self.feed.expire_redis()
            if MStatistics.get('raw_feed', None) == self.feed.pk:
                self.feed.save_raw_feed(self.raw_feed, fetch_date)
        self.feed.save_feed_history(200, "OK", date=fetch_date)

        if self.options['verbose']:
            logging.debug(u'   ---> [%-30s] ~FBTIME: feed parse in ~FM%.4ss' %
                          (self.feed.log_title[:30], time.time() - start))

        return FEED_OK, ret_values
Example #43
0
    def process(self):
        """ Downloads and parses a feed.
        """
        start = time.time()
        self.refresh_feed()

        ret_values = dict(new=0, updated=0, same=0, error=0)

        # logging.debug(u' ---> [%d] Processing %s' % (self.feed.id, self.feed.feed_title))

        if hasattr(self.fpf, 'status'):
            if self.options['verbose']:
                if self.fpf.bozo and self.fpf.status != 304:
                    logging.debug(
                        u'   ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)'
                        % (self.feed.title[:30], self.fpf.bozo_exception,
                           len(self.fpf.entries)))

            if self.fpf.status == 304:
                self.feed = self.feed.save()
                self.feed.save_feed_history(304, "Not modified")
                return FEED_SAME, ret_values

            # 302: Temporary redirect: ignore
            # 301: Permanent redirect: save it
            if self.fpf.status == 301:
                if not self.fpf.href.endswith('feedburner.com/atom.xml'):
                    self.feed.feed_address = self.fpf.href
                if not self.feed.known_good:
                    self.feed.fetched_once = True
                    logging.debug(
                        "   ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..."
                        % (self.feed.title[:30], self.fpf.status))
                    self.feed = self.feed.schedule_feed_fetch_immediately()
                if not self.fpf.entries:
                    self.feed = self.feed.save()
                    self.feed.save_feed_history(self.fpf.status,
                                                "HTTP Redirect")
                    return FEED_ERRHTTP, ret_values
            if self.fpf.status >= 400:
                logging.debug(
                    "   ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..."
                    % (self.feed.title[:30], self.fpf.status))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(self.fpf.status, "HTTP Error")
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRHTTP, ret_values

        if not self.fpf.entries:
            if self.fpf.bozo and isinstance(self.fpf.bozo_exception,
                                            feedparser.NonXMLContentType):
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..."
                    % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(552, 'Non-xml feed',
                                                self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values
            elif self.fpf.bozo and isinstance(
                    self.fpf.bozo_exception, xml.sax._exceptions.SAXException):
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..."
                    % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(553, 'SAX Exception',
                                                self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values

        # the feed has changed (or it is the first time we parse it)
        # saving the etag and last_modified fields
        self.feed.etag = self.fpf.get('etag')
        if self.feed.etag:
            self.feed.etag = self.feed.etag[:255]
        # some times this is None (it never should) *sigh*
        if self.feed.etag is None:
            self.feed.etag = ''

        try:
            self.feed.last_modified = mtime(self.fpf.modified)
        except:
            self.feed.last_modified = None
            pass

        self.fpf.entries = self.fpf.entries[:100]

        if self.fpf.feed.get('title'):
            self.feed.feed_title = strip_tags(self.fpf.feed.get('title'))
        tagline = self.fpf.feed.get('tagline', self.feed.data.feed_tagline)
        if tagline:
            self.feed.data.feed_tagline = utf8encode(tagline)
            self.feed.data.save()
        if not self.feed.feed_link_locked:
            self.feed.feed_link = self.fpf.feed.get(
                'link') or self.fpf.feed.get('id') or self.feed.feed_link

        self.feed = self.feed.save()

        # Compare new stories to existing stories, adding and updating
        start_date = datetime.datetime.utcnow()
        story_guids = []
        stories = []
        for entry in self.fpf.entries:
            story = pre_process_story(entry)
            if story.get('published') < start_date:
                start_date = story.get('published')
            stories.append(story)
            story_guids.append(story.get('guid'))

        existing_stories = dict((s.story_guid, s) for s in MStory.objects(
            # story_guid__in=story_guids,
            story_date__gte=start_date,
            story_feed_id=self.feed.pk).limit(
                max(int(len(story_guids) * 1.5), 10)))

        ret_values = self.feed.add_update_stories(
            stories, existing_stories, verbose=self.options['verbose'])

        if (hasattr(self.fpf, 'feed') and hasattr(self.fpf.feed, 'links')
                and self.fpf.feed.links):
            hub_url = None
            self_url = self.feed.feed_address
            for link in self.fpf.feed.links:
                if link['rel'] == 'hub' and not hub_url:
                    hub_url = link['href']
                elif link['rel'] == 'self':
                    self_url = link['href']
            push_expired = False
            if self.feed.is_push:
                try:
                    push_expired = self.feed.push.lease_expires < datetime.datetime.now(
                    )
                except PushSubscription.DoesNotExist:
                    self.feed.is_push = False
            if (hub_url and self_url and not settings.DEBUG
                    and self.feed.active_subscribers > 0
                    and (push_expired or not self.feed.is_push
                         or self.options.get('force'))):
                logging.debug(
                    u'   ---> [%-30s] ~BB~FW%sSubscribing to PuSH hub: %s' %
                    (self.feed.title[:30], "~SKRe-~SN" if push_expired else "",
                     hub_url))
                try:
                    PushSubscription.objects.subscribe(self_url,
                                                       feed=self.feed,
                                                       hub=hub_url)
                except TimeoutError:
                    logging.debug(
                        u'   ---> [%-30s] ~BB~FW~FRTimed out~FW subscribing to PuSH hub: %s'
                        % (self.feed.title[:30], hub_url))
            elif (self.feed.is_push
                  and (self.feed.active_subscribers <= 0 or not hub_url)):
                logging.debug(
                    u'   ---> [%-30s] ~BB~FWTurning off PuSH, no hub found' %
                    (self.feed.title[:30]))
                self.feed.is_push = False
                self.feed = self.feed.save()

        logging.debug(
            u'   ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s'
            % (self.feed.title[:30], '~FG~SB' if ret_values['new'] else '',
               ret_values['new'], '~FY~SB' if ret_values['updated'] else '',
               ret_values['updated'], '~SB' if ret_values['same'] else '',
               ret_values['same'], '~FR~SB' if ret_values['error'] else '',
               ret_values['error'], len(self.fpf.entries)))
        self.feed.update_all_statistics(full=bool(ret_values['new']),
                                        force=self.options['force'])
        if ret_values['new']:
            self.feed.trim_feed()
            self.feed.expire_redis()
        self.feed.save_feed_history(200, "OK")

        if self.options['verbose']:
            logging.debug(u'   ---> [%-30s] ~FBTIME: feed parse in ~FM%.4ss' %
                          (self.feed.title[:30], time.time() - start))

        return FEED_OK, ret_values
Example #44
0
    def process(self):
        """ Downloads and parses a feed.
        """
        start = time.time()
        self.refresh_feed()

        ret_values = {
            ENTRY_NEW: 0,
            ENTRY_UPDATED: 0,
            ENTRY_SAME: 0,
            ENTRY_ERR: 0
        }

        # logging.debug(u' ---> [%d] Processing %s' % (self.feed.id, self.feed.feed_title))

        if hasattr(self.fpf, 'status'):
            if self.options['verbose']:
                if self.fpf.bozo and self.fpf.status != 304:
                    logging.debug(
                        u'   ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)'
                        % (self.feed.title[:30], self.fpf.bozo_exception,
                           len(self.fpf.entries)))

            if self.fpf.status == 304:
                self.feed = self.feed.save()
                self.feed.save_feed_history(304, "Not modified")
                return FEED_SAME, ret_values

            if self.fpf.status in (302, 301):
                if not self.fpf.href.endswith('feedburner.com/atom.xml'):
                    self.feed.feed_address = self.fpf.href
                if not self.feed.known_good:
                    self.feed.fetched_once = True
                    logging.debug(
                        "   ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..."
                        % (self.feed.title[:30], self.fpf.status))
                    self.feed = self.feed.schedule_feed_fetch_immediately()
                if not self.fpf.entries:
                    self.feed = self.feed.save()
                    self.feed.save_feed_history(self.fpf.status,
                                                "HTTP Redirect")
                    return FEED_ERRHTTP, ret_values
            if self.fpf.status >= 400:
                logging.debug(
                    "   ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..."
                    % (self.feed.title[:30], self.fpf.status))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed = self.feed.check_feed_link_for_feed_address()
                if not fixed_feed:
                    self.feed.save_feed_history(self.fpf.status, "HTTP Error")
                self.feed = self.feed.save()
                return FEED_ERRHTTP, ret_values

        if not self.fpf.entries:
            if self.fpf.bozo and isinstance(self.fpf.bozo_exception,
                                            feedparser.NonXMLContentType):
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..."
                    % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed = self.feed.check_feed_link_for_feed_address()
                if not fixed_feed:
                    self.feed.save_feed_history(552, 'Non-xml feed',
                                                self.fpf.bozo_exception)
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values
            elif self.fpf.bozo and isinstance(
                    self.fpf.bozo_exception, xml.sax._exceptions.SAXException):
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..."
                    % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed = self.feed.check_feed_link_for_feed_address()
                if not fixed_feed:
                    self.feed.save_feed_history(553, 'SAX Exception',
                                                self.fpf.bozo_exception)
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values

        # the feed has changed (or it is the first time we parse it)
        # saving the etag and last_modified fields
        self.feed.etag = self.fpf.get('etag')
        if self.feed.etag:
            self.feed.etag = self.feed.etag[:255]
        # some times this is None (it never should) *sigh*
        if self.feed.etag is None:
            self.feed.etag = ''

        try:
            self.feed.last_modified = mtime(self.fpf.modified)
        except:
            pass

        self.fpf.entries = self.fpf.entries[:50]

        if self.fpf.feed.get('title'):
            self.feed.feed_title = self.fpf.feed.get('title')
        tagline = self.fpf.feed.get('tagline', self.feed.data.feed_tagline)
        if tagline:
            self.feed.data.feed_tagline = utf8encode(tagline)
            self.feed.data.save()
        if not self.feed.feed_link_locked:
            self.feed.feed_link = self.fpf.feed.get(
                'link') or self.fpf.feed.get('id') or self.feed.feed_link

        guids = []
        for entry in self.fpf.entries:
            if entry.get('id', ''):
                guids.append(entry.get('id', ''))
            elif entry.get('link'):
                guids.append(entry.link)
            elif entry.get('title'):
                guids.append(entry.title)
        self.feed = self.feed.save()

        # Compare new stories to existing stories, adding and updating
        start_date = datetime.datetime.utcnow()
        # end_date = datetime.datetime.utcnow()
        story_guids = []
        stories = []
        for entry in self.fpf.entries:
            story = pre_process_story(entry)
            if story.get('published') < start_date:
                start_date = story.get('published')
            # if story.get('published') > end_date:
            #     end_date = story.get('published')
            stories.append(story)
            story_guids.append(story.get('guid') or story.get('link'))

        existing_stories = list(
            MStory.objects(
                # story_guid__in=story_guids,
                story_date__gte=start_date,
                story_feed_id=self.feed_id).limit(
                    min(int(len(story_guids) * 1.5), 10)))

        # MStory.objects(
        #     (Q(story_date__gte=start_date) & Q(story_date__lte=end_date))
        #     | (Q(story_guid__in=story_guids)),
        #     story_feed=self.feed
        # ).order_by('-story_date')
        ret_values = self.feed.add_update_stories(
            stories, existing_stories, verbose=self.options['verbose'])

        if ((not self.feed.is_push or self.options.get('force'))
                and hasattr(self.fpf, 'feed')
                and hasattr(self.fpf.feed, 'links') and self.fpf.feed.links):
            hub_url = None
            self_url = self.feed.feed_address
            for link in self.fpf.feed.links:
                if link['rel'] == 'hub':
                    hub_url = link['href']
                elif link['rel'] == 'self':
                    self_url = link['href']
            if hub_url and self_url and not settings.DEBUG:
                logging.debug(
                    u'   ---> [%-30s] ~BB~FWSubscribing to PuSH hub: %s' %
                    (self.feed.title[:30], hub_url))
                PushSubscription.objects.subscribe(self_url,
                                                   feed=self.feed,
                                                   hub=hub_url)

        logging.debug(
            u'   ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s'
            % (self.feed.title[:30], '~FG~SB' if ret_values[ENTRY_NEW] else '',
               ret_values[ENTRY_NEW], '~FY~SB'
               if ret_values[ENTRY_UPDATED] else '', ret_values[ENTRY_UPDATED],
               '~SB' if ret_values[ENTRY_SAME] else '', ret_values[ENTRY_SAME],
               '~FR~SB' if ret_values[ENTRY_ERR] else '',
               ret_values[ENTRY_ERR], len(self.fpf.entries)))
        self.feed.update_all_statistics(full=bool(ret_values[ENTRY_NEW]),
                                        force=self.options['force'])
        self.feed.trim_feed()
        self.feed.save_feed_history(200, "OK")

        if self.options['verbose']:
            logging.debug(u'   ---> [%-30s] ~FBTIME: feed parse in ~FM%.4ss' %
                          (self.feed.title[:30], time.time() - start))

        return FEED_OK, ret_values
Example #45
0
    def test_train(self):
        user = User.objects.all()
        feed = Feed.objects.all()

        management.call_command('loaddata', 'brownstoner.json', verbosity=0)
        management.call_command('refresh_feed',
                                force=1,
                                feed=1,
                                single_threaded=True,
                                daemonize=False)
        management.call_command('loaddata', 'brownstoner2.json', verbosity=0)
        management.call_command('refresh_feed',
                                force=1,
                                feed=1,
                                single_threaded=True,
                                daemonize=False)

        stories = MStory.objects(story_feed_id=1)[:53]

        phrasefilter = PhraseFilter()
        for story in stories:
            # print story.story_title, story.id
            phrasefilter.run(story.story_title, story.id)

        phrasefilter.pare_phrases()
        phrases = phrasefilter.get_phrases()
        print phrases

        tokenizer = Tokenizer(phrases)
        classifier = Bayes(
            tokenizer)  # FisherClassifier(user[0], feed[0], phrases)

        classifier.train('good', 'House of the Day: 393 Pacific St.')
        classifier.train('good', 'House of the Day: 393 Pacific St.')
        classifier.train('good', 'Condo of the Day: 393 Pacific St.')
        classifier.train('good', 'Co-op of the Day: 393 Pacific St. #3')
        classifier.train('good', 'Co-op of the Day: 393 Pacific St. #3')
        classifier.train('good', 'Development Watch: 393 Pacific St. #3')
        classifier.train('bad', 'Development Watch: 393 Pacific St. #3')
        classifier.train('bad', 'Development Watch: 393 Pacific St. #3')
        classifier.train('bad', 'Development Watch: 393 Pacific St. #3')
        classifier.train('bad', 'Streetlevel: 393 Pacific St. #3')

        guess = dict(classifier.guess('Co-op of the Day: 413 Atlantic'))
        self.assertTrue(guess['good'] > .99)
        self.assertTrue('bad' not in guess)

        guess = dict(classifier.guess('House of the Day: 413 Atlantic'))
        self.assertTrue(guess['good'] > .99)
        self.assertTrue('bad' not in guess)

        guess = dict(classifier.guess('Development Watch: Yatta'))
        self.assertTrue(guess['bad'] > .7)
        self.assertTrue(guess['good'] < .3)

        guess = dict(classifier.guess('Development Watch: 393 Pacific St.'))
        self.assertTrue(guess['bad'] > .7)
        self.assertTrue(guess['good'] < .3)

        guess = dict(classifier.guess('Streetlevel: 123 Carlton St.'))
        self.assertTrue(guess['bad'] > .99)
        self.assertTrue('good' not in guess)

        guess = classifier.guess('Extra, Extra')
        self.assertTrue('bad' not in guess)
        self.assertTrue('good' not in guess)

        guess = classifier.guess('Nothing doing: 393 Pacific St.')
        self.assertTrue('bad' not in guess)
        self.assertTrue('good' not in guess)
Example #46
0
    def test_load_feeds__slashdot(self):
        self.client.login(username='******', password='******')

        old_story_guid = "tag:google.com,2005:reader/item/4528442633bc7b2b"

        management.call_command('loaddata',
                                'slashdot1.json',
                                verbosity=0,
                                commit=False)

        feed = Feed.objects.get(feed_link__contains='slashdot')
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 0)

        management.call_command('refresh_feed',
                                force=1,
                                feed=5,
                                single_threaded=True,
                                daemonize=False)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 38)

        response = self.client.get(reverse('load-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds']['5']['nt'], 38)

        self.client.post(reverse('mark-story-as-read'), {
            'story_id': old_story_guid,
            'feed_id': 5
        })

        response = self.client.get(reverse('refresh-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds']['5']['nt'], 37)

        management.call_command('loaddata',
                                'slashdot2.json',
                                verbosity=0,
                                commit=False)
        management.call_command('refresh_feed',
                                force=1,
                                feed=5,
                                single_threaded=True,
                                daemonize=False)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 38)

        url = reverse('load-single-feed', kwargs=dict(feed_id=5))
        response = self.client.get(url)

        # pprint([c['story_title'] for c in json.decode(response.content)])
        feed = json.decode(response.content)

        # Test: 1 changed char in title
        self.assertEquals(len(feed['stories']), 6)

        response = self.client.get(reverse('refresh-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds']['5']['nt'], 37)
Example #47
0
    def receive_newsletter(self, params):
        user = self.user_from_email(params['recipient'])
        if not user:
            return

        sender_name, sender_username, sender_domain = self.split_sender(
            params['from'])
        feed_address = self.feed_address(
            user, "%s@%s" % (sender_username, sender_domain))

        usf = UserSubscriptionFolders.objects.get(user=user)
        usf.add_folder('', 'Newsletters')

        try:
            feed = Feed.objects.get(feed_address=feed_address)
        except Feed.DoesNotExist:
            feed = Feed.objects.create(feed_address=feed_address,
                                       feed_link='http://' + sender_domain,
                                       feed_title=sender_name,
                                       fetched_once=True,
                                       known_good=True)
            feed.update()
            logging.user(user, "~FCCreating newsletter feed: ~SB%s" % (feed))
            r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL)
            r.publish(user.username, 'reload:%s' % feed.pk)

        if feed.feed_title != sender_name:
            feed.feed_title = sender_name
            feed.save()

        try:
            usersub = UserSubscription.objects.get(user=user, feed=feed)
        except UserSubscription.DoesNotExist:
            _, _, usersub = UserSubscription.add_subscription(
                user=user, feed_address=feed_address, folder='Newsletters')

        story_hash = MStory.ensure_story_hash(params['signature'], feed.pk)
        story_content = self.get_content(params)
        story_content = self.clean_content(story_content)
        story_params = {
            "story_feed_id":
            feed.pk,
            "story_date":
            datetime.datetime.fromtimestamp(int(params['timestamp'])),
            "story_title":
            params['subject'],
            "story_content":
            story_content,
            "story_author_name":
            params['from'],
            "story_permalink":
            "https://%s%s" %
            (Site.objects.get_current().domain,
             reverse('newsletter-story', kwargs={'story_hash': story_hash})),
            "story_guid":
            params['signature'],
        }
        try:
            story = MStory.objects.get(story_hash=story_hash)
        except MStory.DoesNotExist:
            story = MStory(**story_params)
            story.save()

        usersub.needs_unread_recalc = True
        usersub.save()

        self.publish_to_subscribers(feed)

        MFetchHistory.add(feed_id=feed.pk, fetch_type='push')
        logging.user(
            user, "~FCNewsletter feed story: ~SB%s~SN / ~SB%s" %
            (story.story_title, feed))

        return story