def reindex_stories():
    db = pymongo.Connection().newsblur
    count = MStory.objects().count()
    print "Mongo DB stories: %s" % count
    p = 0.0
    i = 0

    feeds = Feed.objects.all().order_by('-average_stories_per_month')
    feed_count = feeds.count()
    f = 0
    for feed in feeds:
        f += 1
        print "%s/%s: %s" % (
            f,
            feed_count,
            feed,
        )
        sys.stdout.flush()
        for story in MStory.objects(story_feed_id=feed.pk):
            i += 1.0
            if round(i / count * 100) != p:
                p = round(i / count * 100)
                print '%s%%' % p
            if isinstance(story.id, unicode):
                story.story_guid = story.id
                story.id = pymongo.objectid.ObjectId()
                try:
                    story.save()
                except OperationError, e:
                    print " ***> OperationError: %s" % e
                except e:
                    print ' ***> Unknown Error: %s' % e
                db.stories.remove({"_id": story.story_guid})
예제 #2
0
파일: tests.py 프로젝트: ichthyos/NewsBlur
 def test_load_feeds__gawker(self):
     self.client.login(username='******', password='******')
     
     management.call_command('loaddata', 'gawker1.json', verbosity=0)
     
     feed = Feed.objects.get(feed_link__contains='gawker')
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 0)
     
     feed.update(force=True)
     
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 38)
     
     management.call_command('loaddata', 'gawker2.json', verbosity=0)
     
     feed.update(force=True)
     
     # Test: 1 changed char in content
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 38)
     
     url = reverse('load-single-feed', kwargs=dict(feed_id=1))
     response = self.client.get(url)
     feed = json.decode(response.content)
     self.assertEquals(len(feed['stories']), 6)
예제 #3
0
    def test_load_feeds__gawker(self):
        self.client.login(username='******', password='******')

        management.call_command('loaddata',
                                'gawker1.json',
                                verbosity=0,
                                skip_checks=False)

        feed = Feed.objects.get(pk=10)
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEqual(stories.count(), 0)

        feed.update(force=True)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEqual(stories.count(), 38)

        management.call_command('loaddata',
                                'gawker2.json',
                                verbosity=0,
                                skip_checks=False)

        feed.update(force=True)

        # Test: 1 changed char in content
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEqual(stories.count(), 38)

        url = reverse('load-single-feed', kwargs=dict(feed_id=10))
        response = self.client.get(url)
        feed = json.decode(response.content)
        self.assertEqual(len(feed['stories']), 6)
예제 #4
0
파일: tests.py 프로젝트: ichthyos/NewsBlur
 def test_load_feeds__gothamist(self):
     self.client.login(username='******', password='******')
     
     management.call_command('loaddata', 'gothamist_aug_2009_1.json', verbosity=0)
     feed = Feed.objects.get(feed_link__contains='gothamist')
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 0)
     
     management.call_command('refresh_feed', force=1, feed=4, single_threaded=True, daemonize=False)
     
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 42)
     
     url = reverse('load-single-feed', kwargs=dict(feed_id=4))
     response = self.client.get(url)
     content = json.decode(response.content)
     self.assertEquals(len(content['stories']), 6)
     
     management.call_command('loaddata', 'gothamist_aug_2009_2.json', verbosity=0)
     management.call_command('refresh_feed', force=1, feed=4, single_threaded=True, daemonize=False)
     
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 42)
     
     url = reverse('load-single-feed', kwargs=dict(feed_id=4))
     response = self.client.get(url)
     # print [c['story_title'] for c in json.decode(response.content)]
     content = json.decode(response.content)
     # Test: 1 changed char in title
     self.assertEquals(len(content['stories']), 6)
예제 #5
0
def mark_story_as_unread(request):
    story_id = request.POST['story_id']
    feed_id = int(request.POST['feed_id'])

    usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id)
                
    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()
        
    data = dict(code=0, payload=dict(story_id=story_id))
    logging.user(request, "~FY~SBUnread~SN story in feed: %s" % (usersub.feed))
    
    story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
    
    if story.story_date < usersub.mark_read_date:
        # Story is outside the mark as read range, so invert all stories before.
        newer_stories = MStory.objects(story_feed_id=story.story_feed_id,
                                       story_date__gte=story.story_date,
                                       story_date__lte=usersub.mark_read_date
                                       ).only('story_guid')
        newer_stories = [s.story_guid for s in newer_stories]
        usersub.mark_read_date = story.story_date - datetime.timedelta(minutes=1)
        usersub.needs_unread_recalc = True
        usersub.save()
        
        # Mark stories as read only after the mark_read_date has been moved, otherwise
        # these would be ignored.
        data = usersub.mark_story_ids_as_read(newer_stories, request=request)
        
    m = MUserStory.objects(story_id=story_id, user_id=request.user.pk, feed_id=feed_id)
    m.delete()
    
    return data
예제 #6
0
    def test_load_feeds__gawker(self):
        self.client.login(username='******', password='******')

        management.call_command('loaddata', 'gawker1.json', verbosity=0)

        feed = Feed.objects.get(feed_link__contains='gawker')
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 0)

        management.call_command('refresh_feed',
                                force=1,
                                feed=1,
                                single_threaded=True,
                                daemonize=False)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 38)

        management.call_command('loaddata', 'gawker2.json', verbosity=0)
        management.call_command('refresh_feed',
                                force=1,
                                feed=1,
                                single_threaded=True,
                                daemonize=False)

        # Test: 1 changed char in content
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 38)

        response = self.client.post('/reader/load_single_feed', {"feed_id": 1})
        feed = json.decode(response.content)
        self.assertEquals(len(feed['stories']), 30)
예제 #7
0
def reindex_stories():
    count = MStory.objects().count()
    print "Mongo DB stories: %s" % count
    p = 0.0
    i = 0

    feeds = Feed.objects.all().order_by('-average_stories_per_month')
    feed_count = feeds.count()
    f = 0
    for feed in feeds:
        f += 1
        print "%s/%s: %s" % (f, feed_count, feed,)
        sys.stdout.flush()
    
        for story in MStory.objects(story_feed_id=feed.pk):
            i += 1.0
            if round(i / count * 100) != p:
                p = round(i / count * 100)
                print '%s%%' % p
            if isinstance(story.id, unicode) and story.id:
                story.story_guid = story.id
                story.id = pymongo.objectid.ObjectId()
                try:
                    story.save()
                except mongoengine.queryset.OperationError:
                    print 'Dupe!'
                    continue
예제 #8
0
파일: tests.py 프로젝트: AnushPrem/NewsBlur
 def test_load_feeds__slashdot(self):
     self.client.login(username='******', password='******')
     
     management.call_command('loaddata', 'slashdot1.json', verbosity=0)
     
     feed = Feed.objects.get(feed_link__contains='slashdot')
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 0)
     
     management.call_command('refresh_feed', force=1, feed=5, single_threaded=True, daemonize=False)
     
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 38)
     
     management.call_command('loaddata', 'slashdot2.json', verbosity=0)
     management.call_command('refresh_feed', force=1, feed=5, single_threaded=True, daemonize=False)
     
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 38)
     
     response = self.client.post('/reader/feed', { "feed_id": 5 })
     
     # pprint([c['story_title'] for c in json.decode(response.content)])
     feed = json.decode(response.content)
     
     # Test: 1 changed char in title
     self.assertEquals(len(feed['stories']), 30)
예제 #9
0
파일: tests.py 프로젝트: stfenjobs/PyTune3
    def test_load_feeds__gothamist(self):
        self.client.login(username='******', password='******')

        management.call_command('loaddata',
                                'gothamist_aug_2009_1.json',
                                verbosity=0)
        feed = Feed.objects.get(feed_link__contains='gothamist')
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 0)

        feed.update(force=True)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 42)

        url = reverse('load-single-feed', kwargs=dict(feed_id=4))
        response = self.client.get(url)
        content = json.decode(response.content)
        self.assertEquals(len(content['stories']), 6)

        management.call_command('loaddata',
                                'gothamist_aug_2009_2.json',
                                verbosity=0)
        feed.update(force=True)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 42)

        url = reverse('load-single-feed', kwargs=dict(feed_id=4))
        response = self.client.get(url)
        # print [c['story_title'] for c in json.decode(response.content)]
        content = json.decode(response.content)
        # Test: 1 changed char in title
        self.assertEquals(len(content['stories']), 6)
예제 #10
0
파일: views.py 프로젝트: rjmolesa/NewsBlur
def mark_story_as_unread(request):
    story_id = request.POST['story_id']
    feed_id = int(request.POST['feed_id'])

    usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id)
                
    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()
        
    data = dict(code=0, payload=dict(story_id=story_id))
    logging.user(request, "~FY~SBUnread~SN story in feed: %s" % (usersub.feed))
    
    story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
    
    if story.story_date < usersub.mark_read_date:
        # Story is outside the mark as read range, so invert all stories before.
        newer_stories = MStory.objects(story_feed_id=story.story_feed_id,
                                       story_date__gte=story.story_date,
                                       story_date__lte=usersub.mark_read_date
                                       ).only('story_guid')
        newer_stories = [s.story_guid for s in newer_stories]
        usersub.mark_read_date = story.story_date - datetime.timedelta(minutes=1)
        usersub.needs_unread_recalc = True
        usersub.save()
        
        # Mark stories as read only after the mark_read_date has been moved, otherwise
        # these would be ignored.
        data = usersub.mark_story_ids_as_read(newer_stories, request=request)
        
    m = MUserStory.objects(story_id=story_id, user_id=request.user.pk, feed_id=feed_id)
    m.delete()
    
    return data
예제 #11
0
def bootstrap_stories():
    print "Mongo DB stories: %s" % MStory.objects().count()
    # db.stories.drop()
    print "Dropped! Mongo DB stories: %s" % MStory.objects().count()

    print "Stories: %s" % Story.objects.all().count()
    pprint(db.stories.index_information())

    feeds = Feed.objects.all().order_by('-average_stories_per_month')
    feed_count = feeds.count()
    i = 0
    for feed in feeds:
        i += 1
        print "%s/%s: %s (%s stories)" % (i, feed_count,
                            feed, Story.objects.filter(story_feed=feed).count())
        sys.stdout.flush()
    
        stories = Story.objects.filter(story_feed=feed).values()
        for story in stories:
            # story['story_tags'] = [tag.name for tag in Tag.objects.filter(story=story['id'])]
            try:
                story['story_tags'] = json.decode(story['story_tags'])
            except:
                continue
            del story['id']
            del story['story_author_id']
            try:
                MStory(**story).save()
            except:
                continue

    print "\nMongo DB stories: %s" % MStory.objects().count()
예제 #12
0
def reindex_stories():
    db = pymongo.Connection().newsblur
    count = MStory.objects().count()
    print "Mongo DB stories: %s" % count
    p = 0.0
    i = 0

    feeds = Feed.objects.all().order_by('-average_stories_per_month')
    feed_count = feeds.count()
    f = 0
    for feed in feeds:
        f += 1
        print "%s/%s: %s" % (f, feed_count, feed,)
        sys.stdout.flush()
        for story in MStory.objects(story_feed_id=feed.pk):
            i += 1.0
            if round(i / count * 100) != p:
                p = round(i / count * 100)
                print '%s%%' % p
            if isinstance(story.id, unicode):
                story.story_guid = story.id
                story.id = pymongo.objectid.ObjectId()
                try:
                    story.save()
                except OperationError, e:
                    print " ***> OperationError: %s" % e
                except e:
                    print ' ***> Unknown Error: %s' % e
                db.stories.remove({"_id": story.story_guid})
def bootstrap_stories():
    print "Mongo DB stories: %s" % MStory.objects().count()
    # db.stories.drop()
    print "Dropped! Mongo DB stories: %s" % MStory.objects().count()

    print "Stories: %s" % Story.objects.all().count()
    pprint(db.stories.index_information())

    feeds = Feed.objects.all().order_by('-average_stories_per_month')
    feed_count = feeds.count()
    i = 0
    for feed in feeds:
        i += 1
        print "%s/%s: %s (%s stories)" % (
            i, feed_count, feed, Story.objects.filter(story_feed=feed).count())
        sys.stdout.flush()

        stories = Story.objects.filter(story_feed=feed).values()
        for story in stories:
            # story['story_tags'] = [tag.name for tag in Tag.objects.filter(story=story['id'])]
            try:
                story['story_tags'] = json.decode(story['story_tags'])
            except:
                continue
            del story['id']
            del story['story_author_id']
            try:
                MStory(**story).save()
            except:
                continue

    print "\nMongo DB stories: %s" % MStory.objects().count()
예제 #14
0
    def test_load_feeds__slashdot(self):
        self.client.login(username='******', password='******')

        management.call_command('loaddata', 'slashdot1.json', verbosity=0)

        feed = Feed.objects.get(feed_link__contains='slashdot')
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 0)

        management.call_command('refresh_feed',
                                force=1,
                                feed=5,
                                single_threaded=True,
                                daemonize=False)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 38)

        management.call_command('loaddata', 'slashdot2.json', verbosity=0)
        management.call_command('refresh_feed',
                                force=1,
                                feed=5,
                                single_threaded=True,
                                daemonize=False)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 38)

        response = self.client.post('/reader/feed', {"feed_id": 5})

        # pprint([c['story_title'] for c in json.decode(response.content)])
        feed = json.decode(response.content)

        # Test: 1 changed char in title
        self.assertEquals(len(feed['stories']), 30)
예제 #15
0
    def test_load_feeds__motherjones(self):
        self.client.login(username='******', password='******')

        management.call_command('loaddata',
                                'motherjones1.json',
                                verbosity=0,
                                skip_checks=False)

        feed = Feed.objects.get(feed_link__contains='motherjones')
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEqual(stories.count(), 0)

        management.call_command('refresh_feed',
                                force=1,
                                feed=feed.pk,
                                daemonize=False,
                                skip_checks=False)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEqual(stories.count(), 10)

        response = self.client.get(reverse('load-feeds'))
        content = json.decode(response.content)
        self.assertEqual(content['feeds'][str(feed.pk)]['nt'], 10)

        self.client.post(reverse('mark-story-as-read'), {
            'story_id': stories[0].story_guid,
            'feed_id': feed.pk
        })

        response = self.client.get(reverse('refresh-feeds'))
        content = json.decode(response.content)
        self.assertEqual(content['feeds'][str(feed.pk)]['nt'], 9)

        management.call_command('loaddata',
                                'motherjones2.json',
                                verbosity=0,
                                skip_checks=False)
        management.call_command('refresh_feed',
                                force=1,
                                feed=feed.pk,
                                daemonize=False,
                                skip_checks=False)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEqual(stories.count(), 10)

        url = reverse('load-single-feed', kwargs=dict(feed_id=feed.pk))
        response = self.client.get(url)

        # pprint([c['story_title'] for c in json.decode(response.content)])
        feed = json.decode(response.content)

        # Test: 1 changed char in title
        self.assertEqual(len(feed['stories']), 6)

        response = self.client.get(reverse('refresh-feeds'))
        content = json.decode(response.content)
        self.assertEqual(content['feeds'][str(feed['feed_id'])]['nt'], 9)
예제 #16
0
    def test_load_feeds__google(self):
        # Freezegun the date to 2017-04-30

        self.client.login(username='******', password='******')
        old_story_guid = "blog.google:443/topics/inside-google/google-earths-incredible-3d-imagery-explained/"

        management.call_command('loaddata', 'google1.json', verbosity=1)
        print Feed.objects.all()
        feed = Feed.objects.get(pk=766)
        print " Testing test_load_feeds__google: %s" % feed
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 0)

        management.call_command('refresh_feed',
                                force=False,
                                feed=766,
                                single_threaded=True,
                                daemonize=False)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 20)

        response = self.client.get(
            reverse('load-feeds') + "?update_counts=true")
        content = json.decode(response.content)
        self.assertEquals(content['feeds']['766']['nt'], 20)

        old_story = MStory.objects.get(story_feed_id=feed.pk,
                                       story_guid__contains=old_story_guid)
        self.client.post(reverse('mark-story-hashes-as-read'),
                         {'story_hash': old_story.story_hash})

        response = self.client.get(reverse('refresh-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds']['766']['nt'], 19)

        management.call_command('loaddata', 'google2.json', verbosity=1)
        management.call_command('refresh_feed',
                                force=False,
                                feed=766,
                                single_threaded=True,
                                daemonize=False)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 20)

        url = reverse('load-single-feed', kwargs=dict(feed_id=766))
        response = self.client.get(url)

        # pprint([c['story_title'] for c in json.decode(response.content)])
        feed = json.decode(response.content)

        # Test: 1 changed char in title
        self.assertEquals(len(feed['stories']), 6)

        response = self.client.get(reverse('refresh-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds']['766']['nt'], 19)
예제 #17
0
파일: tests.py 프로젝트: stfenjobs/PyTune3
    def test_load_feeds__slashdot(self):
        self.client.login(username='******', password='******')

        old_story_guid = "tag:google.com,2005:reader/item/4528442633bc7b2b"

        management.call_command('loaddata', 'slashdot1.json', verbosity=0)

        feed = Feed.objects.get(feed_link__contains='slashdot')
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 0)

        management.call_command('refresh_feed',
                                force=1,
                                feed=5,
                                single_threaded=True,
                                daemonize=False)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 38)

        response = self.client.get(reverse('load-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds']['5']['nt'], 38)

        self.client.post(reverse('mark-story-as-read'), {
            'story_id': old_story_guid,
            'feed_id': 5
        })

        response = self.client.get(reverse('refresh-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds']['5']['nt'], 37)

        management.call_command('loaddata', 'slashdot2.json', verbosity=0)
        management.call_command('refresh_feed',
                                force=1,
                                feed=5,
                                single_threaded=True,
                                daemonize=False)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 38)

        url = reverse('load-single-feed', kwargs=dict(feed_id=5))
        response = self.client.get(url)

        # pprint([c['story_title'] for c in json.decode(response.content)])
        feed = json.decode(response.content)

        # Test: 1 changed char in title
        self.assertEquals(len(feed['stories']), 6)

        response = self.client.get(reverse('refresh-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds']['5']['nt'], 37)
예제 #18
0
    def count_unreads_for_subscribers(self, feed):
        user_subs = UserSubscription.objects.filter(
            feed=feed, active=True, user__profile__last_seen_on__gte=feed.unread_cutoff
        ).order_by("-last_read_date")

        if not user_subs.count():
            return

        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options["compute_scores"]:
            r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
            stories = MStory.objects(story_feed_id=feed.pk, story_date__gte=feed.unread_cutoff)
            stories = Feed.format_stories(stories, feed.pk)
            story_hashes = r.zrangebyscore(
                "zF:%s" % feed.pk, int(feed.unread_cutoff.strftime("%s")), int(time.time() + 60 * 60 * 24)
            )
            missing_story_hashes = set(story_hashes) - set([s["story_hash"] for s in stories])
            if missing_story_hashes:
                missing_stories = MStory.objects(
                    story_feed_id=feed.pk, story_hash__in=missing_story_hashes
                ).read_preference(pymongo.ReadPreference.PRIMARY)
                missing_stories = Feed.format_stories(missing_stories, feed.pk)
                stories = missing_stories + stories
                logging.debug(
                    u"   ---> [%-30s] ~FYFound ~SB~FC%s(of %s)/%s~FY~SN un-secondaried stories while computing scores"
                    % (feed.title[:30], len(missing_stories), len(missing_story_hashes), len(stories))
                )
            cache.set("S:%s" % feed.pk, stories, 60)
            logging.debug(
                u"   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)"
                % (
                    feed.title[:30],
                    len(stories),
                    user_subs.count(),
                    feed.num_subscribers,
                    feed.active_subscribers,
                    feed.premium_subscribers,
                )
            )
            self.calculate_feed_scores_with_stories(user_subs, stories)
        elif self.options.get("mongodb_replication_lag"):
            logging.debug(
                u"   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag"
                % (feed.title[:30], self.options.get("mongodb_replication_lag"))
            )
예제 #19
0
파일: models.py 프로젝트: mt3/NewsBlur
 def mark_feed_read(self):
     now = datetime.datetime.now()
     if MStory.objects(story_feed_id=self.feed.pk).first():
         latest_story_date = MStory.objects(story_feed_id=self.feed.pk).order_by('-story_date')[0].story_date\
                             + datetime.timedelta(minutes=1)
     else:
         latest_story_date = now
     self.last_read_date = max(now, latest_story_date)
     self.mark_read_date = max(now, latest_story_date)
     self.unread_count_negative = 0
     self.unread_count_positive = 0
     self.unread_count_neutral = 0
     self.unread_count_updated = max(now, latest_story_date)
     self.needs_unread_relcalc = False
     self.save()
예제 #20
0
def imagesearch(request, page_num):
    if page_num == '':
        page_num = '1'
    page_num = int(page_num)
    num_per_page = 15

    q = request.GET.get('q', None)
    if q:
        image_server = settings.FDFS_HTTP_SERVER
        index_stories = SearchStory.query(q)[:500]
        response_images = []
        for index_story in index_stories:
            story = MStory.objects(id=index_story['db_id']).first()
            if story and story.image_ids:
                for image_id in story.image_ids:
                    if len(image_id) > 20:
                        # print image_id
                        image = MImage.objects(id=image_id).first()
                        imagedict = dict(
                            image_url=image_server + image.image_remote_id,
                            story_url=story.story_guid,
                            story_title=story.story_title,
                        )
                        response_images.append(imagedict)
                        if len(response_images) >= 50:
                            return render(request, 'imagesearch.html',
                                          locals())
    return render(request, 'imagesearch.html', locals())
예제 #21
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(feed=feed, 
                                                    active=True,
                                                    user__profile__last_seen_on__gte=UNREAD_CUTOFF)\
                                            .order_by('-last_read_date')
        logging.debug(u'   ---> [%-30s] Computing scores: %s (%s/%s/%s) subscribers' % (
                      unicode(feed)[:30], user_subs.count(),
                      feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers))
        
        if self.options['slave_db']:
            slave_db = self.options['slave_db']

            stories_db_orig = slave_db.stories.find({
                "story_feed_id": feed.pk,
                "story_date": {
                    "$gte": UNREAD_CUTOFF,
                },
            })
            stories_db = []
            for story in stories_db_orig:
                stories_db.append(bunch(story))
        else:
            stories_db = MStory.objects(story_feed_id=feed.pk,
                                        story_date__gte=UNREAD_CUTOFF)
        for sub in user_subs:
            cache.delete('usersub:%s' % sub.user_id)
            sub.needs_unread_recalc = True
            sub.save()
            
        if self.options['compute_scores']:
            for sub in user_subs:
                silent = False if self.options['verbose'] >= 2 else True
                sub.calculate_feed_scores(silent=silent, stories_db=stories_db)
예제 #22
0
def more_like_this(request):
    user = get_user(request)
    get_post = getattr(request, request.method)
    order = get_post.get('order', 'newest')
    page = int(get_post.get('page', 1))
    limit = int(get_post.get('limit', 10))
    offset = limit * (page - 1)
    story_hash = get_post.get('story_hash')

    feed_ids = [
        us.feed_id for us in UserSubscription.objects.filter(user=user)
    ]
    feed_ids, _ = MStory.split_story_hash(story_hash)
    story_ids = SearchStory.more_like_this([feed_ids],
                                           story_hash,
                                           order,
                                           offset=offset,
                                           limit=limit)
    stories_db = MStory.objects(story_hash__in=story_ids).order_by(
        '-story_date' if order == "newest" else 'story_date')
    stories = Feed.format_stories(stories_db)

    return {
        "stories": stories,
    }
예제 #23
0
파일: models.py 프로젝트: thongly/NewsBlur
    def mark_feed_read(self):
        if (self.unread_count_negative == 0 and self.unread_count_neutral == 0
                and self.unread_count_positive == 0
                and not self.needs_unread_recalc):
            return

        now = datetime.datetime.utcnow()

        # Use the latest story to get last read time.
        latest_story = MStory.objects(story_feed_id=self.feed.pk).order_by(
            '-story_date').only('story_date').limit(1)
        if latest_story and len(latest_story) >= 1:
            latest_story_date = latest_story[0]['story_date']\
                                + datetime.timedelta(seconds=1)
        else:
            latest_story_date = now

        self.last_read_date = latest_story_date
        self.mark_read_date = latest_story_date
        self.unread_count_negative = 0
        self.unread_count_positive = 0
        self.unread_count_neutral = 0
        self.unread_count_updated = now
        self.oldest_unread_story_date = now
        self.needs_unread_recalc = False

        # No longer removing old user read stories, since they're needed for social,
        # and they get cleaned up automatically when new stories come in.
        # MUserStory.delete_old_stories(self.user_id, self.feed_id)

        self.save()
예제 #24
0
파일: views.py 프로젝트: tosh/NewsBlur
def mark_story_as_read(request):
    story_ids = request.REQUEST.getlist('story_id')
    feed_id = int(request.REQUEST['feed_id'])
    
    usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id)
    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()
        
    data = dict(code=0, payload=story_ids)
    
    if len(story_ids) > 1:
        logging.debug(" ---> [%s] Read %s stories in feed: %s" % (request.user, len(story_ids), usersub.feed))
    else:
        logging.debug(" ---> [%s] Read story in feed: %s" % (request.user, usersub.feed))
        
    for story_id in story_ids:
        story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
        now = datetime.datetime.utcnow()
        m = MUserStory(story=story, user_id=request.user.pk, feed_id=feed_id, read_date=now)
        try:
            m.save()
        except OperationError:
            logging.info(' ---> [%s] *** Marked story as read: Duplicate Story -> %s' % (request.user, story_id))
    
    return data
예제 #25
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(
            feed=feed, active=True, user__profile__last_seen_on__gte=UNREAD_CUTOFF
        ).order_by("-last_read_date")
        logging.debug(
            u"   ---> [%-30s] Computing scores: %s (%s/%s/%s) subscribers"
            % (
                unicode(feed)[:30],
                user_subs.count(),
                feed.num_subscribers,
                feed.active_subscribers,
                feed.premium_subscribers,
            )
        )

        stories_db = MStory.objects(story_feed_id=feed.pk, story_date__gte=UNREAD_CUTOFF)
        for sub in user_subs:
            cache.delete("usersub:%s" % sub.user_id)
            sub.needs_unread_recalc = True
            sub.save()

        if self.options["compute_scores"]:
            for sub in user_subs:
                silent = False if self.options["verbose"] >= 2 else True
                sub.calculate_feed_scores(silent=silent, stories_db=stories_db)
예제 #26
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(
            feed=feed, active=True, user__profile__last_seen_on__gte=UNREAD_CUTOFF
        ).order_by("-last_read_date")

        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options["compute_scores"]:
            stories_db = MStory.objects(story_feed_id=feed.pk, story_date__gte=UNREAD_CUTOFF)
            logging.debug(
                u"   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)"
                % (
                    feed.title[:30],
                    stories_db.count(),
                    user_subs.count(),
                    feed.num_subscribers,
                    feed.active_subscribers,
                    feed.premium_subscribers,
                )
            )
            self.calculate_feed_scores_with_stories(user_subs, stories_db)
        elif self.options.get("mongodb_replication_lag"):
            logging.debug(
                u"   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag"
                % (feed.title[:30], self.options.get("mongodb_replication_lag"))
            )
예제 #27
0
def mark_story_as_unread(request):
    story_id = request.POST['story_id']
    feed_id = int(request.POST['feed_id'])

    try:
        usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id)
    except Feed.DoesNotExist:
        duplicate_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
        if duplicate_feed:
            try:
                usersub = UserSubscription.objects.get(user=request.user, 
                                                       feed=duplicate_feed[0].feed)
            except Feed.DoesNotExist:
                return dict(code=-1)
                
    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()
        
    data = dict(code=0, payload=dict(story_id=story_id))
    logging.user(request.user, "~FY~SBUnread~SN story in feed: %s" % (usersub.feed))
        
    story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
    m = MUserStory.objects(story=story, user_id=request.user.pk, feed_id=feed_id)
    m.delete()
    
    return data
예제 #28
0
def imagesearch(request,page_num):
	if page_num == '':
		page_num = '1'
	page_num = int(page_num)
	num_per_page = 15

	q = request.GET.get('q',None)
	if q:
		image_server = settings.FDFS_HTTP_SERVER
		index_stories = SearchStory.query(q)[:500]
		response_images = []
		for index_story in index_stories:
			story = MStory.objects(id=index_story['db_id']).first()	
			if story and story.image_ids:
				for image_id in story.image_ids:
					if len(image_id) > 20:
						# print image_id
						image = MImage.objects(id=image_id).first()
						imagedict = dict(
							image_url=image_server+image.image_remote_id,
							story_url=story.story_guid,
							story_title = story.story_title,
							)
						response_images.append(imagedict)
						if len(response_images)>=50:
							return render(request,'imagesearch.html',locals())
	return render(request,'imagesearch.html',locals())
예제 #29
0
    def mark_feed_read(self):
        if (self.unread_count_negative == 0 and self.unread_count_neutral == 0
                and self.unread_count_positive == 0
                and not self.needs_unread_recalc):
            return

        now = datetime.datetime.utcnow()

        # Use the latest story to get last read time.
        latest_story = MStory.objects(story_feed_id=self.feed.pk).order_by(
            '-story_date').only('story_date').limit(1)
        if latest_story and len(latest_story) >= 1:
            latest_story_date = latest_story[0]['story_date']\
                                + datetime.timedelta(seconds=1)
        else:
            latest_story_date = now

        self.last_read_date = latest_story_date
        self.mark_read_date = latest_story_date
        self.unread_count_negative = 0
        self.unread_count_positive = 0
        self.unread_count_neutral = 0
        self.unread_count_updated = now
        self.oldest_unread_story_date = now
        self.needs_unread_recalc = False

        self.save()

        return True
예제 #30
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(feed=feed, 
                                                    active=True,
                                                    user__profile__last_seen_on__gte=UNREAD_CUTOFF)\
                                            .order_by('-last_read_date')
        
        if not user_subs.count():
            return
            
        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options['compute_scores']:
            stories = MStory.objects(story_feed_id=feed.pk,
                                     story_date__gte=UNREAD_CUTOFF)\
                            .read_preference(pymongo.ReadPreference.PRIMARY)
            stories = Feed.format_stories(stories, feed.pk)
            logging.debug(u'   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)' % (
                          feed.title[:30], len(stories), user_subs.count(),
                          feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers))        
            self.calculate_feed_scores_with_stories(user_subs, stories)
        elif self.options.get('mongodb_replication_lag'):
            logging.debug(u'   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag' % (
              feed.title[:30], self.options.get('mongodb_replication_lag')))
def bootstrap_userstories():
    print "Mongo DB userstories: %s" % MUserStory.objects().count()
    # db.userstories.drop()
    print "Dropped! Mongo DB userstories: %s" % MUserStory.objects().count()

    print "UserStories: %s" % UserStory.objects.all().count()
    pprint(db.userstories.index_information())

    userstories = UserStory.objects.all().values()
    for userstory in userstories:
        try:
            story = Story.objects.get(pk=userstory['story_id'])
        except Story.DoesNotExist:
            continue
        try:
            userstory['story'] = MStory.objects(
                story_feed_id=story.story_feed.pk,
                story_guid=story.story_guid)[0]
        except:
            print '!',
            continue
        print '.',
        del userstory['id']
        del userstory['opinion']
        del userstory['story_id']
        try:
            MUserStory(**userstory).save()
        except:
            print '\n\n!\n\n'
            continue

    print "\nMongo DB userstories: %s" % MUserStory.objects().count()
예제 #32
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(
            days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(feed=feed,
                                                    active=True,
                                                    user__profile__last_seen_on__gte=UNREAD_CUTOFF)\
                                            .order_by('-last_read_date')

        if not user_subs.count():
            return

        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options['compute_scores']:
            stories = MStory.objects(story_feed_id=feed.pk,
                                     story_date__gte=UNREAD_CUTOFF)\
                            .read_preference(pymongo.ReadPreference.PRIMARY)
            stories = Feed.format_stories(stories, feed.pk)
            logging.debug(
                u'   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)'
                % (feed.title[:30], len(stories), user_subs.count(),
                   feed.num_subscribers, feed.active_subscribers,
                   feed.premium_subscribers))
            self.calculate_feed_scores_with_stories(user_subs, stories)
        elif self.options.get('mongodb_replication_lag'):
            logging.debug(
                u'   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag'
                %
                (feed.title[:30], self.options.get('mongodb_replication_lag')))
예제 #33
0
파일: models.py 프로젝트: matsufan/NewsBlur
 def mark_feed_read(self):
     now = datetime.datetime.utcnow()
     
     # Use the latest story to get last read time.
     latest_story = MStory.objects(story_feed_id=self.feed.pk).order_by('-story_date').only('story_date').limit(1)
     if latest_story and len(latest_story) >= 1:
         latest_story_date = latest_story[0]['story_date']\
                             + datetime.timedelta(seconds=1)
     else:
         latest_story_date = now
     
     self.last_read_date = latest_story_date
     self.mark_read_date = latest_story_date
     self.unread_count_negative = 0
     self.unread_count_positive = 0
     self.unread_count_neutral = 0
     self.unread_count_updated = now
     self.oldest_unread_story_date = now
     self.needs_unread_recalc = False
     
     # No longer removing old user read stories, since they're needed for social,
     # and they get cleaned up automatically when new stories come in.
     # MUserStory.delete_old_stories(self.user_id, self.feed_id)
     
     self.save()
예제 #34
0
파일: views.py 프로젝트: vvarp/NewsBlur
def mark_story_as_read(request):
    story_ids = request.REQUEST.getlist("story_id")
    feed_id = int(request.REQUEST["feed_id"])

    try:
        usersub = UserSubscription.objects.select_related("feed").get(user=request.user, feed=feed_id)
    except Feed.DoesNotExist:
        duplicate_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
        if duplicate_feed:
            try:
                usersub = UserSubscription.objects.get(user=request.user, feed=duplicate_feed[0].feed)
            except Feed.DoesNotExist:
                return dict(code=-1)

    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()

    data = dict(code=0, payload=story_ids)

    if len(story_ids) > 1:
        logging.info(" ---> [%s] ~FYRead %s stories in feed: %s" % (request.user, len(story_ids), usersub.feed))
    else:
        logging.info(" ---> [%s] ~FYRead story in feed: %s" % (request.user, usersub.feed))

    for story_id in story_ids:
        story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
        now = datetime.datetime.utcnow()
        m = MUserStory(story=story, user_id=request.user.pk, feed_id=feed_id, read_date=now)
        try:
            m.save()
        except OperationError:
            logging.info(" ---> [%s] ~BRMarked story as read: Duplicate Story -> %s" % (request.user, story_id))

    return data
예제 #35
0
def bootstrap_userstories():
    print "Mongo DB userstories: %s" % MUserStory.objects().count()
    # db.userstories.drop()
    print "Dropped! Mongo DB userstories: %s" % MUserStory.objects().count()

    print "UserStories: %s" % UserStory.objects.all().count()
    pprint(db.userstories.index_information())

    userstories = UserStory.objects.all().values()
    for userstory in userstories:
        try:
            story = Story.objects.get(pk=userstory['story_id'])
        except Story.DoesNotExist:
            continue
        try:
            userstory['story'] = MStory.objects(story_feed_id=story.story_feed.pk, story_guid=story.story_guid)[0]
        except:
            print '!',
            continue
        print '.',
        del userstory['id']
        del userstory['opinion']
        del userstory['story_id']
        try:
            MUserStory(**userstory).save()
        except:
            print '\n\n!\n\n'
            continue

    print "\nMongo DB userstories: %s" % MUserStory.objects().count()
예제 #36
0
파일: views.py 프로젝트: mrcrabby/NewsBlur
def mark_story_as_unread(request):
    story_id = request.POST['story_id']
    feed_id = int(request.POST['feed_id'])

    try:
        usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id)
    except Feed.DoesNotExist:
        duplicate_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
        if duplicate_feed:
            try:
                usersub = UserSubscription.objects.get(user=request.user, 
                                                       feed=duplicate_feed[0].feed)
            except Feed.DoesNotExist:
                return dict(code=-1)
                
    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()
        
    data = dict(code=0, payload=dict(story_id=story_id))
    logging.user(request, "~FY~SBUnread~SN story in feed: %s" % (usersub.feed))
        
    story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
    m = MUserStory.objects(story=story, user_id=request.user.pk, feed_id=feed_id)
    m.delete()
    
    return data
예제 #37
0
파일: models.py 프로젝트: arg0/NewsBlur
 def mark_feed_read(self):
     if (self.unread_count_negative == 0
         and self.unread_count_neutral == 0
         and self.unread_count_positive == 0
         and not self.needs_unread_recalc):
         return
     
     now = datetime.datetime.utcnow()
     
     # Use the latest story to get last read time.
     latest_story = MStory.objects(story_feed_id=self.feed.pk).order_by('-story_date').only('story_date').limit(1)
     if latest_story and len(latest_story) >= 1:
         latest_story_date = latest_story[0]['story_date']\
                             + datetime.timedelta(seconds=1)
     else:
         latest_story_date = now
     
     self.last_read_date = latest_story_date
     self.mark_read_date = latest_story_date
     self.unread_count_negative = 0
     self.unread_count_positive = 0
     self.unread_count_neutral = 0
     self.unread_count_updated = now
     self.oldest_unread_story_date = now
     self.needs_unread_recalc = False
     
     self.save()
     
     return True
예제 #38
0
파일: models.py 프로젝트: TKupels/NewsBlur
 def mark_feed_read(self, cutoff_date=None):
     if (self.unread_count_negative == 0
         and self.unread_count_neutral == 0
         and self.unread_count_positive == 0
         and not self.needs_unread_recalc):
         return
     
     recount = True
     # Use the latest story to get last read time.
     if cutoff_date:
         cutoff_date = cutoff_date + datetime.timedelta(seconds=1)
     else:
         latest_story = MStory.objects(story_feed_id=self.feed.pk).order_by('-story_date').only('story_date').limit(1)
         if latest_story and len(latest_story) >= 1:
             cutoff_date = (latest_story[0]['story_date']
                            + datetime.timedelta(seconds=1))
         else:
             cutoff_date = datetime.datetime.utcnow()
             recount = False
     
     self.last_read_date = cutoff_date
     self.mark_read_date = cutoff_date
     self.oldest_unread_story_date = cutoff_date
     if not recount:
         self.unread_count_negative = 0
         self.unread_count_positive = 0
         self.unread_count_neutral = 0
         self.unread_count_updated = datetime.datetime.utcnow()
         self.needs_unread_recalc = False
     else:
         self.needs_unread_recalc = True
     
     self.save()
     
     return True
예제 #39
0
파일: tests.py 프로젝트: MilenkoM/NewsBlur
    def test_load_feeds__google(self):
        # Freezegun the date to 2017-04-30
        
        self.client.login(username='******', password='******')
        old_story_guid = "blog.google:443/topics/inside-google/google-earths-incredible-3d-imagery-explained/"

        management.call_command('loaddata', 'google1.json', verbosity=1)
        print Feed.objects.all()
        feed = Feed.objects.get(pk=766)
        print " Testing test_load_feeds__google: %s" % feed
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 0)

        management.call_command('refresh_feed', force=False, feed=766, single_threaded=True, daemonize=False)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 20)

        response = self.client.get(reverse('load-feeds')+"?update_counts=true")
        content = json.decode(response.content)
        self.assertEquals(content['feeds']['766']['nt'], 20)

        old_story = MStory.objects.get(story_feed_id=feed.pk, story_guid__contains=old_story_guid)
        self.client.post(reverse('mark-story-hashes-as-read'), {'story_hash': old_story.story_hash})

        response = self.client.get(reverse('refresh-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds']['766']['nt'], 19)

        management.call_command('loaddata', 'google2.json', verbosity=1)
        management.call_command('refresh_feed', force=False, feed=766, single_threaded=True, daemonize=False)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 20)

        url = reverse('load-single-feed', kwargs=dict(feed_id=766))
        response = self.client.get(url)

        # pprint([c['story_title'] for c in json.decode(response.content)])
        feed = json.decode(response.content)

        # Test: 1 changed char in title
        self.assertEquals(len(feed['stories']), 6)

        response = self.client.get(reverse('refresh-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds']['766']['nt'], 19)
예제 #40
0
def mark_story_as_read(request):
    story_ids = request.REQUEST.getlist('story_id')
    feed_id = int(request.REQUEST['feed_id'])

    try:
        usersub = UserSubscription.objects.select_related('feed').get(
            user=request.user, feed=feed_id)
    except (UserSubscription.DoesNotExist, Feed.DoesNotExist):
        duplicate_feed = DuplicateFeed.objects.filter(
            duplicate_feed_id=feed_id)
        if duplicate_feed:
            try:
                usersub = UserSubscription.objects.get(
                    user=request.user, feed=duplicate_feed[0].feed)
            except (UserSubscription.DoesNotExist, Feed.DoesNotExist):
                return dict(code=-1)
        else:
            return dict(code=-1)

    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()

    data = dict(code=0, payload=story_ids)

    if len(story_ids) > 1:
        logging.user(
            request.user,
            "~FYRead %s stories in feed: %s" % (len(story_ids), usersub.feed))
    else:
        logging.user(request.user,
                     "~FYRead story in feed: %s" % (usersub.feed))

    for story_id in story_ids:
        try:
            story = MStory.objects(story_feed_id=feed_id,
                                   story_guid=story_id)[0]
        except IndexError:
            # Story has been deleted, probably by feed_fetcher.
            continue
        now = datetime.datetime.utcnow()
        date = now if now > story.story_date else story.story_date  # For handling future stories
        m = MUserStory(story=story,
                       user_id=request.user.pk,
                       feed_id=feed_id,
                       read_date=date)
        try:
            m.save()
        except OperationError:
            logging.user(
                request.user,
                "~BRMarked story as read: Duplicate Story -> %s" % (story_id))
            m = MUserStory.objects.get(story=story,
                                       user_id=request.user.pk,
                                       feed_id=feed_id)
            m.read_date = date
            m.save()

    return data
예제 #41
0
    def count_unreads_for_subscribers(self, feed):
        user_subs = UserSubscription.objects.filter(feed=feed,
                                                    active=True,
                                                    user__profile__last_seen_on__gte=feed.unread_cutoff)\
                                            .order_by('-last_read_date')

        if not user_subs.count():
            return

        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options['compute_scores']:
            r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
            stories = MStory.objects(story_feed_id=feed.pk,
                                     story_date__gte=feed.unread_cutoff)
            stories = Feed.format_stories(stories, feed.pk)
            story_hashes = r.zrangebyscore(
                'zF:%s' % feed.pk, int(feed.unread_cutoff.strftime('%s')),
                int(time.time() + 60 * 60 * 24))
            missing_story_hashes = set(story_hashes) - set(
                [s['story_hash'] for s in stories])
            if missing_story_hashes:
                missing_stories = MStory.objects(story_feed_id=feed.pk,
                                                 story_hash__in=missing_story_hashes)\
                                        .read_preference(pymongo.ReadPreference.PRIMARY)
                missing_stories = Feed.format_stories(missing_stories, feed.pk)
                stories = missing_stories + stories
                logging.debug(
                    u'   ---> [%-30s] ~FYFound ~SB~FC%s(of %s)/%s~FY~SN un-secondaried stories while computing scores'
                    % (feed.title[:30], len(missing_stories),
                       len(missing_story_hashes), len(stories)))
            cache.set("S:%s" % feed.pk, stories, 60)
            logging.debug(
                u'   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)'
                % (feed.title[:30], len(stories), user_subs.count(),
                   feed.num_subscribers, feed.active_subscribers,
                   feed.premium_subscribers))
            self.calculate_feed_scores_with_stories(user_subs, stories)
        elif self.options.get('mongodb_replication_lag'):
            logging.debug(
                u'   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag'
                %
                (feed.title[:30], self.options.get('mongodb_replication_lag')))
예제 #42
0
    def handle(self, *args, **options):
        cursor = connection.cursor()
        cursor.execute(
            """SELECT DISTINCT f.id AS original_id, f2.id AS duplicate_id, 
                              f.feed_address AS original_feed_address,
                              f2.feed_address AS duplicate_feed_address,
                              f.feed_title AS original_feed_title,
                              f2.feed_title AS duplicate_feed_title, 
                              f.feed_link AS original_feed_link,
                              f2.feed_link AS duplicate_feed_link, 
                              fd2.feed_tagline AS original_feed_tagline,
                              fd.feed_tagline AS duplicate_feed_tagline 
                          FROM feeds f, feeds f2
                          INNER JOIN rss_feeds_feeddata fd ON fd.feed_id = f.feed_id
                          INNER JOIN rss_feeds_feeddata fd2 ON fd2.feed_id = f2.feed_id
                          WHERE f2.id > f.id
                              AND fd.feed_tagline = fd2.feed_tagline 
                              AND f.feed_link = f2.feed_link 
                              AND f.feed_title = f2.feed_title
                          ORDER BY original_id ASC;""")

        feed_fields = ('original_id', 'duplicate_id', 'original_feed_address',
                       'duplicate_feed_address')
        skips = 0
        merges = 0
        for feeds_values in cursor.fetchall():
            feeds = dict(zip(feed_fields, feeds_values))
            duplicate_stories = MStory.objects(
                story_feed_id=feeds['duplicate_id']).only('story_guid')[5:8]
            duplicate_story_ids = [
                story.story_guid for story in duplicate_stories
            ]
            original_stories = MStory.objects(
                story_feed_id=feeds['original_id'],
                story_guid__in=duplicate_story_ids)
            if duplicate_stories.count() == original_stories.count():
                merges += 1
                merge_feeds(feeds['original_id'], feeds['duplicate_id'])
            else:
                # print duplicate_stories
                # print duplicate_story_ids
                # print original_stories
                # print "Skipping: %s" % feeds
                skips += 1

        print "Skips: %s, Merges: %s" % (skips, merges)
예제 #43
0
def calculate_metrics():
    from apps.rss_feeds.models import MStory
    from apps.reader.models import MUserStory

    return {
        'stories': MStory.objects().count(),
        'read_stories': MUserStory.objects().count(),
    }
예제 #44
0
    def calculate_metrics(self):
        from apps.rss_feeds.models import MStory
        from apps.reader.models import MUserStory

        return {
            'stories': MStory.objects().count(),
            'read_stories': MUserStory.objects().count(),
        }
예제 #45
0
 def test_load_feeds__slashdot(self):
     self.client.login(username='******', password='******')
     
     old_story_guid = "{'original-id': u'http://yro.slashdot.org/story/09/09/05/0112254/Court-Allows-Microsoft-To-Sell-Word-During-Appeal?from=rss', 'gr:original-id': u'http://yro.slashdot.org/story/09/09/05/0112254/Court-Allows-Microsoft-To-Sell-Word-During-Appeal?from=rss'}"
     new_story_guid = "{'original-id': u'http://yro.slashdot.org/story/09/09/05/0112254/Court-Allows-Microsoft-To-Sell-Word-During-Appeal?from=rss!!', 'gr:original-id': u'http://yro.slashdot.org/story/09/09/05/0112254/Court-Allows-Microsoft-To-Sell-Word-During-Appeal?from=rss!!'}"
     
     management.call_command('loaddata', 'slashdot1.json', verbosity=0)
     
     feed = Feed.objects.get(feed_link__contains='slashdot')
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 0)
     
     management.call_command('refresh_feed', force=1, feed=5, single_threaded=True, daemonize=False)
     
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 38)
     
     response = self.client.get(reverse('load-feeds'))
     content = json.decode(response.content)
     self.assertEquals(content['feeds']['5']['nt'], 38)
     
     self.client.post(reverse('mark-story-as-read'), {'story_id': old_story_guid, 'feed_id': 5})
     
     response = self.client.get(reverse('refresh-feeds'))
     content = json.decode(response.content)
     self.assertEquals(content['feeds']['5']['nt'], 37)
     
     management.call_command('loaddata', 'slashdot2.json', verbosity=0)
     management.call_command('refresh_feed', force=1, feed=5, single_threaded=True, daemonize=False)
     
     stories = MStory.objects(story_feed_id=feed.pk)
     self.assertEquals(stories.count(), 38)
     
     url = reverse('load-single-feed', kwargs=dict(feed_id=5))
     response = self.client.get(url)
     
     # pprint([c['story_title'] for c in json.decode(response.content)])
     feed = json.decode(response.content)
     
     # Test: 1 changed char in title
     self.assertEquals(len(feed['stories']), 12)
     
     response = self.client.get(reverse('refresh-feeds'))
     content = json.decode(response.content)
     self.assertEquals(content['feeds']['5']['nt'], 37)
예제 #46
0
파일: tests.py 프로젝트: ichthyos/NewsBlur
    def test_load_feeds__slashdot(self):
        self.client.login(username='******', password='******')
        
        old_story_guid = "tag:google.com,2005:reader/item/4528442633bc7b2b"
        
        management.call_command('loaddata', 'slashdot1.json', verbosity=0)
        
        feed = Feed.objects.get(feed_link__contains='slashdot')
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 0)
        
        management.call_command('refresh_feed', force=1, feed=5, single_threaded=True, daemonize=False)
        
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 38)
        
        response = self.client.get(reverse('load-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds']['5']['nt'], 38)

        self.client.post(reverse('mark-story-as-read'), {'story_id': old_story_guid, 'feed_id': 5})
        
        response = self.client.get(reverse('refresh-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds']['5']['nt'], 37)
        
        management.call_command('loaddata', 'slashdot2.json', verbosity=0)
        management.call_command('refresh_feed', force=1, feed=5, single_threaded=True, daemonize=False)
        
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 38)
        
        url = reverse('load-single-feed', kwargs=dict(feed_id=5))
        response = self.client.get(url)
        
        # pprint([c['story_title'] for c in json.decode(response.content)])
        feed = json.decode(response.content)
        
        # Test: 1 changed char in title
        self.assertEquals(len(feed['stories']), 6)
        
        response = self.client.get(reverse('refresh-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds']['5']['nt'], 37)
예제 #47
0
파일: tests.py 프로젝트: ichthyos/NewsBlur
    def test_load_feeds__motherjones(self):
        self.client.login(username='******', password='******')
        
        management.call_command('loaddata', 'motherjones1.json', verbosity=0)
        
        feed = Feed.objects.get(feed_link__contains='motherjones')
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 0)
        
        management.call_command('refresh_feed', force=1, feed=feed.pk, single_threaded=True, daemonize=False)
        
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 10)
        
        response = self.client.get(reverse('load-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds'][str(feed.pk)]['nt'], 10)

        self.client.post(reverse('mark-story-as-read'), {'story_id': stories[0].story_guid, 'feed_id': feed.pk})
        
        response = self.client.get(reverse('refresh-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds'][str(feed.pk)]['nt'], 9)
        
        management.call_command('loaddata', 'motherjones2.json', verbosity=0)
        management.call_command('refresh_feed', force=1, feed=feed.pk, single_threaded=True, daemonize=False)
        
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 10)
        
        url = reverse('load-single-feed', kwargs=dict(feed_id=feed.pk))
        response = self.client.get(url)
        
        # pprint([c['story_title'] for c in json.decode(response.content)])
        feed = json.decode(response.content)
        
        # Test: 1 changed char in title
        self.assertEquals(len(feed['stories']), 6)
        
        response = self.client.get(reverse('refresh-feeds'))
        content = json.decode(response.content)
        self.assertEquals(content['feeds'][str(feed['feed_id'])]['nt'], 9)
예제 #48
0
    def mark_feed_read(self):
        now = datetime.datetime.utcnow()
        
        # Use the latest story to get last read time.
        if MStory.objects(story_feed_id=self.feed.pk).first():
            latest_story_date = MStory.objects(story_feed_id=self.feed.pk).order_by('-story_date').only('story_date')[0]['story_date']\
                                + datetime.timedelta(seconds=1)
        else:
            latest_story_date = now

        self.last_read_date = latest_story_date
        self.mark_read_date = latest_story_date
        self.unread_count_negative = 0
        self.unread_count_positive = 0
        self.unread_count_neutral = 0
        self.unread_count_updated = latest_story_date
        self.needs_unread_recalc = False
        MUserStory.delete_marked_as_read_stories(self.user.pk, self.feed.pk)
        
        self.save()
예제 #49
0
파일: models.py 프로젝트: moyoinc/NewsBlur
    def mark_feed_read(self):
        now = datetime.datetime.utcnow()

        # Use the latest story to get last read time.
        if MStory.objects(story_feed_id=self.feed.pk).first():
            latest_story_date = MStory.objects(story_feed_id=self.feed.pk).order_by('-story_date').only('story_date')[0]['story_date']\
                                + datetime.timedelta(seconds=1)
        else:
            latest_story_date = now

        self.last_read_date = latest_story_date
        self.mark_read_date = latest_story_date
        self.unread_count_negative = 0
        self.unread_count_positive = 0
        self.unread_count_neutral = 0
        self.unread_count_updated = latest_story_date
        self.needs_unread_recalc = False
        MUserStory.delete_marked_as_read_stories(self.user.pk, self.feed.pk)

        self.save()
예제 #50
0
def compress_stories():
    count = MStory.objects().count()
    print "Mongo DB stories: %s" % count
    p = 0.0
    i = 0

    feeds = Feed.objects.all().order_by('-average_stories_per_month')
    feed_count = feeds.count()
    f = 0
    for feed in feeds:
        f += 1
        print "%s/%s: %s" % (f, feed_count, feed,)
        sys.stdout.flush()
    
        for story in MStory.objects(story_feed_id=feed.pk):
            i += 1.0
            if round(i / count * 100) != p:
                p = round(i / count * 100)
                print '%s%%' % p
            story.save()
예제 #51
0
def compress_stories():
    count = MStory.objects().count()
    print "Mongo DB stories: %s" % count
    p = 0.0
    i = 0

    feeds = Feed.objects.all().order_by('-average_stories_per_month')
    feed_count = feeds.count()
    f = 0
    for feed in feeds:
        f += 1
        print "%s/%s: %s" % (f, feed_count, feed,)
        sys.stdout.flush()
    
        for story in MStory.objects(story_feed_id=feed.pk):
            i += 1.0
            if round(i / count * 100) != p:
                p = round(i / count * 100)
                print '%s%%' % p
            story.save()
예제 #52
0
    def test_load_feeds__gothamist(self):
        self.client.login(username='******', password='******')

        management.call_command('loaddata',
                                'gothamist_aug_2009_1.json',
                                verbosity=0)
        feed = Feed.objects.get(feed_link__contains='gothamist')
        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 0)

        management.call_command('refresh_feed',
                                force=1,
                                feed=4,
                                single_threaded=True,
                                daemonize=False)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 42)

        response = self.client.post('/reader/load_single_feed', {"feed_id": 4})
        content = json.decode(response.content)
        self.assertEquals(len(content['stories']), 30)

        management.call_command('loaddata',
                                'gothamist_aug_2009_2.json',
                                verbosity=0)
        management.call_command('refresh_feed',
                                force=1,
                                feed=4,
                                single_threaded=True,
                                daemonize=False)

        stories = MStory.objects(story_feed_id=feed.pk)
        self.assertEquals(stories.count(), 42)

        response = self.client.get('/reader/load_single_feed', {"feed_id": 4})
        # print [c['story_title'] for c in json.decode(response.content)]
        content = json.decode(response.content)
        # Test: 1 changed char in title
        self.assertEquals(len(content['stories']), 30)
예제 #53
0
    def process_feed_wrapper(self, feed_queue):
        """ wrapper for ProcessFeed
        """
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        delta = None
        current_process = multiprocessing.current_process()
        identity = "X"
        if current_process._identity:
            identity = current_process._identity[0]
        for feed_id in feed_queue:
            ret_entries = {ENTRY_NEW: 0, ENTRY_UPDATED: 0, ENTRY_SAME: 0, ENTRY_ERR: 0}
            start_time = datetime.datetime.utcnow()

            feed = self.refresh_feed(feed_id)

            try:
                ffeed = FetchFeed(feed_id, self.options)
                ret_feed, fetched_feed = ffeed.fetch()

                if (fetched_feed and ret_feed == FEED_OK) or self.options["force"]:
                    pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
                    ret_feed, ret_entries = pfeed.process()

                    feed = self.refresh_feed(feed_id)

                    if ret_entries.get(ENTRY_NEW) or self.options["force"] or not feed.fetched_once:
                        if not feed.fetched_once:
                            feed.fetched_once = True
                            feed.save()
                        MUserStory.delete_old_stories(feed_id=feed.pk)
                        user_subs = UserSubscription.objects.filter(feed=feed)
                        logging.debug(
                            u"   ---> [%-30s] Computing scores for all feed subscribers: %s subscribers"
                            % (unicode(feed)[:30], user_subs.count())
                        )
                        stories_db = MStory.objects(story_feed_id=feed.pk, story_date__gte=UNREAD_CUTOFF)
                        for sub in user_subs:
                            cache.delete("usersub:%s" % sub.user_id)
                            silent = False if self.options["verbose"] >= 2 else True
                            sub.calculate_feed_scores(silent=silent, stories_db=stories_db)
                    cache.delete("feed_stories:%s-%s-%s" % (feed.id, 0, 25))
                    # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']:
                    #     feed.get_stories(force=True)
            except KeyboardInterrupt:
                break
            except urllib2.HTTPError, e:
                feed.save_feed_history(e.code, e.msg, e.fp.read())
                fetched_feed = None
            except Feed.DoesNotExist, e:
                logging.debug("   ---> [%-30s] Feed is now gone..." % (unicode(feed)[:30]))
                return
예제 #54
0
    def handle(self, *args, **options):
        cursor = connection.cursor()
        cursor.execute("""SELECT DISTINCT f.id AS original_id, f2.id AS duplicate_id, 
                              f.feed_address AS original_feed_address,
                              f2.feed_address AS duplicate_feed_address,
                              f.feed_title AS original_feed_title,
                              f2.feed_title AS duplicate_feed_title, 
                              f.feed_link AS original_feed_link,
                              f2.feed_link AS duplicate_feed_link, 
                              fd2.feed_tagline AS original_feed_tagline,
                              fd.feed_tagline AS duplicate_feed_tagline 
                          FROM feeds f, feeds f2
                          INNER JOIN rss_feeds_feeddata fd ON fd.feed_id = f.feed_id
                          INNER JOIN rss_feeds_feeddata fd2 ON fd2.feed_id = f2.feed_id
                          WHERE f2.id > f.id
                              AND fd.feed_tagline = fd2.feed_tagline 
                              AND f.feed_link = f2.feed_link 
                              AND f.feed_title = f2.feed_title
                          ORDER BY original_id ASC;""")

        feed_fields = ('original_id', 'duplicate_id', 'original_feed_address', 'duplicate_feed_address')
        skips = 0
        merges = 0
        for feeds_values in cursor.fetchall():
            feeds = dict(zip(feed_fields, feeds_values))
            duplicate_stories = MStory.objects(story_feed_id=feeds['duplicate_id']).only('story_guid')[5:8]
            duplicate_story_ids = [story.story_guid for story in duplicate_stories]
            original_stories = MStory.objects(story_feed_id=feeds['original_id'], story_guid__in=duplicate_story_ids)
            if duplicate_stories.count() == original_stories.count():
                merges += 1
                merge_feeds(feeds['original_id'], feeds['duplicate_id'])
            else:
                # print duplicate_stories
                # print duplicate_story_ids
                # print original_stories
                # print "Skipping: %s" % feeds
                skips += 1

        print "Skips: %s, Merges: %s" % (skips, merges)
예제 #55
0
def mark_story_as_starred(request):
    code     = 1
    feed_id  = int(request.POST['feed_id'])
    story_id = request.POST['story_id']
    
    story = MStory.objects(story_feed_id=feed_id, story_guid=story_id).limit(1)
    if story:
        story_db = dict([(k, v) for k, v in story[0]._data.items() 
                                if k is not None and v is not None])
        now = datetime.datetime.now()
        story_values = dict(user_id=request.user.pk, starred_date=now, **story_db)
        MStarredStory.objects.create(**story_values)
        logging.user(request.user, "~FCStarring: ~SB%s" % (story[0].story_title[:50]))
    else:
        code = -1
    
    return {'code': code}
예제 #56
0
    def run(self,feed_pk,**kwargs):
        from apps.rss_feeds.models import MStory

        stories = MStory.objects(story_feed_id=feed_pk)
        if len(stories): #is sort by story-data
            for story in stories:
                # start = time.time()
                story.fetch_reference_images()
                # num_valid_urls = 0
                # for image_id in story.image_ids:
                #     if len(image_id) > 20:
                #         num_valid_urls +=1

                # delta = time.time() - start
                # logging.info('Process ~FY%d~FW[~FB%d~FW] urls in ~FG%.4s~FW seconds.' % (
                #     num_valid_urls,len(story.image_urls),delta))
        logging.info('---> ~FYProcess feed %d done!~FW'%feed_pk)
예제 #57
0
def send_story_email(request):
    code = 1
    message = 'OK'
    story_id = request.POST['story_id']
    feed_id = request.POST['feed_id']
    to_address = request.POST['to']
    from_name = request.POST['from_name']
    from_email = request.POST['from_email']
    comments = request.POST['comments']
    comments = comments[:2048]  # Separated due to PyLint
    from_address = '*****@*****.**'

    if not email_re.match(to_address):
        code = -1
        message = 'You need to send the email to a valid email address.'
    elif not email_re.match(from_email):
        code = -1
        message = 'You need to provide your email address.'
    elif not from_name:
        code = -1
        message = 'You need to provide your name.'
    else:
        story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
        story = Feed.format_story(story, feed_id, text=True)
        feed = Feed.objects.get(pk=story['story_feed_id'])
        text = render_to_string('mail/email_story_text.xhtml', locals())
        html = render_to_string('mail/email_story_html.xhtml', locals())
        subject = "%s is sharing a story with you: \"%s\"" % (
            from_name, story['story_title'])
        subject = subject.replace('\n', ' ')
        msg = EmailMultiAlternatives(
            subject,
            text,
            from_email='NewsBlur <%s>' % from_address,
            to=[to_address],
            cc=['%s <%s>' % (from_name, from_email)],
            headers={'Reply-To': '%s <%s>' % (from_name, from_email)})
        msg.attach_alternative(html, "text/html")
        msg.send()
        logging.user(
            request, '~BMSharing story by email: ~FY~SB%s~SN~BM~FY/~SB%s' %
            (story['story_title'][:50], feed.feed_title[:50]))

    return {'code': code, 'message': message}
예제 #58
0
 def count_unreads_for_subscribers(self, feed):
     UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
     user_subs = UserSubscription.objects.filter(feed=feed, 
                                                 active=True,
                                                 user__profile__last_seen_on__gte=UNREAD_CUTOFF)\
                                         .order_by('-last_read_date')
     logging.debug(u'   ---> [%-30s] Computing scores: %s (%s/%s/%s) subscribers' % (
                   unicode(feed)[:30], user_subs.count(),
                   feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers))
     
     stories_db = MStory.objects(story_feed_id=feed.pk,
                                 story_date__gte=UNREAD_CUTOFF)
     for sub in user_subs:
         cache.delete('usersub:%s' % sub.user_id)
         sub.needs_unread_recalc = True
         sub.save()
         
     if self.options['compute_scores']:
         for sub in user_subs:
             silent = False if self.options['verbose'] >= 2 else True
             sub.calculate_feed_scores(silent=silent, stories_db=stories_db)
예제 #59
0
파일: tests.py 프로젝트: starsep/NewsBlur
    def test_train(self):
        # user = User.objects.all()
        # feed = Feed.objects.all()

        management.call_command('loaddata',
                                'brownstoner.json',
                                verbosity=0,
                                commit=False)
        management.call_command('refresh_feed',
                                force=1,
                                feed=1,
                                single_threaded=True,
                                daemonize=False)
        management.call_command('loaddata',
                                'brownstoner2.json',
                                verbosity=0,
                                commit=False)
        management.call_command('refresh_feed',
                                force=1,
                                feed=1,
                                single_threaded=True,
                                daemonize=False)

        stories = MStory.objects(story_feed_id=1)[:53]

        phrasefilter = PhraseFilter()
        for story in stories:
            # print story.story_title, story.id
            phrasefilter.run(story.story_title, story.id)

        phrasefilter.pare_phrases()
        phrases = phrasefilter.get_phrases()
        print phrases

        tokenizer = Tokenizer(phrases)
        classifier = Bayes(
            tokenizer)  # FisherClassifier(user[0], feed[0], phrases)

        classifier.train('good', 'House of the Day: 393 Pacific St.')
        classifier.train('good', 'House of the Day: 393 Pacific St.')
        classifier.train('good', 'Condo of the Day: 393 Pacific St.')
        classifier.train('good', 'Co-op of the Day: 393 Pacific St. #3')
        classifier.train('good', 'Co-op of the Day: 393 Pacific St. #3')
        classifier.train('good', 'Development Watch: 393 Pacific St. #3')
        classifier.train('bad', 'Development Watch: 393 Pacific St. #3')
        classifier.train('bad', 'Development Watch: 393 Pacific St. #3')
        classifier.train('bad', 'Development Watch: 393 Pacific St. #3')
        classifier.train('bad', 'Streetlevel: 393 Pacific St. #3')

        guess = dict(classifier.guess('Co-op of the Day: 413 Atlantic'))
        self.assertTrue(guess['good'] > .99)
        self.assertTrue('bad' not in guess)

        guess = dict(classifier.guess('House of the Day: 413 Atlantic'))
        self.assertTrue(guess['good'] > .99)
        self.assertTrue('bad' not in guess)

        guess = dict(classifier.guess('Development Watch: Yatta'))
        self.assertTrue(guess['bad'] > .7)
        self.assertTrue(guess['good'] < .3)

        guess = dict(classifier.guess('Development Watch: 393 Pacific St.'))
        self.assertTrue(guess['bad'] > .7)
        self.assertTrue(guess['good'] < .3)

        guess = dict(classifier.guess('Streetlevel: 123 Carlton St.'))
        self.assertTrue(guess['bad'] > .99)
        self.assertTrue('good' not in guess)

        guess = classifier.guess('Extra, Extra')
        self.assertTrue('bad' not in guess)
        self.assertTrue('good' not in guess)

        guess = classifier.guess('Nothing doing: 393 Pacific St.')
        self.assertTrue('bad' not in guess)
        self.assertTrue('good' not in guess)