Esempio n. 1
0
    def handle(self, *args, **options):
        b = BigramHistogram()
        if options['reset']:
            b.flush()

        if options['load']:
            b.load()

        if options['info']:
            print "Histogram stored under key %s. Total bigram count: %s, and __max: %s (%s)" % (b.key, b.count(), b.max(), b.max_bigram())

        if options['inspect']:
            i = options['inspect']
            print "Normalized score for %s: " % i, b.get(i)
            print "Non-normalized score for %s: " % i, b.get(i, normalize=False)
Esempio n. 2
0
class BigramHistogramConstructionTest(TestCase):
    """
    Test that (A) the bigram histogram is constructed correctly 
    """
    def setUp(self):
        settings.REDIS.update({'db': 1})
        settings.TEXT_MODEL = SimpleText
        self.histogram = BigramHistogram()
        self.histogram.flush()
        
    def test_histogram_values(self):
        test_histogram = {
            'A,can': 1,
            'can,of': 1,
            'of,cherry': 1,
            'cherry,coke': 1,
            'coke,makes': 1,
            'makes,the': 1,
            'the,thing': 1,
            'thing,awesome': 1,
            'watch,for': 1,
            'for,the': 1,
            'the,sunrise': 1,
            'sunrise,and': 1,
            'and,in': 1,
            'in,a': 2,
            'a,split': 1,
            'split,second': 1,
            'second,and': 1,
            'and,there': 1,
            'there,goes': 2,
            'goes,the': 1,
            'the,boat': 1,
            'boat,across': 1,
            'across,the': 1,
            'the,horizon': 1,
            'a,true': 1,
            'true,soul': 1,
            'soul,surfer': 1,
            'surfer,there': 1,
            'goes,an': 1,
            'an,absolute': 1,
            'absolute,legend': 1,
            'legend,in': 1,
            'a,second': 1,
        }

        self.comments = []
        SimpleText.objects.create(text="A can of cherry coke makes the thing awesome")
        SimpleText.objects.create(text="Watch for the sunrise, and in a split second and there goes the boat across the horizon")
        SimpleText.objects.create(text="a true soul surfer, there goes an absolute legend in a second")

        self.assertEquals(len(test_histogram.keys()), 33)

        self.assertEqual(self.histogram.key, "simpletext")
        self.assertEqual(self.histogram.count(), 33 + 2) # plus 2 for __max, on for __max_bigram
        self.assertEqual(self.histogram.max(), 2)

        self.assertEqual(self.histogram.max_bigram(), "there,goes")
        self.assertEqual(self.histogram.get('there,goes'), 100.0)
        self.assertEqual(self.histogram.get('a,true'), 50.0)
        self.assertEqual(self.histogram.get('nilesh,ashra'), False)

    def testAutoPopulationOnCommentCreation(self):
        comment = SimpleText.objects.create(text="i jumped into it, it made a big sloppy mess, why did i do that?")
        self.assertEqual(self.histogram.count(), 14 + 2) # plus 2 for __max, on for __max_bigram

    def tearDown(self):
        self.histogram.flush()