Exemple #1
0
 def test_reduce(self):
     from math import log
     d = Document('http://a.com', 'hello there hello')
     out = []
     class mock_reduce_out(object):
         def add(self, k, v):
             out.append((k,v))
     reduce_out = mock_reduce_out()        
     map_out = [('hello', 1), ('hello ', ('http://a.com', 2.0/3)),
                ('there', 1), ('there ', ('http://a.com', 1.0/3))]
     # unlike in test_TfIdf.map, using doc_count=2
     TfIdf.reduce(map_out, reduce_out, {'doc_count':2})
     expected = [('hello', ('http://a.com', 2.0/3 * log(2))),
                 ('there', ('http://a.com', 1.0/3 * log(2)))]
     self.assertEqual(sorted(expected), sorted(out))
Exemple #2
0
 def test_map(self):
     d = Document('http://a.com', 'hello there hello')
     out = list(TfIdf.map(d, None))
     expected = [('hello', 1), ('there', 1),
                 ('hello ', ('http://a.com', 2.0/3)),
                 ('there ', ('http://a.com', 1.0/3))]
     self.assertEqual(sorted(expected), sorted(out))
Exemple #3
0
 def test_partition(self):
     self.assertEquals(
         TfIdf.partition('hi', 1000, None),
         TfIdf.partition('hi ', 1000, None))