Ejemplo n.º 1
0
 def test_tokenize_classify(self):
     # from pprint import pprint
     entries = []
     errors  = []
     lines = sample_input.splitlines()
     for idx, line in enumerate(lines):
         terms = rolodexer.tokenize(line)
         try:
             cterms = rolodexer.classify(terms)
         except rolodexer.RolodexerError:
             errors.append(idx)
         else:
             keys = cterms.keys()
             
             self.assertTrue(u'phonenumber' in keys)
             self.assertTrue(u'firstname' in keys)
             self.assertTrue(u'lastname' in keys)
             self.assertTrue(u'color' in keys)
             self.assertTrue(u'zipcode' in keys)
             
             entries.append(cterms)
     
     output_dict = { u"entries": entries, u"errors": errors }
     # pprint(output_dict)
     
     sample_output_dict = json.loads(sample_output)
     self.assertItemsEqual(
         output_dict, sample_output_dict)
Ejemplo n.º 2
0
 def test_file_read(self):
     from os.path import join, dirname
     from rolodexer.histogram import Histogram
     entries = []
     errors  = []
     colors  = Histogram()
     inpth = join(dirname(dirname(__file__)), 'data', 'data.in')
     with open(inpth, 'rb') as fh:
         idx = 0
         while True:
             linen = fh.readline()
             if not linen:
                 break
             line = linen.strip()
             tokens = rolodexer.tokenize(line)
             try:
                 terms = rolodexer.classify(tokens)
             except rolodexer.RolodexerError:
                 errors.append(idx)
             else:
                 entries.append(terms)
                 colors.inc(terms.get('color', 'CLEAR'))
             idx += 1
         output_dict = { u"entries": entries, u"errors": errors }
         output_json = json.dumps(output_dict, indent=2, sort_keys=True)
         print(output_json)
         print(colors)
         # all classified lines have colors:
         self.assertEquals(colors.min(), 3)
         self.assertEquals(colors.max(), 10)
         self.assertEquals(colors.val('CLEAR'), 0)
Ejemplo n.º 3
0
 def test_classify(self):
     terms = [
         u'yellow', u'373 781 7380', u'87360',
         u'Washington', u'Booker T.']
     
     out = rolodexer.classify(terms)
     keys = out.keys()
     
     self.assertTrue(u'phonenumber' in keys)
     self.assertTrue(u'firstname' in keys)
     self.assertTrue(u'lastname' in keys)
     self.assertTrue(u'color' in keys)
     self.assertTrue(u'zipcode' in keys)
     
     phonefield = rolodexer.PhoneNumberField()
     
     self.assertEqual(out[u'color'],          terms[0])
     self.assertEqual(out[u'phonenumber'],    phonefield.format(terms[1]))
     self.assertEqual(out[u'zipcode'],        terms[2])
     self.assertEqual(out[u'lastname'],       terms[3])
     self.assertEqual(out[u'firstname'],      terms[4])