コード例 #1
0
 def test_token_index_simple(self):
     tk_str = 'adams wat badcat xcat $9000 xcat'
     ti = token_index(tk_str)
     print(ti)
     self.assertIn(1, ti['wat'])
     self.assertIn(3, ti['xcat'])
     self.assertIn(5, ti['xcat'])
コード例 #2
0
 def test_freq_dist_dict_full(self):
     with open('{}{}'.format(base_resources, '2011-1-19raw.txt'), 'r')\
     as f:
         text = f.read().decode('utf-8')
         text = remove_punctuation(text)
         stopped = stop_words(text)  
         ti = token_index(stopped)
         #print(pformat(ti), file=stderr)
         with open('{}{}'.format(target_out, '2011-1-19token_index'),\
         
         'w') as out_file:
             out_file.write(pformat(ti))