def test_freq_dist_dict_full(self): with open('{}{}'.format(base_resources, '2011-1-19raw.txt'), 'r')\ as f: text = f.read().decode('utf-8') stopped = stop_words(text) freq_dist = freq_dist_dict(stopped.split()) #print(pformat(freq_dist), file=stderr) self.assertGreater(freq_dist[u'year'], 8) self.assertLess(freq_dist[u'year'], 12) text = remove_punctuation(text) stopped = stop_words(text) freq_dist = freq_dist_dict(stopped.split()) #print(pformat(freq_dist), file=stderr) self.assertGreater(freq_dist[u'year'], 16) with open('{}{}'.format(target_out, '2011-1-19freq_dist_dict'),\ 'w') as out_file: out_file.write(pformat(freq_dist))
def create_tokens(text): text = remove_punctuation(text) text = stop_word_placeheld(text) return freq_dist_dict(text)