def token_link_text(text, distance=10): '''Combine token frequency and token links together into a single JSON format.''' text = remove_punctuation(text) token_list = stop_word_placeheld(text) return link_op(token_list, distance=distance)
def run(self): with self.input().open('r') as I: src = I.read() print('Src type: {}'.format(type(src))) tokens = stop_word_placeheld(src) with self.output().open('w') as O: pickle.dump(tokens, O)
def create_tokens(text): text = remove_punctuation(text) text = stop_word_placeheld(text) return freq_dist_dict(text)
def test_stop_word_placeheld(self): stopped = stop_word_placeheld(self.small_text) print(pformat(stopped)) self.assertEqual(stopped[0], '') self.assertEqual(stopped[1], 'quick') self.assertEqual(stopped[6], '')