def test_ngramify(self): self.assertListEqual([("this",), ("works",)], list(experiment.ngramify(experiment.tokenize("This works"), n=1))) self.assertListEqual([(".", "this"), ("this", "works")], list(experiment.ngramify(experiment.tokenize("This works"), n=2))) self.assertListEqual([(".", "this", "works")], list(experiment.ngramify(experiment.tokenize("This works"), n=3)))
def test_tokenizer(self): self.assertListEqual("this works".split(), experiment.tokenize("this works")) self.assertListEqual("this works".split(), experiment.tokenize("This works")) stemmer = snowballstemmer.stemmer("english") self.assertListEqual("this work".split(), experiment.tokenize("This works", stemmer=stemmer.stemWord))