Exemple #1
0
 def test_ngramify(self):
     self.assertListEqual([("this",), ("works",)], list(experiment.ngramify(experiment.tokenize("This works"), n=1)))
     self.assertListEqual([(".", "this"), ("this", "works")], list(experiment.ngramify(experiment.tokenize("This works"), n=2)))
     self.assertListEqual([(".", "this", "works")], list(experiment.ngramify(experiment.tokenize("This works"), n=3)))
Exemple #2
0
    def test_tokenizer(self):
        self.assertListEqual("this works".split(), experiment.tokenize("this works"))
        self.assertListEqual("this works".split(), experiment.tokenize("This works"))

        stemmer = snowballstemmer.stemmer("english")
        self.assertListEqual("this work".split(), experiment.tokenize("This works", stemmer=stemmer.stemWord))