def test_hamClassificationWithoutCache(self): """ Like L{test_spamClassification}, but ensure no instance cache is used to satisfied word info lookups. """ self.classifier.train(StringIO("very nice words"), False) classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r') self.assertTrue(classifier.score(StringIO("words, very nice")) < 0.01)
def test_spamClassificationWithoutCache(self): """ Like L{test_spamClassification}, but ensure no instance cache is used to satisfied word info lookups. """ self.classifier.train(StringIO("spam words of spamfulness"), True) classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r') self.assertTrue( classifier.score(StringIO("spamfulness words of spam")) > 0.99)
def test_largeDocumentClassification(self): """ A document with more than 999 tokens can be successfully classified. """ words = [] for i in range(1000): word = "word%d" % (i, ) words.append(word) document = " ".join(words) self.classifier.train(StringIO(document), False) classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r') self.assertTrue(classifier.score(StringIO(document)) < 0.01)
def setUp(self): self.path = self.mktemp() self.bayes = spam._SQLite3Classifier(self.path) self.classifier = Hammie(self.bayes, mode='r')