def getBayesAccuracy(splitRatio=0.9): featureset = [] for text in getSpamContent(): featureset.append((featuresForText(text), SPAM)) for text in getHamContent(): featureset.append((featuresForText(text), NOSPAM)) shuffle(featureset) trainset, devset = splitByRatio(featureset, splitRatio) classifier = NaiveBayesClassifier.train(trainset) print classifier.show_most_informative_features(10) return nltk_classify.accuracy(classifier, devset)
def testGetSpamContent(self): content = getSpamContent() self.assertEqual(len(content), 1732) self.assertTrue(content[234])