def test_NaiveBayesClassifier_with_smoothing(): model = Model(simple_tokenizer(SPAM_DATA), k=1) verify(model.classes["spam"].prior, 0.4) verify(model.classes["ham"].prior, 0.6) verify(model.classes["spam"].P("today"), 0.0476) verify(model.classes["ham"].P("today"), 0.1111) classifier = Classifier(model) verify(classifier.get_normalised_probabilities(["today", "is", "secret"])["spam"], 0.4858)
def test_NaiveBayesClassifier_without_smoothing(): model = Model(simple_tokenizer(SPAM_DATA), k=0) assert model.features_num == 12 verify(model.classes["spam"].prior, 0.3750) verify(model.classes["spam"].P("secret"), 0.3333) verify(model.classes["ham"].P("secret"), 0.0667) classifier = Classifier(model) verify(classifier.get_normalised_probabilities(["sports"])["spam"], 0.1667) verify(classifier.get_normalised_probabilities(["secret", "is", "secret"])["spam"], 0.9615) verify(classifier.get_normalised_probabilities(["today", "is", "secret"])["spam"], 0)
def test_NaiveBayesClassifier_with_smoothing_2(): model = Model(simple_tokenizer(TITLE_DATA), k=1) assert model.features_num == 11 verify(model.classes["movie"].prior, 0.5000) verify(model.classes["song"].prior, 0.5000) verify(model.classes["movie"].P("perfect"), 0.1579) verify(model.classes["song"].P("perfect"), 0.1053) verify(model.classes["movie"].P("storm"), 0.0526) verify(model.classes["song"].P("storm"), 0.1053) classifier = Classifier(model) verify(classifier.get_normalised_probabilities(["perfect", "storm"])["movie"], 0.4286)
def test_NaiveBayesClassifier_without_smoothing_2(): model = Model(simple_tokenizer(TITLE_DATA), k=0) classifier = Classifier(model) verify(classifier.get_normalised_probabilities(["perfect", "storm"])["movie"], 0.0)