Ejemplo n.º 1
0
def test_NaiveBayesClassifier_with_smoothing():
    model = Model(simple_tokenizer(SPAM_DATA), k=1)
    verify(model.classes["spam"].prior, 0.4)
    verify(model.classes["ham"].prior, 0.6)
    verify(model.classes["spam"].P("today"), 0.0476)
    verify(model.classes["ham"].P("today"), 0.1111)

    classifier = Classifier(model)
    verify(classifier.get_normalised_probabilities(["today", "is", "secret"])["spam"], 0.4858)
Ejemplo n.º 2
0
def test_NaiveBayesClassifier_without_smoothing():
    model = Model(simple_tokenizer(SPAM_DATA), k=0)
    assert model.features_num == 12
    verify(model.classes["spam"].prior, 0.3750)
    verify(model.classes["spam"].P("secret"), 0.3333)
    verify(model.classes["ham"].P("secret"), 0.0667)

    classifier = Classifier(model)
    verify(classifier.get_normalised_probabilities(["sports"])["spam"], 0.1667)
    verify(classifier.get_normalised_probabilities(["secret", "is", "secret"])["spam"], 0.9615)
    verify(classifier.get_normalised_probabilities(["today", "is", "secret"])["spam"], 0)
Ejemplo n.º 3
0
def test_NaiveBayesClassifier_with_smoothing_2():
    model = Model(simple_tokenizer(TITLE_DATA), k=1)
    assert model.features_num == 11
    verify(model.classes["movie"].prior, 0.5000)
    verify(model.classes["song"].prior, 0.5000)
    verify(model.classes["movie"].P("perfect"), 0.1579)
    verify(model.classes["song"].P("perfect"), 0.1053)
    verify(model.classes["movie"].P("storm"), 0.0526)
    verify(model.classes["song"].P("storm"), 0.1053)

    classifier = Classifier(model)
    verify(classifier.get_normalised_probabilities(["perfect", "storm"])["movie"], 0.4286)
Ejemplo n.º 4
0
def test_NaiveBayesClassifier_without_smoothing_2():
    model = Model(simple_tokenizer(TITLE_DATA), k=0)
    classifier = Classifier(model)
    verify(classifier.get_normalised_probabilities(["perfect", "storm"])["movie"], 0.0)