def test_minMaxPr(self): classifyItems = [ # actual ham LabeledItem(None, "ham.css", mock.classification( [mock.model("ham.css", pr=30.2), mock.model("spam.css", pr=-18.0)] )), LabeledItem(None, "ham.css", mock.classification( [mock.model("ham.css", pr=16.2), mock.model("spam.css", pr=-21.0)] )), LabeledItem(None, "ham.css", mock.classification( [mock.model("ham.css", pr=57.2), mock.model("spam.css", pr=-16.0)] )) ] self.assertEquals(minMaxPr(classifyItems), (-21.0, 57.2))
def test_accuracy(self): crm = Crm114(["ham.css", "spam.css"]) classifyItems = [ # actual ham LabeledItem(None, "ham.css", mock.classification( [mock.model("ham.css", pr=30.2), mock.model("spam.css", pr=-16.0)] )), LabeledItem(None, "ham.css", mock.classification( [mock.model("ham.css", pr=16.2), mock.model("spam.css", pr=-16.0)] )), LabeledItem(None, "ham.css", mock.classification( [mock.model("ham.css", pr=16.2), mock.model("spam.css", pr=-16.0)] )), LabeledItem(None, "ham.css", mock.classification( [mock.model("ham.css", pr=-10.0), mock.model("spam.css", pr=99.0)] )), LabeledItem(None, "ham.css", mock.classification( [mock.model("ham.css", pr=-40.0), mock.model("spam.css", pr=80.0)] )), # actual spam LabeledItem(None, "spam.css", mock.classification( [mock.model("ham.css", pr=-5.0), mock.model("spam.css", pr=80.0)] )), LabeledItem(None, "spam.css", mock.classification( [mock.model("ham.css", pr=-45.0), mock.model("spam.css", pr=89.0)] )), LabeledItem(None, "spam.css", mock.classification( [mock.model("ham.css", pr=85.0), mock.model("spam.css", pr=-25.0)] )) ] result = accuracy(crm, classifyItems, threshold = None) self.assertEquals(result["ham.css"].tp, 3) self.assertEquals(result["ham.css"].fp, 1) self.assertEquals(result["ham.css"].tn, 2) self.assertEquals(result["ham.css"].fn, 2) self.assertEquals(result["ham.css"].precision, 3.0 / 4.0) self.assertEquals(result["ham.css"].recall, 3.0 / 5.0) self.assertEquals(result["spam.css"].tp, 2) self.assertEquals(result["spam.css"].fp, 2) self.assertEquals(result["spam.css"].tn, 3) self.assertEquals(result["spam.css"].fn, 1) self.assertEquals(result["spam.css"].precision, 2.0 / 4.0) self.assertEquals(result["spam.css"].recall, 2.0 / 3.0) # if ham.pr >= -20.0, then ham is considered best match result = accuracy(crm, classifyItems, threshold = -20.0) self.assertEquals(result["ham.css"].tp, 4) self.assertEquals(result["ham.css"].fp, 2) self.assertEquals(result["ham.css"].tn, 1) self.assertEquals(result["ham.css"].fn, 1) self.assertEquals(result["ham.css"].precision, 4.0 / 6.0) self.assertEquals(result["ham.css"].recall, 4.0 / 5.0) self.assertEquals(result["spam.css"].tp, 1) self.assertEquals(result["spam.css"].fp, 1) self.assertEquals(result["spam.css"].tn, 4) self.assertEquals(result["spam.css"].fn, 2) self.assertEquals(result["spam.css"].precision, 1.0 / 2.0) self.assertEquals(result["spam.css"].recall, 1.0 / 3.0)
def test_varyThreshold(self): crm = Crm114(["ham.css", "spam.css"]) items = [ # actual ham # threshold -60 -> ham -> correct # threshold -20 -> ham -> correct # threshold 20 -> ham -> correct # threshold 60 -> ham -> correct LabeledItem(None, "ham.css", mock.classification( [mock.model("ham.css", pr=100.0), mock.model("spam.css", pr=-100.0)] )), # actual ham # threshold -60 -> ham -> correct # threshold -20 -> ham -> correct # threshold 20 -> ham -> correct # threshold 60 -> spam -> mistake LabeledItem(None, "ham.css", mock.classification( [mock.model("ham.css", pr=20.0), mock.model("spam.css", pr=-20.0)] )), # actual spam # threshold -60 -> ham -> mistake # threshold -20 -> spam -> correct # threshold 20 -> spam -> correct # threshold 60 -> spam -> correct LabeledItem(None, "spam.css", mock.classification( [mock.model("ham.css", pr=-30.0), mock.model("spam.css", pr=30.0)] )), # actual spam # threshold -60 -> spam -> correct # threshold -20 -> spam -> correct # threshold 20 -> spam -> correct # threshold 60 -> spam -> correct LabeledItem(None, "spam.css", mock.classification( [mock.model("ham.css", pr=-100.0), mock.model("spam.css", pr=100.0)] )) ] result = varyThreshold(crm, items, 4) # min = -100 # max = 100 # increment = 40 # Accuracy(tp, fp, tn, fn) expected = {'ham.css': { -60.0 : Accuracy(2, 1, 1, 0), -20.0 : Accuracy(2, 0, 2, 0), 20.0 : Accuracy(2, 0, 2, 0), 60.0 : Accuracy(1, 0, 2, 1) }, 'spam.css': { -60.0 : Accuracy(1, 0, 2, 1), -20.0 : Accuracy(2, 0, 2, 0), 20.0 : Accuracy(2, 0, 2, 0), 60.0 : Accuracy(2, 1, 1, 0) } } self.assertEquals(expected, result)