예제 #1
0
    def test_minMaxPr(self):

        classifyItems = [
            # actual ham
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=30.2),
                 mock.model("spam.css", pr=-18.0)]
                )),
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=16.2),
                 mock.model("spam.css", pr=-21.0)]
                )),
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=57.2),
                 mock.model("spam.css", pr=-16.0)]
                ))
            ]

        self.assertEquals(minMaxPr(classifyItems), (-21.0, 57.2))
예제 #2
0
    def test_accuracy(self):
        crm = Crm114(["ham.css", "spam.css"])

        classifyItems = [
            # actual ham
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=30.2),
                 mock.model("spam.css", pr=-16.0)]
                )),
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=16.2),
                 mock.model("spam.css", pr=-16.0)]
                )),
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=16.2),
                 mock.model("spam.css", pr=-16.0)]
                )),
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=-10.0),
                 mock.model("spam.css", pr=99.0)]
                )),
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=-40.0),
                 mock.model("spam.css", pr=80.0)]
                )),
            # actual spam
            LabeledItem(None, "spam.css", mock.classification(
                [mock.model("ham.css", pr=-5.0),
                 mock.model("spam.css", pr=80.0)]
                )),
            LabeledItem(None, "spam.css", mock.classification(
                [mock.model("ham.css", pr=-45.0),
                 mock.model("spam.css", pr=89.0)]
                )),
            LabeledItem(None, "spam.css", mock.classification(
                [mock.model("ham.css", pr=85.0),
                 mock.model("spam.css", pr=-25.0)]
                ))
            ]

        result = accuracy(crm, classifyItems, threshold = None)
        self.assertEquals(result["ham.css"].tp, 3)
        self.assertEquals(result["ham.css"].fp, 1)
        self.assertEquals(result["ham.css"].tn, 2)
        self.assertEquals(result["ham.css"].fn, 2)
        self.assertEquals(result["ham.css"].precision, 3.0 / 4.0)
        self.assertEquals(result["ham.css"].recall, 3.0 / 5.0)
        self.assertEquals(result["spam.css"].tp, 2)
        self.assertEquals(result["spam.css"].fp, 2)
        self.assertEquals(result["spam.css"].tn, 3)
        self.assertEquals(result["spam.css"].fn, 1)
        self.assertEquals(result["spam.css"].precision, 2.0 / 4.0)
        self.assertEquals(result["spam.css"].recall, 2.0 / 3.0)

        # if ham.pr >= -20.0, then ham is considered best match
        result = accuracy(crm, classifyItems, threshold = -20.0)
        self.assertEquals(result["ham.css"].tp, 4)
        self.assertEquals(result["ham.css"].fp, 2)
        self.assertEquals(result["ham.css"].tn, 1)
        self.assertEquals(result["ham.css"].fn, 1)
        self.assertEquals(result["ham.css"].precision, 4.0 / 6.0)
        self.assertEquals(result["ham.css"].recall, 4.0 / 5.0)
        self.assertEquals(result["spam.css"].tp, 1)
        self.assertEquals(result["spam.css"].fp, 1)
        self.assertEquals(result["spam.css"].tn, 4)
        self.assertEquals(result["spam.css"].fn, 2)
        self.assertEquals(result["spam.css"].precision, 1.0 / 2.0)
        self.assertEquals(result["spam.css"].recall, 1.0 / 3.0)
예제 #3
0
    def test_varyThreshold(self):

        crm = Crm114(["ham.css", "spam.css"])

        items = [
            # actual ham
            # threshold -60 -> ham -> correct
            # threshold -20 -> ham -> correct
            # threshold  20 -> ham -> correct
            # threshold  60 -> ham -> correct
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=100.0),
                 mock.model("spam.css", pr=-100.0)]
                )),
            # actual ham
            # threshold -60 -> ham -> correct
            # threshold -20 -> ham -> correct
            # threshold  20 -> ham -> correct
            # threshold  60 -> spam -> mistake
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=20.0),
                 mock.model("spam.css", pr=-20.0)]
                )),
            # actual spam
            # threshold -60 -> ham -> mistake
            # threshold -20 -> spam -> correct
            # threshold  20 -> spam -> correct
            # threshold  60 -> spam -> correct
            LabeledItem(None, "spam.css", mock.classification(
                [mock.model("ham.css", pr=-30.0),
                 mock.model("spam.css", pr=30.0)]
                )),
            # actual spam
            # threshold -60 -> spam -> correct
            # threshold -20 -> spam -> correct
            # threshold  20 -> spam -> correct
            # threshold  60 -> spam -> correct
            LabeledItem(None, "spam.css", mock.classification(
                [mock.model("ham.css", pr=-100.0),
                 mock.model("spam.css", pr=100.0)]
                ))
            ]

        result = varyThreshold(crm, items, 4)

        # min = -100
        # max = 100
        # increment = 40
        # Accuracy(tp, fp, tn, fn)
        expected = {'ham.css': {
                         -60.0 : Accuracy(2, 1, 1, 0),
                         -20.0 : Accuracy(2, 0, 2, 0),
                          20.0 : Accuracy(2, 0, 2, 0),
                          60.0 : Accuracy(1, 0, 2, 1)
                        },
                    'spam.css': {
                         -60.0 : Accuracy(1, 0, 2, 1),
                         -20.0 : Accuracy(2, 0, 2, 0),
                          20.0 : Accuracy(2, 0, 2, 0),
                          60.0 : Accuracy(2, 1, 1, 0)
                        }
                    }

        self.assertEquals(expected, result)