Ejemplo n.º 1
0
    def test_minMaxPr(self):

        classifyItems = [
            # actual ham
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=30.2),
                 mock.model("spam.css", pr=-18.0)]
                )),
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=16.2),
                 mock.model("spam.css", pr=-21.0)]
                )),
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=57.2),
                 mock.model("spam.css", pr=-16.0)]
                ))
            ]

        self.assertEquals(minMaxPr(classifyItems), (-21.0, 57.2))
Ejemplo n.º 2
0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

from crm114 import *
import json
import mock
import os
import unittest

crmResultSpamString = mock.classificationString(
            [mock.model("spam.css", 1461, 16572, 1.0, 129.64),
             mock.model("ham.css", 856, 301, 4.82e-131, -90.32)],
            totalFeatures = 2452)

class MockCrmRunner:
    def run(self, data, command):
        return crmResultSpamString

TEST_DIR = "testdata"
HAM_TEXT = "ham1 ham2 ham3 ham4 ham5"
SPAM_TEXT = "fooA fooB fooC fooD fooE"
HAM_FILENAME = os.path.join(TEST_DIR, "ham.css")
SPAM_FILENAME = os.path.join(TEST_DIR, "spam.css")
TUNA_FILENAME = os.path.join(TEST_DIR, "tuna.css")

def freshTestDir():
Ejemplo n.º 3
0
    def test_accuracy(self):
        crm = Crm114(["ham.css", "spam.css"])

        classifyItems = [
            # actual ham
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=30.2),
                 mock.model("spam.css", pr=-16.0)]
                )),
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=16.2),
                 mock.model("spam.css", pr=-16.0)]
                )),
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=16.2),
                 mock.model("spam.css", pr=-16.0)]
                )),
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=-10.0),
                 mock.model("spam.css", pr=99.0)]
                )),
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=-40.0),
                 mock.model("spam.css", pr=80.0)]
                )),
            # actual spam
            LabeledItem(None, "spam.css", mock.classification(
                [mock.model("ham.css", pr=-5.0),
                 mock.model("spam.css", pr=80.0)]
                )),
            LabeledItem(None, "spam.css", mock.classification(
                [mock.model("ham.css", pr=-45.0),
                 mock.model("spam.css", pr=89.0)]
                )),
            LabeledItem(None, "spam.css", mock.classification(
                [mock.model("ham.css", pr=85.0),
                 mock.model("spam.css", pr=-25.0)]
                ))
            ]

        result = accuracy(crm, classifyItems, threshold = None)
        self.assertEquals(result["ham.css"].tp, 3)
        self.assertEquals(result["ham.css"].fp, 1)
        self.assertEquals(result["ham.css"].tn, 2)
        self.assertEquals(result["ham.css"].fn, 2)
        self.assertEquals(result["ham.css"].precision, 3.0 / 4.0)
        self.assertEquals(result["ham.css"].recall, 3.0 / 5.0)
        self.assertEquals(result["spam.css"].tp, 2)
        self.assertEquals(result["spam.css"].fp, 2)
        self.assertEquals(result["spam.css"].tn, 3)
        self.assertEquals(result["spam.css"].fn, 1)
        self.assertEquals(result["spam.css"].precision, 2.0 / 4.0)
        self.assertEquals(result["spam.css"].recall, 2.0 / 3.0)

        # if ham.pr >= -20.0, then ham is considered best match
        result = accuracy(crm, classifyItems, threshold = -20.0)
        self.assertEquals(result["ham.css"].tp, 4)
        self.assertEquals(result["ham.css"].fp, 2)
        self.assertEquals(result["ham.css"].tn, 1)
        self.assertEquals(result["ham.css"].fn, 1)
        self.assertEquals(result["ham.css"].precision, 4.0 / 6.0)
        self.assertEquals(result["ham.css"].recall, 4.0 / 5.0)
        self.assertEquals(result["spam.css"].tp, 1)
        self.assertEquals(result["spam.css"].fp, 1)
        self.assertEquals(result["spam.css"].tn, 4)
        self.assertEquals(result["spam.css"].fn, 2)
        self.assertEquals(result["spam.css"].precision, 1.0 / 2.0)
        self.assertEquals(result["spam.css"].recall, 1.0 / 3.0)
Ejemplo n.º 4
0
    def test_varyThreshold(self):

        crm = Crm114(["ham.css", "spam.css"])

        items = [
            # actual ham
            # threshold -60 -> ham -> correct
            # threshold -20 -> ham -> correct
            # threshold  20 -> ham -> correct
            # threshold  60 -> ham -> correct
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=100.0),
                 mock.model("spam.css", pr=-100.0)]
                )),
            # actual ham
            # threshold -60 -> ham -> correct
            # threshold -20 -> ham -> correct
            # threshold  20 -> ham -> correct
            # threshold  60 -> spam -> mistake
            LabeledItem(None, "ham.css", mock.classification(
                [mock.model("ham.css", pr=20.0),
                 mock.model("spam.css", pr=-20.0)]
                )),
            # actual spam
            # threshold -60 -> ham -> mistake
            # threshold -20 -> spam -> correct
            # threshold  20 -> spam -> correct
            # threshold  60 -> spam -> correct
            LabeledItem(None, "spam.css", mock.classification(
                [mock.model("ham.css", pr=-30.0),
                 mock.model("spam.css", pr=30.0)]
                )),
            # actual spam
            # threshold -60 -> spam -> correct
            # threshold -20 -> spam -> correct
            # threshold  20 -> spam -> correct
            # threshold  60 -> spam -> correct
            LabeledItem(None, "spam.css", mock.classification(
                [mock.model("ham.css", pr=-100.0),
                 mock.model("spam.css", pr=100.0)]
                ))
            ]

        result = varyThreshold(crm, items, 4)

        # min = -100
        # max = 100
        # increment = 40
        # Accuracy(tp, fp, tn, fn)
        expected = {'ham.css': {
                         -60.0 : Accuracy(2, 1, 1, 0),
                         -20.0 : Accuracy(2, 0, 2, 0),
                          20.0 : Accuracy(2, 0, 2, 0),
                          60.0 : Accuracy(1, 0, 2, 1)
                        },
                    'spam.css': {
                         -60.0 : Accuracy(1, 0, 2, 1),
                         -20.0 : Accuracy(2, 0, 2, 0),
                          20.0 : Accuracy(2, 0, 2, 0),
                          60.0 : Accuracy(2, 1, 1, 0)
                        }
                    }

        self.assertEquals(expected, result)