Esempio n. 1
0
def test_atom_filter(initialized_filter, train_dir, test_dir):
    train_corp = TrainingCorpus(train_dir)
    test_corp = Corpus(test_dir)

    filter = initialized_filter
    filter.train(train_corp)
    prediction = dict()

    for name, mail in test_corp.emails():
        result = filter.test(mail)
        if result == -1:
            continue
        elif result > POSITIVITY_THRESHOLD:
            prediction[name] = POSITIVE
        else:
            prediction[name] = NEGATIVE

    truth = read_classification_from_file(test_dir + '/' + TRUTHFILE)
    conf_matrix = BinaryConfusionMatrix(POSITIVE, NEGATIVE)
    conf_matrix.compute_from_dicts(truth, prediction)

    matrix_dict = conf_matrix.as_dict()
    # For testing purposes
    print(matrix_dict)

    score = quality_score(matrix_dict['tp'], \
                          matrix_dict['tn'], \
                          matrix_dict['fp'], \
                          matrix_dict['fn'])
    return score
Esempio n. 2
0
def compute_quality_for_corpus(corpus_dir):
    path = os.getcwd()
    os.chdir(corpus_dir)
    truth_dict = read_classification_from_file('!truth.txt')
    pred_dict = read_classification_from_file('!prediction.txt')
    os.chdir(path)
    cm = BinaryConfusionMatrix(pos_tag='SPAM', neg_tag='OK')
    cm.compute_from_dicts(truth_dict, pred_dict)
    return quality_score(**cm.as_dict())
def compute_quality_for_corpus(corpus_dir):
	""" Compute quality_score() for predictions in corpus """
	matrix = BinaryConfusionMatrix(pos_tag="SPAM", neg_tag="OK")

	matrix.compute_from_dicts(
		dict(read_classification_from_file(os.path.join(corpus_dir, "!truth.txt"))),
		dict(read_classification_from_file(os.path.join(corpus_dir, "!prediction.txt")))
	)

	return quality_score(**matrix.as_dict())
Esempio n. 4
0
def compute_quality_for_corpus(corpus_dir):
    truth_dict = utils.read_classification_from_file(
        os.path.join(corpus_dir, '!truth.txt'))
    prediction_dict = utils.read_classification_from_file(
        os.path.join(corpus_dir, '!prediction.txt'))
    confusion_matrix = BinaryConfusionMatrix(pos_tag, neg_tag)
    confusion_matrix.compute_from_dicts(truth_dict, prediction_dict)

    conf_dict = confusion_matrix.as_dict()
    return quality_score(**conf_dict)
Esempio n. 5
0
def compute_quality_for_corpus(corpus_dir):
        truth_dic = methods.read_classification_from_file(methods.add_slash(corpus_dir)+"!truth.txt")
        pred_dic = methods.read_classification_from_file(methods.add_slash(corpus_dir)+"!prediction.txt")
        bc1 = BinaryConfusionMatrix('SPAM', 'OK')
        bc1.compute_from_dicts(truth_dic, pred_dic)
        dict_score = bc1.as_dict()
        fn=dict_score['fn']
        tn=dict_score['tn']
        fp=dict_score['fp']
        tp=dict_score['tp']
        return quality_score(tp, tn, fp, fn), tp, tn, fp, fn
class BinaryConfusionMatrixTest(unittest.TestCase):
    def setUp(self):
        # Prepare fixture
        self.cm = BinaryConfusionMatrix(pos_tag=SPAM_TAG, neg_tag=HAM_TAG)

    def test_countersAreZero_afterCreation(self):
        # Exercise the SUT
        cmdict = self.cm.as_dict()
        # Assert
        self.assertDictEqual(cmdict, {"tp": 0, "tn": 0, "fp": 0, "fn": 0})

    def test_updatesTPcorrectly(self):
        # Exercise the SUT
        self.cm.update(SPAM_TAG, SPAM_TAG)
        # Assert
        self.assertDictEqual(self.cm.as_dict(), {"tp": 1, "tn": 0, "fp": 0, "fn": 0})

    def test_updatesTNcorrectly(self):
        # Exercise the SUT
        self.cm.update(HAM_TAG, HAM_TAG)
        # Assert
        self.assertDictEqual(self.cm.as_dict(), {"tp": 0, "tn": 1, "fp": 0, "fn": 0})

    def test_updatesFPcorrectly(self):
        # Exercise the SUT
        self.cm.update(HAM_TAG, SPAM_TAG)
        # Assert
        self.assertDictEqual(self.cm.as_dict(), {"tp": 0, "tn": 0, "fp": 1, "fn": 0})

    def test_updatesFNcorrectly(self):
        # Exercise the SUT
        self.cm.update(SPAM_TAG, HAM_TAG)
        # Assert
        self.assertDictEqual(self.cm.as_dict(), {"tp": 0, "tn": 0, "fp": 0, "fn": 1})

    def test_update_raisesValueError_forWrongTruthValue(self):
        # This test may be ignored (deleted).
        # It tests an additional feature of the BCF class.

        # Assert and exercise the SUT
        with self.assertRaises(ValueError):
            self.cm.update("a bad value", SPAM_TAG)

    def test_update_raisesValueError_forWrongPredictionValue(self):
        # This test may be ignored (deleted).
        # It tests an additional feature of the BCF class.

        # Assert and exercise the SUT
        with self.assertRaises(ValueError):
            self.cm.update(SPAM_TAG, "a bad value")

    def test_computeFromDicts_allCasesOnce(self):
        # Prepare fixture
        truth = {1: SPAM_TAG, 2: SPAM_TAG, 3: HAM_TAG, 4: HAM_TAG}
        prediction = {1: SPAM_TAG, 2: HAM_TAG, 3: SPAM_TAG, 4: HAM_TAG}
        # Excercise the SUT
        self.cm.compute_from_dicts(truth, prediction)
        # Assert
        self.assertDictEqual(self.cm.as_dict(), {"tp": 1, "tn": 1, "fp": 1, "fn": 1})
Esempio n. 7
0
def compute_quality_for_corpus(corpus_dir):
    """ Compute quality_score() for predictions in corpus """
    matrix = BinaryConfusionMatrix(pos_tag="SPAM", neg_tag="OK")

    matrix.compute_from_dicts(
        dict(
            read_classification_from_file(
                os.path.join(corpus_dir, "!truth.txt"))),
        dict(
            read_classification_from_file(
                os.path.join(corpus_dir, "!prediction.txt"))))

    return quality_score(**matrix.as_dict())
Esempio n. 8
0
def compute_quality_for_corpus(corpus_dir):
    truth_clasf = read_classification_from_file(corpus_dir + '/' + TRUTHFILE)
    pred_clasf = read_classification_from_file(corpus_dir + '/' + PREDFILE)

    conf_matrix = BinaryConfusionMatrix(POSITIVE, NEGATIVE)
    conf_matrix.compute_from_dicts(truth_clasf, pred_clasf)

    matrix_dict = conf_matrix.as_dict()
    # Testing purposes
    print(matrix_dict)

    score = quality_score(matrix_dict['tp'], \
                          matrix_dict['tn'], \
                          matrix_dict['fp'], \
                          matrix_dict['fn'])

    return score
Esempio n. 9
0
def compute_quality_for_corpus(corpus_dir):
    dirs = os.listdir(corpus_dir)

    for file in dirs:
        if '!' in file:
            if file == "!truth.txt":
                truth_dict = read_classification_from_file(corpus_dir + '/' +
                                                           file)
            elif file == "!prediction.txt":
                pred_dict = read_classification_from_file(corpus_dir + '/' +
                                                          file)
            else:
                truth_dict = None
                pred_dict = None
        else:
            pass

    cm1 = BinaryConfusionMatrix(pos_tag="SPAM", neg_tag="OK")
    cm1.compute_from_dicts(truth_dict, pred_dict)
    final_dict = cm1.as_dict()
    return quality_score(final_dict['tp'], final_dict['tn'], final_dict['fp'],
                         final_dict['fn'])
Esempio n. 10
0
def compute_quality_for_corpus(corpus_dir):
    '''Return the quality score for tested corpus (with truth and prediction files).'''
    from utils import read_classification_from_file as load_as_dict
    truth_file = '!truth.txt'
    pred_file = '!prediction.txt'
    truth_dict = load_as_dict(os.path.join(corpus_dir, truth_file))
    pred_dict = load_as_dict(os.path.join(corpus_dir, pred_file))
    
    from confmat import BinaryConfusionMatrix
    pos_tag = 'SPAM'
    neg_tag = 'OK'
    cm = BinaryConfusionMatrix(pos_tag, neg_tag)
    
    cm.compute_from_dicts(truth_dict, pred_dict)
    
    confusion_dict = cm.as_dict()
    tp = confusion_dict['tp']
    tn = confusion_dict['tn']
    fp = confusion_dict['fp']
    fn = confusion_dict['fn']
    
    return quality_score(tp, tn, fp, fn)
Esempio n. 11
0
class BinaryConfusionMatrixTest(unittest.TestCase):
    def setUp(self):
        # Prepare fixture
        self.cm = BinaryConfusionMatrix(pos_tag=INI_SPAM_TAG,
                                        neg_tag=INI_HAM_TAG)

    def test_countersAreZero_afterCreation(self):
        # Exercise the SUT
        cmdict = self.cm.as_dict()
        # Assert
        self.assertDictEqual(cmdict, {'tp': 0, 'tn': 0, 'fp': 0, 'fn': 0})

    def test_updatesTPcorrectly(self):
        # Exercise the SUT
        self.cm.update(SPAM_TAG, SPAM_TAG)
        # Assert
        self.assertDictEqual(self.cm.as_dict(), {
            'tp': 1,
            'tn': 0,
            'fp': 0,
            'fn': 0
        })

    def test_updatesTNcorrectly(self):
        # Exercise the SUT
        self.cm.update(HAM_TAG, HAM_TAG)
        # Assert
        self.assertDictEqual(self.cm.as_dict(), {
            'tp': 0,
            'tn': 1,
            'fp': 0,
            'fn': 0
        })

    def test_updatesFPcorrectly(self):
        # Exercise the SUT
        self.cm.update(HAM_TAG, SPAM_TAG)
        # Assert
        self.assertDictEqual(self.cm.as_dict(), {
            'tp': 0,
            'tn': 0,
            'fp': 1,
            'fn': 0
        })

    def test_updatesFNcorrectly(self):
        # Exercise the SUT
        self.cm.update(SPAM_TAG, HAM_TAG)
        # Assert
        self.assertDictEqual(self.cm.as_dict(), {
            'tp': 0,
            'tn': 0,
            'fp': 0,
            'fn': 1
        })

    def test_update_raisesValueError_forWrongTruthValue(self):
        # This test may be ignored (deleted).
        # It tests an additional feature of the BCF class.

        # Assert and exercise the SUT
        with self.assertRaises(ValueError):
            self.cm.update('a bad value', SPAM_TAG)

    def test_update_raisesValueError_forWrongPredictionValue(self):
        # This test may be ignored (deleted).
        # It tests an additional feature of the BCF class.

        # Assert and exercise the SUT
        with self.assertRaises(ValueError):
            self.cm.update(SPAM_TAG, 'a bad value')

    def test_computeFromDicts_allCasesOnce(self):
        # Prepare fixture
        truth = {1: SPAM_TAG, 2: SPAM_TAG, 3: HAM_TAG, 4: HAM_TAG}
        prediction = {1: SPAM_TAG, 2: HAM_TAG, 3: SPAM_TAG, 4: HAM_TAG}
        # Excercise the SUT
        self.cm.compute_from_dicts(truth, prediction)
        # Assert
        self.assertDictEqual(self.cm.as_dict(), {
            'tp': 1,
            'tn': 1,
            'fp': 1,
            'fn': 1
        })