/
test_quality.py
148 lines (127 loc) · 5.03 KB
/
test_quality.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import unittest
import os
import shutil
from confmat import BinaryConfusionMatrix
from test_readClassificationFromFile import (
create_classification,
save_classification_to_file)
from quality import (
quality_score,
compute_quality_for_corpus)
class QualityScoreTest(unittest.TestCase):
def test_qualityScore_return1_ifConfmatContainsTpOnly(self):
# Prepare fixture
cm_dict = {'tp': 100, 'tn': 0, 'fp': 0, 'fn': 0}
# Exercise the SUT
q = quality_score(**cm_dict)
# Assertions
self.assertEqual(q, 1.0)
def test_qualityScore_return1_ifConfmatContainsTnOnly(self):
# Prepare fixture
cm_dict = {'tp': 0, 'tn': 100, 'fp': 0, 'fn': 0}
# Exercise the SUT
q = quality_score(**cm_dict)
# Assertions
self.assertEqual(q, 1.0)
def test_qualityScore_return1_ifCconfmatContainsTpOrTnOnly(self):
# Prepare fixture
cm_dict = {'tp': 100, 'tn': 100, 'fp': 0, 'fn': 0}
# Exercise the SUT
q = quality_score(**cm_dict)
# Assertions
self.assertEqual(q, 1.0)
def test_qualityScore_return0_ifConfmatContainsFpOnly(self):
# Prepare fixture
cm_dict = {'tp': 0, 'tn': 0, 'fp': 100, 'fn': 0}
# Exercise the SUT
q = quality_score(**cm_dict)
# Assertions
self.assertEqual(q, 0.0)
def test_qualityScore_return0_ifConfmatContainsFnOnly(self):
# Prepare fixture
cm_dict = {'tp': 0, 'tn': 0, 'fp': 0, 'fn': 100}
# Exercise the SUT
q = quality_score(**cm_dict)
# Assertions
self.assertEqual(q, 0.0)
def test_qualityScore_return0_ifConfmatContainsFpOrFnOnly(self):
# Prepare fixture
cm_dict = {'tp': 0, 'tn': 0, 'fp': 100, 'fn': 100}
# Exercise the SUT
q = quality_score(**cm_dict)
# Assertions
self.assertEqual(q, 0.0)
def test_qualityScore_whenConfmatHasAllCountersEqual(self):
"""
Here we assume the quality function in the form:
q = (tp + tn) / (tp + tn + 10*fp + fn)
"""
# Prepare fixture
cm_dict = {'tp': 1, 'tn': 1, 'fp': 1, 'fn': 1}
# Exercise the SUT
q = quality_score(**cm_dict)
# Assertions
self.assertEqual(q, 2/13)
CORPUS_DIR = 'corpus_for_testing_delete_me'
TRUTH_FILENAME = '!truth.txt'
PREDICTION_FILANAME = '!prediction.txt'
SPAM_TAG = 'SPAM'
HAM_TAG = 'OK'
class ComputeQualityForCorpusTest(unittest.TestCase):
def setUp(self):
# Create a corpus directory
os.makedirs(CORPUS_DIR, exist_ok=True)
def tearDown(self):
# Delete the corpus directory
shutil.rmtree(CORPUS_DIR, ignore_errors=True)
def test_allPredictionsCorrect(self):
# Prepare the SUT
create_identical_truth_and_prediction_file()
# Excercise the SUT
q = compute_quality_for_corpus(CORPUS_DIR)
# Assertions
self.assertEqual(q, 1.0)
def test_allPredictionsWrong(self):
# Prepare the SUT
create_inverse_truth_and_prediction_file()
# Excercise the SUT
q = compute_quality_for_corpus(CORPUS_DIR)
# Assertions
self.assertEqual(q, 0.0)
def create_identical_truth_and_prediction_file():
"""
Create identical !truth.txt and !prediction.txt files in the corpus directory.
Here we assume that the corpus directory already exists.
"""
# Create an artificial email classification dictionary
class_dict = create_classification()
# Compile the filepaths
truth_filepath = os.path.join(CORPUS_DIR, TRUTH_FILENAME)
pred_filepath = os.path.join(CORPUS_DIR, PREDICTION_FILANAME)
# Save the same dictionary as both the !truth.txt and !prediction.txt
save_classification_to_file(class_dict, truth_filepath)
save_classification_to_file(class_dict, pred_filepath)
def create_inverse_truth_and_prediction_file():
"""
Create inverse !truth.txt and !prediction.txt files in the corpus directory.
Here we assume that the corpus directory already exists.
"""
# Create an artificial truth dictionary
truth_dict = create_classification()
# Create an inverted version of truth_dict
pred_dict = invert_classes(truth_dict)
# Compile the filepaths
truth_filepath = os.path.join(CORPUS_DIR, TRUTH_FILENAME)
pred_filepath = os.path.join(CORPUS_DIR, PREDICTION_FILANAME)
# Save the dictionaries in !truth.txt and !prediction.txt, respectively.
save_classification_to_file(truth_dict, truth_filepath)
save_classification_to_file(pred_dict, pred_filepath)
def invert_classes(orig_dict):
"""Return a dict with switched HAM_TAG and SPAM_TAG."""
inv_dict = {}
for email_filename, truth in orig_dict.items():
inv_dict[email_filename] = \
SPAM_TAG if truth==HAM_TAG else HAM_TAG
return inv_dict
if __name__=='__main__':
unittest.main()