def test_atom_filter(initialized_filter, train_dir, test_dir): train_corp = TrainingCorpus(train_dir) test_corp = Corpus(test_dir) filter = initialized_filter filter.train(train_corp) prediction = dict() for name, mail in test_corp.emails(): result = filter.test(mail) if result == -1: continue elif result > POSITIVITY_THRESHOLD: prediction[name] = POSITIVE else: prediction[name] = NEGATIVE truth = read_classification_from_file(test_dir + '/' + TRUTHFILE) conf_matrix = BinaryConfusionMatrix(POSITIVE, NEGATIVE) conf_matrix.compute_from_dicts(truth, prediction) matrix_dict = conf_matrix.as_dict() # For testing purposes print(matrix_dict) score = quality_score(matrix_dict['tp'], \ matrix_dict['tn'], \ matrix_dict['fp'], \ matrix_dict['fn']) return score
def compute_quality_for_corpus(corpus_dir): path = os.getcwd() os.chdir(corpus_dir) truth_dict = read_classification_from_file('!truth.txt') pred_dict = read_classification_from_file('!prediction.txt') os.chdir(path) cm = BinaryConfusionMatrix(pos_tag='SPAM', neg_tag='OK') cm.compute_from_dicts(truth_dict, pred_dict) return quality_score(**cm.as_dict())
def compute_quality_for_corpus(corpus_dir): """ Compute quality_score() for predictions in corpus """ matrix = BinaryConfusionMatrix(pos_tag="SPAM", neg_tag="OK") matrix.compute_from_dicts( dict(read_classification_from_file(os.path.join(corpus_dir, "!truth.txt"))), dict(read_classification_from_file(os.path.join(corpus_dir, "!prediction.txt"))) ) return quality_score(**matrix.as_dict())
def compute_quality_for_corpus(corpus_dir): truth_dict = utils.read_classification_from_file( os.path.join(corpus_dir, '!truth.txt')) prediction_dict = utils.read_classification_from_file( os.path.join(corpus_dir, '!prediction.txt')) confusion_matrix = BinaryConfusionMatrix(pos_tag, neg_tag) confusion_matrix.compute_from_dicts(truth_dict, prediction_dict) conf_dict = confusion_matrix.as_dict() return quality_score(**conf_dict)
def compute_quality_for_corpus(corpus_dir): truth_dic = methods.read_classification_from_file(methods.add_slash(corpus_dir)+"!truth.txt") pred_dic = methods.read_classification_from_file(methods.add_slash(corpus_dir)+"!prediction.txt") bc1 = BinaryConfusionMatrix('SPAM', 'OK') bc1.compute_from_dicts(truth_dic, pred_dic) dict_score = bc1.as_dict() fn=dict_score['fn'] tn=dict_score['tn'] fp=dict_score['fp'] tp=dict_score['tp'] return quality_score(tp, tn, fp, fn), tp, tn, fp, fn
class BinaryConfusionMatrixTest(unittest.TestCase): def setUp(self): # Prepare fixture self.cm = BinaryConfusionMatrix(pos_tag=SPAM_TAG, neg_tag=HAM_TAG) def test_countersAreZero_afterCreation(self): # Exercise the SUT cmdict = self.cm.as_dict() # Assert self.assertDictEqual(cmdict, {"tp": 0, "tn": 0, "fp": 0, "fn": 0}) def test_updatesTPcorrectly(self): # Exercise the SUT self.cm.update(SPAM_TAG, SPAM_TAG) # Assert self.assertDictEqual(self.cm.as_dict(), {"tp": 1, "tn": 0, "fp": 0, "fn": 0}) def test_updatesTNcorrectly(self): # Exercise the SUT self.cm.update(HAM_TAG, HAM_TAG) # Assert self.assertDictEqual(self.cm.as_dict(), {"tp": 0, "tn": 1, "fp": 0, "fn": 0}) def test_updatesFPcorrectly(self): # Exercise the SUT self.cm.update(HAM_TAG, SPAM_TAG) # Assert self.assertDictEqual(self.cm.as_dict(), {"tp": 0, "tn": 0, "fp": 1, "fn": 0}) def test_updatesFNcorrectly(self): # Exercise the SUT self.cm.update(SPAM_TAG, HAM_TAG) # Assert self.assertDictEqual(self.cm.as_dict(), {"tp": 0, "tn": 0, "fp": 0, "fn": 1}) def test_update_raisesValueError_forWrongTruthValue(self): # This test may be ignored (deleted). # It tests an additional feature of the BCF class. # Assert and exercise the SUT with self.assertRaises(ValueError): self.cm.update("a bad value", SPAM_TAG) def test_update_raisesValueError_forWrongPredictionValue(self): # This test may be ignored (deleted). # It tests an additional feature of the BCF class. # Assert and exercise the SUT with self.assertRaises(ValueError): self.cm.update(SPAM_TAG, "a bad value") def test_computeFromDicts_allCasesOnce(self): # Prepare fixture truth = {1: SPAM_TAG, 2: SPAM_TAG, 3: HAM_TAG, 4: HAM_TAG} prediction = {1: SPAM_TAG, 2: HAM_TAG, 3: SPAM_TAG, 4: HAM_TAG} # Excercise the SUT self.cm.compute_from_dicts(truth, prediction) # Assert self.assertDictEqual(self.cm.as_dict(), {"tp": 1, "tn": 1, "fp": 1, "fn": 1})
def compute_quality_for_corpus(corpus_dir): """ Compute quality_score() for predictions in corpus """ matrix = BinaryConfusionMatrix(pos_tag="SPAM", neg_tag="OK") matrix.compute_from_dicts( dict( read_classification_from_file( os.path.join(corpus_dir, "!truth.txt"))), dict( read_classification_from_file( os.path.join(corpus_dir, "!prediction.txt")))) return quality_score(**matrix.as_dict())
def compute_quality_for_corpus(corpus_dir): truth_clasf = read_classification_from_file(corpus_dir + '/' + TRUTHFILE) pred_clasf = read_classification_from_file(corpus_dir + '/' + PREDFILE) conf_matrix = BinaryConfusionMatrix(POSITIVE, NEGATIVE) conf_matrix.compute_from_dicts(truth_clasf, pred_clasf) matrix_dict = conf_matrix.as_dict() # Testing purposes print(matrix_dict) score = quality_score(matrix_dict['tp'], \ matrix_dict['tn'], \ matrix_dict['fp'], \ matrix_dict['fn']) return score
def compute_quality_for_corpus(corpus_dir): dirs = os.listdir(corpus_dir) for file in dirs: if '!' in file: if file == "!truth.txt": truth_dict = read_classification_from_file(corpus_dir + '/' + file) elif file == "!prediction.txt": pred_dict = read_classification_from_file(corpus_dir + '/' + file) else: truth_dict = None pred_dict = None else: pass cm1 = BinaryConfusionMatrix(pos_tag="SPAM", neg_tag="OK") cm1.compute_from_dicts(truth_dict, pred_dict) final_dict = cm1.as_dict() return quality_score(final_dict['tp'], final_dict['tn'], final_dict['fp'], final_dict['fn'])
def compute_quality_for_corpus(corpus_dir): '''Return the quality score for tested corpus (with truth and prediction files).''' from utils import read_classification_from_file as load_as_dict truth_file = '!truth.txt' pred_file = '!prediction.txt' truth_dict = load_as_dict(os.path.join(corpus_dir, truth_file)) pred_dict = load_as_dict(os.path.join(corpus_dir, pred_file)) from confmat import BinaryConfusionMatrix pos_tag = 'SPAM' neg_tag = 'OK' cm = BinaryConfusionMatrix(pos_tag, neg_tag) cm.compute_from_dicts(truth_dict, pred_dict) confusion_dict = cm.as_dict() tp = confusion_dict['tp'] tn = confusion_dict['tn'] fp = confusion_dict['fp'] fn = confusion_dict['fn'] return quality_score(tp, tn, fp, fn)
class BinaryConfusionMatrixTest(unittest.TestCase): def setUp(self): # Prepare fixture self.cm = BinaryConfusionMatrix(pos_tag=INI_SPAM_TAG, neg_tag=INI_HAM_TAG) def test_countersAreZero_afterCreation(self): # Exercise the SUT cmdict = self.cm.as_dict() # Assert self.assertDictEqual(cmdict, {'tp': 0, 'tn': 0, 'fp': 0, 'fn': 0}) def test_updatesTPcorrectly(self): # Exercise the SUT self.cm.update(SPAM_TAG, SPAM_TAG) # Assert self.assertDictEqual(self.cm.as_dict(), { 'tp': 1, 'tn': 0, 'fp': 0, 'fn': 0 }) def test_updatesTNcorrectly(self): # Exercise the SUT self.cm.update(HAM_TAG, HAM_TAG) # Assert self.assertDictEqual(self.cm.as_dict(), { 'tp': 0, 'tn': 1, 'fp': 0, 'fn': 0 }) def test_updatesFPcorrectly(self): # Exercise the SUT self.cm.update(HAM_TAG, SPAM_TAG) # Assert self.assertDictEqual(self.cm.as_dict(), { 'tp': 0, 'tn': 0, 'fp': 1, 'fn': 0 }) def test_updatesFNcorrectly(self): # Exercise the SUT self.cm.update(SPAM_TAG, HAM_TAG) # Assert self.assertDictEqual(self.cm.as_dict(), { 'tp': 0, 'tn': 0, 'fp': 0, 'fn': 1 }) def test_update_raisesValueError_forWrongTruthValue(self): # This test may be ignored (deleted). # It tests an additional feature of the BCF class. # Assert and exercise the SUT with self.assertRaises(ValueError): self.cm.update('a bad value', SPAM_TAG) def test_update_raisesValueError_forWrongPredictionValue(self): # This test may be ignored (deleted). # It tests an additional feature of the BCF class. # Assert and exercise the SUT with self.assertRaises(ValueError): self.cm.update(SPAM_TAG, 'a bad value') def test_computeFromDicts_allCasesOnce(self): # Prepare fixture truth = {1: SPAM_TAG, 2: SPAM_TAG, 3: HAM_TAG, 4: HAM_TAG} prediction = {1: SPAM_TAG, 2: HAM_TAG, 3: SPAM_TAG, 4: HAM_TAG} # Excercise the SUT self.cm.compute_from_dicts(truth, prediction) # Assert self.assertDictEqual(self.cm.as_dict(), { 'tp': 1, 'tn': 1, 'fp': 1, 'fn': 1 })