def test_allPredictionsWrong(self): # Prepare the SUT create_inverse_truth_and_prediction_file() # Excercise the SUT q = compute_quality_for_corpus(CORPUS_DIR) # Assertions self.assertEqual(q, 0.0)
def myfilter_test(corpus_dir): '''Runs MyFilter test and than prints its prediction quality.''' MyFilter.train(corpus_dir) MyFilter.test(corpus_dir) print ("MyFilter:") print ( quality.compute_quality_for_corpus(corpus_dir) ) remove_prediction()
def test_allPredictionsCorrect(self): # Prepare the SUT create_identical_truth_and_prediction_file() # Excercise the SUT q = compute_quality_for_corpus(CORPUS_DIR) # Assertions self.assertEqual(q, 1.0)
def test_allPredictionsWrong(self): # Prepare the SUT # Create an artificial email classification dictionary truth_dict = create_classification() create_truth_and_prediction_file(truth_dict, invert_classes) # Excercise the SUT q = compute_quality_for_corpus(CORPUS_DIR) # Assertions self.assertEqual(q, 0.0)
def test_allPredictionsHam_for10SpamsAnd10Hams(self): # Prepare the SUT # Create an artificial email classification dictionary truth_dict = create_classification(n_items=20, n_spams=10) create_truth_and_prediction_file(truth_dict, hams_only) # Since there are 10 spams and 10 hams in the corpus, # and predictions are all ham, then the confusion matrix # shall have TN = 10 and FN = 10, zero positives. # The modified accuracy then is: expected_q = 10 / (10 + 10) # Excercise the SUT q = compute_quality_for_corpus(CORPUS_DIR) # Assertions self.assertEqual(q, expected_q)
tests_done += 1 return (score, tests_done) def test_word_filters(self, name, mail): score = 0 tests_done = 0 for filt in self.word_filters: result = filt.test(mail) if result != -1: score += result tests_done += 1 return (score, tests_done) if __name__ == '__main__': import quality train_dir = '1/' test_dir = '2/' # filt = atomfilters.XSpamStatusFilter() # score = quality.test_atom_filter(filt, train_dir, test_dir) # print("Score of {name} is {score:3.2f}".format(name=filt.__class__.__name__, score=score)) filt = MyFilter() filt.train(train_dir) filt.test(test_dir) # utils.show_mismatched(test_dir) score = quality.compute_quality_for_corpus(test_dir) print("Score for {constant} is {score:3.2f}".format( constant=POSITIVITY_THRESHOLD, score=score))
def quality_s(self): return quality.compute_quality_for_corpus(self.path_to_corpus)
for b_line in pred: b_list = b_line.strip().split() b_dic[b_list[0]] = b_list[1] if __name__ == '__main__': train_corpus = '/Users/eygene/Desktop/spam-data-12-s75-h25/1' test_corpus = '/Users/eygene/Desktop/spam-data-12-s75-h25/2' filter = MyFilter() filter.train(train_corpus) filter.test(test_corpus) print('1>2: ' ,compute_quality_for_corpus(test_corpus)) filter = MyFilter() filter.train(test_corpus) filter.test(train_corpus) print('2->1: ' ,compute_quality_for_corpus(train_corpus)) filter = MyFilter() filter.train(test_corpus) filter.test(test_corpus) print('2->2: ', compute_quality_for_corpus(test_corpus)) get_error(test_corpus) filter = MyFilter() filter.train(train_corpus) filter.test(train_corpus) print('1->1: ' ,compute_quality_for_corpus(train_corpus))
word_probability = ( spam_ham_dict.get(word) + self.alpha) / ( spam_ham_num_of_words + self.alpha * self.num_of_all_words + self.alpha) else: word_probability = self.alpha / ( spam_ham_num_of_words + self.alpha * self.num_of_all_words + self.alpha) else: word_probability = 1 self.average_word_probability += word_probability email_probability *= word_probability email_probability, underflow_overflow = underflow_overflow_exception_handler( email_probability, underflow_overflow) self.average_word_probability /= len(words) return email_probability, underflow_overflow if __name__ == "__main__": my_filter1 = MyFilter() my_filter1.train("spam-data-12-s75-h25/1/") my_filter1.test("spam-data-12-s75-h25/2/") print("Quality of the spam filter for first folder with emails:") print(compute_quality_for_corpus("spam-data-12-s75-h25/2/")) my_filter2 = MyFilter() my_filter2.train("spam-data-12-s75-h25/2/") my_filter2.test("spam-data-12-s75-h25/1/") print("Quality of the spam filter for second folder with emails:") print(compute_quality_for_corpus("spam-data-12-s75-h25/1/"))
import simplefilters import quality import os nf = simplefilters.RandomFilter() nf.train('C:\\Users\\Prabhath\\Desktop\\spam_filter\\1' + '\\!truth.txt') nf.test('C:\\Users\\Prabhath\\Desktop\\spam_filter\\2') print(quality.compute_quality_for_corpus('C:\\Users\\Prabhath\\Desktop\\spam_filter')) os.remove("C:\\Users\\Prabhath\\Desktop\\spam_filter\\2\\!prediction.txt")