def test_allPredictionsWrong(self):
     # Prepare the SUT
     create_inverse_truth_and_prediction_file()
     # Excercise the SUT
     q = compute_quality_for_corpus(CORPUS_DIR)
     # Assertions
     self.assertEqual(q, 0.0)
Exemple #2
0
def myfilter_test(corpus_dir):
    '''Runs MyFilter test and than prints its prediction quality.'''
    MyFilter.train(corpus_dir)
    MyFilter.test(corpus_dir)
    print ("MyFilter:")
    print ( quality.compute_quality_for_corpus(corpus_dir) )
    remove_prediction()
 def test_allPredictionsCorrect(self):
     # Prepare the SUT
     create_identical_truth_and_prediction_file()
     # Excercise the SUT
     q = compute_quality_for_corpus(CORPUS_DIR)
     # Assertions
     self.assertEqual(q, 1.0)
 def test_allPredictionsWrong(self):
     # Prepare the SUT
     # Create an artificial email classification dictionary  
     truth_dict = create_classification()
     create_truth_and_prediction_file(truth_dict, invert_classes)
     # Excercise the SUT
     q = compute_quality_for_corpus(CORPUS_DIR)
     # Assertions
     self.assertEqual(q, 0.0)
 def test_allPredictionsHam_for10SpamsAnd10Hams(self):
     # Prepare the SUT
     # Create an artificial email classification dictionary  
     truth_dict = create_classification(n_items=20, n_spams=10)
     create_truth_and_prediction_file(truth_dict, hams_only)
     # Since there are 10 spams and 10 hams in the corpus,
     # and predictions are all ham, then the confusion matrix 
     # shall have TN = 10 and FN = 10, zero positives.
     # The modified accuracy then is:
     expected_q = 10 / (10 + 10)
     # Excercise the SUT
     q = compute_quality_for_corpus(CORPUS_DIR)
     # Assertions
     self.assertEqual(q, expected_q)
Exemple #6
0
                tests_done += 1
        return (score, tests_done)

    def test_word_filters(self, name, mail):
        score = 0
        tests_done = 0
        for filt in self.word_filters:
            result = filt.test(mail)
            if result != -1:
                score += result
                tests_done += 1
        return (score, tests_done)


if __name__ == '__main__':
    import quality
    train_dir = '1/'
    test_dir = '2/'

    # filt = atomfilters.XSpamStatusFilter()
    # score = quality.test_atom_filter(filt, train_dir, test_dir)
    # print("Score of {name} is {score:3.2f}".format(name=filt.__class__.__name__, score=score))

    filt = MyFilter()
    filt.train(train_dir)
    filt.test(test_dir)
    # utils.show_mismatched(test_dir)
    score = quality.compute_quality_for_corpus(test_dir)
    print("Score for {constant} is {score:3.2f}".format(
        constant=POSITIVITY_THRESHOLD, score=score))
 def quality_s(self):
     return quality.compute_quality_for_corpus(self.path_to_corpus)
        for b_line in pred:
            b_list = b_line.strip().split()
            b_dic[b_list[0]] = b_list[1]



if __name__ == '__main__':
    train_corpus = '/Users/eygene/Desktop/spam-data-12-s75-h25/1'
    test_corpus = '/Users/eygene/Desktop/spam-data-12-s75-h25/2'

    filter = MyFilter()
    filter.train(train_corpus)
    filter.test(test_corpus)


    print('1>2: ' ,compute_quality_for_corpus(test_corpus))
    filter = MyFilter()
    filter.train(test_corpus)
    filter.test(train_corpus)
    print('2->1: ' ,compute_quality_for_corpus(train_corpus))
    filter = MyFilter()
    filter.train(test_corpus)
    filter.test(test_corpus)
    print('2->2: ', compute_quality_for_corpus(test_corpus))
    get_error(test_corpus)
    filter = MyFilter()
    filter.train(train_corpus)
    filter.test(train_corpus)
    print('1->1: ' ,compute_quality_for_corpus(train_corpus))

Exemple #9
0
                    word_probability = (
                        spam_ham_dict.get(word) + self.alpha) / (
                            spam_ham_num_of_words +
                            self.alpha * self.num_of_all_words + self.alpha)
                else:
                    word_probability = self.alpha / (
                        spam_ham_num_of_words +
                        self.alpha * self.num_of_all_words + self.alpha)
            else:
                word_probability = 1
            self.average_word_probability += word_probability
            email_probability *= word_probability
            email_probability, underflow_overflow = underflow_overflow_exception_handler(
                email_probability, underflow_overflow)
        self.average_word_probability /= len(words)
        return email_probability, underflow_overflow


if __name__ == "__main__":
    my_filter1 = MyFilter()
    my_filter1.train("spam-data-12-s75-h25/1/")
    my_filter1.test("spam-data-12-s75-h25/2/")
    print("Quality of the spam filter for first folder with emails:")
    print(compute_quality_for_corpus("spam-data-12-s75-h25/2/"))

    my_filter2 = MyFilter()
    my_filter2.train("spam-data-12-s75-h25/2/")
    my_filter2.test("spam-data-12-s75-h25/1/")
    print("Quality of the spam filter for second folder with emails:")
    print(compute_quality_for_corpus("spam-data-12-s75-h25/1/"))
Exemple #10
0
import simplefilters
import quality
import os

nf = simplefilters.RandomFilter()
nf.train('C:\\Users\\Prabhath\\Desktop\\spam_filter\\1' + '\\!truth.txt')
nf.test('C:\\Users\\Prabhath\\Desktop\\spam_filter\\2')
print(quality.compute_quality_for_corpus('C:\\Users\\Prabhath\\Desktop\\spam_filter'))
os.remove("C:\\Users\\Prabhath\\Desktop\\spam_filter\\2\\!prediction.txt")