Beispiel #1
0
import numpy as np
import csv
from bigram import Bigram

ham_validation_list = Bigram.processed_file("../ham/validation.txt")
spam_validation_list = Bigram.processed_file("../spam/validation.txt")

with open('eval_k_0.1_1.0.csv', mode='w') as report_file:
    report_writer = csv.writer(report_file,
                               delimiter=',',
                               quotechar='"',
                               quoting=csv.QUOTE_MINIMAL)
    report_writer.writerow([
        'k', 'ham accuracy', 'spam accuracy', 'precision', 'recall', 'F1_score'
    ])

    for k in np.linspace(0.1, 1.0, 10):
        ham_pro_dict = Bigram.bigram_probability_dict("../ham/train.txt", k)
        spam_pro_dict = Bigram.bigram_probability_dict("../spam/train.txt", k)

        ham_count_ham = ham_count_spam = 0
        for message in ham_validation_list:
            spam_pp = Bigram.perplexity_single_message(spam_pro_dict, message)
            ham_pp = Bigram.perplexity_single_message(ham_pro_dict, message)
            result = None
            if spam_pp > ham_pp:
                ham_count_ham += 1
            else:
                ham_count_spam += 1

        spam_count_ham = spam_count_spam = 0
Beispiel #2
0
import numpy as np
import csv
from bigram import Bigram

k = 0.01
ham_test_list = Bigram.processed_file("../ham/test.txt")
spam_test_list = Bigram.processed_file("../spam/test.txt")

ham_pro_dict = Bigram.bigram_probability_dict("../ham/train.txt", k)
spam_pro_dict = Bigram.bigram_probability_dict("../spam/train.txt", k)

ham_count_ham = ham_count_spam = 0
for message in ham_test_list:
    spam_pp = Bigram.perplexity_single_message(spam_pro_dict, message)
    ham_pp = Bigram.perplexity_single_message(ham_pro_dict, message)
    result = None
    if spam_pp > ham_pp:
        ham_count_ham += 1
    else:
        ham_count_spam += 1

spam_count_ham = spam_count_spam = 0
for message in spam_test_list:
    spam_pp = Bigram.perplexity_single_message(spam_pro_dict, message)
    ham_pp = Bigram.perplexity_single_message(ham_pro_dict, message)
    result = None
    if spam_pp > ham_pp:
        spam_count_ham += 1
    else:
        spam_count_spam += 1