Example #1
0
def main():
    #Initialization
    positive_text = os.listdir('train/pos')
    negative_text = os.listdir('train/neg')
    positive_library = {}
    negative_library = {}
    print("Start positive dictio")
    #Add to positive dictio
    for i in positive_text:
        with open("train/pos/" + i, 'r', encoding='utf8') as file:
            data = file.read().replace('\n', ' ')
            preprocess_string(data, positive_library)
    print("Start negative dictio")
    #Add to negative dictio
    for i in negative_text:
        with open("train/neg/" + i, 'r', encoding='utf8') as file:
            data = file.read().replace('\n', ' ')
            preprocess_string(data, negative_library)
    print("Finished creating dictio, starting naive bayes")
    #Sort Dictio by key
    #    pos_dictio_item=positive_library.items()
    #    positive_library=sorted(pos_dictio_item) # List of tuples, contains word freq per class
    #    neg_dictio_item=negative_library.items()
    #    negative_library=sorted(neg_dictio_item)
    positive_test = os.listdir('test/pos')
    negative_test = os.listdir('test/neg')
    confirm_positive = 0
    error_positive = 0
    confirm_negative = 0
    error_negative = 0
    print("Start the positive")
    #Add to positive dictio
    for i in positive_test:
        with open("test/pos/" + i, 'r', encoding='utf8') as file:
            print("test/pos/" + i)
            data = file.read().replace('\n', ' ')
            data = preprocess_string(data, {})
            if naivebayes(data, positive_library,
                          negative_library) == "positive":
                confirm_positive += 1
            else:
                error_positive += 1
    print("Finished compute positive")
    #Add to negative dictio
    print("Start with negative")
    for i in negative_test:
        with open("test/neg/" + i, 'r', encoding='utf8') as file:
            print("test/neg/" + i)
            data = file.read().replace('\n', ' ')
            data = preprocess_string(data, {})
            if naivebayes(data, positive_library,
                          negative_library) == "negative":
                confirm_negative += 1
            else:
                error_negative += 1
    print("Finished compute negative")
    print("Final Result")
    print(confirm_positive)
    print(error_positive)
    print(confirm_negative)
    print(error_negative)
    total = sum(
        [confirm_positive, confirm_negative, error_positive, error_negative])
    print(f"Accuracy:{(confirm_positive+confirm_negative)/(total)}")
Example #2
0
train7_o = preprocess_words_from_file('TrainingData/Other/o6.txt')
train8_o = preprocess_words_from_file('TrainingData/Other/o7.txt')
train9_o = preprocess_words_from_file('TrainingData/Other/o8.txt')
train10_o = preprocess_words_from_file('TrainingData/Other/o9.txt')
train11_o = preprocess_words_from_file('TrainingData/Other/o10.txt')
train12_o = preprocess_words_from_file('TrainingData/Other/o11.txt')
train13_o = preprocess_words_from_file('TrainingData/Other/o12.txt')
train14_o = preprocess_words_from_file('TrainingData/Other/o13.txt')
train15_o = preprocess_words_from_file('TrainingData/Other/o14.txt')
train16_o = preprocess_words_from_file('TrainingData/Other/o15.txt')
train17_o = preprocess_words_from_file('TrainingData/Other/o16.txt')
train18_o = preprocess_words_from_file('TrainingData/Other/o17.txt')
train19_o = preprocess_words_from_file('TrainingData/Other/o18.txt')
train20_o = preprocess_words_from_file('TrainingData/Other/o19.txt')

cl = naivebayes(getwords)
cl.setdb('Ad_vs_other.db')

cl.train(train1_ad, 'Advertisement')
cl.train(train2_ad, 'Advertisement')
cl.train(train3_ad, 'Advertisement')
cl.train(train4_ad, 'Advertisement')
cl.train(train5_ad, 'Advertisement')
cl.train(train6_ad, 'Advertisement')
cl.train(train7_ad, 'Advertisement')
cl.train(train8_ad, 'Advertisement')
cl.train(train9_ad, 'Advertisement')
cl.train(train10_ad, 'Advertisement')
cl.train(train11_ad, 'Advertisement')
cl.train(train12_ad, 'Advertisement')
cl.train(train13_ad, 'Advertisement')