def run_naive_bayes_bow_runtime_vocabulary(nbr, str_list): i = 0 avg_f1 = 0 avg_accuracy = 0 while i < 10: dataset = Dataset(categories) dataset.load_preprocessed(categories) dataset.split_train_bayers(nbr) print_v2_test_docs_vocabulary_labeled(categories) dataset.load_preprocessed_test_vocabulary_labeled_in_use(categories) vectorizer = CountVectorizer( vocabulary=Vocabulary.get_vocabulary(categories)) vectors = vectorizer.fit_transform(dataset.train['data']) clf = MultinomialNB().fit(vectors.todense(), dataset.train['target']) test_vec = vectorizer.transform(dataset.test['data']) pred = clf.predict(test_vec.todense()) avg_f1 += metrics.f1_score(dataset.test['target'], pred, average='macro') avg_accuracy += clf.score(test_vec.todense(), dataset.test['target']) i += 1 avg_accuracy = avg_accuracy / 10 avg_f1 = avg_f1 / 10 str_list.extend([ "NB BOW runtime voc Avg f1: " + avg_f1.__str__(), "NB BOW runtime voc Avg acc: " + avg_accuracy.__str__() ]) print("Avg f1: " + avg_f1.__str__()) print("Avg acc: " + avg_accuracy.__str__())
def run_naive_bayes_bow(nbr, str_list): i = 0 avg_f1 = 0 avg_accuracy = 0 while i < 10: dataset = Dataset(categories) dataset.split_train_bayers(nbr) vectorizer = CountVectorizer() vectors = vectorizer.fit_transform(dataset.train['data']) clf = MultinomialNB().fit(vectors.todense(), dataset.train['target']) test_vec = vectorizer.transform(dataset.test['data']) pred = clf.predict(test_vec.todense()) avg_f1 += metrics.f1_score(dataset.test['target'], pred, average='macro') avg_accuracy += clf.score(test_vec.todense(), dataset.test['target']) i += 1 avg_accuracy = avg_accuracy / 10 avg_f1 = avg_f1 / 10 str_list.extend([ "NB BOW Avg f1: " + avg_f1.__str__(), "NB BOW Avg acc: " + avg_accuracy.__str__() ]) print("Avg f1: " + avg_f1.__str__()) print("Avg acc: " + avg_accuracy.__str__())