Esempio n. 1
0
def run_naive_bayes_bow_runtime_vocabulary(nbr, str_list):
    i = 0
    avg_f1 = 0
    avg_accuracy = 0
    while i < 10:
        dataset = Dataset(categories)
        dataset.load_preprocessed(categories)
        dataset.split_train_bayers(nbr)
        print_v2_test_docs_vocabulary_labeled(categories)
        dataset.load_preprocessed_test_vocabulary_labeled_in_use(categories)
        vectorizer = CountVectorizer(
            vocabulary=Vocabulary.get_vocabulary(categories))
        vectors = vectorizer.fit_transform(dataset.train['data'])

        clf = MultinomialNB().fit(vectors.todense(), dataset.train['target'])
        test_vec = vectorizer.transform(dataset.test['data'])
        pred = clf.predict(test_vec.todense())
        avg_f1 += metrics.f1_score(dataset.test['target'],
                                   pred,
                                   average='macro')
        avg_accuracy += clf.score(test_vec.todense(), dataset.test['target'])
        i += 1
    avg_accuracy = avg_accuracy / 10
    avg_f1 = avg_f1 / 10
    str_list.extend([
        "NB BOW runtime voc Avg f1: " + avg_f1.__str__(),
        "NB BOW runtime voc Avg acc: " + avg_accuracy.__str__()
    ])
    print("Avg f1: " + avg_f1.__str__())
    print("Avg acc: " + avg_accuracy.__str__())
Esempio n. 2
0
def run_naive_bayes_bow(nbr, str_list):
    i = 0
    avg_f1 = 0
    avg_accuracy = 0
    while i < 10:
        dataset = Dataset(categories)
        dataset.split_train_bayers(nbr)
        vectorizer = CountVectorizer()
        vectors = vectorizer.fit_transform(dataset.train['data'])

        clf = MultinomialNB().fit(vectors.todense(), dataset.train['target'])
        test_vec = vectorizer.transform(dataset.test['data'])
        pred = clf.predict(test_vec.todense())
        avg_f1 += metrics.f1_score(dataset.test['target'],
                                   pred,
                                   average='macro')
        avg_accuracy += clf.score(test_vec.todense(), dataset.test['target'])
        i += 1
    avg_accuracy = avg_accuracy / 10
    avg_f1 = avg_f1 / 10
    str_list.extend([
        "NB BOW Avg f1: " + avg_f1.__str__(),
        "NB BOW Avg acc: " + avg_accuracy.__str__()
    ])
    print("Avg f1: " + avg_f1.__str__())
    print("Avg acc: " + avg_accuracy.__str__())