コード例 #1
0
import NaiveBayes

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score


def read_df(csv, method):
    data = pd.read_csv(csv, header=0)
    return method.fit_transform(data.text).toarray(), data.spam.to_numpy()


methods = {
    'CountVectorizer MultinomialNB with Stemmer': NaiveBayes.VectorizedNB(stemmer='Snowball'),
    'CountVectorizer MultinomialNB with Snowball, Stop_Words': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True),
    'CountVectorizerMultinomialNB with Snowball and Stop_Words, n_grams=2': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True, n_grams=2),

    'TfidfVectorizer MultinomialNB with Stemmer': NaiveBayes.VectorizedNB(stemmer='Snowball'),
    'TfidfVectorizer MultinomialNB with Snowball, Stop_Words': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True, vectorizer='Tfidf'),
    'TfidfVectorizer MultinomialNB with Snowball, Stop_Words, n_grams=2': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True, vectorizer='Tfidf'),

    'CountVectorizer GaussianNB with Stemmer': NaiveBayes.VectorizedNB(stemmer='Snowball', classifier='Gaussian'),
    'CountVectorizer GaussianNB with Snowball, Stop_Words': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True, classifier='Gaussian'),
    'CountVectorizer GaussianNB with Snowball and Stop_Words, n_grams=2': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True, n_grams=2, classifier='Gaussian'),

    'TfidfVectorizer GaussianNB with Stemmer': NaiveBayes.VectorizedNB(stemmer='Snowball', classifier='Gaussian'),
    'TfidfVectorizer GaussianNB with Snowball, Stop_Words': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True, vectorizer='Tfidf', classifier='Gaussian'),
    'TfidfVectorizer GaussianNB with Snowball, Stop_Words, n_grams=2': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True, vectorizer='Tfidf', classifier='Gaussian')
}