import NaiveBayes import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.model_selection import cross_val_score def read_df(csv, method): data = pd.read_csv(csv, header=0) return method.fit_transform(data.text).toarray(), data.spam.to_numpy() methods = { 'CountVectorizer MultinomialNB with Stemmer': NaiveBayes.VectorizedNB(stemmer='Snowball'), 'CountVectorizer MultinomialNB with Snowball, Stop_Words': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True), 'CountVectorizerMultinomialNB with Snowball and Stop_Words, n_grams=2': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True, n_grams=2), 'TfidfVectorizer MultinomialNB with Stemmer': NaiveBayes.VectorizedNB(stemmer='Snowball'), 'TfidfVectorizer MultinomialNB with Snowball, Stop_Words': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True, vectorizer='Tfidf'), 'TfidfVectorizer MultinomialNB with Snowball, Stop_Words, n_grams=2': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True, vectorizer='Tfidf'), 'CountVectorizer GaussianNB with Stemmer': NaiveBayes.VectorizedNB(stemmer='Snowball', classifier='Gaussian'), 'CountVectorizer GaussianNB with Snowball, Stop_Words': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True, classifier='Gaussian'), 'CountVectorizer GaussianNB with Snowball and Stop_Words, n_grams=2': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True, n_grams=2, classifier='Gaussian'), 'TfidfVectorizer GaussianNB with Stemmer': NaiveBayes.VectorizedNB(stemmer='Snowball', classifier='Gaussian'), 'TfidfVectorizer GaussianNB with Snowball, Stop_Words': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True, vectorizer='Tfidf', classifier='Gaussian'), 'TfidfVectorizer GaussianNB with Snowball, Stop_Words, n_grams=2': NaiveBayes.VectorizedNB(stemmer='Snowball', stop_words=True, vectorizer='Tfidf', classifier='Gaussian') }