def train_and_test_unbalanced(sql_dir): X_train, X_test, y_train, y_test = count_vectorizer(sql_dir) X_train_tfidf, X_test_tfidf, y_train, y_test = tfidf_vectorizer_word(sql_dir) # X_train_tfidf, X_test_tfidf, y_train, y_test = tfidf_vectorizer_ngram(sql_dir) # X_train_tfidf, X_test_tfidf, y_train, y_test = tfidf_vectorizer_chars(sql_dir) # Naive Bayes on Count Vectors nb, accuracy = train_model(naive_bayes.ComplementNB(), X_train, y_train, X_test, y_test) print("NB, Count Vectors: ", accuracy) # Naive Bayes on TF-IDF Vectors nb_tfidf, accuracy = train_model(naive_bayes.ComplementNB(), X_train_tfidf, y_train, X_test_tfidf, y_test) print("NB, TF-IDF Vectors: ", accuracy) # Linear Classifier on Count Vectors lr, accuracy = train_model(linear_model.LogisticRegression(class_weight='balanced'), X_train, y_train, X_test, y_test) print ("LR, Count Vectors: ", accuracy) # Linear Classifier on TF-IDF Vectors lr_tfidf, accuracy = train_model(linear_model.LogisticRegression(class_weight='balanced'), X_train_tfidf, y_train, X_test_tfidf, y_test) print ("LR, TF-IDF Vectors: ", accuracy) # RF on Count Vectors rf, accuracy = train_model(ensemble.RandomForestClassifier(class_weight='balanced'), X_train, y_train, X_test, y_test) print("RF, Count Vectors: ", accuracy) # RF on TF-IDF Vectors rf_tfidf, accuracy = train_model(ensemble.RandomForestClassifier(class_weight='balanced'), X_train_tfidf, y_train, X_test_tfidf, y_test) print("RF, TF-IDF Vectors: ", accuracy)
def model_build(alg_name, params): alg = None if alg_name == 'SVM': if params['kernel'] == 'linear': alg = SVC(C=params['C'], probability=True, kernel='linear') elif params['kernel'] == 'rbf': alg = SVC(C=params['C'], gamma=params['gammas'], probability=True, kernel=params['kernel']) elif params['kernel'] == 'poly': alg = SVC(C=params['C'], degree=params['degree'], probability=True, kernel=params['kernel']) elif alg_name == 'KNN': alg = KNeighborsClassifier(n_neighbors=params['K'], weights=params['weights'], leaf_size=params['leaf_size']) elif alg_name == 'Random Forest': alg = RandomForestClassifier(n_estimators=params['n_estimators'], criterion=params['criterion'], max_features=params['max_features'], random_state=1234) elif alg_name == 'LightGBM': alg = lgb.LGBMClassifier(learning_rate=params['learning_rate'], num_leaves=params['num_leaves'], n_estimators=params['n_estimators'], objective=params['objective']) elif alg_name == 'XGBoost': alg = XGBClassifier(objective=params['objective'], eval_metrics=params['eval_metrics'], learning_rate=params['learning_rate'], max_depth=params['max_depth']) elif alg_name == 'Naive Bayes': if params['distribution'] == 'Multinomial Naive Bayes': alg = naive_bayes.MultinomialNB(alpha=params['alpha'], fit_prior=params['fit_prior']) elif params['distribution'] == 'Gaussian Naive Bayes': alg = naive_bayes.GaussianNB() elif params['distribution'] == 'Complement Naive Bayes': alg = naive_bayes.ComplementNB(alpha=params['alpha'], fit_prior=params['fit_prior'], norm=params['norm']) elif params['distribution'] == 'Bernoulli Naive Bayes': alg = naive_bayes.BernoulliNB(alpha=params['alpha'], fit_prior=params['fit_prior'], binarize=params['binarize']) elif params['distribution'] == 'Categorical Naive Bayes': alg = naive_bayes.CategoricalNB(alpha=params['alpha'], fit_prior=params['fit_prior']) return alg
def set_model(classifier): param_grid = {'alpha': [1e-10, 1e-1, 1, 10]} if classifier == 'MNB': clf = naive_bayes.MultinomialNB(alpha=1e-10, class_prior=[.1, .9]) param_grid = { 'alpha': [1e-10, 1e-1, 1, 10], 'class_prior': [[.1, .9], [.25, .5], [.75, .25]] } elif classifier == 'BNB': clf = naive_bayes.BernoulliNB(alpha=1e-10, class_prior=[.1, .9]) elif classifier == 'CNB': clf = naive_bayes.ComplementNB(alpha=1e-10, norm=True) elif classifier == 'KNN': clf = neighbors.KNeighborsClassifier(n_neighbors=1) elif classifier == 'SVM': clf = svm.SVC(kernel='linear') param_grid = {'C': [1e-3, 1]} elif classifier == 'DT': clf = tree.DecisionTreeClassifier() elif classifier == 'RF': clf = ensemble.RandomForestClassifier() else: print("Please enter a valid model.") raise SystemExit return clf, param_grid
def use_cl(train_samples, train_labels): def an(sir): inner_words = re.split(r'[ \s]\s*', sir) return inner_words print("USING CLASSIFIER ") model = naive_bayes.ComplementNB() # If we use tokenizer for the NN, here we use TFIDFVectorizer to transform the text vectorizer = TfidfVectorizer(ngram_range=(1, 3), analyzer=an) train_txt = vectorizer.fit_transform(train_samples['Text']) test_txt = vectorizer.transform(test_samples['Text']) val_samples = vectorizer.transform(validation_samples['Text']) # Use GridSearch to find the best model ! parameters = { 'alpha': [ 1, 10, 0.05, 0.1, 0.01, 0.14, 0.13, 0.12, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.1, 1.2, 1.3, 1.4, 1.5, 10.1, 10.2, 10.3, 10.4, 10.5, 9.9, 9.8, 9.7, 9.6 ] }, model2 = GridSearchCV(model, parameters) model2.fit(train_txt, train_labels['Prediction']) print("Best Params: " + str(model2.best_params_)) model_best = model2.best_estimator_ scores_cl(model_best, val_samples, validation_labels['Prediction']) predict_cl(model_best, test_txt, test_id, output=True)
def set_model(classifier): param_grid = {} if classifier == 'MNB': clf = naive_bayes.MultinomialNB(alpha=1e-10) param_grid={'class_prior':[[.5,.5],[.48,.52],[.45,.55],[.4,.6],[.6,.4],[.75,.25],[.25,.75],[.9,.1],[.1,.9]], 'alpha':[1e-10,1e-1,1,10,100]} elif classifier == 'BNB': clf = naive_bayes.BernoulliNB(alpha=1e-1) elif classifier == 'CNB': clf = naive_bayes.ComplementNB(norm=True) elif classifier == 'KNN': clf = neighbors.KNeighborsClassifier(n_neighbors=1) param_grid={'n_neighbors':[1,5,10], 'leaf_size':[10,30,50]} #param_grid = {'p': [1,2], 'n_neighbors':[1,2,3,4,5,6,7,8,9,10], 'leaf_size':[10,20,30,40,50], 'weights':['uniform','distance'], 'algorithm':['auto','ball_tree','kd_tree','brute']} elif classifier == 'SVM': clf = svm.LinearSVC(C=10) param_grid = {'C':[.001,.01,.1,1,10,100]} #param_grid = {'tol': [1e-3, 1e-4, 1e-5, 1e-6], 'dual':[True, False], 'max_iter':[1000,10000], 'C': [.001, .01, .1, 10, 100, 1000], 'class_weight': [{'Y':1.5, 'N':1}, {'Y':3, 'N':1.5}, {'Y':1, 'N':.75}, {'Y':1, 'N':.4}, {'Y':.75, 'N':.25}, {'Y':1, 'N':3}]} elif classifier == 'DT': clf = tree.DecisionTreeClassifier() param_grid={'max_depth':[None,10,100,1000,10000],'min_weight_fraction_leaf':[0,.01,.1,.2,.3,.4,.5]} elif classifier == 'RF': clf = ensemble.RandomForestClassifier() elif classifier == 'NN': clf = neural_network.MLPClassifier() elif classifier == 'do_all': clf = 'all' else: print("Please enter a valid model.") raise SystemExit return clf, param_grid
def NB_param_selection(X, y, nfolds): alphas = np.linspace(0.001, 10.0, num=100) param_grid = {'alpha': alphas} grid_search = GridSearchCV(naive_bayes.ComplementNB(), param_grid, cv=nfolds, verbose=4) grid_search.fit(X, y) return grid_search
def classification_report(sql_dir): vectorizer = CountVectorizer(encoding='latin-1') encoder, X_train, X_test, y_train, y_test = dataset_divider(sql_dir, vectorizer, need_encoder = True) best_model, accuracy = train_model(naive_bayes.ComplementNB(), X_train, y_train, X_test, y_test) y_pred = best_model.predict(X_test) precision,recall,fscore,support=score(y_test,y_pred,average='macro') print('Precision : {}'.format(precision)) print('Recall : {}'.format(recall)) print('F-score : {}'.format(fscore))
def train_nb(_x_train, _y_train, _x_test, _y_test): nb_param_grid = dict(alpha=[0, 0.1, 0.5, 1], norm=[False, True]) nb_grid = RandomizedSearchCV(estimator=naive_bayes.ComplementNB(), param_distributions=nb_param_grid, cv=5, verbose=10, n_jobs=-1, scoring='accuracy') # naive.fit(Train_X_Tfidf, Y_train) # predictions_NB = naive.predict(Test_X_Tfidf) nb_grid.fit(_x_train, np.ravel(_y_train)) predictions_NB = nb_grid.predict(_x_test) nb_accr = accuracy_score(predictions_NB, np.ravel(_y_test)) return nb_grid, nb_accr
def init_naive_bayes(self) -> None: """ MultinomialNB works with occurrence counts BernoulliNB is designed for binary/boolean features """ all_models = [ naive_bayes.BernoulliNB(), naive_bayes.GaussianNB(), naive_bayes.MultinomialNB(), naive_bayes.ComplementNB() ] self.models.extend(all_models) models = ["bernoulli", "gaussian", "multinomial", "complement"] for mod in models: self.model_keys[mod] = "nb"
def __init__(self, trainDF): super().__init__() prePro = PreProcessor() self.pf = PlotFunctions() self.trainDF = trainDF self.X_train, self.X_test, self.y_train, self.y_test = \ prePro.split_train_test(trainDF['cleaned_sentence'], trainDF['classification'], 0.4) self.X_test, self.X_cross, self.y_test, self.y_cross = \ prePro.split_train_test(self.X_test, self.y_test, 0.5) self.all_scores = list() self.models = { 'MultinomialNB': naive_bayes.MultinomialNB(alpha=0.767, class_prior=None, fit_prior=True), 'ComplementNB': naive_bayes.ComplementNB(alpha=0.767, class_prior=None, fit_prior=True), 'LogisticRegression': linear_model.LogisticRegression(solver='lbfgs') }
def document_everything(X_train, X_test, y_train, y_test): Reports = {} Accuracies = {} conf_matrices = {} clf = svm.LinearSVC(C=10) clf = clf.fit(X_train, y_train) y_pred = clf.predict(X_test) Accuracies['SVM'] = clf.score(X_test, y_test) Reports['SVM'] = metrics.classification_report(y_test, y_pred) conf_matrices['SVM'] = metrics.confusion_matrix(y_test, y_pred) print('SVM Done') clf = ensemble.RandomForestClassifier() clf = clf.fit(X_train, y_train) y_pred = clf.predict(X_test) Accuracies['RF'] = clf.score(X_test, y_test) Reports['RF'] = metrics.classification_report(y_test, y_pred) conf_matrices['RF'] = metrics.confusion_matrix(y_test, y_pred) print('RF Done') clf = tree.DecisionTreeClassifier() clf = clf.fit(X_train, y_train) y_pred = clf.predict(X_test) Accuracies['DT'] = clf.score(X_test, y_test) Reports['DT'] = metrics.classification_report(y_test, y_pred) conf_matrices['DT'] = metrics.confusion_matrix(y_test, y_pred) print('DT Done') clf = neighbors.KNeighborsClassifier(n_neighbors=1,leaf_size=10) clf = clf.fit(X_train, y_train) y_pred = clf.predict(X_test) Accuracies['KNN'] = clf.score(X_test, y_test) Reports['KNN'] = metrics.classification_report(y_test, y_pred) conf_matrices['KNN'] = metrics.confusion_matrix(y_test, y_pred) print('KNN Done') clf = naive_bayes.MultinomialNB() clf = clf.fit(X_train, y_train) y_pred = clf.predict(X_test) Accuracies['MNB'] = clf.score(X_test, y_test) Reports['MNB'] = metrics.classification_report(y_test, y_pred) conf_matrices['MNB'] = metrics.confusion_matrix(y_test, y_pred) print('MNB Done') clf = naive_bayes.BernoulliNB(alpha=1e-10) clf = clf.fit(X_train, y_train) y_pred = clf.predict(X_test) Accuracies['BNB'] = clf.score(X_test, y_test) Reports['BNB'] = metrics.classification_report(y_test, y_pred) conf_matrices['BNB'] = metrics.confusion_matrix(y_test, y_pred) print('BNB Done') clf = naive_bayes.ComplementNB() clf = clf.fit(X_train, y_train) y_pred = clf.predict(X_test) Accuracies['CNB'] = clf.score(X_test, y_test) Reports['CNB'] = metrics.classification_report(y_test, y_pred) conf_matrices['CNB'] = metrics.confusion_matrix(y_test, y_pred) print('CNB Done') clf = neural_network.MLPClassifier() clf = clf.fit(X_train, y_train) y_pred = clf.predict(X_test) Accuracies['MLP'] = clf.score(X_test, y_test) Reports['MLP'] = metrics.classification_report(y_test, y_pred) conf_matrices['MLP'] = metrics.confusion_matrix(y_test, y_pred) print('CNB Done') for clf in Reports: print(str(clf)) print(str(Accuracies[clf])) print(str(Reports[clf])) print(str(conf_matrices[clf])) size = [conf_matrices[clf][0][0],conf_matrices[clf][0][1],conf_matrices[clf][1][0],conf_matrices[clf][1][1]] labels = 'True Negatives', 'False Positive', 'False Negative', 'True Positives' explode = (0,.1,0,.1) fig1, ax1 = plt.subplots() ax1.pie(size, explode=explode, labels = labels) ax1.axis('equal') plt.show()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, train_size=i, random_state=0) classifier = naive_bayes.BernoulliNB() classifier.fit(X=X_train, y=y_train) predicted = classifier.predict(X_test) BernoulliNB_accuracy.append(accuracy_score(y_test, predicted)) ''' classifier = naive_bayes.CategoricalNB() classifier.fit(X=X_train, y= y_train) predicted = classifier.predict(X_test) CategoricalNB_accuracy.append(accuracy_score(y_test, predicted)) ''' classifier = naive_bayes.ComplementNB() classifier.fit(X=X_train, y=y_train) predicted = classifier.predict(X_test) ComplementNB_accuracy.append(accuracy_score(y_test, predicted)) classifier = naive_bayes.GaussianNB() classifier.fit(X=X_train, y=y_train) predicted = classifier.predict(X_test) GaussianNB_accuracy.append(accuracy_score(y_test, predicted)) classifier = naive_bayes.MultinomialNB() classifier.fit(X=X_train, y=y_train) predicted = classifier.predict(X_test) MultinomialNB_accuracy.append(accuracy_score(y_test, predicted)) ## DTs
'avg_RTs', 'sbjACC', 'pid', 'picid']) X = [[data['congruency'], data['response'],data['prev_congruency'], \ data['prev_response'], data['avg_RTs']] for i in range(len(data['congruency']))] y = [data ['sbjACC'] for i in range(len(data['congruency']))] ''' X = [data[i][0:4] for i in range(len(data))] y = [data[i][5] for i in range(len(data))] #X = X.norma X, y = np.array(X), np.array(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) regressor = nb.ComplementNB() regressor.fit(X_train, y_train) #training the algorithm y_pred = regressor.predict(X_test) print("Naive Bayes") print("Accuracy Score: ", regressor.score(X, y)) print("Test Score: ", regressor.score(X_test, y_test)) print("Test Accuracy Score: ", metrics.accuracy_score(y_test, y_pred)) print("Fl Score: ", metrics.f1_score(y_test, y_pred)) print("Parameters: ", regressor.get_params) from sklearn.metrics import classification_report, confusion_matrix print("Confusion Matrix for Training Data:") print(confusion_matrix(y_train, regressor.predict(X_train)))
import sklearn.naive_bayes as nb import pandas as pd import matplotlib.pyplot as plt data = pd.read_csv('spambase.data') spam_features = data.drop('spam_or_not', axis=1) spam_labels = data.loc[:, 'spam_or_not'] bayes = { 'Gaussian': nb.GaussianNB(), 'Multinomial': nb.MultinomialNB(), 'Complement': nb.ComplementNB(), 'Bernoulli': nb.BernoulliNB() } for k in bayes.keys(): predictions = bayes[k].fit(spam_features, spam_labels).predict(spam_features) misses = (spam_labels != predictions).sum() false_positive = misses - (predictions[predictions != spam_labels] == 1).sum() false_negative = misses - false_positive print( '{} Method Mislabelled {} points {}% {} false positive {} false negative' .format(k, misses, misses / spam_labels.shape[0] * 100, false_positive, false_negative))
# -*- coding: utf-8 -*- import numpy as np from sklearn import naive_bayes X = np.random.randint(50, size=(1000, 100)) y = np.random.randint(6, size=(1000)) clf = naive_bayes.ComplementNB() clf.fit(X, y) print(clf.predict(X[2:3]))
lin_svc.fit(X_train, y_train) y_test_pred = lin_svc.predict(X_test) matrix = confusion_matrix(y_test, y_test_pred) score = lin_svc.score(X_test, y_test) no_selection_performance.append(('LinearSVC', score, matrix)) print("Multinomial Naive Bayes") multNB = naive_bayes.MultinomialNB() multNB.fit(X_train, y_train) y_test_pred = multNB.predict(X_test) matrix = confusion_matrix(y_test, y_test_pred) score = multNB.score(X_test, y_test) no_selection_performance.append(('Multinomial Naive Bayes', score, matrix)) print('Complement Naive Bayes') compNB = naive_bayes.ComplementNB() compNB.fit(X_train, y_train) y_test_pred = compNB.predict(X_test) matrix = confusion_matrix(y_test, y_test_pred) score = compNB.score(X_test, y_test) no_selection_performance.append(('Complement Naive Bayes', score, matrix)) print('Gradient Boosting Classifier') gradBoost = ensemble.GradientBoostingClassifier() gradBoost.fit(X_train, y_train) y_test_pred = gradBoost.predict(X_test) matrix = confusion_matrix(y_test, y_test_pred) score = gradBoost.score(X_test, y_test) no_selection_performance.append( ('Gradient Boosting Classifier', score, matrix))
def my_naive_bayes(x, y): # model = nb.BernoulliNB() # model = nb.MultinomialNB() model = nb.ComplementNB() model.fit(x, y) return model
predictions = classifier.predict(feature_vector_valid) if is_neural_net: predictions = predictions.argmax(axis=-1) accuracy = metrics.accuracy_score(predictions, valid_y) return accuracy # Naive Bayes accuracy = train_model(naive_bayes.MultinomialNB(alpha=1e-5), xtrain_tfidf_ngram, train_y, xvalid_tfidf_ngram) print("NB, WordLevel TF-IDF: ") print('Accuracy: ', accuracy) accuracy = train_model(naive_bayes.ComplementNB(alpha=1e-5), xtrain_tfidf, train_y, xvalid_tfidf) print("NB, WordLevel TF-IDF: ") print('Accuracy: ', accuracy) accuracy = train_model(naive_bayes.MultinomialNB(alpha=1e-5), xtrain_tfidf_ngram, train_y, xvalid_tfidf_ngram) print("NB, BigramLevel TF-IDF: ") print('Accuracy: ', accuracy) # RF accuracy = train_model(ensemble.RandomForestClassifier(n_estimators=100), xtrain_tfidf, train_y, xvalid_tfidf) print("RF, WordLevel TF-IDF: ", accuracy) accuracy = train_model(ensemble.RandomForestClassifier(n_estimators=100), xtrain_tfidf_ngram, train_y, xvalid_tfidf_ngram) print("RF, WordLevel TF-IDF: ", accuracy)
# 1.2. Train the vectorizer vectorizer = feature_extraction.text.TfidfVectorizer(min_df=5, max_df=0.1, stop_words='english') #vectorizer = feature_extraction.text.TfidfVectorizer(stop_words='english') # Try Kim's vectorizer #vectorizer = feature_extraction.text.TfidfVectorizer(max_df=0.08, stop_words='english', strip_accents='ascii', token_pattern='[A-Za-z]{1,}', ngram_range=(1,2), sublinear_tf=True) # Try Seo's vectorizer vectorizer.fit(train_raw.data) print('* The size of vocabulary: ', len(vectorizer.vocabulary_)) # 1.3. Vectorize the training and test data train_vectors = vectorizer.transform(train_raw.data) tests_vectors = vectorizer.transform(tests_raw.data) # 2. Instantiate classifier models models = [ {'name': 'linear_model.SGD', 'inst': linear_model.SGDClassifier()}, {'name': 'naive_bayes.CompNB', 'inst': naive_bayes.ComplementNB(alpha=0.4)}, {'name': 'svm.LinearSVC', 'inst': svm.LinearSVC(class_weight='balanced')}, {'name': 'svm.SVC(linear)', 'inst': svm.SVC(kernel='linear', class_weight='balanced')}, {'name': 'svm.SVC(rbf)', 'inst': svm.SVC(class_weight='balanced')}, {'name': 'neural_network.MLP', 'inst': neural_network.MLPClassifier(learning_rate='adaptive', early_stopping=True, verbose=True)}, ] # 3. Evaluate the classifier models for m in models: # Train the model m['inst'].fit(train_vectors, train_raw.target) train_predict = m['inst'].predict(train_vectors) train_accuracy = metrics.balanced_accuracy_score(train_raw.target, train_predict) # Test the model tests_predict = m['inst'].predict(tests_vectors)
data = data.reindex(columns=reorder_colnames) data = pd.get_dummies(data, columns=['race']) features = ['age', 'fnlwgt', 'work_ Private','work_ Self-emp','work_ Government', 'edunum', 'marital', 'relation', 'sex', 'gain', 'loss', 'hpw', 'country', 'occu_ Adm-clerical', 'occu_ Armed-Forces', 'occu_ Craft-repair', 'occu_ Exec-managerial', 'occu_ Farming-fishing', 'occu_ Handlers-cleaners', 'occu_ Machine-op-inspct', 'occu_ Other-service', 'occu_ Priv-house-serv', 'occu_ Prof-specialty', 'occu_ Protective-serv', 'occu_ Sales', 'occu_ Tech-support', 'occu_ Transport-moving', 'race_ Amer-Indian-Eskimo', 'race_ Asian-Pac-Islander', 'race_ Black', 'race_ Other', 'race_ White'] y = data['income'] X = data[features] print('Using features: ' + str(features)) # Define the Naive Bayes models gaussianModel = nb.GaussianNB() bernoulliModel = nb.BernoulliNB() multinomialModel = nb.MultinomialNB() complementModel = nb.ComplementNB() # Split training and testing data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # Test options and evaluation metric scoring = 'accuracy' # Fit the training sets gaussianModel.fit(X_train, y_train) bernoulliModel.fit(X_train, y_train) multinomialModel.fit(X_train, y_train) complementModel.fit(X_train, y_train) # Predict for the test sets predG = gaussianModel.predict(X_test)
class CustomComplementNB: def __init__(self, model): self.feature_log_pro_ = model.feature_log_prob_.T def predict_proba(self, X): jll = np.dot(X, self.feature_log_pro_) log_prob_x = np.log(np.sum(np.exp(jll), axis=1)) result = jll - np.atleast_2d(log_prob_x).T return np.exp(result) if __name__ == '__main__': feature, label = Iris.features, Iris.label train_feature, test_feature, train_label, test_label = data_split.split( feature, label) nativeBayes = bayes.GaussianNB() nativeBayes.fit(train_feature, train_label) pred = nativeBayes.predict_proba(test_feature)[0] print("pred", pred) myBayes = CustomGaussianBayes(nativeBayes) my_pred = myBayes.predict_proba(test_feature)[0] print("my_pred", my_pred) complementBayes = bayes.ComplementNB() complementBayes.fit(train_feature, train_label) pred = complementBayes.predict_proba(test_feature)[0] print("pred", pred) myComplementBayes = CustomComplementNB(complementBayes) my_pred = myComplementBayes.predict_proba(test_feature)[0] print("my_pred", my_pred)
# RF on Count Vectors rf, accuracy = train_model(ensemble.RandomForestClassifier(), X_train, y_train, X_test, y_test) print("RF, Count Vectors: ", accuracy) # RF on TF-IDF Vectors rf_tfidf, accuracy = train_model(ensemble.RandomForestClassifier(), X_train_tfidf, y_train, X_test_tfidf, y_test) print("RF, TF-IDF Vectors: ", accuracy) # ## For unbalanced dataset # In[20]: # Naive Bayes on Count Vectors nb, accuracy = train_model(naive_bayes.ComplementNB(), X_train, y_train, X_test, y_test) print("NB, Count Vectors: ", accuracy) # Naive Bayes on TF-IDF Vectors nb_tfidf, accuracy = train_model(naive_bayes.ComplementNB(), X_train_tfidf, y_train, X_test_tfidf, y_test) print("NB, TF-IDF Vectors: ", accuracy) # Linear Classifier on Count Vectors lr, accuracy = train_model(linear_model.LogisticRegression(class_weight='balanced'), X_train, y_train, X_test, y_test) print ("LR, Count Vectors: ", accuracy) # Linear Classifier on TF-IDF Vectors lr_tfidf, accuracy = train_model(linear_model.LogisticRegression(class_weight='balanced'), X_train_tfidf, y_train, X_test_tfidf, y_test) print ("LR, TF-IDF Vectors: ", accuracy) # RF on Count Vectors