def predict_nb(X_train, X_test, y_train, y_test): clf = nb() print("nb started") clf.fit(X_train,y_train) y_pred=clf.predict(X_test) calc_accuracy("Naive Bayes",y_test,y_pred) np.savetxt('submission_surf_nb.csv', np.c_[range(1,len(y_test)+1),y_pred,y_test], delimiter=',', header = 'ImageId,Label,TrueLabel', comments = '', fmt='%d')
def run_nb(): clf = nb() print("nb started") clf.fit(x,y) #print(clf.classes_) #print clf.n_layers_ pred=clf.predict(x_) #print(pred) np.savetxt('submission_nb.csv', np.c_[range(1,len(test)+1),pred,label_test], delimiter=',', header = 'ImageId,Label,TrueLabel', comments = '', fmt='%d') calc_accuracy("Naive Bayes",label_test,pred)
def predict_nb(X, y, X_train, X_test, y_train, y_test): clf = nb() print("======== Naive Bayes ========") clf.fit(X_train, y_train) pickle.dump(clf, open('naivebayes_trained_new.sav', 'wb')) y_pred = clf.predict(X_test) calc_accuracy("Naive Bayes", y_test, y_pred) np.savetxt('submission_surf_nb.csv', np.c_[range(1, len(y_test) + 1), y_pred, y_test], delimiter=',', header='ImageId,Label,TrueLabel', comments='', fmt='%d')
def hyperopt_train_test(params): X_ = X[:] """ if 'normalize' in params: if params['normalize'] == 1: X_ = normalize(X_) del params['normalize'] else: del params['normalize'] if 'scale' in params: if params['scale'] == 1: X_ = scale(X_) del params['scale'] else: del params['scale'] """ clf = nb(**params) return cross_val_score(clf, X_, y, cv=5).mean()
def NB_from_cfg(params): clf = nb(**params) return 1 - cross_val_score(clf, X, y, cv=5).mean()
def predict_nb(X_train, X_test, y_train, y_test): clf = nb() print("nb started") clf.fit(X_train, y_train) y_pred = clf.predict(X_test) calc_accuracy("Naive Bayes", y_test, y_pred)
('_id', 'S30')], converters={0: lambda x: x.decode('utf-8').lower()}) Xt = name_map(test_data['name']).tolist() Xt = vec1.transform(Xt).toarray() yt = test_data['gender'] rf_model = RandomForestClassifier(random_state=123456) knn_model = KNeighborsClassifier(n_jobs=4) logit = LogisticRegression(class_weight='balanced', n_jobs=4, warm_start=True, random_state=123456) naive = nb() svm_model = SVC(kernel='linear', C=1) rf_model.fit(X, y) knn_model.fit(X, y) logit.fit(X, y) naive.fit(X, y) # svm_model.fit(X,y) print('random forest') print(classification_report(yt, rf_model.predict(Xt))) print('KNN:') print(classification_report(yt, knn_model.predict(Xt))) print('logistic regression:') print(classification_report(yt, logit.predict(Xt))) print('naive bayes:')
#Naive Bayes Classification import pandas as pd import numpy as np import matplotlib.pyplot as plt dataset=pd.read_csv('Social_Network_Ads.csv') x=dataset.iloc[:,[2,3]].values y=dataset.iloc[:,4].values from sklearn.model_selection import train_test_split as tts xTrain,xTest,yTrain,yTest=tts(x,y,test_size=0.25,random_state=0) from sklearn.preprocessing import StandardScaler as ss scale=ss() xTrain=scale.fit_transform(xTrain) xTest=scale.transform(xTest) from sklearn.naive_bayes import GaussianNB as nb classifier=nb() classifier.fit(xTrain,yTrain) yPred=classifier.predict(xTest) from sklearn.metrics import confusion_matrix as cm cm=cm(yTest,yPred) from matplotlib.colors import ListedColormap X_set, y_set = xTrain, yTrain X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green'))) plt.xlim(X1.min(), X1.max()) plt.ylim(X2.min(), X2.max()) for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j) plt.title('Naive Bayes Classification (Training set)')
Y = dataset[:, -1] #define training set X_train = X[:-100, :] Y_train = Y[:-100, ] #define test set X_test = X[-100:, ] Y_test = Y[-100:, ] print("Checkpoint I") #create a model instance of naive-bayes naive_instance = nb() print("Checkpoint II") naive_instance.fit(X_train, Y_train) print("Classification Score for Naive-Bayes is -:", naive_instance.score(X_test, Y_test)) print("Checkpoint III") from sklearn.ensemble import AdaBoostClassifier as ABC #create a model instance of AdaBoost adaboost = ABC() adaboost.fit(X_train, Y_train) print("Classification Score for AdaBoost -: ", adaboost.score(X_test, Y_test))
def naive_bayes(data, classifiers): bayes = nb() return bayes.fit(data, classifiers)
def main(): ### FUNCAO PRINCIPAL ### ini_tot = time.time() arq = open('RELAT_DESEMPENHO_{0}_{1}_{2}_.txt'.format(ALGORITMO, TIPO_NB, TAM_TESTES), 'w') parametros=['################# PARAMETROS #################\n', 'Algoritmo de Cllassificacao: {0}_{1}\n'.format(ALGORITMO,TIPO_NB), 'Numero de individuos.......: {0}\n'.format(str(IND)), 'Base de Treino.............: {0:.2f}%\n'.format((1 - TAM_TESTES)*100), 'Base de Testes.............: {0:.2f}%\n'.format(TAM_TESTES*100), '################# PARAMETROS #################\n'] arq.writelines(parametros) for c in range(50): inicio = time.time() errou = 0 acertou = 0 dados = [] ind_treino, ind_teste, cl_treino, cl_teste = modelar_dados(TAM_TESTES) naive = nb() naive.fit(ind_treino, cl_treino) x = naive.predict (ind_teste) for i in range (len(x)): if cl_teste[i] == x[i]: acertou = acertou + 1 else: errou = errou + 1 fim = time.time() tempo = fim - inicio dados.append('\n############## RESULTADO TESTE {0} #############\n'.format(str(c+1))) dados.append('Vetor algoritmo: {0} // Tamanho do vetor: {1}\n'.format(str(x),str(len(x)))) dados.append('Vetor gabarito.: {0} // Tamanho do vetor: {1}\n'.format(str(cl_teste),str(len(cl_teste)))) dados.append('Precisao do treinamento: {0:.2f}%\n'.format(naive.score(ind_treino,cl_treino)*100)) dados.append('Acertos................: {0:.2f}% ({1} acertos)\n'.format(acertou*100/len(x), acertou)) dados.append('Erros..................: {0:.2f}% ({1} erros)\n'.format(errou*100/len(x), errou)) dados.append('Tempo de execução......: {0:.2f}s\n'.format(tempo)) dados.append('################# FIM TESTE {0} ################\n'.format(str(c+1))) arq.writelines(dados) print ('FIM EXECUCAO NAIVE BAYES MULTINOMIAL') fim_tot = time.time() tempo_tot = fim_tot - ini_tot arq.write('\nTEMPO TOTAL DE EXECUÇÃO: {0}'.format(str(tempo_tot))) arq.close()
imputer_object = imp(missing_values='NaN', strategy='mean', axis=0) # fitting the object on our data -- we do this so that we can save the # fit for our new data. imputer_object.fit(explanatory_df) explanatory_df = imputer_object.transform(explanatory_df) ########################## ### Naive Bayes Model ### ########################## ### creating naive bayes classifier ### naive_bayes_classifier = nb() accuracy_scores = cv(naive_bayes_classifier, explanatory_df, response_series, cv=10, scoring='accuracy') print accuracy_scores.mean() #looks like on average the model is 60% accurate, not very high ### calculating accuracy metrics for comparison ### ## ACCURACY METRIC 1: Cohen's Kappa ## mean_accuracy_score = accuracy_scores.mean() largest_class_percent_of_total = response_series.value_counts(normalize = True)[0] largest_class_percent_of_total #the largest class percent total is 90%, thus the model will correctly #predict 90% of the time that someone WILL NOT be in the hall of fame
### imputing missing cases ### imputer_object = imp(missing_values='NaN', strategy='mean', axis=0) # fitting the object on our data -- we do this so that we can save the # fit for our new data. imputer_object.fit(explanatory_df) explanatory_df = imputer_object.transform(explanatory_df) ########################## ### Naive Bayes Model ### ########################## ### creating naive bayes classifier ### naive_bayes_classifier = nb() accuracy_scores = cv(naive_bayes_classifier, explanatory_df, response_series, cv=10, scoring='accuracy') print accuracy_scores.mean() #looks like on average the model is 60% accurate, not very high ### calculating accuracy metrics for comparison ### ## ACCURACY METRIC 1: Cohen's Kappa ## mean_accuracy_score = accuracy_scores.mean() largest_class_percent_of_total = response_series.value_counts(
nspammean = X_train[nspamset].mean() nspamstd = X_train[nspamset].std() #Ya entrenamos el modelo, ahora hay que evaluarlo con el set de entrenaminto #La función pdf devuelve la altura de un punto de la distribucion estandar norm.pdf(x,m,std) a = pd.DataFrame([ np.log(norm.cdf(X_test[i], loc=spammean[i], scale=spamstd[i])) for i in X_test.columns ]).sum() b = pd.DataFrame([ np.log(norm.cdf(X_test[i], loc=nspammean[i], scale=nspamstd[i])) for i in X_test.columns ]).sum() spam = a > b #Checar los resultados del algoritmo contra el algoritmo en sklearn model = nb() model.fit(X_train, Y_train) res = model.predict(X_test) check = [] for i in range(len(spam)): if (spam[i] == True and res[i] == 1) or (spam[i] == False and res[i] == 0): check.append(i) print 'Programa vs SKlearn: ' + str(len(check) / len(spam)) #Checar si de verdad los modelos le arinaron usando el sklearn from sklearn.metrics import accuracy_score print 'Modelo programado: ' + str(accuracy_score(Y_test, spam)) print 'Modelo sklearn: ' + str(accuracy_score(Y_test, res))
#Convert list of lists to nd array (Required for NB Training) for key in X_label.keys(): train.append(X_data[key]) trainLabel.append(label2no[X_label[key]]) train = np.array(train) trainLabel = np.array(trainLabel) min1 = train.min() # print (min1) for i in range(len(train)): for j in range(len(train[i])): train[i][j] = train[i][j] + abs(min1) #%% #Naive Bayes Classifier Training nb_clf = nb().fit(train, trainLabel.transpose()) #%% test = [] testLabel = [] for key in Y_label.keys(): test.append(Y_data[key]) testLabel.append(label2no[Y_label[key]]) test = np.array(test) testLabel = np.array(testLabel) min1 = test.min() for i in range(len(test)): for j in range(len(test[i])): test[i][j] += min1
parameters = {"n_neighbors": [1, 3, 5, 7, 9, 11]} fix_time() grid_obj = GridSearchCV(cls, parameters, scoring=scorer, cv=5) grid_obj = grid_obj.fit(x_tun, y_tun) cls = grid_obj.best_estimator_ cls.fit(x_train, y_train) cls = grid_obj.best_estimator_ report_file.write("KNN time: " + str(elapsed()) + str("\n")) print("KNN:") predictionsTest["KNN"] = kfolding(x_test, y_test, cls, "KNN")[1] pred_file.write("KNN: " + str(predictionsTest["KNN"]) + "\n") #Naive Bayes cls = nb() parameters = { "var_smoothing": [1e-09, 1e-08, 1e-07, 1e-06, 1e-05, 1e-04, 1e-03, 1e-02, 1e-01, 1] } fix_time() grid_obj = GridSearchCV(cls, parameters, scoring=scorer, cv=5) grid_obj = grid_obj.fit(x_tun, y_tun) cls = grid_obj.best_estimator_ cls.fit(x_train, y_train) cls = grid_obj.best_estimator_ report_file.write("NB time: " + str(elapsed()) + str("\n"))
def q5b(Xtrain1, Ttrain1, Xtest1, Ttest1, clf, name): startFB = time.time() print name print "Xtrain accuracy: " + str(clf.score(Xtrain1, Ttrain1)) if Xtest1 is not None and Ttest1 is not None: print "Xtest accuracy: " + str(clf.score(Xtest1, Ttest1)) print "runtime: " + str(time.time() - startFB) q5b(Xtrain, Ttrain, Xtest, Ttest, clf, "full Gaussian Bayes classifier") # (c) print "\nQuestion 5(c)" clf = nb().fit(Xtrain, Ttrain) q5b(Xtrain, Ttrain, Xtest, Ttest, clf, "Gaussian naive Bayes classifier") # (d) print "\nQuestion 5(d)" sigma = 0.1 noise = sigma * np.random.normal(size=np.shape(Xtrain)) Xtrain = Xtrain + noise random25 = Xtrain[np.random.choice(Xtrain.shape[0], 25, replace=False), :] random25 = random25.reshape((25, 28, 28)) plt.suptitle("Question 5(d): 25 random MNIST images.") for i in range(25): plt.subplot(5, 5, i + 1) plt.axis('off') plt.imshow(random25[i], cmap='Greys', interpolation='nearest') plt.show()