def predict_nb(X_train, X_test, y_train, y_test):
    clf = nb()
    print("nb started")
    clf.fit(X_train,y_train)
    y_pred=clf.predict(X_test)
    calc_accuracy("Naive Bayes",y_test,y_pred)
    np.savetxt('submission_surf_nb.csv', np.c_[range(1,len(y_test)+1),y_pred,y_test], delimiter=',', header = 'ImageId,Label,TrueLabel', comments = '', fmt='%d')
Example #2
0
def run_nb():
	clf = nb()
	print("nb started")
	clf.fit(x,y)
	#print(clf.classes_)
	#print clf.n_layers_
	pred=clf.predict(x_)
	#print(pred)
	np.savetxt('submission_nb.csv', np.c_[range(1,len(test)+1),pred,label_test], delimiter=',', header = 'ImageId,Label,TrueLabel', comments = '', fmt='%d')
	calc_accuracy("Naive Bayes",label_test,pred)
Example #3
0
def predict_nb(X, y, X_train, X_test, y_train, y_test):
    clf = nb()
    print("======== Naive Bayes ========")
    clf.fit(X_train, y_train)
    pickle.dump(clf, open('naivebayes_trained_new.sav', 'wb'))
    y_pred = clf.predict(X_test)
    calc_accuracy("Naive Bayes", y_test, y_pred)
    np.savetxt('submission_surf_nb.csv',
               np.c_[range(1,
                           len(y_test) + 1), y_pred, y_test],
               delimiter=',',
               header='ImageId,Label,TrueLabel',
               comments='',
               fmt='%d')
Example #4
0
def hyperopt_train_test(params):
    X_ = X[:]
    """
    if 'normalize' in params:
        if params['normalize'] == 1:
            X_ = normalize(X_)
            del params['normalize']
        else:
            del params['normalize']
    if 'scale' in params:
        if params['scale'] == 1:
            X_ = scale(X_)
            del params['scale']
        else:
            del params['scale']
    """
    clf = nb(**params)
    return cross_val_score(clf, X_, y, cv=5).mean()
Example #5
0
def NB_from_cfg(params):

    clf = nb(**params)
    
    return 1 - cross_val_score(clf, X, y, cv=5).mean()
def predict_nb(X_train, X_test, y_train, y_test):
    clf = nb()
    print("nb started")
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    calc_accuracy("Naive Bayes", y_test, y_pred)
Example #7
0
                                 ('_id', 'S30')],
                          converters={0: lambda x: x.decode('utf-8').lower()})

Xt = name_map(test_data['name']).tolist()

Xt = vec1.transform(Xt).toarray()

yt = test_data['gender']

rf_model = RandomForestClassifier(random_state=123456)
knn_model = KNeighborsClassifier(n_jobs=4)
logit = LogisticRegression(class_weight='balanced',
                           n_jobs=4,
                           warm_start=True,
                           random_state=123456)
naive = nb()
svm_model = SVC(kernel='linear', C=1)

rf_model.fit(X, y)
knn_model.fit(X, y)
logit.fit(X, y)
naive.fit(X, y)
# svm_model.fit(X,y)

print('random forest')
print(classification_report(yt, rf_model.predict(Xt)))
print('KNN:')
print(classification_report(yt, knn_model.predict(Xt)))
print('logistic regression:')
print(classification_report(yt, logit.predict(Xt)))
print('naive bayes:')
#Naive Bayes Classification
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
dataset=pd.read_csv('Social_Network_Ads.csv')
x=dataset.iloc[:,[2,3]].values
y=dataset.iloc[:,4].values
from sklearn.model_selection import train_test_split as tts
xTrain,xTest,yTrain,yTest=tts(x,y,test_size=0.25,random_state=0)
from sklearn.preprocessing import StandardScaler as ss
scale=ss()
xTrain=scale.fit_transform(xTrain)
xTest=scale.transform(xTest)
from sklearn.naive_bayes import GaussianNB as nb
classifier=nb()
classifier.fit(xTrain,yTrain)
yPred=classifier.predict(xTest)
from sklearn.metrics import confusion_matrix as cm
cm=cm(yTest,yPred)
from matplotlib.colors import ListedColormap
X_set, y_set = xTrain, yTrain
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Naive Bayes Classification (Training set)')
Y = dataset[:, -1]

#define training set

X_train = X[:-100, :]
Y_train = Y[:-100, ]

#define test set

X_test = X[-100:, ]
Y_test = Y[-100:, ]

print("Checkpoint I")
#create a model instance of naive-bayes

naive_instance = nb()

print("Checkpoint II")

naive_instance.fit(X_train, Y_train)
print("Classification Score for Naive-Bayes is -:",
      naive_instance.score(X_test, Y_test))

print("Checkpoint III")

from sklearn.ensemble import AdaBoostClassifier as ABC
#create a model instance of AdaBoost
adaboost = ABC()
adaboost.fit(X_train, Y_train)
print("Classification Score for AdaBoost -: ", adaboost.score(X_test, Y_test))
Example #10
0
def naive_bayes(data, classifiers):
    bayes = nb()
    return bayes.fit(data, classifiers)
def main():
    ### FUNCAO PRINCIPAL ###

    ini_tot = time.time()
    
    arq = open('RELAT_DESEMPENHO_{0}_{1}_{2}_.txt'.format(ALGORITMO, TIPO_NB, TAM_TESTES), 'w')

    parametros=['################# PARAMETROS #################\n',
                'Algoritmo de Cllassificacao: {0}_{1}\n'.format(ALGORITMO,TIPO_NB),
                'Numero de individuos.......: {0}\n'.format(str(IND)),
                'Base de Treino.............: {0:.2f}%\n'.format((1 - TAM_TESTES)*100),
                'Base de Testes.............: {0:.2f}%\n'.format(TAM_TESTES*100),
                '################# PARAMETROS #################\n']

    arq.writelines(parametros)

    for c in range(50):
        inicio = time.time()
        
        errou = 0
        acertou = 0
        
        dados = []
        
        ind_treino, ind_teste, cl_treino, cl_teste = modelar_dados(TAM_TESTES)

        naive = nb()

        naive.fit(ind_treino, cl_treino)

        x = naive.predict (ind_teste)

        for i in range (len(x)):
                            if cl_teste[i] == x[i]:
                                    acertou = acertou + 1
                            else:
                                    errou = errou + 1

        fim = time.time()

        tempo = fim - inicio

        dados.append('\n############## RESULTADO TESTE {0} #############\n'.format(str(c+1)))
        dados.append('Vetor algoritmo: {0} // Tamanho do vetor: {1}\n'.format(str(x),str(len(x))))
        dados.append('Vetor gabarito.: {0} // Tamanho do vetor: {1}\n'.format(str(cl_teste),str(len(cl_teste))))
        dados.append('Precisao do treinamento: {0:.2f}%\n'.format(naive.score(ind_treino,cl_treino)*100))
        dados.append('Acertos................: {0:.2f}% ({1} acertos)\n'.format(acertou*100/len(x), acertou))
        dados.append('Erros..................: {0:.2f}% ({1} erros)\n'.format(errou*100/len(x), errou))
        dados.append('Tempo de execução......: {0:.2f}s\n'.format(tempo))
        dados.append('################# FIM TESTE {0} ################\n'.format(str(c+1)))

        arq.writelines(dados)

    print ('FIM EXECUCAO NAIVE BAYES MULTINOMIAL')

    fim_tot = time.time()

    tempo_tot = fim_tot - ini_tot

    arq.write('\nTEMPO TOTAL DE EXECUÇÃO: {0}'.format(str(tempo_tot)))

    arq.close()
imputer_object = imp(missing_values='NaN', strategy='mean', axis=0)
# fitting the object on our data -- we do this so that we can save the 
# fit for our new data.
imputer_object.fit(explanatory_df)
explanatory_df = imputer_object.transform(explanatory_df)


##########################
### Naive Bayes Model  ###
##########################


### creating naive bayes classifier ###

naive_bayes_classifier = nb()

accuracy_scores = cv(naive_bayes_classifier, explanatory_df, response_series, cv=10, scoring='accuracy')
print accuracy_scores.mean()
#looks like on average the model is 60% accurate, not very high

### calculating accuracy metrics for comparison ###

## ACCURACY METRIC 1: Cohen's Kappa ##

mean_accuracy_score = accuracy_scores.mean()
largest_class_percent_of_total = response_series.value_counts(normalize = True)[0]

largest_class_percent_of_total
#the largest class percent total is 90%, thus the model will correctly
#predict 90% of the time that someone WILL NOT be in the hall of fame
Example #13
0
### imputing missing cases ###

imputer_object = imp(missing_values='NaN', strategy='mean', axis=0)
# fitting the object on our data -- we do this so that we can save the
# fit for our new data.
imputer_object.fit(explanatory_df)
explanatory_df = imputer_object.transform(explanatory_df)

##########################
### Naive Bayes Model  ###
##########################

### creating naive bayes classifier ###

naive_bayes_classifier = nb()

accuracy_scores = cv(naive_bayes_classifier,
                     explanatory_df,
                     response_series,
                     cv=10,
                     scoring='accuracy')
print accuracy_scores.mean()
#looks like on average the model is 60% accurate, not very high

### calculating accuracy metrics for comparison ###

## ACCURACY METRIC 1: Cohen's Kappa ##

mean_accuracy_score = accuracy_scores.mean()
largest_class_percent_of_total = response_series.value_counts(
Example #14
0
nspammean = X_train[nspamset].mean()
nspamstd = X_train[nspamset].std()

#Ya entrenamos el modelo, ahora hay que evaluarlo con el set de entrenaminto
#La función pdf devuelve la altura de un punto de la distribucion estandar norm.pdf(x,m,std)
a = pd.DataFrame([
    np.log(norm.cdf(X_test[i], loc=spammean[i], scale=spamstd[i]))
    for i in X_test.columns
]).sum()
b = pd.DataFrame([
    np.log(norm.cdf(X_test[i], loc=nspammean[i], scale=nspamstd[i]))
    for i in X_test.columns
]).sum()
spam = a > b

#Checar los resultados del algoritmo contra el algoritmo en sklearn
model = nb()
model.fit(X_train, Y_train)
res = model.predict(X_test)

check = []
for i in range(len(spam)):
    if (spam[i] == True and res[i] == 1) or (spam[i] == False and res[i] == 0):
        check.append(i)
print 'Programa vs SKlearn: ' + str(len(check) / len(spam))

#Checar si de verdad los modelos le arinaron usando el sklearn
from sklearn.metrics import accuracy_score

print 'Modelo programado: ' + str(accuracy_score(Y_test, spam))
print 'Modelo sklearn: ' + str(accuracy_score(Y_test, res))
Example #15
0
#Convert list of lists to nd array (Required for NB Training)
for key in X_label.keys():
    train.append(X_data[key])
    trainLabel.append(label2no[X_label[key]])

train = np.array(train)
trainLabel = np.array(trainLabel)
min1 = train.min()
# print (min1)
for i in range(len(train)):
    for j in range(len(train[i])):
        train[i][j] = train[i][j] + abs(min1)

#%%
#Naive Bayes Classifier Training
nb_clf = nb().fit(train, trainLabel.transpose())

#%%
test = []
testLabel = []

for key in Y_label.keys():
    test.append(Y_data[key])
    testLabel.append(label2no[Y_label[key]])

test = np.array(test)
testLabel = np.array(testLabel)
min1 = test.min()
for i in range(len(test)):
    for j in range(len(test[i])):
        test[i][j] += min1
Example #16
0
parameters = {"n_neighbors": [1, 3, 5, 7, 9, 11]}
fix_time()
grid_obj = GridSearchCV(cls, parameters, scoring=scorer, cv=5)
grid_obj = grid_obj.fit(x_tun, y_tun)
cls = grid_obj.best_estimator_

cls.fit(x_train, y_train)

cls = grid_obj.best_estimator_
report_file.write("KNN time: " + str(elapsed()) + str("\n"))

print("KNN:")
predictionsTest["KNN"] = kfolding(x_test, y_test, cls, "KNN")[1]
pred_file.write("KNN: " + str(predictionsTest["KNN"]) + "\n")
#Naive Bayes
cls = nb()

parameters = {
    "var_smoothing":
    [1e-09, 1e-08, 1e-07, 1e-06, 1e-05, 1e-04, 1e-03, 1e-02, 1e-01, 1]
}

fix_time()
grid_obj = GridSearchCV(cls, parameters, scoring=scorer, cv=5)
grid_obj = grid_obj.fit(x_tun, y_tun)
cls = grid_obj.best_estimator_

cls.fit(x_train, y_train)

cls = grid_obj.best_estimator_
report_file.write("NB time: " + str(elapsed()) + str("\n"))
Example #17
0

def q5b(Xtrain1, Ttrain1, Xtest1, Ttest1, clf, name):
    startFB = time.time()
    print name
    print "Xtrain accuracy: " + str(clf.score(Xtrain1, Ttrain1))
    if Xtest1 is not None and Ttest1 is not None:
        print "Xtest accuracy: " + str(clf.score(Xtest1, Ttest1))
    print "runtime: " + str(time.time() - startFB)


q5b(Xtrain, Ttrain, Xtest, Ttest, clf, "full Gaussian Bayes classifier")

# (c)
print "\nQuestion 5(c)"
clf = nb().fit(Xtrain, Ttrain)
q5b(Xtrain, Ttrain, Xtest, Ttest, clf, "Gaussian naive Bayes classifier")

# (d)
print "\nQuestion 5(d)"
sigma = 0.1
noise = sigma * np.random.normal(size=np.shape(Xtrain))
Xtrain = Xtrain + noise
random25 = Xtrain[np.random.choice(Xtrain.shape[0], 25, replace=False), :]
random25 = random25.reshape((25, 28, 28))
plt.suptitle("Question 5(d): 25 random MNIST images.")
for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.axis('off')
    plt.imshow(random25[i], cmap='Greys', interpolation='nearest')
plt.show()