def predictResult(x_train, y_train, y_test, x_test): data2 = pd.read_csv("/tmp/predict_result.csv", header=0) # vamos percorrer o arquivo com o valor a ser testado, onde vamos pegar as colunas e jogar os valores numa array cols2 = data2.columns[(data2.columns != columnResultName)] fts2 = data2[cols2] fts2 = Normalizer().fit_transform(fts2) randomForest.fit(x_train, y_train) dump(randomForest, 'randomForest.model') randomForestLoaded = load('randomForest.model') prFit = randomForestLoaded.predict(x_test) print("predicao:", prFit) print("Matriz de Confusao LR:") print(cfm(y_test, prFit)) print("F1 score LR:") print(f1s(y_test, prFit)) print("Precision score LR:") print(ps(y_test, prFit)) print("Recall score LR:") print(rs(y_test, prFit)) print("Classification Report") print(cr(y_test, prFit)) pr1 = randomForestLoaded.predict(fts2) print("predico unica", pr1) return pr1
def predictResult(betterN, x_train, y_train, y_test, x_test): data2 = pd.read_csv("/tmp/predict_result.csv", header=0) # vamos percorrer o arquivo com o valor a ser testado, onde vamos pegar as colunas e jogar os valores numa array cols2 = data2.columns[(data2.columns != columnResultName)] fts2 = np.array(data2[cols2]) #quando nao mandar um vaor de betterN, significa que demos o load do modelo if betterN > 0: knn.n_neighbors = betterN knn.fit(x_train, y_train) # dump(knn, 'models/knn_teste.joblib') prFit = knn.predict(x_test) print("predicao: a", prFit) print("Matriz de Confusao NB:") print(cfm(y_test, prFit)) print("F1 score NB:") print(f1s(y_test, prFit)) print("Precision score NB:") print(ps(y_test, prFit)) print("Recall score NB:") print(rs(y_test, prFit)) print("Classification Report") print(cr(y_test, prFit)) pr1 = knn.predict(fts2) print("predico unica", int(pr1[0])) print("predicao unica score") print(pr1) return pr1
def predictResult(x_train, y_train, y_test, x_test): data2 = pd.read_csv("/tmp/predict_result.csv", header=0) # vamos percorrer o arquivo com o valor a ser testado, onde vamos pegar as colunas e jogar os valores numa array cols2 = data2.columns[(data2.columns != columnResultName)] fts2 = data2[cols2] fts2 = Normalizer().fit_transform(fts2) scores = cross_val_score(logisticR, x_train, y_train, n_jobs=30) print("scores cross val") print(scores) logisticR.fit(x_train, y_train) dump(logisticR, 'logistic.model') logisticLoaded = load('logistic.model') prFit = logisticLoaded.predict(x_test) print("predicao:", prFit) print("Matriz de Confusao LR:") print(cfm(y_test, prFit)) print("F1 score LR:") print(f1s(y_test, prFit)) print("Precision score LR:") print(ps(y_test, prFit)) print("Recall score LR:") print(rs(y_test, prFit)) print("Classification Report") print(cr(y_test, prFit)) print("Accuracy score") print(asc(y_test, prFit)) class_names = [0, 1] # name of classes fig, ax = plt.subplots() tick_marks = np.arange(len(class_names)) plt.xticks(tick_marks, class_names) plt.yticks(tick_marks, class_names) # create heatmap sns.heatmap(pd.DataFrame(cfm(y_test, prFit)), annot=True, cmap="YlGnBu", fmt='g') ax.xaxis.set_label_position("top") plt.tight_layout() plt.title('Confusion matrix', y=1.1) plt.ylabel('Actual label') plt.xlabel('Predicted label') plt.show() y_pred_proba = logisticLoaded.predict_proba(x_test)[::, 1] fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba) auc = metrics.roc_auc_score(y_test, y_pred_proba) plt.plot(fpr, tpr, label="data 1, auc=" + str(auc)) plt.legend(loc=4) plt.show() pr1 = logisticLoaded.predict(fts2) print("predico unica", pr1) return pr1
def f1_score(gt, pred, F1_Thresh=0.5, files=None, median=False): from sklearn.metrics import precision_score, recall_score from sklearn.metrics import f1_score as f1s if type(gt)==list: gt = np.array(gt) if type(pred)==list: pred = np.array(pred) # F1_Thresh = 0.5 output = (pred>F1_Thresh)*1.0 F1 = f1s(gt, output) F1_MAX=F1 if median: output_median3, output_median5, output_median7 = get_median(output, files) F1_median3 = f1s(gt, output_median3) F1_median5 = f1s(gt, output_median5) F1_median7 = f1s(gt, output_median7) return [F1], F1_MAX, F1_Thresh, F1_median3, F1_median5, F1_median7 else: return [F1], F1_MAX, F1_Thresh
for n in range(0, 5): print "Quantidade Vizinhos:", neighbors[n] knn3 = KNeighborsClassifier(n_neighbors=neighbors[n]) knn3.fit(x_train, y_train) print "Accuracy Training KNN:", knn3.score(x_train, y_train) predictions = knn3.predict(x_test) accuracy = metrics.accuracy_score(y_test, predictions) print "Accuracy Test KNN:", accuracy print "Matriz de Confusao KNN:" print cfm(y_test, predictions) print "F1 score KNN:" print f1s(y_test, predictions) print "Precision score KNN:" print ps(y_test, predictions) print "Recall score KNN:" print rs(y_test, predictions) #svm kernel linear svm = svm.SVC(kernel='linear', C=1.0) svm.fit(x_train, y_train) predictionsSvm = svm.predict(x_test) accuracySvm = metrics.accuracy_score(predictionsSvm, y_test) print "SVM LINEAR Accuracy Test:", accuracySvm