Esempio n. 1
0
def predictResult(x_train, y_train, y_test, x_test):
    data2 = pd.read_csv("/tmp/predict_result.csv", header=0)
    # vamos percorrer o arquivo com o valor a ser testado, onde vamos pegar as colunas e jogar os valores numa array
    cols2 = data2.columns[(data2.columns != columnResultName)]
    fts2 = data2[cols2]
    fts2 = Normalizer().fit_transform(fts2)

    randomForest.fit(x_train, y_train)

    dump(randomForest, 'randomForest.model')

    randomForestLoaded = load('randomForest.model')
    prFit = randomForestLoaded.predict(x_test)
    print("predicao:", prFit)
    print("Matriz de Confusao LR:")
    print(cfm(y_test, prFit))
    print("F1 score LR:")
    print(f1s(y_test, prFit))
    print("Precision score LR:")
    print(ps(y_test, prFit))
    print("Recall score LR:")
    print(rs(y_test, prFit))
    print("Classification Report")
    print(cr(y_test, prFit))

    pr1 = randomForestLoaded.predict(fts2)
    print("predico unica", pr1)
    return pr1
Esempio n. 2
0
def predictResult(betterN, x_train, y_train, y_test, x_test):
    data2 = pd.read_csv("/tmp/predict_result.csv", header=0)
    # vamos percorrer o arquivo com o valor a ser testado, onde vamos pegar as colunas e jogar os valores numa array
    cols2 = data2.columns[(data2.columns != columnResultName)]
    fts2 = np.array(data2[cols2])

    #quando nao mandar um vaor de betterN, significa que demos o load do modelo
    if betterN > 0:
        knn.n_neighbors = betterN
        knn.fit(x_train, y_train)

        # dump(knn, 'models/knn_teste.joblib')

        prFit = knn.predict(x_test)
        print("predicao: a", prFit)
        print("Matriz de Confusao NB:")
        print(cfm(y_test, prFit))
        print("F1 score NB:")
        print(f1s(y_test, prFit))
        print("Precision score NB:")
        print(ps(y_test, prFit))
        print("Recall score NB:")
        print(rs(y_test, prFit))
        print("Classification Report")
        print(cr(y_test, prFit))

    pr1 = knn.predict(fts2)
    print("predico unica", int(pr1[0]))
    print("predicao unica score")
    print(pr1)
    return pr1
Esempio n. 3
0
def predictResult(x_train, y_train, y_test, x_test):
    data2 = pd.read_csv("/tmp/predict_result.csv", header=0)
    # vamos percorrer o arquivo com o valor a ser testado, onde vamos pegar as colunas e jogar os valores numa array
    cols2 = data2.columns[(data2.columns != columnResultName)]
    fts2 = data2[cols2]
    fts2 = Normalizer().fit_transform(fts2)

    scores = cross_val_score(logisticR, x_train, y_train, n_jobs=30)
    print("scores cross val")
    print(scores)

    logisticR.fit(x_train, y_train)
    dump(logisticR, 'logistic.model')

    logisticLoaded = load('logistic.model')

    prFit = logisticLoaded.predict(x_test)
    print("predicao:", prFit)
    print("Matriz de Confusao LR:")
    print(cfm(y_test, prFit))
    print("F1 score LR:")
    print(f1s(y_test, prFit))
    print("Precision score LR:")
    print(ps(y_test, prFit))
    print("Recall score LR:")
    print(rs(y_test, prFit))
    print("Classification Report")
    print(cr(y_test, prFit))
    print("Accuracy score")
    print(asc(y_test, prFit))

    class_names = [0, 1]  # name  of classes
    fig, ax = plt.subplots()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names)
    plt.yticks(tick_marks, class_names)
    # create heatmap
    sns.heatmap(pd.DataFrame(cfm(y_test, prFit)),
                annot=True,
                cmap="YlGnBu",
                fmt='g')
    ax.xaxis.set_label_position("top")
    plt.tight_layout()
    plt.title('Confusion matrix', y=1.1)
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')
    plt.show()

    y_pred_proba = logisticLoaded.predict_proba(x_test)[::, 1]
    fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba)
    auc = metrics.roc_auc_score(y_test, y_pred_proba)
    plt.plot(fpr, tpr, label="data 1, auc=" + str(auc))
    plt.legend(loc=4)
    plt.show()

    pr1 = logisticLoaded.predict(fts2)
    print("predico unica", pr1)
    return pr1
Esempio n. 4
0
def f1_score(gt, pred, F1_Thresh=0.5, files=None, median=False):  
  from sklearn.metrics import precision_score, recall_score
  from sklearn.metrics import f1_score as f1s
  if type(gt)==list: gt = np.array(gt)
  if type(pred)==list: pred = np.array(pred)
  # F1_Thresh = 0.5
  output = (pred>F1_Thresh)*1.0
  F1 = f1s(gt, output)
  F1_MAX=F1

  if median:
    output_median3, output_median5, output_median7 = get_median(output, files)
    F1_median3 = f1s(gt, output_median3)
    F1_median5 = f1s(gt, output_median5)
    F1_median7 = f1s(gt, output_median7)

    return [F1], F1_MAX, F1_Thresh, F1_median3, F1_median5, F1_median7
  else:
    return [F1], F1_MAX, F1_Thresh 
Esempio n. 5
0
for n in range(0, 5):
    print "Quantidade Vizinhos:", neighbors[n]
    knn3 = KNeighborsClassifier(n_neighbors=neighbors[n])
    knn3.fit(x_train, y_train)

    print "Accuracy Training KNN:", knn3.score(x_train, y_train)

    predictions = knn3.predict(x_test)
    accuracy = metrics.accuracy_score(y_test, predictions)

    print "Accuracy Test KNN:", accuracy
    print "Matriz de Confusao KNN:"
    print cfm(y_test, predictions)
    print "F1 score KNN:"
    print f1s(y_test, predictions)
    print "Precision score KNN:"
    print ps(y_test, predictions)
    print "Recall score KNN:"
    print rs(y_test, predictions)

#svm kernel linear

svm = svm.SVC(kernel='linear', C=1.0)
svm.fit(x_train, y_train)

predictionsSvm = svm.predict(x_test)

accuracySvm = metrics.accuracy_score(predictionsSvm, y_test)

print "SVM LINEAR Accuracy Test:", accuracySvm