def reportBestResult():
    C = [16, 32, 64, 128, 256, 512]
    bestAUC = 0
    bestParam = 0
    for c in C:
        auc_history = load_np_array("results/ada_auc_" + str(c) + ".bin")
        mean_auc = auc_history.mean()
        if (mean_auc > bestAUC):
            bestAUC = mean_auc
            bestParam = c

    print bestParam

    confusion_matrix_history = load_np_array("results/rfc_folds_confusion_" +
                                             str(c) + ".bin")
    print confusion_matrix_history

    mean_cm = np.mean(confusion_matrix_history, axis=2)
    std_cm = np.std(confusion_matrix_history, axis=2)
    #     for i in range(0,2):
    #         for j in range(0,2):
    #            mean_cm[i][j] = confusion_matrix_history[i][j].mean()

    print mean_cm
    print std_cm
    compute_performance_metrics(mean_cm)
    acc_mean = []
    recall_mean = []
    for c in C:
        accuracy_history = load_np_array("results/ada_accuracy_" + str(c) +
                                         ".bin")
        recall_history = load_np_array("results/ada_recall_" + str(c) + ".bin")
        acc_mean.append(accuracy_history.mean())
        recall_mean.append(recall_history.mean())
Example #2
0
    #     cost_history = clf.fit(homesite_data, batch_size = 128, \
    #                            max_iterations = 100, save_interval = 10, \
    #                            path = "classifiers_data/ann_weights.bin",
    #                            return_cost = True)
    #
    #     # Save cost and accuracy history
    #     save_np_array(cost_path, cost_history)

    #     # Test neural network.
    #     oversampled_path = "resources/oversampled_normalized_data_ratio_2.bin"
    #     homesite_data = Data()
    #     homesite_data.load_sliptted_data(oversampled_path, one_hot = True)
    #     clf = NeuralNetwork(path = "classifiers_data/ann_weights.bin", \
    #                         lr = 0.005, lamb = 0.)
    #     prob_predicted_labels = clf.predict(homesite_data.validation_x)
    #     predicted_labels = np.argmax(prob_predicted_labels, axis = 1)
    #     validation_labels = np.argmax(homesite_data.validation_y, axis = 1)
    #
    #     # Show final results.
    #     results = confusion_matrix(validation_labels, predicted_labels)
    #     accuracy, precision, recall = compute_performance_metrics(results)
    #     auc = compute_auc(validation_labels, prob_predicted_labels[:, 1])

    # Save plot.
    non_normalized_cost = load_np_array(
        "results/ann_non_normalized_cost_history.bin")
    normalized_cost = load_np_array("results/ann_normalized_cost_history.bin")

    plot("results/normalization_vs_non_normalization.png", [normalized_cost, non_normalized_cost], \
         ["com normalizacao", "sem normalizacao"], "iteracoes", "custo", 'center right')
Example #3
0
    #         recall_history.append(recall)
    #         auc_history.append(auc)
    #         balancing_rate = np.count_nonzero(homesite.train_y) * 1.0 / len(homesite.train_y)
    #         balancing_rate_history.append(balancing_rate)
    #
    #         print 'Saving result.', i * 0.1
    #         save_np_array("../homesite_data/results/random_forest_balancing_accuracy.bin", np.array(accuracy_history))
    #         save_np_array("../homesite_data/results/random_forest_balancing_precision.bin", np.array(precision_history))
    #         save_np_array("../homesite_data/results/random_forest_balancing_recall.bin", np.array(recall_history))
    #         save_np_array("../homesite_data/results/random_forest_balancing_auc.bin", np.array(auc_history))
    #         save_np_array("../homesite_data/results/random_forest_balancing_rate.bin", np.array(balancing_rate_history))
    #
    #         del homesite
    #         del clf

    accuracy_history = load_np_array(
        "../homesite_data/results/random_forest_balancing_accuracy.bin")
    precision_history = load_np_array(
        "../homesite_data/results/random_forest_balancing_precision.bin")
    recall_history = load_np_array(
        "../homesite_data/results/random_forest_balancing_recall.bin")
    auc_history = load_np_array(
        "../homesite_data/results/random_forest_balancing_auc.bin")
    balancing_rate_history = load_np_array(
        "../homesite_data/results/random_forest_balancing_rate.bin")

    #     for accuracy, precision, recall, auc, balancing_rate in zip(accuracy_history, precision_history, recall_history, auc_history, balancing_rate_history):
    #         print accuracy, precision, recall, auc, balancing_rate


    plot("../homesite_data/results/random_forest_balacing.png", [recall_history, auc_history], \
         ["sensitividade ", "AUC"], "taxa de balanceamento", "metricas", 'center right', \
Example #4
0
#
# # Plot results.
# accuracy_history = load_np_array("../homesite_data/results/random_forest_grid_search_accuracy.bin")
# precision_history = load_np_array("../homesite_data/results/random_forest_grid_search_precision.bin")
# recall_history = load_np_array("../homesite_data/results/random_forest_grid_search_recall.bin")
# auc_history = load_np_array("../homesite_data/results/random_forest_grid_search_auc.bin")
#
# for accuracy, precision, recall, auc in zip(accuracy_history, precision_history, recall_history, auc_history):
#     print accuracy, precision, recall, auc
#
# plot("../homesite_data/results/random_forest_grid_search.png", [recall_history, auc_history], \
#      ["sensitividade ", "AUC"], "numero de arvores", "metricas", 'center right', \
#         x = np.linspace(1, len(recall_history) * 10, num = len(recall_history), endpoint = True))
#
c = 300
accuracy_history = load_np_array("results/random_forests/rf_accuracy_" + str(c) + ".bin")
precision_history = load_np_array("results/random_forests/rf_precision_" + str(c) + ".bin")
recall_history = load_np_array("results/random_forests/rf_recall_" + str(c) + ".bin")
auc_history = load_np_array("results/random_forests/rf_auc_" + str(c) + ".bin")
confusion_matrix_history = load_np_array("results/random_forests/rf_confusion_matrix_" + str(c) + ".bin")

# Show confusion matrix for best c.
confusion_matrix_mean = np.zeros(4)
confusion_matrix_std = np.zeros(4)
confusion_matrix_mean[0] = np.mean(confusion_matrix_history[0, 0, :] * 100.0 / (confusion_matrix_history[0, 0, :] + confusion_matrix_history[0, 1, :]))
confusion_matrix_mean[1] = np.mean(confusion_matrix_history[0, 1, :] * 100.0 / (confusion_matrix_history[0, 0, :] + confusion_matrix_history[0, 1, :]))
confusion_matrix_mean[2] = np.mean(confusion_matrix_history[1, 0, :] * 100.0 / (confusion_matrix_history[1, 0, :] + confusion_matrix_history[1, 1, :]))
confusion_matrix_mean[3] = np.mean(confusion_matrix_history[1, 1, :] * 100.0 / (confusion_matrix_history[1, 0, :] + confusion_matrix_history[1, 1, :]))
confusion_matrix_std[0] = np.std(confusion_matrix_history[0, 0, :] * 100.0 / (confusion_matrix_history[0, 0, :] + confusion_matrix_history[0, 1, :]))
confusion_matrix_std[1] = np.std(confusion_matrix_history[0, 1, :] * 100.0 / (confusion_matrix_history[0, 0, :] + confusion_matrix_history[0, 1, :]))
confusion_matrix_std[2] = np.std(confusion_matrix_history[1, 0, :] * 100.0 / (confusion_matrix_history[1, 0, :] + confusion_matrix_history[1, 1, :]))
# # Plot results.
# accuracy_history = load_np_array("../homesite_data/results/random_forest_grid_search_accuracy.bin")
# precision_history = load_np_array("../homesite_data/results/random_forest_grid_search_precision.bin")
# recall_history = load_np_array("../homesite_data/results/random_forest_grid_search_recall.bin")
# auc_history = load_np_array("../homesite_data/results/random_forest_grid_search_auc.bin")
#
# for accuracy, precision, recall, auc in zip(accuracy_history, precision_history, recall_history, auc_history):
#     print accuracy, precision, recall, auc
#
# plot("../homesite_data/results/random_forest_grid_search.png", [recall_history, auc_history], \
#      ["sensitividade ", "AUC"], "numero de arvores", "metricas", 'center right', \
#         x = np.linspace(1, len(recall_history) * 10, num = len(recall_history), endpoint = True))

c = 50
accuracy_history = load_np_array("results/ann/ann_accuracy_" + str(c) + ".bin")
precision_history = load_np_array("results/ann/ann_precision_" + str(c) +
                                  ".bin")
recall_history = load_np_array("results/ann/ann_recall_" + str(c) + ".bin")
auc_history = load_np_array("results/ann/ann_auc_" + str(c) + ".bin")
confusion_matrix_history = load_np_array("results/ann/ann_confusion_matrix_" +
                                         str(c) + ".bin")

# Show confusion matrix for best c.
confusion_matrix_mean = np.zeros(4)
confusion_matrix_std = np.zeros(4)
confusion_matrix_mean[0] = np.mean(
    confusion_matrix_history[0, 0, :] * 100.0 /
    (confusion_matrix_history[0, 0, :] + confusion_matrix_history[0, 1, :]))
confusion_matrix_mean[1] = np.mean(
    confusion_matrix_history[0, 1, :] * 100.0 /
'''
Created on 30/11/2015

@author: Alexandre Yukio Yamashita
'''
from data.numpy_file import load_np_array
import numpy as np
from data.plot import plot

# Plot grid search.
C = [16, 32, 64, 128, 256, 512]
auc = []
recall = []

for c in C:
    recall_history = load_np_array("results/adaboost/ada_recall_" + str(c) + ".bin")
    recall_history = recall_history[len(recall_history) - 5:]
    accuracy_history = load_np_array("results/adaboost/ada_accuracy_" + str(c) + ".bin")
    accuracy_history = recall_history[len(accuracy_history) - 5:]
    precision_history = load_np_array("results/adaboost/ada_precision_" + str(c) + ".bin")
    precision_history = precision_history[len(precision_history) - 5:]
    auc_history = load_np_array("results/adaboost/ada_auc_" + str(c) + ".bin")
    auc_history = auc_history[len(auc_history) - 5:]
    auc.append(np.mean(auc_history))
    recall.append(np.mean(recall_history))

recall = np.array(recall)
auc = np.array(auc)

plot("results/adaboost/adaboost_grid_search.png", [recall, auc], \
    ["sensitividade ", "AUC"], "numero de estimadores", "metricas", 'center right', \