Ejemplo n.º 1
0
def collect_summary(path_list, time_span):
    evaluator_nn = Evaluation.Evaluation(1, 1)
    aggregate_summary = evaluator_nn.roc_aggregate(path_list[0],
                                                   time_span,
                                                   eps=True)
    for path in path_list[1:]:
        new_summary = evaluator_nn.roc_aggregate(path, time_span, eps=True)
        new_summary["FPR"] = aggregate_summary["FPR"][:new_summary.shape[0]]
        aggregate_summary = pd.concat([aggregate_summary, new_summary], axis=0)
    aggregate_summary = np.round(aggregate_summary, 3)
    return aggregate_summary
Ejemplo n.º 2
0
# ========= 2.a.i. Model =========
# Initialize the model at the first iteration

# Model
model = sk.linear_model.LogisticRegression(solver="liblinear",
                                           max_iter=1000,
                                           class_weight={
                                               0: 1,
                                               1: 3000
                                           }).fit(XTrain, yTrain)
# Predict
pred = model.predict_proba(XValid)[:, 1]

# Evaluate
evaluator = Evaluation.Evaluation(yValid, pred)

# Save ROC plot
_ = evaluator.roc_plot(plot=False,
                       title=MODEL_NAME,
                       save_path=DYNAMIC_PATH + f"roc_{time_pred}")
# Save summary
summary_data = evaluator.summary()
summary_data.to_csv(DYNAMIC_PATH + f"summary_{time_pred}.csv", index=False)

# Store predictions
pred_valid1 = pred

# Permutation test
imp_means, imp_vars = feature_importance_permutation(
    predict_method=model.predict_proba_single,
Ejemplo n.º 3
0
                          hidden_size = HIDDEN_SIZE).to(device)

criterion = nn.CrossEntropyLoss(weight = torch.FloatTensor([1, CLASS_WEIGHT])).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)

model_sub, loss = model_sub.fit(XTrain_good, yTrain_good, NUM_EPOCHS, BATCH_SIZE, optimizer, criterion)
pred_prob_sub = model_sub.predict_proba_single(XTest)
y_pred_sub = threshold_predict(pred_prob_sub, yTest, fpr = FPR_THRESHOLD)
tpr_sub = true_positive_rate(yTest, y_pred_sub)

# Summary
print("TPR on valid. full: {}. sub: {}".format(tpr_full, tpr_sub))



evaluator = Evaluation.Evaluation(yTest, pred_prob_full)
# Save ROC plot
_ = evaluator.roc_plot(plot = True, title = MODEL_NAME)

evaluator = Evaluation.Evaluation(yTest, pred_prob_sub)
# Save ROC plot
_ = evaluator.roc_plot(plot = True, title = MODEL_NAME)








Ejemplo n.º 4
0
    pred_train = model.predict_proba_single(x_data=XTrain,
                                            batch_size=BATCH_SIZE,
                                            transformation=transformation)

    # ========= 2.a.ii. Feature importance by permutation test =========
    # Permutation test
    imp_means, imp_vars = feature_importance_permutation(
        predict_method=model.predict_proba_single,
        X=np.array(XTest),
        y=np.array(yTest),
        metric=true_positive_rate,
        fpr_threshold=FPR_THRESHOLD,
        num_rounds=5,
        seed=RANDOM_SEED)
    # Save feature importance plot
    fi_evaluator = Evaluation.FeatureImportance(imp_means, imp_vars,
                                                XTest.columns, MODEL_NAME)
    fi_evaluator.FI_plot(save_path=DYNAMIC_PATH, y_fontsize=4, eps=True)

    # ========= 2.b. Evaluation =========
    evaluator = Evaluation.Evaluation(yTest, pred)

    # Save ROC plot
    _ = evaluator.roc_plot(plot=False,
                           title=MODEL_NAME,
                           save_path=DYNAMIC_PATH + f"roc_{time_pred}")

    # Save summary
    summary_data = evaluator.summary()
    summary_data.to_csv(DYNAMIC_PATH + f"summary_{time_pred}.csv", index=False)

    # ========= 2.c. Save predicted results =========
Ejemplo n.º 5
0
pred = pd.DataFrame(pred, columns=["pred_prob"])
pred.to_csv(REPORTS_DIR + f"predicted_result_{time_pred}.csv",
            index=False,
            header=True)

# Save probs for train set (for stacked model)
pred_train = prediction_model.predict_proba_single(
    eval_features=train_features,
    batch_size=EVAL_BATCH_SIZE,
    transformation=transformation)
pred_train = pd.DataFrame(pred_train, columns=["pred_prob"])
pred_train.to_csv(DYNAMIC_PATH + f"predicted_result_train_{time_pred}.csv",
                  index=False)

# ========= 2.b. Evaluation =========
evaluator = Evaluation.Evaluation(yTest, pred)

# Save ROC plot
_ = evaluator.roc_plot(plot=False,
                       title=MODEL_NAME,
                       save_path=REPORTS_DIR + f"roc_{time_pred}")

# Save summary
summary_data = evaluator.summary()
summary_data.to_csv(REPORTS_DIR + f"summary_{time_pred}.csv", index=False)

# ========= 2.c. Save predicted results =========
pred = pd.DataFrame(pred, columns=["pred_prob"])
pred.to_csv(REPORTS_DIR + f"predicted_result_{time_pred}.csv", index=False)

# ========= End of iteration =========
Ejemplo n.º 6
0
def collect_summary(path_list, time_span):
    evaluator_nn = Evaluation.Evaluation(1, 1)
    aggregate_summary = evaluator_nn.roc_aggregate(path_list[0],
                                                   time_span,
                                                   eps=True)
    for path in path_list[1:]:
        new_summary = evaluator_nn.roc_aggregate(path, time_span, eps=True)
        new_summary["FPR"] = aggregate_summary["FPR"][:new_summary.shape[0]]
        aggregate_summary = pd.concat([aggregate_summary, new_summary], axis=0)
    aggregate_summary = np.round(aggregate_summary, 3)
    return aggregate_summary


# NN aggregate
evaluator_nn = Evaluation.Evaluation(1, 1)
aggregate_summary_nn = collect_summary(FIG_ROOT_PATH_NN_LST, time_span)

# LR aggregate ROC
evaluator_lr = Evaluation.Evaluation(1, 1)
aggregate_summary_lr = collect_summary(FIG_ROOT_PATH_LR_LST, time_span)

# RF aggregate
evaluator_rf = Evaluation.Evaluation(1, 1)
aggregate_summary_rf = collect_summary(FIG_ROOT_PATH_RF_LST, time_span)

# IF aggregate
evaluator_if = Evaluation.Evaluation(1, 1)
aggregate_summary_if = evaluator_if.roc_aggregate(FIG_ROOT_PATH_IF,
                                                  time_span,
                                                  eps=True)
Ejemplo n.º 7
0
 pred_new = model_new.predict_proba(XTest)[:, 1]
 # ========= 2.a.ii. Plot beta values =========
 # Plot the features whose coefficients are the top 50 largest in magnitude
 non_zero_coeffs = model_new.coef_[model_new.coef_ != 0]
 indices = np.argsort(abs(non_zero_coeffs))[::-1][:50]
 _ = plt.figure()
 _ = plt.title("Logistic Regression Coefficients Values")
 _ = sns.barplot(y=XTest.columns[indices],
                 x=np.squeeze(non_zero_coeffs)[indices])
 _ = plt.yticks(fontsize=4)
 plt.savefig(dynamic_path + f"coeffs_{time_pred}.eps",
             format='eps',
             dpi=800)
 plt.close()
 # ========= 2.b. Evaluation =========
 evaluator = Evaluation.Evaluation(yTest, pred_new)
 # Save ROC plot
 _ = evaluator.roc_plot(plot=False,
                        title="LR",
                        save_path=dynamic_path + f"roc_{time_pred}")
 # Save summary
 summary_data = evaluator.summary()
 summary_data.to_csv(dynamic_path + f"summary_{time_pred}.csv", index=False)
 # ========= 2.c. Feature importance =========
 # Permutation test
 imp_means, imp_vars = mlxtend.evaluate.feature_importance_permutation(
     predict_method=model_new.predict,
     X=np.array(XTest),
     y=np.array(yTest),
     metric=sk.metrics.f1_score,
     num_rounds=15,
Ejemplo n.º 8
0
# Plot and save losses
_ = sns.scatterplot(range(len(loss_train_vec)), loss_train_vec, label = "train")
_ = sns.scatterplot(range(len(loss_valid_vec)), 10*loss_valid_vec, label = "valid")
plt.savefig(DYNAMIC_PATH + f"losses_{time_pred}.png")
plt.close()



    # Prediction on train
    # test_loader = torch.utils.data.DataLoader(dataset = np.array(XTrain),
    #                                             batch_size = len(yTrain),
    #                                             shuffle = False)


evaluator = Evaluation.Evaluation(yValid, pred)

# Save ROC plot
_ = evaluator.roc_plot(plot = False, title = MODEL_NAME, save_path = DYNAMIC_PATH + f"roc_{time_pred}")

# Store predictions
pred_valid1 = pred


# Permutation test
imp_means, imp_vars = feature_importance_permutation(
                        predict_method = model.predict_proba_single,
                        X = np.array(XValid),
                        y = np.array(yValid),
                        metric = true_positive_rate,
                        fpr_threshold = FPR_THRESHOLD,