Exemple #1
0
def experiment_summary(pkl_file):
    data = read_pickle(pkl_file)
    model_config = data["config"]
    if "parameters" not in model_config:
        model_config["parameters"] = "?"

    if "train_start_date" not in data:
        model_config["train_start_date"] = "01Jan1970"
    else:
        model_config["train_start_date"] = data["train_start_date"].strftime("%d%b%Y")

    model_config["test_end_date"] = data["test_end_date"].strftime("%d%b%Y")

    # model_config["features"] = data["features"]
    model_config["feature_summary"] = feature_summary(model_config["features"])
    prec_at = precision_at_x_percent(data["test_labels"], data["test_predictions"], x_percent=0.01)
    auc_model = compute_AUC(data["test_labels"], data["test_predictions"])
    num_units = len(data["test_labels"])
    cm_1 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.10)
    cm_2 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.15)
    cm_3 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.20)
    cm_4 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.25)
    cm_5 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.30)
    cm_6 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.40)
    cm_7 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.50)
    cm_8 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.60)
    cm_9 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.70)

    fpr = [cm_1[0, 1], cm_2[0, 1], cm_3[0, 1], cm_4[0, 1], cm_5[0, 1], cm_6[0, 1], cm_7[0, 1], cm_8[0, 1], cm_9[0, 1]]
    tpr = [cm_1[1, 1], cm_2[1, 1], cm_3[1, 1], cm_4[1, 1], cm_5[1, 1], cm_6[1, 1], cm_7[1, 1], cm_8[1, 1], cm_9[1, 1]]
    fnr = [cm_1[1, 0], cm_2[1, 0], cm_3[1, 0], cm_4[1, 0], cm_5[1, 0], cm_6[1, 0], cm_7[1, 0], cm_8[1, 0], cm_9[1, 0]]
    tnr = [cm_1[0, 0], cm_2[0, 0], cm_3[0, 0], cm_4[0, 0], cm_5[0, 0], cm_6[0, 0], cm_7[0, 0], cm_8[0, 0], cm_9[0, 0]]

    rec_1 = recall_at_x_percent(data["test_labels"], data["test_predictions"], x_percent=0.10)
    rec_2 = recall_at_x_percent(data["test_labels"], data["test_predictions"], x_percent=0.15)
    rec_3 = recall_at_x_percent(data["test_labels"], data["test_predictions"], x_percent=0.20)
    try:
        aggregation = data["aggregation"]
    except:
        aggregation = "No aggregated data stored"

    try:
        eis_baseline = data["eis_baseline"]
    except:
        eis_baseline = "No baseline stored"

    recall = "[{}, {}, {}]".format(rec_1.round(2), rec_2.round(2), rec_3.round(2))
    return Experiment(
        dateutil.parser.parse(timestamp_from_path(pkl_file)),
        model_config,
        auc_model,
        data,
        fpr,
        tpr,
        fnr,
        tnr,
        recall,
        aggregation,
        eis_baseline,
    )
Exemple #2
0
def experiment_summary(pkl_file):
    data = read_pickle(pkl_file)
    model_config = data["config"]
    if "parameters" not in model_config:
        model_config["parameters"] = "?"

    if "train_start_date" not in data:
        model_config["train_start_date"] = "01Jan1970"
    else:
        model_config["train_start_date"] = data["train_start_date"].strftime("%d%b%Y")

    model_config["test_end_date"] = data["test_end_date"].strftime("%d%b%Y")


    # model_config["features"] = data["features"]
    model_config["feature_summary"] = feature_summary(model_config["features"])
    prec_at = precision_at_x_percent(
        data["test_labels"], data["test_predictions"],
        x_percent=0.01)
    auc_model = compute_AUC(data["test_labels"], data["test_predictions"])
    num_units = len(data["test_labels"])
    cm_1 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.10)
    cm_2 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.15)
    cm_3 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.20)
    fpr = [cm_1[0, 1], cm_2[0, 1], cm_3[0, 1]]
    tpr = [cm_1[1, 1], cm_2[1, 1], cm_3[1, 1]]
    fnr = [cm_1[1, 0], cm_2[1, 0], cm_3[1, 0]]
    rec_1 = recall_at_x_percent(
        data["test_labels"], data["test_predictions"],
        x_percent=0.10)
    rec_2 = recall_at_x_percent(
        data["test_labels"], data["test_predictions"],
        x_percent=0.15)
    rec_3 = recall_at_x_percent(
        data["test_labels"], data["test_predictions"],
        x_percent=0.20)
    recall = "[{}, {}, {}]".format(rec_1.round(2), rec_2.round(2), rec_3.round(2))
    return Experiment(dateutil.parser.parse(timestamp_from_path(pkl_file)),
                      model_config,
                      auc_model,
                      data, fpr, tpr, fnr, recall)
Exemple #3
0
def experiment_summary(pkl_file):
    data = read_pickle(pkl_file)
    model_config = data["config"]
    if "parameters" not in model_config:
        model_config["parameters"] = "?"

    if "train_start_date" not in data:
        model_config["train_start_date"] = "01Jan1970"
    else:
        model_config["train_start_date"] = data["train_start_date"].strftime(
            "%d%b%Y")

    model_config["test_end_date"] = data["test_end_date"].strftime("%d%b%Y")

    # model_config["features"] = data["features"]
    model_config["feature_summary"] = feature_summary(model_config["features"])
    prec_at = precision_at_x_percent(data["test_labels"],
                                     data["test_predictions"],
                                     x_percent=0.01)
    auc_model = compute_AUC(data["test_labels"], data["test_predictions"])
    num_units = len(data["test_labels"])

    threshold_levels = []
    fpr, tpr, fnr, tnr = {}, {}, {}, {}
    for each_threshold in sorted(list(data["eis_baseline"].keys())):
        threshold_levels.append(each_threshold)
        fpr.update({
            each_threshold:
            data["eis_baseline"][each_threshold]["dsapp"][0, 1]
        })
        tpr.update({
            each_threshold:
            data["eis_baseline"][each_threshold]["dsapp"][1, 1]
        })
        fnr.update({
            each_threshold:
            data["eis_baseline"][each_threshold]["dsapp"][1, 0]
        })
        tnr.update({
            each_threshold:
            data["eis_baseline"][each_threshold]["dsapp"][0, 0]
        })
        eis_baseline = data["eis_baseline"][each_threshold]["eis"]

    rec_list = []
    for rec_threshold in [10., 15., 20.]:
        rec_list.append(
            recall_at_x_percent(data["test_labels"],
                                data["test_predictions"],
                                x_percent=rec_threshold / 100.))

    try:
        aggregation = data["aggregation"]
    except:
        aggregation = "No aggregated data stored"

    recall = "[{}, {}, {}]".format(rec_list[0].round(2), rec_list[1].round(2),
                                   rec_list[2].round(2))
    return Experiment(dateutil.parser.parse(timestamp_from_path(pkl_file)),
                      model_config, auc_model, data, fpr, tpr, fnr, tnr,
                      recall, aggregation, eis_baseline, threshold_levels)