Beispiel #1
0
def experiment_department():
    input = LogFile("../Data/bpic2018.csv",
                    ",",
                    0,
                    None,
                    "startTime",
                    "case",
                    convert=False)
    input.remove_attributes([
        "eventid", "identity_id", "event_identity_id", "year", "penalty_",
        "amount_applied", "payment_actual", "penalty_amount", "risk_factor",
        "cross_compliance", "selected_random", "selected_risk",
        "selected_manually", "rejected"
    ])
    input.convert2int()

    data = input.filter_copy("self.data.department == 1")
    model = cd.create_model(data, data)

    print("Starting writing model to file")
    with open("model_department", "wb") as fout:
        pickle.dump(model, fout)
    print("Done")

    with open("model_department", "rb") as fin:
        model = pickle.load(fin)

    for dept in [1, 2, 3, 4]:
        data = input.filter_copy("self.data.department == " + str(dept))
        scores = cd.get_event_detailed_scores(data, model)
        cd.plot_attribute_graph(scores, model.current_variables)
Beispiel #2
0
def experiment_attributes_standard():
    with open("model_30000b", "rb") as fin:
        model = pickle.load(fin)
    train = LogFile("../Data/bpic2018.csv",
                    ",",
                    0,
                    30000,
                    "startTime",
                    "case",
                    activity_attr=None,
                    integer_input=False,
                    convert=False)
    train.remove_attributes([
        "eventid", "identity_id", "event_identity_id", "year", "penalty_",
        "amount_applied", "payment_actual", "penalty_amount", "risk_factor",
        "cross_compliance", "selected_random", "selected_risk",
        "selected_manually", "rejected"
    ])
    train.convert2int()

    input = LogFile("../Data/bpic2018.csv",
                    ",",
                    0,
                    10000000,
                    "startTime",
                    "case",
                    convert=False,
                    values=train.values)
    input.remove_attributes([
        "eventid", "identity_id", "event_identity_id", "penalty_",
        "amount_applied", "payment_actual", "penalty_amount", "risk_factor",
        "cross_compliance", "selected_random", "selected_risk",
        "selected_manually", "rejected"
    ])
    input.convert2int()

    data = input.filter_copy("self.data.year == 1")
    scores_year1 = cd.get_event_detailed_scores(data, model)
    cd.plot_attribute_graph(scores_year1, model.current_variables)

    data = input.filter_copy("self.data.year == 2")
    scores_year2 = cd.get_event_detailed_scores(data, model)
    cd.plot_attribute_graph(scores_year2, model.current_variables)

    data = input.filter_copy("self.data.year == 3")
    scores_year3 = cd.get_event_detailed_scores(data, model)
    cd.plot_attribute_graph(scores_year3, model.current_variables)

    p_vals_year1_2 = []
    p_vals_year2_3 = []
    p_vals_year1_3 = []
    for key in sorted(scores_year1.keys()):
        p_vals_year1_2.append(
            stats.ks_2samp(scores_year1[key], scores_year2[key]).pvalue)
        p_vals_year2_3.append(
            stats.ks_2samp(scores_year2[key], scores_year3[key]).pvalue)
        p_vals_year1_3.append(
            stats.ks_2samp(scores_year1[key], scores_year3[key]).pvalue)

    def tmp(x):
        if x == 0:
            return x
        else:
            return 1

    p_vals_year1_2 = [tmp(x) for x in p_vals_year1_2]
    plt.plot(sorted(scores_year1.keys()), p_vals_year1_2, "o")
    plt.xticks(rotation='vertical')
    plt.show()
    p_vals_year2_3 = [tmp(x) for x in p_vals_year2_3]
    plt.plot(sorted(scores_year1.keys()), p_vals_year2_3, "o")
    plt.xticks(rotation='vertical')
    plt.show()
    p_vals_year1_3 = [tmp(x) for x in p_vals_year1_3]
    plt.plot(sorted(scores_year1.keys()), p_vals_year1_3, "o")
    plt.xticks(rotation='vertical')
    plt.show()

    x = []
    y_1 = []
    y_2 = []
    y_3 = []
    for key in sorted(scores_year1.keys()):
        x.append(key)
        y_1.append(np.median(scores_year1[key]))
        y_2.append(np.median(scores_year2[key]))
        y_3.append(np.median(scores_year3[key]))
    plt.plot(x, y_1, "o")
    plt.plot(x, y_2, "o")
    plt.plot(x, y_3, "o")
    plt.xticks(rotation='vertical')
    plt.xlabel("Attributes")
    plt.ylabel("Median Score")
    plt.legend(["2015", "2016", "2017"])
    plt.show()

    p_vals_year1_2 = []
    p_vals_year2_3 = []
    p_vals_year1_3 = []
    for key in sorted(scores_year1.keys()):
        p_vals_year1_2.append(
            stats.ks_2samp(scores_year1[key],
                           [np.median(scores_year1[key])]).pvalue)
        p_vals_year2_3.append(
            stats.ks_2samp(scores_year2[key],
                           [np.median(scores_year2[key])]).pvalue)
        p_vals_year1_3.append(
            stats.ks_2samp(scores_year1[key],
                           [np.median(scores_year3[key])]).pvalue)

    def tmp(x):
        if x == 0:
            return x
        else:
            return 1

    p_vals_year1_2 = [tmp(x) for x in p_vals_year1_2]
    plt.plot(sorted(scores_year1.keys()), p_vals_year1_2, "o")
    plt.xticks(rotation='vertical')
    plt.show()
    p_vals_year2_3 = [tmp(x) for x in p_vals_year2_3]
    plt.plot(sorted(scores_year1.keys()), p_vals_year2_3, "o")
    plt.xticks(rotation='vertical')
    plt.show()
    p_vals_year1_3 = [tmp(x) for x in p_vals_year1_3]
    plt.plot(sorted(scores_year1.keys()), p_vals_year1_3, "o")
    plt.xticks(rotation='vertical')
    plt.show()