Esempio n. 1
0
def test_file_bohmer(file):
    split_dataset(file + "_data.csv", file + "_labels.csv",
                  file + "_train.csv", file + "_test.csv", 10000)

    train_data = LogFile(file + "_train.csv",
                         ",",
                         0,
                         1000000,
                         None,
                         "case_id",
                         "name",
                         convert=False)
    train_data.remove_attributes(["label"])
    model = bohmer.train(train_data, 3, 4, 1)

    test_data = LogFile(file + "_test.csv",
                        ",",
                        0,
                        1000000,
                        None,
                        "case_id",
                        "name",
                        convert=False,
                        values=train_data.values)
    bohmer.test(test_data, file + "_output_bohmer.csv", model, "label", 0)

    plot.plot_single_roc_curve(file + "_output_bohmer.csv",
                               file,
                               save_file="../Data/Nolle_Graphs/" +
                               file.split("/")[-1] + "_roc_bohmer.png")
    plot.plot_single_prec_recall_curve(file + "_output_bohmer.csv",
                                       file,
                                       save_file="../Data/Nolle_Graphs/" +
                                       file.split("/")[-1] +
                                       "_precrec_bohmer.png")
Esempio n. 2
0
def score_continuous_net(model,
                         test,
                         label_attr,
                         output_file=None,
                         title=None):
    import Utils.PlotResults as plot

    ranking = model.test_parallel(test)
    ranking.sort(key=lambda l: l[0].get_total_score())
    scores = []
    y = []
    for r in ranking:
        scores.append((getattr(r[1], "Index"), r[0].get_total_score(),
                       getattr(r[1], label_attr) != 0))
        y.append(r[0].get_total_score())
    print(len(scores))

    if output_file is None:
        output_file = "../output.csv"

    with open(output_file, "w") as fout:
        for s in scores:
            fout.write(",".join([str(i) for i in s]))
            fout.write("\n")

    plot.plot_single_roc_curve(output_file, title)
    plot.plot_single_prec_recall_curve(output_file, title)
Esempio n. 3
0
def test_file_full(file):
    split_dataset(file + "_data.csv", file + "_labels.csv",
                  file + "_train.csv", file + "_test.csv", None)
    train_data = LogFile(file + "_train.csv", ",", 0, 1000000, None, "case_id",
                         "name")
    train_data.remove_attributes(["label"])
    model = edbn.train(train_data)

    test_data = LogFile(file + "_test.csv",
                        ",",
                        0,
                        1000000,
                        None,
                        "case_id",
                        "name",
                        values=train_data.values)
    edbn.test(test_data, file + "_output_full.csv", model, "label", "0",
              train_data)

    plot.plot_single_roc_curve(file + "_output_full.csv",
                               file,
                               save_file="../Data/Nolle_Graphs/" +
                               file.split("/")[-1] + "_roc.png")
    plot.plot_single_prec_recall_curve(file + "_output_full.csv",
                                       file,
                                       save_file="../Data/Nolle_Graphs/" +
                                       file.split("/")[-1] + "_precrec.png")
Esempio n. 4
0
def stephenRun():
    # Use the BPIC15_x_sorted.csv to generate new training and test datafiles with anomalies introduced
    # After running this once you can comment this line out
    # preProcessData("../Data/")

    # Indicate which are the training and test files
    train_file = "../Data/BPIC15_train_1.csv"
    test_file = "../Data/BPIC15_test_1.csv"

    # Load logfile to use as training data
    train_data = LogFile(train_file, ",", 0, 500000, None, "Case")
    train_data.remove_attributes(["Anomaly"])

    # Train the model
    model = edbn.train(train_data)

    # Test the model and save the scores in ../Data/output.csv
    test_data = LogFile(test_file,
                        ",",
                        header=0,
                        rows=500000,
                        time_attr=None,
                        trace_attr="Case",
                        values=train_data.values)
    edbn.test(test_data,
              "../Data/output.csv",
              model,
              label="Anomaly",
              normal_val="0")

    # Plot the ROC curve based on the results
    plot.plot_single_roc_curve("../Data/output.csv")
Esempio n. 5
0
def breast_discrete_exec():
    data = "../Data/breast_data.csv"
    labels = "../Data/breast_labels.csv"

    log = pd.read_csv(data, header=None)
    labels = pd.read_csv(labels, header=None)
    log["Label"] = labels[0]

    cols = []
    for c in log.columns:
        cols.append("V" + str(c))
    log.columns = cols
    log['ID'] = log.reset_index().index
    print(log)

    train = log[:100]
    test = log[100:]
    train = train[train.VLabel == 0].drop(columns=["VLabel"])

    train.to_csv("../Data/breast_train.csv", index=False)
    test.to_csv("../Data/breast_test.csv", index=False)

    train_data = LogFile("../Data/breast_train.csv",
                         ",",
                         0,
                         500000,
                         None,
                         "ID",
                         activity_attr="Activity")
    train_data.k = 0
    model = edbn.train(train_data)

    test_data = LogFile("../Data/breast_test.csv",
                        ",",
                        0,
                        500000,
                        None,
                        "ID",
                        activity_attr="Activity")
    test_data.k = 0
    print(test_data.data)
    edbn.test(test_data, "../Data/breast_discrete_output.csv", model, "VLabel",
              "0")

    plot.plot_single_roc_curve("../Data/breast_discrete_output.csv",
                               "breast_discrete")
    plot.plot_single_prec_recall_curve("../Data/breast_discrete_output.csv",
                                       "breast_discrete")
Esempio n. 6
0
    for edge in net.edges():
        relations.append((edge[0], edge[1]))

    for relation in relations:
    #    if relation not in mappings:
        edbn.get_variable(relation[1]).add_parent(edbn.get_variable(relation[0]))
        print(relation[0], "->", relation[1])

    edbn.train(train, single=True)

    ranking = edbn.test(test)
    ranking.sort(key=lambda l: l[0].get_total_score())
    scores = []
    y = []
    for r in ranking:
        scores.append((getattr(r[1], "Index"), r[0].get_total_score(), getattr(r[1], "Class") != 1))
        y.append(r[0].get_total_score())
    print(len(scores))

    with open("../output.csv", "w") as fout:
        for s in scores:
            fout.write(",".join([str(i) for i in s]))
            fout.write("\n")

    plot.plot_single_roc_curve("../output.csv")
    plot.plot_single_prec_recall_curve("../output.csv")

    plt.plot(list(range(len(y))), y)
    plt.show()

Esempio n. 7
0
def run_full():
    # Use the BPIC15_x_sorted.csv to generate new training and test datafiles with anomalies introduced
    # After running this once you can comment this line out
    #preProcessData("../Data/")

    for i in range(1, 2):
        # Indicate which are the training and test files
        train_file = "../Data/bpic15_%i_train.csv" % (i)
        test_file = "../Data/bpic15_%i_test.csv" % (i)

        # Load logfile to use as training data
        train_data = LogFile(train_file,
                             ",",
                             0,
                             500000,
                             time_attr="Complete_Timestamp",
                             trace_attr="Case_ID",
                             activity_attr="Activity")
        train_data.remove_attributes(["Anomaly"])

        # train_data.keep_attributes(["Case_ID", "Complete_Timestamp", "Activity", "Resource", "case_termName"])
        train_data.remove_attributes(["planned"])
        train_data.remove_attributes(["dueDate"])
        train_data.remove_attributes(["dateFinished"])

        # train_data.keep_attributes(["Case_ID", "Complete_Timestamp", "Activity", "Resource", "Weekday"])

        # train_data.create_k_context()
        # train_data.add_duration_to_k_context()

        # Train the model
        model = edbn.train(train_data)

        # Test the model and save the scores in ../Data/output.csv
        test_data = LogFile(test_file,
                            ",",
                            header=0,
                            rows=500000,
                            time_attr="Complete_Timestamp",
                            trace_attr="Case_ID",
                            values=train_data.values)
        # test_data.create_k_context()
        # test_data.add_duration_to_k_context()

        edbn.test(test_data,
                  "../Data/output2_%i.csv" % (i),
                  model,
                  label="Anomaly",
                  normal_val="0",
                  train_data=train_data)

        # Plot the ROC curve based on the results
        plot.plot_single_roc_curve("../Data/output2_%i.csv" % (i),
                                   title="BPIC15_%i" % (i))
        plot.plot_single_prec_recall_curve("../Data/output2_%i.csv" % (i),
                                           title="BPIC15_%i" % (i))

    out_files = []
    labels = []
    for i in range(1, 6):
        out_files.append("../Data/output2_%i.csv" % (i))
        labels.append("MUNIS_%i" % (i))
    plot.plot_compare_roc_curve(out_files, labels, "BPIC15 Comparison")
    plot.plot_compare_prec_recall_curve(out_files, labels, "BPIC15 Comparison")