Esempio n. 1
0
def compare_bpic_total(path):
    train = path + "BPIC15_train_total.csv"
    test = path + "BPIC15_test_total.csv"
    output = path + "Output/BPIC_15_output_total.csv"
    output_edbn = path + "Output/BPIC15_edbn_output_total.csv"
    prec_recall = path + "Output/prec_recall_total.png"
    roc = path + "Output/roc_total.png"

    if not os.path.exists(path + "Output"):
        os.mkdir(path + "Output")

    train_data = LogFile(train, ",", 0, 500000, "Time", "Case", activity_attr="Activity", convert=False)
    train_data.remove_attributes(["Anomaly", "Type", "Time"])
    test_data = LogFile(test, ",", 0, 500000, "Time", "Case", activity_attr="Activity", values=train_data.values, convert=False)

    bohmer_model = bmr.train(train_data)
    bmr.test(test_data, output, bohmer_model, label = "Anomaly", normal_val = 0)

    train_data.convert2int()
    test_data.convert2int()

    edbn_model = edbn_train(train_data)
    edbn_test(test_data, output_edbn, edbn_model, label = "Anomaly", normal_val = "0")

    plt.plot_compare_prec_recall_curve([output, output_edbn], ["Likelihood Graph", "EDBN"], save_file=prec_recall)
    plt.plot_compare_roc_curve([output, output_edbn], ["Likelihood Graph", "EDBN"], roc)
Esempio n. 2
0
def compare_bpics(path):
    for i in range(1, 6):
        # Input Files
        train = path + "BPIC15_train_%i.csv" % (i)
        test = path + "BPIC15_test_%i.csv" % (i)
        output = path + "Output/BPIC15_output_%i.csv" % (i)
        output_edbn = path + "Output/BPIC15_edbn_output_%i.csv" % (i)
        prec_recall = path + "Output/prec_recall_%i.png" % (i)
        roc = path + "Output/roc_%i.png" % (i)

        train_data = LogFile(train,
                             ",",
                             0,
                             500000,
                             "Time",
                             "Case",
                             activity_attr="Activity",
                             convert=False)
        train_data.remove_attributes(["Anomaly", "Type", "Time"])
        test_data = LogFile(test,
                            ",",
                            0,
                            500000,
                            "Time",
                            "Case",
                            activity_attr="Activity",
                            values=train_data.values,
                            convert=False)

        bohmer_model = bmr.train(train_data)
        bmr.test(test_data,
                 output,
                 bohmer_model,
                 label="Anomaly",
                 normal_val="0")

        train_data.convert2int()
        test_data.convert2int()

        edbn_model = edbn.train(train_data)
        edbn.test(test_data,
                  output_edbn,
                  edbn_model,
                  label="Anomaly",
                  normal_val="0")

        plt.plot_compare_prec_recall_curve([output, output_edbn],
                                           ["Likelihood Graph", "EDBN"],
                                           save_file=prec_recall)
        plt.plot_compare_roc_curve([output, output_edbn],
                                   ["Likelihood Graph", "EDBN"], roc)
Esempio n. 3
0
def compare(files, nolle_result, nolle_labels):
    i = 0
    for file in files:
        results = []
        results.append(file + "_output_sample.csv")
        results.append(file + "_output_full.csv")
        results.append(file + "_output_bohmer.csv")
        plot.plot_compare_prec_recall_curve(
            results, ["Sample", "Full", "Bohmer"] + nolle_labels,
            nolle_result,
            "Comparison",
            save_file="../Data/Nolle_Graphs/" + file.split("/")[-1] +
            "_compare_precrec.png")
        plot.plot_compare_roc_curve(results, ["Sample", "Full", "Bohmer"],
                                    "Comparison",
                                    save_file="../Data/Nolle_Graphs/" +
                                    file.split("/")[-1] + "_compare_roc.png")
        i += 1
Esempio n. 4
0
def compare_bpic_total(path):
    train = path + "BPIC15_train_total.csv"
    test = path + "BPIC15_test_total.csv"
    output = path + "Output/BPIC_15_output_total.csv"
    output_edbn = path + "Output/BPIC15_edbn_output_total.csv"
    prec_recall = path + "Output/prec_recall_total.png"
    roc = path + "Output/roc_total.png"

    #bohmer_model = bmr.train(train, header = 0, length = 5000000)
    #bmr.test(train, test, output, bohmer_model, ",", 5000000, skip=0)

    train_data = LogFile(train, ",", 0, 500000, None, "Case")
    train_data.remove_attributes(["Anomaly"])
    test_data = LogFile(test, ",", 0, 500000, None, "Case", train_data.string_2_int, train_data.int_2_string)

    edbn_model = edbn.train(train_data)
    edbn.test(test_data, output_edbn, edbn_model, "Anomaly", "0")

    plt.plot_compare_prec_recall_curve([output, output_edbn], ["Likelihood Graph", "eDBN"], save_file=prec_recall)
    plt.plot_compare_roc_curve([output, output_edbn], ["Likelihood Graph", "eDBN"], roc)
Esempio n. 5
0
def compare_bpics(path):
    for i in range(1,6):
        # Input Files
        train = path + "BPIC15_train_%i.csv" % (i)
        test = path + "BPIC15_test_%i.csv" % (i)
        output = path + "Output/BPIC15_output_%i.csv" % (i)
        output_edbn = path + "Output/BPIC15_edbn_output_%i.csv" % (i)
        prec_recall = path + "Output/prec_recall_%i.png" % (i)
        roc = path + "Output/roc_%i.png" % (i)

        #bohmer_model = bmr.train(train + "_ints", header = 0, length = 500000)
        #bmr.test(train + "_ints", test + "_ints", output, bohmer_model, ",", 500000, skip=0)

        train_data = LogFile(train, ",", 0, 500000, None, "Case")
        train_data.remove_attributes(["Anomaly"])
        test_data = LogFile(test, ",", 0, 500000, None, "Case", train_data.string_2_int, train_data.int_2_string)

        edbn_model = edbn.train(train_data)
        edbn.test(test_data, output_edbn, edbn_model, "Anomaly", "0")

        plt.plot_compare_prec_recall_curve([output, output_edbn], ["Likelihood Graph", "eDBN"], save_file=prec_recall)
        plt.plot_compare_roc_curve([output, output_edbn], ["Likelihood Graph", "eDBN"], roc)
Esempio n. 6
0
def run_full():
    # Use the BPIC15_x_sorted.csv to generate new training and test datafiles with anomalies introduced
    # After running this once you can comment this line out
    #preProcessData("../Data/")

    for i in range(1, 2):
        # Indicate which are the training and test files
        train_file = "../Data/bpic15_%i_train.csv" % (i)
        test_file = "../Data/bpic15_%i_test.csv" % (i)

        # Load logfile to use as training data
        train_data = LogFile(train_file,
                             ",",
                             0,
                             500000,
                             time_attr="Complete_Timestamp",
                             trace_attr="Case_ID",
                             activity_attr="Activity")
        train_data.remove_attributes(["Anomaly"])

        # train_data.keep_attributes(["Case_ID", "Complete_Timestamp", "Activity", "Resource", "case_termName"])
        train_data.remove_attributes(["planned"])
        train_data.remove_attributes(["dueDate"])
        train_data.remove_attributes(["dateFinished"])

        # train_data.keep_attributes(["Case_ID", "Complete_Timestamp", "Activity", "Resource", "Weekday"])

        # train_data.create_k_context()
        # train_data.add_duration_to_k_context()

        # Train the model
        model = edbn.train(train_data)

        # Test the model and save the scores in ../Data/output.csv
        test_data = LogFile(test_file,
                            ",",
                            header=0,
                            rows=500000,
                            time_attr="Complete_Timestamp",
                            trace_attr="Case_ID",
                            values=train_data.values)
        # test_data.create_k_context()
        # test_data.add_duration_to_k_context()

        edbn.test(test_data,
                  "../Data/output2_%i.csv" % (i),
                  model,
                  label="Anomaly",
                  normal_val="0",
                  train_data=train_data)

        # Plot the ROC curve based on the results
        plot.plot_single_roc_curve("../Data/output2_%i.csv" % (i),
                                   title="BPIC15_%i" % (i))
        plot.plot_single_prec_recall_curve("../Data/output2_%i.csv" % (i),
                                           title="BPIC15_%i" % (i))

    out_files = []
    labels = []
    for i in range(1, 6):
        out_files.append("../Data/output2_%i.csv" % (i))
        labels.append("MUNIS_%i" % (i))
    plot.plot_compare_roc_curve(out_files, labels, "BPIC15 Comparison")
    plot.plot_compare_prec_recall_curve(out_files, labels, "BPIC15 Comparison")