def compare_bpic_total(path): train = path + "BPIC15_train_total.csv" test = path + "BPIC15_test_total.csv" output = path + "Output/BPIC_15_output_total.csv" output_edbn = path + "Output/BPIC15_edbn_output_total.csv" prec_recall = path + "Output/prec_recall_total.png" roc = path + "Output/roc_total.png" if not os.path.exists(path + "Output"): os.mkdir(path + "Output") train_data = LogFile(train, ",", 0, 500000, "Time", "Case", activity_attr="Activity", convert=False) train_data.remove_attributes(["Anomaly", "Type", "Time"]) test_data = LogFile(test, ",", 0, 500000, "Time", "Case", activity_attr="Activity", values=train_data.values, convert=False) bohmer_model = bmr.train(train_data) bmr.test(test_data, output, bohmer_model, label = "Anomaly", normal_val = 0) train_data.convert2int() test_data.convert2int() edbn_model = edbn_train(train_data) edbn_test(test_data, output_edbn, edbn_model, label = "Anomaly", normal_val = "0") plt.plot_compare_prec_recall_curve([output, output_edbn], ["Likelihood Graph", "EDBN"], save_file=prec_recall) plt.plot_compare_roc_curve([output, output_edbn], ["Likelihood Graph", "EDBN"], roc)
def compare_bpics(path): for i in range(1, 6): # Input Files train = path + "BPIC15_train_%i.csv" % (i) test = path + "BPIC15_test_%i.csv" % (i) output = path + "Output/BPIC15_output_%i.csv" % (i) output_edbn = path + "Output/BPIC15_edbn_output_%i.csv" % (i) prec_recall = path + "Output/prec_recall_%i.png" % (i) roc = path + "Output/roc_%i.png" % (i) train_data = LogFile(train, ",", 0, 500000, "Time", "Case", activity_attr="Activity", convert=False) train_data.remove_attributes(["Anomaly", "Type", "Time"]) test_data = LogFile(test, ",", 0, 500000, "Time", "Case", activity_attr="Activity", values=train_data.values, convert=False) bohmer_model = bmr.train(train_data) bmr.test(test_data, output, bohmer_model, label="Anomaly", normal_val="0") train_data.convert2int() test_data.convert2int() edbn_model = edbn.train(train_data) edbn.test(test_data, output_edbn, edbn_model, label="Anomaly", normal_val="0") plt.plot_compare_prec_recall_curve([output, output_edbn], ["Likelihood Graph", "EDBN"], save_file=prec_recall) plt.plot_compare_roc_curve([output, output_edbn], ["Likelihood Graph", "EDBN"], roc)
def compare(files, nolle_result, nolle_labels): i = 0 for file in files: results = [] results.append(file + "_output_sample.csv") results.append(file + "_output_full.csv") results.append(file + "_output_bohmer.csv") plot.plot_compare_prec_recall_curve( results, ["Sample", "Full", "Bohmer"] + nolle_labels, nolle_result, "Comparison", save_file="../Data/Nolle_Graphs/" + file.split("/")[-1] + "_compare_precrec.png") plot.plot_compare_roc_curve(results, ["Sample", "Full", "Bohmer"], "Comparison", save_file="../Data/Nolle_Graphs/" + file.split("/")[-1] + "_compare_roc.png") i += 1
def compare_bpic_total(path): train = path + "BPIC15_train_total.csv" test = path + "BPIC15_test_total.csv" output = path + "Output/BPIC_15_output_total.csv" output_edbn = path + "Output/BPIC15_edbn_output_total.csv" prec_recall = path + "Output/prec_recall_total.png" roc = path + "Output/roc_total.png" #bohmer_model = bmr.train(train, header = 0, length = 5000000) #bmr.test(train, test, output, bohmer_model, ",", 5000000, skip=0) train_data = LogFile(train, ",", 0, 500000, None, "Case") train_data.remove_attributes(["Anomaly"]) test_data = LogFile(test, ",", 0, 500000, None, "Case", train_data.string_2_int, train_data.int_2_string) edbn_model = edbn.train(train_data) edbn.test(test_data, output_edbn, edbn_model, "Anomaly", "0") plt.plot_compare_prec_recall_curve([output, output_edbn], ["Likelihood Graph", "eDBN"], save_file=prec_recall) plt.plot_compare_roc_curve([output, output_edbn], ["Likelihood Graph", "eDBN"], roc)
def compare_bpics(path): for i in range(1,6): # Input Files train = path + "BPIC15_train_%i.csv" % (i) test = path + "BPIC15_test_%i.csv" % (i) output = path + "Output/BPIC15_output_%i.csv" % (i) output_edbn = path + "Output/BPIC15_edbn_output_%i.csv" % (i) prec_recall = path + "Output/prec_recall_%i.png" % (i) roc = path + "Output/roc_%i.png" % (i) #bohmer_model = bmr.train(train + "_ints", header = 0, length = 500000) #bmr.test(train + "_ints", test + "_ints", output, bohmer_model, ",", 500000, skip=0) train_data = LogFile(train, ",", 0, 500000, None, "Case") train_data.remove_attributes(["Anomaly"]) test_data = LogFile(test, ",", 0, 500000, None, "Case", train_data.string_2_int, train_data.int_2_string) edbn_model = edbn.train(train_data) edbn.test(test_data, output_edbn, edbn_model, "Anomaly", "0") plt.plot_compare_prec_recall_curve([output, output_edbn], ["Likelihood Graph", "eDBN"], save_file=prec_recall) plt.plot_compare_roc_curve([output, output_edbn], ["Likelihood Graph", "eDBN"], roc)
def run_full(): # Use the BPIC15_x_sorted.csv to generate new training and test datafiles with anomalies introduced # After running this once you can comment this line out #preProcessData("../Data/") for i in range(1, 2): # Indicate which are the training and test files train_file = "../Data/bpic15_%i_train.csv" % (i) test_file = "../Data/bpic15_%i_test.csv" % (i) # Load logfile to use as training data train_data = LogFile(train_file, ",", 0, 500000, time_attr="Complete_Timestamp", trace_attr="Case_ID", activity_attr="Activity") train_data.remove_attributes(["Anomaly"]) # train_data.keep_attributes(["Case_ID", "Complete_Timestamp", "Activity", "Resource", "case_termName"]) train_data.remove_attributes(["planned"]) train_data.remove_attributes(["dueDate"]) train_data.remove_attributes(["dateFinished"]) # train_data.keep_attributes(["Case_ID", "Complete_Timestamp", "Activity", "Resource", "Weekday"]) # train_data.create_k_context() # train_data.add_duration_to_k_context() # Train the model model = edbn.train(train_data) # Test the model and save the scores in ../Data/output.csv test_data = LogFile(test_file, ",", header=0, rows=500000, time_attr="Complete_Timestamp", trace_attr="Case_ID", values=train_data.values) # test_data.create_k_context() # test_data.add_duration_to_k_context() edbn.test(test_data, "../Data/output2_%i.csv" % (i), model, label="Anomaly", normal_val="0", train_data=train_data) # Plot the ROC curve based on the results plot.plot_single_roc_curve("../Data/output2_%i.csv" % (i), title="BPIC15_%i" % (i)) plot.plot_single_prec_recall_curve("../Data/output2_%i.csv" % (i), title="BPIC15_%i" % (i)) out_files = [] labels = [] for i in range(1, 6): out_files.append("../Data/output2_%i.csv" % (i)) labels.append("MUNIS_%i" % (i)) plot.plot_compare_roc_curve(out_files, labels, "BPIC15 Comparison") plot.plot_compare_prec_recall_curve(out_files, labels, "BPIC15 Comparison")