def test_file_full(file): split_dataset(file + "_data.csv", file + "_labels.csv", file + "_train.csv", file + "_test.csv", None) train_data = LogFile(file + "_train.csv", ",", 0, 1000000, None, "case_id", "name") train_data.remove_attributes(["label"]) model = edbn.train(train_data) test_data = LogFile(file + "_test.csv", ",", 0, 1000000, None, "case_id", "name", values=train_data.values) edbn.test(test_data, file + "_output_full.csv", model, "label", "0", train_data) plot.plot_single_roc_curve(file + "_output_full.csv", file, save_file="../Data/Nolle_Graphs/" + file.split("/")[-1] + "_roc.png") plot.plot_single_prec_recall_curve(file + "_output_full.csv", file, save_file="../Data/Nolle_Graphs/" + file.split("/")[-1] + "_precrec.png")
def duration_test(): path = "../Data/Experiments_Duration/" train_rates = [0, 5, 10, 25] test_rates = [1, 5, 10, 25, 50, 100, 250, 500] anoms_rates = [] for train_rate in train_rates: for test_rate in test_rates: anoms_rates.append((train_rate, test_rate)) for i in range(len(anoms_rates)): print(anoms_rates[i]) scores = [] for run in range(RUNS): print("Run %i" % run) train_file = path + "%i_train_%i.csv" % (i, anoms_rates[i][0]) test_file = path + "%i_test_%i.csv" % (i, anoms_rates[i][1]) duration_generator.generate(10000, 10000, anoms_rates[i][0], anoms_rates[i][1], train_file, test_file) train_data = LogFile(train_file, ",", 0, 1000000, "date", "trace") train_data.remove_attributes(["Anomaly"]) test_data = LogFile(test_file, ",", 0, 1000000, "date", "trace", values=train_data.values) train_data.keep_attributes( ["event", "date", "trace", "process", "resource", "random"]) train_data.create_k_context() train_data.add_duration_to_k_context() bins = train_data.discretize("duration_0") test_data.create_k_context() test_data.add_duration_to_k_context() test_data.discretize("duration_0", bins) model = edbn.train(train_data) edbn.test(test_data, path + "Output_%i_%i.csv" % anoms_rates[i], model, "anomaly", "0") output_file = path + "Output_%i_%i.csv" % anoms_rates[i] output_roc = path + "roc_%i_%i.png" % anoms_rates[i] output_prec = path + "prec_recall_%i_%i.png" % anoms_rates[i] score = plt.get_roc_auc(output_file) scores.append(plt.get_roc_auc(output_file)) print("Score = %f" % score) with open(path + "results.txt", "a") as fout: fout.write("Testing:\ntrain rate: %i\ntest rate: %i\n" % (anoms_rates[i][0], anoms_rates[i][1])) fout.write("Result: " + str(scores) + "\n") fout.write("Mean: %f Median: %f\n" % (np.mean(scores), np.median(scores))) fout.write("Variance: %f\n\n" % np.var(scores))
def compare_bpics(path): for i in range(1, 6): # Input Files train = path + "BPIC15_train_%i.csv" % (i) test = path + "BPIC15_test_%i.csv" % (i) output = path + "Output/BPIC15_output_%i.csv" % (i) output_edbn = path + "Output/BPIC15_edbn_output_%i.csv" % (i) prec_recall = path + "Output/prec_recall_%i.png" % (i) roc = path + "Output/roc_%i.png" % (i) train_data = LogFile(train, ",", 0, 500000, "Time", "Case", activity_attr="Activity", convert=False) train_data.remove_attributes(["Anomaly", "Type", "Time"]) test_data = LogFile(test, ",", 0, 500000, "Time", "Case", activity_attr="Activity", values=train_data.values, convert=False) bohmer_model = bmr.train(train_data) bmr.test(test_data, output, bohmer_model, label="Anomaly", normal_val="0") train_data.convert2int() test_data.convert2int() edbn_model = edbn.train(train_data) edbn.test(test_data, output_edbn, edbn_model, label="Anomaly", normal_val="0") plt.plot_compare_prec_recall_curve([output, output_edbn], ["Likelihood Graph", "EDBN"], save_file=prec_recall) plt.plot_compare_roc_curve([output, output_edbn], ["Likelihood Graph", "EDBN"], roc)
def categorical_test(): path = "../Data/Experiments/" train_rates = [0, 5, 10, 25] test_rates = [1, 5, 10, 25, 50, 100, 250, 500] anoms_rates = [] for train_rate in train_rates: for test_rate in test_rates: anoms_rates.append((train_rate, test_rate)) for i in range(len(anoms_rates)): print(anoms_rates[i]) scores = [] for run in range(RUNS): print("Run %i" % run) train_file = path + "%i_train_%i.csv" % (i, anoms_rates[i][0]) test_file = path + "%i_test_%i.csv" % (i, anoms_rates[i][1]) generator.create_shipment_data(10000, 10000, anoms_rates[i][0], anoms_rates[i][1], train_file, test_file) train_data = LogFile(train_file, ",", 0, 1000000, None, "Case") train_data.remove_attributes(["Anomaly"]) test_data = LogFile(test_file, ",", 0, 1000000, None, "Case", values=train_data.values) model = edbn.train(train_data) edbn.test(test_data, path + "Output_%i_%i.csv" % anoms_rates[i], model, "Anomaly", "0") output_file = path + "Output_%i_%i.csv" % anoms_rates[i] output_roc = path + "roc_%i_%i.png" % anoms_rates[i] output_prec = path + "prec_recall_%i_%i.png" % anoms_rates[i] score = plt.get_roc_auc(output_file) scores.append(plt.get_roc_auc(output_file)) print("Score = %f" % score) with open(path + "results.txt", "a") as fout: fout.write("Testing:\ntrain rate: %i\ntest rate: %i\n" % (anoms_rates[i][0], anoms_rates[i][1])) fout.write("Result: " + str(scores) + "\n") fout.write("Mean: %f Median: %f\n" % (np.mean(scores), np.median(scores))) fout.write("Variance: %f\n\n" % np.var(scores))