def breast_discrete_exec(): data = "../Data/breast_data.csv" labels = "../Data/breast_labels.csv" log = pd.read_csv(data, header=None) labels = pd.read_csv(labels, header=None) log["Label"] = labels[0] cols = [] for c in log.columns: cols.append("V" + str(c)) log.columns = cols log['ID'] = log.reset_index().index print(log) train = log[:100] test = log[100:] train = train[train.VLabel == 0].drop(columns=["VLabel"]) train.to_csv("../Data/breast_train.csv", index=False) test.to_csv("../Data/breast_test.csv", index=False) train_data = LogFile("../Data/breast_train.csv", ",", 0, 500000, None, "ID", activity_attr="Activity") train_data.k = 0 model = edbn.train(train_data) test_data = LogFile("../Data/breast_test.csv", ",", 0, 500000, None, "ID", activity_attr="Activity") test_data.k = 0 print(test_data.data) edbn.test(test_data, "../Data/breast_discrete_output.csv", model, "VLabel", "0") plot.plot_single_roc_curve("../Data/breast_discrete_output.csv", "breast_discrete") plot.plot_single_prec_recall_curve("../Data/breast_discrete_output.csv", "breast_discrete")
case_attr = "case" act_attr = "event" logfile = LogFile(data, ",", 0, None, None, case_attr, activity_attr=act_attr, convert=False, k=5) prefix_size = max(logfile.data.groupby(logfile.trace).size()) if prefix_size > 40: prefix_size = 40 logfile.k = prefix_size logfile.add_end_events() logfile.keep_attributes(["case", "event", "role"]) logfile.convert2int() logfile.create_k_context() train_log, test_log = logfile.splitTrainTest(66, case=False, method="train-test") model = train(train_log, 100, 10) acc = test(test_log, model) print(acc)