def run_sdl(): from Methods.SDL.sdl import train, test labeled_logfile = "../Data/Outcome_Prediction/BPIC15_1_f2.csv" log = LogFile(labeled_logfile, ";", 0, None, "time_timestamp", "Case_ID", activity_attr="label", convert=True, k=10) columns = [ "label", "Case_ID", "Activity", "monitoringResource", "question", "org_resource", "Responsible_actor", "SUMleges" ] log.keep_attributes(columns) log.create_k_context() train_log, test_log = log.splitTrainTest(80, True, "train-test") train_log.ignoreHistoryAttributes.add("label") test_log.ignoreHistoryAttributes.add("label") model = train(train_log, 200, 42) print(test(test_log, model)) results1 = [] results2 = [] for case in test_log.get_cases(): pass
def train_edbn(data_folder, model_folder, k=None, next_event=True): from EDBN.Execute import train from Predictions.eDBN_Prediction import learn_duplicated_events, predict_next_event, predict_suffix if k is None: best_model = {} for k in range(1, 6): train_log = LogFile(data_folder + "train_log.csv", ",", 0, None, None, "case", activity_attr="event", convert=False, k=k) train_train_log, train_test_log = train_log.splitTrainTest(80) train_train_log.add_end_events() train_train_log.convert2int() train_train_log.create_k_context() train_test_log.values = train_train_log.values train_test_log.add_end_events() train_test_log.convert2int() train_test_log.create_k_context() model = train(train_train_log) # Train average number of duplicated events model.duplicate_events = learn_duplicated_events(train_train_log) if next_event: acc = predict_next_event(model, train_test_log) else: acc = predict_suffix(model, train_test_log) print("Testing k=", k, " | Validation acc:", acc) if "Acc" not in best_model or best_model["Acc"] < acc: best_model["Acc"] = acc best_model["Model"] = model best_model["k"] = k print("Best k value:", best_model["k"], " | Validation acc of", best_model["Acc"]) k = best_model["k"] train_log = LogFile(data_folder + "train_log.csv", ",", 0, None, None, "case", activity_attr="event", convert=False, k=k) train_log.add_end_events() train_log.convert2int() train_log.create_k_context() model = train(train_log) # Train average number of duplicated events model.duplicate_events = learn_duplicated_events(train_log) with open(os.path.join(model_folder, "model"), "wb") as pickle_file: pickle.dump(model, pickle_file) with open(os.path.join(model_folder, "k"), "w") as outfile: outfile.write(str(k))
def run_edbn(): from eDBN_Prediction import get_probabilities from Methods.EDBN.Train import train labeled_logfile = "../Data/Outcome_Prediction/BPIC15_1_f2.csv" log = LogFile(labeled_logfile, ";", 0, None, "time_timestamp", "Case_ID", activity_attr="label", convert=True, k=1) columns = [ "label", "Case_ID", "time_timestamp", "Activity", "monitoringResource", "question", "org_resource", "Responsible_actor", "SUMleges" ] log.keep_attributes(columns) log.create_k_context() train_log, test_log = log.splitTrainTest(80, True, "train-test") train_log.ignoreHistoryAttributes.add("label") model = train(train_log) results1 = [] results2 = [] for case in test_log.get_cases(): case_df = case[1] case_probs = {1: 1, 2: 1} ground = 0 for row in case_df.iterrows(): ground = getattr(row[1], "label") parents = model.variables["label"].conditional_table.parents value = [] for parent in parents: value.append(getattr(row[1], parent.attr_name)) tuple_val = tuple(value) activity_var = model.variables["label"] probs, unknown = get_probabilities(activity_var, tuple_val, parents) case_probs[1] += probs.get(1, 0) case_probs[2] += probs.get(2, 0) # correct_prob = sum(case_probs) / len(case_probs) if ground == 1: if case_probs[1] > case_probs[2]: results1.append(1) else: results1.append(0) if ground == 2: if case_probs[2] > case_probs[1]: results2.append(1) else: results2.append(0) print(len(results1), sum(results1) / len(results1)) print(len(results2), sum(results2) / len(results2))