def payload_extractor_trial(log_name, settings_file): log = read_XES_log(log_name) train, test = split_log_train_test(log, 0.8) print("Lengths of logs train: {}, test: {}".format(len(train), len(test))) pex = PayloadExtractor() settings = settings_from_cfg(settings_file) ## Get first forms of data extracted_train_data = pex.get_overview_of_data(train, settings) extracted_test_data = pex.get_overview_of_data(test, settings) train_df, test_df = build_dataframes(extracted_train_data, extracted_test_data, settings) return train_df, test_df
def data_declare_main(inp_folder, log_name, ignored): drc = DRC() log = read_XES_log(log_name) # Transform log into suitable data structures transformed_log = xes_to_data_positional(log) train_log, test_log = split_log_train_test(transformed_log, 0.8) #print(train_log[0]) train_case_ids = [tr["name"] for tr in train_log] test_case_ids = [tr["name"] for tr in test_log] train_names, train_features, test_names, test_features = drc.create_data_aware_features( train_log, test_log, ignored) train_dict = {} test_dict = {} for i, tf in enumerate(train_features): train_dict[train_names[i]] = tf for i, tf in enumerate(test_features): test_dict[test_names[i]] = tf train_df = pd.DataFrame.from_dict(train_dict) test_df = pd.DataFrame.from_dict(test_dict) #train_df = pd.DataFrame(train_features, columns=train_names) #test_df = pd.DataFrame(test_features, columns=test_names) #print(train_names) # add Case_ID train_df["Case_ID"] = train_case_ids test_df["Case_ID"] = test_case_ids train_df.to_csv(inp_folder + "/dwd_train.csv", index=False) test_df.to_csv(inp_folder + "/dwd_test.csv", index=False)
def payload_extractor(inp_folder, log_name, settings_file): log = read_XES_log(log_name) train, test = split_log_train_test(log, 0.8) print("Lengths of logs train: {}, test: {}".format(len(train), len(test))) pex = PayloadExtractor() settings = settings_from_cfg(settings_file) ## Get first forms of data extracted_train_data = pex.get_overview_of_data(train, settings) extracted_test_data = pex.get_overview_of_data(test, settings) train_df, test_df = build_dataframes(extracted_train_data, extracted_test_data, settings) # Force all to float (except label?) train_df.to_csv(inp_folder + "/payload_train.csv", index=False) test_df.to_csv(inp_folder + "/payload_test.csv", index=False) return train_df, test_df
def baseline(inp_folder, logPath): log = read_XES_log(logPath) transformed_log = xes_to_positional(log) train_log, test_log = split_log_train_test(transformed_log, 0.8) # Collect all different IA's activitySet = list(extract_unique_events_transformed(train_log)) # Transform to matrix print("Train data") # train data train_df = transform_log(train_log, activitySet) print("Test data") # test data test_df = transform_log(test_log, activitySet) train_df.to_csv(inp_folder + "/baseline_train.csv", index=False) test_df.to_csv(inp_folder + "/baseline_test.csv", index=False)