def payload_extractor_trial(log_name, settings_file):
    log = read_XES_log(log_name)

    train, test = split_log_train_test(log, 0.8)

    print("Lengths of logs train: {}, test: {}".format(len(train), len(test)))
    pex = PayloadExtractor()

    settings = settings_from_cfg(settings_file)

    ## Get first forms of data
    extracted_train_data = pex.get_overview_of_data(train, settings)
    extracted_test_data = pex.get_overview_of_data(test, settings)

    train_df, test_df = build_dataframes(extracted_train_data,
                                         extracted_test_data, settings)

    return train_df, test_df
Example #2
0
def data_declare_main(inp_folder, log_name, ignored):

    drc = DRC()
    log = read_XES_log(log_name)

    # Transform log into suitable data structures
    transformed_log = xes_to_data_positional(log)

    train_log, test_log = split_log_train_test(transformed_log, 0.8)
    #print(train_log[0])

    train_case_ids = [tr["name"] for tr in train_log]
    test_case_ids = [tr["name"] for tr in test_log]

    train_names, train_features, test_names, test_features = drc.create_data_aware_features(
        train_log, test_log, ignored)

    train_dict = {}
    test_dict = {}
    for i, tf in enumerate(train_features):
        train_dict[train_names[i]] = tf

    for i, tf in enumerate(test_features):
        test_dict[test_names[i]] = tf

    train_df = pd.DataFrame.from_dict(train_dict)
    test_df = pd.DataFrame.from_dict(test_dict)

    #train_df = pd.DataFrame(train_features, columns=train_names)
    #test_df = pd.DataFrame(test_features, columns=test_names)

    #print(train_names)
    # add Case_ID

    train_df["Case_ID"] = train_case_ids
    test_df["Case_ID"] = test_case_ids

    train_df.to_csv(inp_folder + "/dwd_train.csv", index=False)
    test_df.to_csv(inp_folder + "/dwd_test.csv", index=False)
def payload_extractor(inp_folder, log_name, settings_file):

    log = read_XES_log(log_name)

    train, test = split_log_train_test(log, 0.8)

    print("Lengths of logs train: {}, test: {}".format(len(train), len(test)))
    pex = PayloadExtractor()

    settings = settings_from_cfg(settings_file)

    ## Get first forms of data
    extracted_train_data = pex.get_overview_of_data(train, settings)
    extracted_test_data = pex.get_overview_of_data(test, settings)

    train_df, test_df = build_dataframes(extracted_train_data,
                                         extracted_test_data, settings)

    # Force all to float (except label?)

    train_df.to_csv(inp_folder + "/payload_train.csv", index=False)
    test_df.to_csv(inp_folder + "/payload_test.csv", index=False)

    return train_df, test_df
def baseline(inp_folder, logPath):

    log = read_XES_log(logPath)

    transformed_log = xes_to_positional(log)

    train_log, test_log = split_log_train_test(transformed_log, 0.8)
    # Collect all different IA's

    activitySet = list(extract_unique_events_transformed(train_log))
    # Transform to matrix

    print("Train data")
    # train data

    train_df = transform_log(train_log, activitySet)

    print("Test data")
    # test data
    test_df = transform_log(test_log, activitySet)

    train_df.to_csv(inp_folder + "/baseline_train.csv", index=False)

    test_df.to_csv(inp_folder + "/baseline_test.csv", index=False)