Example #1
0
    data = "../../Data/BPIC12W.csv"
    # data = "../../Data/Helpdesk.csv"
    # data = "../../Data/Taymouri_bpi_12_w.csv"
    case_attr = "case"
    act_attr = "event"
    k = 15

    logfile = LogFile(data,
                      ",",
                      0,
                      None,
                      None,
                      case_attr,
                      activity_attr=act_attr,
                      convert=False,
                      k=10)
    logfile.convert2int()
    logfile.filter_case_length(5)
    # logfile.k = min(k, max(logfile.data.groupby(logfile.trace).size()))

    logfile.create_k_context()
    train_log, test_log = logfile.splitTrainTest(70,
                                                 case=False,
                                                 method="test-train")

    model = train(train_log, epochs=100, early_stop=10)
    # model.save("tmp.h5")
    # from keras.models import load_model
    # test(test_log, load_model("tmp.h5"))
    test(test_log, model)
Example #2
0
def get_data(dataset, dataset_size, k, add_end, reduce_tasks, resource_pools, remove_resource):
    filename_parts = [dataset, str(dataset_size), str(k)]
    for v in [add_end, reduce_tasks, resource_pools, remove_resource]:
        if v:
            filename_parts.append(str(1))
        else:
            filename_parts.append(str(0))
    print(filename_parts)
    cache_file = LOGFILE_PATH + "/" + "_".join(filename_parts)

    colTitles = []

    if os.path.exists(cache_file):
        print("Loading file from cache")
        with open(cache_file, "rb") as pickle_file:
            preprocessed_log = pickle.load(pickle_file)
    else:
        resource_attr = None
        if dataset == BPIC15_1 or dataset == BPIC15:
            logfile = LogFile("../Data/BPIC15_1_sorted_new.csv", ",", 0, dataset_size, "Complete Timestamp", "Case ID", activity_attr="Activity", convert=False, k=k)
            resource_attr = "Resource"
            colTitles = ["Case ID", "Activity", "Resource"]
            logfile.keep_attributes(colTitles)
            logfile.filter_case_length(5)
        elif dataset == BPIC15_2:
            logfile = LogFile("../Data/BPIC15_2_sorted_new.csv", ",", 0, dataset_size, "Complete Timestamp", "Case ID",
                              activity_attr="Activity", convert=False, k=k)
            resource_attr = "Resource"
            colTitles = ["Case ID", "Activity", "Resource"]
            logfile.keep_attributes(colTitles)
            logfile.filter_case_length(5)
        elif dataset == BPIC15_3:
            logfile = LogFile("../Data/BPIC15_3_sorted_new.csv", ",", 0, dataset_size, "Complete Timestamp", "Case ID", activity_attr="Activity", convert=False, k=k)
            resource_attr = "Resource"
            colTitles = ["Case ID", "Activity", "Resource"]
            logfile.keep_attributes(colTitles)
            logfile.filter_case_length(5)
        elif dataset == BPIC15_4:
            logfile = LogFile("../Data/BPIC15_4_sorted_new.csv", ",", 0, dataset_size, "Complete Timestamp", "Case ID", activity_attr="Activity", convert=False, k=k)
            resource_attr = "Resource"
            colTitles = ["Case ID", "Activity", "Resource"]
            logfile.keep_attributes(colTitles)
            logfile.filter_case_length(5)
        elif dataset == BPIC15_5:
            logfile = LogFile("../Data/BPIC15_5_sorted_new.csv", ",", 0, dataset_size, "Complete Timestamp", "Case ID", activity_attr="Activity", convert=False, k=k)
            resource_attr = "Resource"
            colTitles = ["Case ID", "Activity", "Resource"]
            logfile.keep_attributes(colTitles)
            logfile.filter_case_length(5)
        elif dataset == BPIC12:
            logfile = LogFile("../Data/BPIC12.csv", ",", 0, dataset_size, "completeTime", "case", activity_attr="event", convert=False, k=k)
            resource_attr = "org:resource"
            colTitles = ["case", "event", "org:resource"]
            logfile.keep_attributes(colTitles)
            logfile.filter_case_length(5)
        elif dataset == BPIC12W:
            logfile = LogFile("../Data/BPIC12W.csv", ",", 0, dataset_size, "completeTime", "case", activity_attr="event", convert=False, k=k)
            resource_attr = "org:resource"
            colTitles = ["case", "event", "org:resource"]
            logfile.keep_attributes(colTitles)
            logfile.filter_case_length(5)
        elif dataset == HELPDESK:
            logfile = LogFile("../Data/Helpdesk.csv", ",", 0, dataset_size, "completeTime", "case", activity_attr="event", convert=False, k=k)
            resource_attr = "Resource"
            colTitles = ["case", "event", "Resource"]
            logfile.keep_attributes(colTitles)
            logfile.filter_case_length(3)
        elif dataset == BPIC18:
            logfile = LogFile("../Data/bpic2018.csv", ",", 0, dataset_size, "startTime", "case", activity_attr="event", convert=False, k=k)
            colTitles = ["case", "event", "subprocess"]
            logfile.keep_attributes(colTitles)
        else:
            print("Unknown Dataset")
            return None

        preprocessed_log = preprocess(logfile, add_end, reduce_tasks, resource_pools, resource_attr, remove_resource)

        preprocessed_log.create_k_context()
        with open(cache_file, "wb") as pickle_file:
            pickle.dump(preprocessed_log, pickle_file)
    return preprocessed_log, "_".join(filename_parts)