Example #1
0
def _test1():
    data = "../Data/BPIC15_1_sorted_new.csv"
    case_attr = "case"
    act_attr = "event"

    logfile = LogFile(data,
                      ",",
                      0,
                      None,
                      None,
                      case_attr,
                      activity_attr=act_attr,
                      convert=False,
                      k=5)
    logfile.keep_attributes(["case", "event", "role"])
    logfile.convert2int()
    # logfile.filter_case_length(5)

    logfile.create_k_context()
    train_log, test_log = logfile.splitTrainTest(70,
                                                 case=True,
                                                 method="train-test")

    model = edbn_train(train_log)
    acc = predict_next_event(model, test_log)
    acc_update = predict_next_event_update(model, test_log)
    print("ACC:", acc, acc_update)
Example #2
0
def train_lin(data_folder, model_folder):
    from RelatedMethods.Lin.model import train

    logfile = LogFile(data_folder + "full_log.csv",
                      ",",
                      0,
                      None,
                      None,
                      "case",
                      activity_attr="event",
                      convert=False,
                      k=0)
    logfile.add_end_events()
    logfile.convert2int()
    train_log = LogFile(data_folder + "train_log.csv",
                        ",",
                        0,
                        None,
                        None,
                        "case",
                        activity_attr="event",
                        convert=False,
                        k=0,
                        values=logfile.values)
    train_log.add_end_events()
    train_log.convert2int()

    train(logfile, train_log, model_folder)
Example #3
0
def compare_bpic_total(path):
    train = path + "BPIC15_train_total.csv"
    test = path + "BPIC15_test_total.csv"
    output = path + "Output/BPIC_15_output_total.csv"
    output_edbn = path + "Output/BPIC15_edbn_output_total.csv"
    prec_recall = path + "Output/prec_recall_total.png"
    roc = path + "Output/roc_total.png"

    if not os.path.exists(path + "Output"):
        os.mkdir(path + "Output")

    train_data = LogFile(train, ",", 0, 500000, "Time", "Case", activity_attr="Activity", convert=False)
    train_data.remove_attributes(["Anomaly", "Type", "Time"])
    test_data = LogFile(test, ",", 0, 500000, "Time", "Case", activity_attr="Activity", values=train_data.values, convert=False)

    bohmer_model = bmr.train(train_data)
    bmr.test(test_data, output, bohmer_model, label = "Anomaly", normal_val = 0)

    train_data.convert2int()
    test_data.convert2int()

    edbn_model = edbn_train(train_data)
    edbn_test(test_data, output_edbn, edbn_model, label = "Anomaly", normal_val = "0")

    plt.plot_compare_prec_recall_curve([output, output_edbn], ["Likelihood Graph", "EDBN"], save_file=prec_recall)
    plt.plot_compare_roc_curve([output, output_edbn], ["Likelihood Graph", "EDBN"], roc)
Example #4
0
def duration_test_discretize():
    path = "../Data/Experiments_Discretize/"
    train_rates = [0,5,10,25]
    test_rates = [1,5,10,25,50,100,250,500]
    anoms_rates = []
    for train_rate in train_rates:
        for test_rate in test_rates:
            anoms_rates.append((train_rate, test_rate))

    for i in range(len(anoms_rates)):
        print(anoms_rates[i])
        scores = []
        for run in range(RUNS):
            print("Run %i" % run)
            train_file = path + "%i_train_%i.csv" % (i, anoms_rates[i][0])
            test_file = path + "%i_test_%i.csv" % (i, anoms_rates[i][1])
            duration_generator.generate(10000, 10000, anoms_rates[i][0], anoms_rates[i][1], train_file, test_file)

            train_data = LogFile(train_file, ",", 0, 1000000, "date", "trace", convert=False)
            train_data.remove_attributes(["Anomaly"])

            train_data.keep_attributes(["event", "date", "trace", "process", "resource", "random"])
            train_data.convert2int()

            train_data.create_k_context()
            train_data.add_duration_to_k_context()
            bins = train_data.discretize("duration_0", bins=10)

            test_data = LogFile(test_file, ",", 0, 1000000, "date", "trace", values=train_data.values, convert=False)
            test_data.keep_attributes(["event", "date", "trace", "process", "resource", "random", "anomaly"])
            test_data.convert2int()

            test_data.create_k_context()
            test_data.add_duration_to_k_context()
            test_data.discretize("duration_0", bins)

            model = edbn.train(train_data)
            edbn.test(test_data, path + "Output_%i_%i.csv" % anoms_rates[i], model, "anomaly", "0")

            output_file = path + "Output_%i_%i.csv" % anoms_rates[i]
            output_roc = path + "roc_%i_%i.png" % anoms_rates[i]
            output_prec = path + "prec_recall_%i_%i.png" % anoms_rates[i]

            score = plt.get_roc_auc(output_file)
            scores.append(plt.get_roc_auc(output_file))
            print("Score = %f" % score)

        with open(path + "results.txt", "a") as fout:
            fout.write("Testing:\ntrain rate: %i\ntest rate: %i\n" % (anoms_rates[i][0], anoms_rates[i][1]))
            fout.write("Result: " + str(scores) + "\n")
            fout.write("Mean: %f Median: %f\n" % (np.mean(scores), np.median(scores)))
            fout.write("Variance: %f\n\n" % np.var(scores))
Example #5
0
def compare_bpics(path):
    for i in range(1, 6):
        # Input Files
        train = path + "BPIC15_train_%i.csv" % (i)
        test = path + "BPIC15_test_%i.csv" % (i)
        output = path + "Output/BPIC15_output_%i.csv" % (i)
        output_edbn = path + "Output/BPIC15_edbn_output_%i.csv" % (i)
        prec_recall = path + "Output/prec_recall_%i.png" % (i)
        roc = path + "Output/roc_%i.png" % (i)

        train_data = LogFile(train,
                             ",",
                             0,
                             500000,
                             "Time",
                             "Case",
                             activity_attr="Activity",
                             convert=False)
        train_data.remove_attributes(["Anomaly", "Type", "Time"])
        test_data = LogFile(test,
                            ",",
                            0,
                            500000,
                            "Time",
                            "Case",
                            activity_attr="Activity",
                            values=train_data.values,
                            convert=False)

        bohmer_model = bmr.train(train_data)
        bmr.test(test_data,
                 output,
                 bohmer_model,
                 label="Anomaly",
                 normal_val="0")

        train_data.convert2int()
        test_data.convert2int()

        edbn_model = edbn.train(train_data)
        edbn.test(test_data,
                  output_edbn,
                  edbn_model,
                  label="Anomaly",
                  normal_val="0")

        plt.plot_compare_prec_recall_curve([output, output_edbn],
                                           ["Likelihood Graph", "EDBN"],
                                           save_file=prec_recall)
        plt.plot_compare_roc_curve([output, output_edbn],
                                   ["Likelihood Graph", "EDBN"], roc)
Example #6
0
def run_experiment(data, prefix_size, add_end_event, split_method, split_cases, train_percentage):
    logfile = LogFile(data, ",", 0, None, None, "case",
                      activity_attr="event", convert=False, k=prefix_size)
    if add_end_event:
        logfile.add_end_events()
    logfile.keep_attributes(["case", "event", "role"])
    logfile.convert2int()
    logfile.create_k_context()
    train_log, test_log = logfile.splitTrainTest(train_percentage, case=split_cases, method=split_method)

    with open("Baseline/results.txt", "a") as fout:
        fout.write("Data: " + data)
        fout.write("\nPrefix Size: " + str(prefix_size))
        fout.write("\nEnd event: " + str(add_end_event))
        fout.write("\nSplit method: " + split_method)
        fout.write("\nSplit cases: " + str(split_cases))
        fout.write("\nTrain percentage: " + str(train_percentage))
        fout.write("\nDate: " + time.strftime("%d.%m.%y-%H.%M", time.localtime()))
        fout.write("\n------------------------------------")

        baseline_acc = test(test_log, train(train_log, epochs=100, early_stop=10))
        fout.write("\nBaseline: " + str(baseline_acc))
        fout.write("\n")
        fout.write("====================================\n\n")
Example #7
0
from RelatedMethods.Lin.model import create_model, predict_next
from Utils.LogFile import LogFile


def train(log, epochs=200, early_stop=42):
    return create_model(log, "tmp", epochs, early_stop)


def test(log, model):
    return predict_next(log, model)


if __name__ == "__main__":
    data = "../../Data/BPIC15_5_sorted_new.csv"
    case_attr = "case"
    act_attr = "event"

    logfile = LogFile(data, ",", 0, None, None, case_attr,
                      activity_attr=act_attr, convert=False, k=1)
    logfile.convert2int()

    logfile.create_k_context()
    train_log, test_log = logfile.splitTrainTest(70, case=True, method="train-test")

    # model = train(train_log, epochs=100, early_stop=5)
    model = load_model("../../Predictions/tmp/model_001-4.51.h5", custom_objects={'Modulator': Modulator})

    acc = test(test_log, model)
    print(acc)

Example #8
0
def train_edbn(data_folder, model_folder, k=None, next_event=True):
    from EDBN.Execute import train
    from Predictions.eDBN_Prediction import learn_duplicated_events, predict_next_event, predict_suffix

    if k is None:
        best_model = {}
        for k in range(1, 6):
            train_log = LogFile(data_folder + "train_log.csv",
                                ",",
                                0,
                                None,
                                None,
                                "case",
                                activity_attr="event",
                                convert=False,
                                k=k)

            train_train_log, train_test_log = train_log.splitTrainTest(80)

            train_train_log.add_end_events()
            train_train_log.convert2int()
            train_train_log.create_k_context()

            train_test_log.values = train_train_log.values
            train_test_log.add_end_events()
            train_test_log.convert2int()
            train_test_log.create_k_context()

            model = train(train_train_log)

            # Train average number of duplicated events
            model.duplicate_events = learn_duplicated_events(train_train_log)

            if next_event:
                acc = predict_next_event(model, train_test_log)
            else:
                acc = predict_suffix(model, train_test_log)
            print("Testing k=", k, " | Validation acc:", acc)
            if "Acc" not in best_model or best_model["Acc"] < acc:
                best_model["Acc"] = acc
                best_model["Model"] = model
                best_model["k"] = k
        print("Best k value:", best_model["k"], " | Validation acc of",
              best_model["Acc"])
        k = best_model["k"]

    train_log = LogFile(data_folder + "train_log.csv",
                        ",",
                        0,
                        None,
                        None,
                        "case",
                        activity_attr="event",
                        convert=False,
                        k=k)

    train_log.add_end_events()
    train_log.convert2int()
    train_log.create_k_context()

    model = train(train_log)

    # Train average number of duplicated events
    model.duplicate_events = learn_duplicated_events(train_log)

    with open(os.path.join(model_folder, "model"), "wb") as pickle_file:
        pickle.dump(model, pickle_file)

    with open(os.path.join(model_folder, "k"), "w") as outfile:
        outfile.write(str(k))
Example #9
0
def run_experiment(data,
                   prefix_size,
                   add_end_event,
                   split_method,
                   split_cases,
                   train_percentage,
                   filename="results.txt"):
    data = DATA_FOLDER + data
    logfile = LogFile(data,
                      ",",
                      0,
                      None,
                      "completeTime",
                      "case",
                      activity_attr="event",
                      convert=False,
                      k=prefix_size)

    if prefix_size is None:
        prefix_size = max(logfile.data.groupby(logfile.trace).size())
        if prefix_size > 40:
            prefix_size = 40
    logfile.k = prefix_size

    if add_end_event:
        logfile.add_end_events()
    # logfile.keep_attributes(["case", "event", "role", "completeTime"])
    logfile.keep_attributes(["case", "event", "role"])
    logfile.convert2int()
    logfile.create_k_context()
    train_log, test_log = logfile.splitTrainTest(train_percentage,
                                                 case=split_cases,
                                                 method=split_method)

    with open(filename, "a") as fout:
        fout.write("Data: " + data)
        fout.write("\nPrefix Size: " + str(prefix_size))
        fout.write("\nEnd event: " + str(add_end_event))
        fout.write("\nSplit method: " + split_method)
        fout.write("\nSplit cases: " + str(split_cases))
        fout.write("\nTrain percentage: " + str(train_percentage))
        fout.write("\nDate: " +
                   time.strftime("%d.%m.%y-%H.%M", time.localtime()))
        fout.write("\n------------------------------------\n")

    processes = []
    processes.append(
        Process(target=execute_tax,
                args=(train_log, test_log, filename),
                name="Tax"))
    processes.append(
        Process(target=execute_taymouri,
                args=(train_log, test_log, filename),
                name="Taymouri"))
    processes.append(
        Process(target=execute_camargo,
                args=(train_log, test_log, filename),
                name="Camargo"))
    processes.append(
        Process(target=execute_lin,
                args=(train_log, test_log, filename),
                name="Lin"))
    processes.append(
        Process(target=execute_dimauro,
                args=(train_log, test_log, filename),
                name="Di Mauro"))
    processes.append(
        Process(target=execute_pasquadibisceglie,
                args=(train_log, test_log, filename),
                name="Pasquadibisceglie"))
    processes.append(
        Process(target=execute_edbn,
                args=(train_log, test_log, filename),
                name="EDBN"))
    processes.append(
        Process(target=execute_baseline,
                args=(train_log, test_log, filename),
                name="Baseline"))
    # processes.append(Process(target=execute_new_method, args=(train_log, test_log, filename), name="New Method"))

    print("Starting Processes")
    for p in processes:
        p.start()
        print(p.name, "started")

    print("All processes running")

    for p in processes:
        p.join()
        print(p.name, "stopped")

    with open(filename, "a") as fout:
        fout.write("====================================\n\n")

    print("All processes stopped")