예제 #1
0
def test_file_full(file):
    split_dataset(file + "_data.csv", file + "_labels.csv",
                  file + "_train.csv", file + "_test.csv", None)
    train_data = LogFile(file + "_train.csv", ",", 0, 1000000, None, "case_id",
                         "name")
    train_data.remove_attributes(["label"])
    model = edbn.train(train_data)

    test_data = LogFile(file + "_test.csv",
                        ",",
                        0,
                        1000000,
                        None,
                        "case_id",
                        "name",
                        values=train_data.values)
    edbn.test(test_data, file + "_output_full.csv", model, "label", "0",
              train_data)

    plot.plot_single_roc_curve(file + "_output_full.csv",
                               file,
                               save_file="../Data/Nolle_Graphs/" +
                               file.split("/")[-1] + "_roc.png")
    plot.plot_single_prec_recall_curve(file + "_output_full.csv",
                                       file,
                                       save_file="../Data/Nolle_Graphs/" +
                                       file.split("/")[-1] + "_precrec.png")
예제 #2
0
def duration_test():
    path = "../Data/Experiments_Duration/"
    train_rates = [0, 5, 10, 25]
    test_rates = [1, 5, 10, 25, 50, 100, 250, 500]
    anoms_rates = []
    for train_rate in train_rates:
        for test_rate in test_rates:
            anoms_rates.append((train_rate, test_rate))

    for i in range(len(anoms_rates)):
        print(anoms_rates[i])
        scores = []
        for run in range(RUNS):
            print("Run %i" % run)
            train_file = path + "%i_train_%i.csv" % (i, anoms_rates[i][0])
            test_file = path + "%i_test_%i.csv" % (i, anoms_rates[i][1])
            duration_generator.generate(10000, 10000, anoms_rates[i][0],
                                        anoms_rates[i][1], train_file,
                                        test_file)

            train_data = LogFile(train_file, ",", 0, 1000000, "date", "trace")
            train_data.remove_attributes(["Anomaly"])
            test_data = LogFile(test_file,
                                ",",
                                0,
                                1000000,
                                "date",
                                "trace",
                                values=train_data.values)

            train_data.keep_attributes(
                ["event", "date", "trace", "process", "resource", "random"])

            train_data.create_k_context()
            train_data.add_duration_to_k_context()
            bins = train_data.discretize("duration_0")
            test_data.create_k_context()
            test_data.add_duration_to_k_context()
            test_data.discretize("duration_0", bins)

            model = edbn.train(train_data)
            edbn.test(test_data, path + "Output_%i_%i.csv" % anoms_rates[i],
                      model, "anomaly", "0")

            output_file = path + "Output_%i_%i.csv" % anoms_rates[i]
            output_roc = path + "roc_%i_%i.png" % anoms_rates[i]
            output_prec = path + "prec_recall_%i_%i.png" % anoms_rates[i]

            score = plt.get_roc_auc(output_file)
            scores.append(plt.get_roc_auc(output_file))
            print("Score = %f" % score)

        with open(path + "results.txt", "a") as fout:
            fout.write("Testing:\ntrain rate: %i\ntest rate: %i\n" %
                       (anoms_rates[i][0], anoms_rates[i][1]))
            fout.write("Result: " + str(scores) + "\n")
            fout.write("Mean: %f Median: %f\n" %
                       (np.mean(scores), np.median(scores)))
            fout.write("Variance: %f\n\n" % np.var(scores))
예제 #3
0
def compare_bpics(path):
    for i in range(1, 6):
        # Input Files
        train = path + "BPIC15_train_%i.csv" % (i)
        test = path + "BPIC15_test_%i.csv" % (i)
        output = path + "Output/BPIC15_output_%i.csv" % (i)
        output_edbn = path + "Output/BPIC15_edbn_output_%i.csv" % (i)
        prec_recall = path + "Output/prec_recall_%i.png" % (i)
        roc = path + "Output/roc_%i.png" % (i)

        train_data = LogFile(train,
                             ",",
                             0,
                             500000,
                             "Time",
                             "Case",
                             activity_attr="Activity",
                             convert=False)
        train_data.remove_attributes(["Anomaly", "Type", "Time"])
        test_data = LogFile(test,
                            ",",
                            0,
                            500000,
                            "Time",
                            "Case",
                            activity_attr="Activity",
                            values=train_data.values,
                            convert=False)

        bohmer_model = bmr.train(train_data)
        bmr.test(test_data,
                 output,
                 bohmer_model,
                 label="Anomaly",
                 normal_val="0")

        train_data.convert2int()
        test_data.convert2int()

        edbn_model = edbn.train(train_data)
        edbn.test(test_data,
                  output_edbn,
                  edbn_model,
                  label="Anomaly",
                  normal_val="0")

        plt.plot_compare_prec_recall_curve([output, output_edbn],
                                           ["Likelihood Graph", "EDBN"],
                                           save_file=prec_recall)
        plt.plot_compare_roc_curve([output, output_edbn],
                                   ["Likelihood Graph", "EDBN"], roc)
예제 #4
0
def categorical_test():
    path = "../Data/Experiments/"
    train_rates = [0, 5, 10, 25]
    test_rates = [1, 5, 10, 25, 50, 100, 250, 500]
    anoms_rates = []
    for train_rate in train_rates:
        for test_rate in test_rates:
            anoms_rates.append((train_rate, test_rate))

    for i in range(len(anoms_rates)):
        print(anoms_rates[i])
        scores = []
        for run in range(RUNS):
            print("Run %i" % run)
            train_file = path + "%i_train_%i.csv" % (i, anoms_rates[i][0])
            test_file = path + "%i_test_%i.csv" % (i, anoms_rates[i][1])
            generator.create_shipment_data(10000, 10000, anoms_rates[i][0],
                                           anoms_rates[i][1], train_file,
                                           test_file)

            train_data = LogFile(train_file, ",", 0, 1000000, None, "Case")
            train_data.remove_attributes(["Anomaly"])
            test_data = LogFile(test_file,
                                ",",
                                0,
                                1000000,
                                None,
                                "Case",
                                values=train_data.values)

            model = edbn.train(train_data)
            edbn.test(test_data, path + "Output_%i_%i.csv" % anoms_rates[i],
                      model, "Anomaly", "0")

            output_file = path + "Output_%i_%i.csv" % anoms_rates[i]
            output_roc = path + "roc_%i_%i.png" % anoms_rates[i]
            output_prec = path + "prec_recall_%i_%i.png" % anoms_rates[i]

            score = plt.get_roc_auc(output_file)
            scores.append(plt.get_roc_auc(output_file))
            print("Score = %f" % score)

        with open(path + "results.txt", "a") as fout:
            fout.write("Testing:\ntrain rate: %i\ntest rate: %i\n" %
                       (anoms_rates[i][0], anoms_rates[i][1]))
            fout.write("Result: " + str(scores) + "\n")
            fout.write("Mean: %f Median: %f\n" %
                       (np.mean(scores), np.median(scores)))
            fout.write("Variance: %f\n\n" % np.var(scores))