Beispiel #1
0
def breast_discrete_exec():
    data = "../Data/breast_data.csv"
    labels = "../Data/breast_labels.csv"

    log = pd.read_csv(data, header=None)
    labels = pd.read_csv(labels, header=None)
    log["Label"] = labels[0]

    cols = []
    for c in log.columns:
        cols.append("V" + str(c))
    log.columns = cols
    log['ID'] = log.reset_index().index
    print(log)

    train = log[:100]
    test = log[100:]
    train = train[train.VLabel == 0].drop(columns=["VLabel"])

    train.to_csv("../Data/breast_train.csv", index=False)
    test.to_csv("../Data/breast_test.csv", index=False)

    train_data = LogFile("../Data/breast_train.csv",
                         ",",
                         0,
                         500000,
                         None,
                         "ID",
                         activity_attr="Activity")
    train_data.k = 0
    model = edbn.train(train_data)

    test_data = LogFile("../Data/breast_test.csv",
                        ",",
                        0,
                        500000,
                        None,
                        "ID",
                        activity_attr="Activity")
    test_data.k = 0
    print(test_data.data)
    edbn.test(test_data, "../Data/breast_discrete_output.csv", model, "VLabel",
              "0")

    plot.plot_single_roc_curve("../Data/breast_discrete_output.csv",
                               "breast_discrete")
    plot.plot_single_prec_recall_curve("../Data/breast_discrete_output.csv",
                                       "breast_discrete")
Beispiel #2
0
    case_attr = "case"
    act_attr = "event"

    logfile = LogFile(data,
                      ",",
                      0,
                      None,
                      None,
                      case_attr,
                      activity_attr=act_attr,
                      convert=False,
                      k=5)

    prefix_size = max(logfile.data.groupby(logfile.trace).size())
    if prefix_size > 40:
        prefix_size = 40
    logfile.k = prefix_size

    logfile.add_end_events()

    logfile.keep_attributes(["case", "event", "role"])
    logfile.convert2int()

    logfile.create_k_context()
    train_log, test_log = logfile.splitTrainTest(66,
                                                 case=False,
                                                 method="train-test")

    model = train(train_log, 100, 10)
    acc = test(test_log, model)
    print(acc)