Beispiel #1
0
def experiment_outliers():
    with open("model_30000", "rb") as fin:
        model = pickle.load(fin)
    train = LogFile("../Data/bpic2018.csv",
                    ",",
                    0,
                    30000,
                    "startTime",
                    "case",
                    activity_attr=None,
                    integer_input=False,
                    convert=False)
    train.remove_attributes([
        "eventid", "identity_id", "event_identity_id", "year", "penalty_",
        "amount_applied", "payment_actual", "penalty_amount", "risk_factor",
        "cross_compliance", "selected_random", "selected_risk",
        "selected_manually", "rejected"
    ])
    train.convert2int()

    attr_dicts = []

    data = LogFile("../Data/bpic2018.csv",
                   ",",
                   0,
                   None,
                   "startTime",
                   "case",
                   convert=False,
                   values=train.values)
    data.filter("self.data.year == 1")
    data.remove_attributes([
        "event_identity_id", "year", "penalty_", "amount_applied",
        "payment_actual", "penalty_amount", "risk_factor", "cross_compliance",
        "selected_random", "selected_risk", "selected_manually", "rejected"
    ])

    scores = cd.get_event_scores(data, model)
    for s in scores:
        if sum(scores[s]) != 0:
            score = math.log10(sum(scores[s]) / len(scores[s]))
            if score < -12:
                for case in attr_dicts[0]:
                    if attr_dicts[0][case] == s:
                        print(s, case, score)
Beispiel #2
0
def experiment_clusters():
    with open("model_30000", "rb") as fin:
        model = pickle.load(fin)
    train = LogFile("../Data/bpic2018.csv", ",", 0, 30000, "startTime", "case", activity_attr=None, integer_input=False, convert=False)
    train.remove_attributes(["eventid", "identity_id", "event_identity_id", "year", "penalty_", "amount_applied", "payment_actual", "penalty_amount", "risk_factor", "cross_compliance", "selected_random", "selected_risk", "selected_manually", "rejected"])
    train.convert2int()

    data = LogFile("../Data/bpic2018.csv", ",", 0, None, "startTime", "case", convert=False, values=train.values)
    data.remove_attributes(["event_identity_id", "year", "penalty_", "amount_applied", "payment_actual",
                                    "penalty_amount", "risk_factor", "cross_compliance", "selected_random",
                                    "selected_risk", "selected_manually", "rejected"])
    data.convert2int()
    data.filter("self.data.year == 1")

    scores = cd.get_event_detailed_scores(data, model)

    # First calculate score per trace
    attributes = list(scores.keys())
    num_traces = len(scores[attributes[0]])
    upper = {}
    lower = {}
    for a in attributes:
        upper[a] = []
        lower[a] = []

    for trace_ix in range(num_traces):
        score = 1
        for a in scores:
            a_score = scores[a][trace_ix]
            if a_score == -5:
                score = 0
                break
            score *= a_score

        if -8 < score < -10:
            for a in scores:
                upper[a].append(scores[a][trace_ix])
        elif -10 < score < -12:
            for a in scores:
                lower[a].append(scores[a][trace_ix])
    print(attributes)
    print(upper)
    cd.plot_attribute_graph(upper, attributes)
    cd.plot_attribute_graph(lower, attributes)