def experiment_outliers(): with open("model_30000", "rb") as fin: model = pickle.load(fin) train = LogFile("../Data/bpic2018.csv", ",", 0, 30000, "startTime", "case", activity_attr=None, integer_input=False, convert=False) train.remove_attributes([ "eventid", "identity_id", "event_identity_id", "year", "penalty_", "amount_applied", "payment_actual", "penalty_amount", "risk_factor", "cross_compliance", "selected_random", "selected_risk", "selected_manually", "rejected" ]) train.convert2int() attr_dicts = [] data = LogFile("../Data/bpic2018.csv", ",", 0, None, "startTime", "case", convert=False, values=train.values) data.filter("self.data.year == 1") data.remove_attributes([ "event_identity_id", "year", "penalty_", "amount_applied", "payment_actual", "penalty_amount", "risk_factor", "cross_compliance", "selected_random", "selected_risk", "selected_manually", "rejected" ]) scores = cd.get_event_scores(data, model) for s in scores: if sum(scores[s]) != 0: score = math.log10(sum(scores[s]) / len(scores[s])) if score < -12: for case in attr_dicts[0]: if attr_dicts[0][case] == s: print(s, case, score)
def experiment_clusters(): with open("model_30000", "rb") as fin: model = pickle.load(fin) train = LogFile("../Data/bpic2018.csv", ",", 0, 30000, "startTime", "case", activity_attr=None, integer_input=False, convert=False) train.remove_attributes(["eventid", "identity_id", "event_identity_id", "year", "penalty_", "amount_applied", "payment_actual", "penalty_amount", "risk_factor", "cross_compliance", "selected_random", "selected_risk", "selected_manually", "rejected"]) train.convert2int() data = LogFile("../Data/bpic2018.csv", ",", 0, None, "startTime", "case", convert=False, values=train.values) data.remove_attributes(["event_identity_id", "year", "penalty_", "amount_applied", "payment_actual", "penalty_amount", "risk_factor", "cross_compliance", "selected_random", "selected_risk", "selected_manually", "rejected"]) data.convert2int() data.filter("self.data.year == 1") scores = cd.get_event_detailed_scores(data, model) # First calculate score per trace attributes = list(scores.keys()) num_traces = len(scores[attributes[0]]) upper = {} lower = {} for a in attributes: upper[a] = [] lower[a] = [] for trace_ix in range(num_traces): score = 1 for a in scores: a_score = scores[a][trace_ix] if a_score == -5: score = 0 break score *= a_score if -8 < score < -10: for a in scores: upper[a].append(scores[a][trace_ix]) elif -10 < score < -12: for a in scores: lower[a].append(scores[a][trace_ix]) print(attributes) print(upper) cd.plot_attribute_graph(upper, attributes) cd.plot_attribute_graph(lower, attributes)