def test_mutated_tree(self): for _ in range(100): tree1 = pt_create.apply(random.randint(10, 35)) tree2 = copy.deepcopy(tree1) sub1, sub2 = pt_mutate.change_node_operator(tree2, None) com_res = pt_compare.apply(tree1, tree2) self.assertEqual(str(sub1), str(com_res.subtree1)) self.assertEqual(str(sub2), str(com_res.subtree2)) tree2 = copy.deepcopy(tree1) sub1, sub2 = pt_mutate.add_new_node(tree2, None) com_res = pt_compare.apply(tree1, tree2) if sub1.operator == Operator.LOOP and sub1.children[ 1].operator is not None: self.assertEqual(str(sub1.children[1]), str(com_res.subtree1)) self.assertEqual(str(sub2.children[1]), str(com_res.subtree2)) else: self.assertEqual(str(sub1), str(com_res.subtree1)) self.assertEqual(str(sub2), str(com_res.subtree2)) tree2 = copy.deepcopy(tree1) sub1, sub2 = pt_mutate.remove_node(tree2, None) com_res = pt_compare.apply(tree1, tree2) if (len(sub1.children) == 2 and (sub1.parent is None or (sub2.operator is not None and (sub1.parent is not None and sub2.operator != sub1.parent.operator)) or sub2.operator == Operator.LOOP)) or (len(sub1.children) > 2): self.assertEqual(str(sub1), str(com_res.subtree1)) self.assertEqual(str(sub2), str(com_res.subtree2)) else: self.assertEqual(str(sub1.parent), str(com_res.subtree1)) self.assertEqual(str(sub2.parent), str(com_res.subtree2))
def operator_analyse(): from repair_alignment.process_tree.operation import pt_compare from pm4py.objects.process_tree.pt_operator import Operator opt, labels = [0, 0, 0, 0], ["Xor", "Sequence", "Parallel", "Loop"] for i in [(11, 15), (16, 18), (19, 21), (22, 24)]: sn = str(i[0]) + "-" + str(i[1]) data = pd.read_excel(PATH + "MProcessTree.xlsx", sheet_name=sn, header=0) grade = pd.read_excel(PATH + "iar.xlsx", sheet_name=sn, header=0)['grade'].tolist() trees = data['tree'] m_trees = data['m_tree'] for index in range(len(grade)): com_res = pt_compare.apply(pt_utils.parse(trees[index]), pt_utils.parse(m_trees[index])) if grade[index] != 1 and com_res.subtree1.parent is not None: if com_res.subtree1.parent.operator == Operator.XOR: opt[0] += 1 if com_res.subtree1.parent.operator == Operator.SEQUENCE: opt[1] += 1 if com_res.subtree1.parent.operator == Operator.PARALLEL: opt[2] += 1 if com_res.subtree1.parent.operator == Operator.LOOP: opt[3] += 1 df = pd.DataFrame({"Ratio of Operators": opt}, index=["Xor", "Sequence", "Parallel", "Loop"]) df.plot.pie(subplots=True, colors=palette, figsize=(5, 5), autopct='%.0f%%') import matplotlib.pyplot as plt # explode = (0.1, 0) # only "explode" the 2nd slice (i.e. 'Hogs') # plt.pie(opt, labels=labels, autopct='%1.01f%%', startangle=90, pattern="muted") plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. plt.show() plt.savefig(PATH + "../figure/ParentOperatorAnalyse.png", dpi=300)
def apply(tree1, tree2, log, alignments, parameters=None): """ Alignment repair on tree2 based on the alignment of log on tree1 Parameters ----------- tree1 Process Tree tree2 Process Tree log EventLog alignments related alignment of log on tree1 parameters Returns ------------ alignments repaired alignments """ parameters = {} if parameters is None else parameters parameters['COMPARE_OPTION'] = 1 if parameters.get( 'COMPARE_OPTION') is None else parameters['COMPARE_OPTION'] ret_tuple_as_trans_desc = False if parameters.get(PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE) is None else \ parameters[PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE] # TODO: if the given alignment is not True, try-catch alignments = copy.deepcopy(alignments) com_res = pt_compare.apply(tree1, tree2, parameters['COMPARE_OPTION']) if com_res.value: return alignments else: tree1_total_number = pt_mani_utils.nodes_number(tree1) pt_number.apply(com_res.subtree2, 'D', tree1_total_number + 1) best_worst_cost = apply_pt_alignments(EventLog([Trace()]), tree2, parameters)[0]['cost'] for i in range(len(alignments)): align = alignments[i] if align.get("repair") is None: scope = detect_change_scope(align['alignment'], com_res.subtree1, log[i], ret_tuple_as_trans_desc) if not len(scope.traces) == 0: sub_aligns_before = apply_pt_alignments( EventLog(scope.traces), com_res.subtree1, parameters) sub_aligns_after = apply_pt_alignments( EventLog(scope.traces), com_res.subtree2, parameters) alignment_reassemble(align['alignment'], sub_aligns_after, scope.anchor_index, com_res.subtree1, ret_tuple_as_trans_desc) recompute_cost(align, sub_aligns_before, sub_aligns_after) recompute_fitness(align, log[i], best_worst_cost) align["repair"] = True for a in alignments: a.pop("repair") if a.get("repair") is not None else None return alignments
def apply_with_lock(alignments, tree, m_tree, option=1): alignments = copy.deepcopy(alignments) com_res = pt_compare.apply(tree, m_tree, option) if not com_res.value: for align in alignments: if align.get("expand") is None: scope_expand_trace(align["alignment"], com_res.subtree1, True) align["expand"] = True for a in alignments: a.pop("expand") if a.get("expand") is not None else None return alignments
def apply(alignments, tree, m_tree, parameters=None): parameters = {} if parameters is None else parameters parameters['COMPARE_OPTION'] = 1 if parameters.get('COMPARE_OPTION') is None else parameters['COMPARE_OPTION'] ret_tuple_as_trans_desc = False if parameters.get(PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE) is None else \ parameters[PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE] alignments = copy.deepcopy(alignments) com_res = pt_compare.apply(tree, m_tree, parameters['COMPARE_OPTION']) if not com_res.value: for align in alignments: if align.get("expand") is None: scope_expand_trace(align["alignment"], com_res.subtree1, ret_tuple_as_trans_desc) align["expand"] = True for a in alignments: a.pop("expand") if a.get("expand") is not None else None return alignments
def split_real_life_data(file): name = file.split(".")[0] logs = pd.read_csv(PATH + file)["trace"].tolist() # log_test = random.sample(logs, len(logs) // 2) # pd.DataFrame(log_test, columns=["trace"]).to_csv(PATH + name + "_test.csv", index=False) log_tree = logs[0: len(logs)//10] pd.DataFrame(log_tree, columns=["trace"]).to_csv(PATH + name + "_tree.csv", index=False) tree = inductive_miner.apply_tree(list_to_xes(log_tree)) m_trees = [] print(str(tree)) for i in range(1, 5): log_part = logs[i*len(logs)//10: len(logs)//10*(i+1)] pd.DataFrame(log_part, columns=["trace"]).to_csv(PATH + name + "_part" + str(i + 1) + ".csv", index=False) m_tree = inductive_miner.apply_tree(list_to_xes(log_part)) m_trees.append(m_tree) print(str(tree) == str(m_tree)) print(pt_compare.apply(tree, m_tree))
def remove_repeat_child(node): """ Remove the repeatable child, if the operator is XOR Parameters ----------- node process tree """ if node.operator == Operator.XOR: children = list() for i in range(len(node.children) - 1, -1, -1): for child in children: com_res = pt_compare.apply(node.children[i], child) if not com_res.value: children.append(node.children[i]) else: node.children.pop(i)
def result(file): name = file.split(".")[0] logs = pd.read_csv(PATH + file)["trace"].tolist() for rd in range(1): log = list_to_xes(random.sample(logs, len(logs)//10)) # log = pd.read_csv("../../../data/reallife_test.csv")["trace"].tolist() tree_log = pd.read_csv(PATH + name + "_tree.csv")["trace"].tolist() tree = inductive_miner.apply_tree(list_to_xes(tree_log)) print(ra_pt_utils.pt_depth(str(tree))) m_trees = [] for i in range(4): m_tree_log = pd.read_csv(PATH + name + "_part" + str(i + 1) + ".csv")["trace"].tolist() m_tree = inductive_miner.apply_tree(list_to_xes(m_tree_log)) print(ra_pt_utils.pt_depth(str(m_tree))) print(len(ra_pt_utils.parse_tree_to_a_bfs_sequence(m_tree)), "+") m_trees.append(m_tree) res_com = pt_compare.apply(tree, m_tree) print(len(ra_pt_utils.parse_tree_to_a_bfs_sequence(res_com.subtree1)), len(ra_pt_utils.parse_tree_to_a_bfs_sequence(res_com.subtree2)))
def apply_with_alignments(tree, m_tree, log, alignments, parameters=None, option=1): pt_number.apply(tree, 'D') pt_number.apply(m_tree, 'D') com_res = pt_compare.apply(tree, m_tree, option) if com_res.value: return alignments, copy.deepcopy(alignments) else: mapping_t, tree_info = dict(), dict() tree_utils.recursively_init_tree_tables(tree, tree_info, mapping_t, [1]) best_worst_cost = alignments_on_pt(EventLog([Trace()]), m_tree, parameters)[0]['cost'] repairing_alignment = compute_repairing_alignments( com_res, log, alignments, tree_info, mapping_t, parameters, best_worst_cost) return alignments, repairing_alignment
def run_feature(row): pt = pt_utils.parse(row['tree']) m_pt = pt_utils.parse(row['m_tree']) com_res = pt_compare.apply(pt, m_pt, 2) # depth depth1 = pt_mani_utils.pt_depth(str(com_res.subtree1)) depth2 = pt_mani_utils.pt_depth(str(com_res.subtree2)) depths = [ depth1 / pt_mani_utils.pt_depth(row['tree']), depth2 / pt_mani_utils.pt_depth(row['m_tree']) ] # depths = [depth1, pt_mani_utils.pt_depth(row['tree']), depth2, pt_mani_utils.pt_depth(row['m_tree'])] # trace fit t_l = set(re.findall("[a-z]", row['tree'])) st_l = set(re.findall("[a-z]", str(com_res.subtree1))) nei_l = t_l - st_l min_fit = 1 for trace in row['log'].strip().split(", "): count = 0 for e in list(trace): count = count + 1 if e in nei_l else count min_fit = min(min_fit, count / len(trace)) # type of operators ops = [pt.operator, com_res.subtree1.operator, com_res.subtree2.operator] # number of operators num_loop = len(re.findall(r'\*', row['tree'])) - len( re.findall(r'\*', str(com_res.subtree1))) num_xor = len(re.findall(r'X', row['tree'])) - len( re.findall(r'X', str(com_res.subtree1))) num_and = len(re.findall(r'\+', row['tree'])) - len( re.findall(r'\+', str(com_res.subtree1))) num_seq = len(re.findall(r'->', row['tree'])) - len( re.findall(r'->', str(com_res.subtree1))) total = num_loop + num_xor + num_seq + num_and # rate = [num_loop, num_xor] if total == 0: rate = [ 0, 0, 0, 0, len(re.findall(r'\*', str(com_res.subtree1))), len(re.findall(r'\*', str(com_res.subtree2))) ] else: rate = [ num_loop / total, num_xor / total, num_and / total, num_seq / total, len(re.findall(r'\*', str(com_res.subtree1))), len(re.findall(r'\*', str(com_res.subtree2))) ] # subtree parent loop st = com_res.subtree1 num_loop, num_xor = 0, 0 while st.parent is not None: if st.parent.operator == Operator.LOOP: num_loop += 1 if st.parent.operator == Operator.XOR: num_xor += 1 st = st.parent return pd.Series([min_fit] + rate + ops + depths)
def compare_diff_tree(s_tree1, s_tree2, option=1): tree1 = pt_utils.parse(s_tree1) tree2 = pt_utils.parse(s_tree2) return pt_compare.apply(tree1, tree2, option)