def test_pt_depth(self): tree = pt_utils.parse("a") self.assertEqual(1, pt_mani_utils.pt_depth(tree)) tree = pt_utils.parse("X(a, b)") self.assertEqual(2, pt_mani_utils.pt_depth(tree)) tree = pt_utils.parse("X(a, ->(b, c), d)") self.assertEqual(3, pt_mani_utils.pt_depth(tree)) tree = pt_utils.parse("X(a, ->(b, c, +(e, f)), *(d, X(g, h), τ))") self.assertEqual(4, pt_mani_utils.pt_depth(tree))
def mutate_tree_write_to_table(tab, m_tree, tree_info): excel_utils.write_row_to_table( tab.table, tab.row, tree_info + [ pt_mani_utils.nodes_number(m_tree), " " + str(m_tree), pt_mani_utils.leaves_number(m_tree), pt_mani_utils.pt_depth(m_tree) ])
def result(file): name = file.split(".")[0] logs = pd.read_csv(PATH + file)["trace"].tolist() for rd in range(1): log = list_to_xes(random.sample(logs, len(logs)//10)) # log = pd.read_csv("../../../data/reallife_test.csv")["trace"].tolist() tree_log = pd.read_csv(PATH + name + "_tree.csv")["trace"].tolist() tree = inductive_miner.apply_tree(list_to_xes(tree_log)) print(ra_pt_utils.pt_depth(str(tree))) m_trees = [] for i in range(4): m_tree_log = pd.read_csv(PATH + name + "_part" + str(i + 1) + ".csv")["trace"].tolist() m_tree = inductive_miner.apply_tree(list_to_xes(m_tree_log)) print(ra_pt_utils.pt_depth(str(m_tree))) print(len(ra_pt_utils.parse_tree_to_a_bfs_sequence(m_tree)), "+") m_trees.append(m_tree) res_com = pt_compare.apply(tree, m_tree) print(len(ra_pt_utils.parse_tree_to_a_bfs_sequence(res_com.subtree1)), len(ra_pt_utils.parse_tree_to_a_bfs_sequence(res_com.subtree2)))
def randomly_choose_node(tree, level): """ Randomly choose an inner node Parameters ----------- tree Original Process Tree level The maximal depth of the chosen node Returns ------------ node The inner node that be selected, except root """ level = pt_mani_utils.pt_depth(tree) if level is None else min( pt_mani_utils.pt_depth(tree), level) node_sequence = pt_mani_utils.parse_tree_to_a_bfs_sequence(tree) node = random.choice(node_sequence) while node.operator is None or pt_mani_utils.pt_depth(node) > level: node = random.choice(node_sequence) return node
def create_pts(): pts = list() for node_num in PT_RANGE: trees = pd.DataFrame(columns=['tree', '#node', 'depth', 'root-op']) for i in range(pt_num): tree = pt_create.apply(random.randint(node_num[0], node_num[1])) num_nodes = pt_number.apply(tree, 'D') trees.loc[i] = [ str(tree), num_nodes, pt_mani_utils.pt_depth(tree), str(tree.operator) ] pts.append(trees) return pts
def pt_write_to_table(tab, mutate_tabs, num, l, u, mutate_num, mutate_level): for _ in range(num): row = tab.row # TODO: 一定时间内没有找到 no_node = random.randint(l, u) tree = pt_create.apply(no_node) info = [ no_node, " " + str(tree), pt_mani_utils.leaves_number(tree), pt_mani_utils.pt_depth(tree) ] for i, mutate_tab in enumerate(mutate_tabs): for m_tree in uniq_mutate_tree(tree, mutate_num, mutate_level[i]): mutate_tree_write_to_table(mutate_tab, m_tree, [row] + info) excel_utils.write_row_to_table(tab.table, row, info)
def run_feature(row): pt = pt_utils.parse(row['tree']) m_pt = pt_utils.parse(row['m_tree']) com_res = pt_compare.apply(pt, m_pt, 2) # depth depth1 = pt_mani_utils.pt_depth(str(com_res.subtree1)) depth2 = pt_mani_utils.pt_depth(str(com_res.subtree2)) depths = [ depth1 / pt_mani_utils.pt_depth(row['tree']), depth2 / pt_mani_utils.pt_depth(row['m_tree']) ] # depths = [depth1, pt_mani_utils.pt_depth(row['tree']), depth2, pt_mani_utils.pt_depth(row['m_tree'])] # trace fit t_l = set(re.findall("[a-z]", row['tree'])) st_l = set(re.findall("[a-z]", str(com_res.subtree1))) nei_l = t_l - st_l min_fit = 1 for trace in row['log'].strip().split(", "): count = 0 for e in list(trace): count = count + 1 if e in nei_l else count min_fit = min(min_fit, count / len(trace)) # type of operators ops = [pt.operator, com_res.subtree1.operator, com_res.subtree2.operator] # number of operators num_loop = len(re.findall(r'\*', row['tree'])) - len( re.findall(r'\*', str(com_res.subtree1))) num_xor = len(re.findall(r'X', row['tree'])) - len( re.findall(r'X', str(com_res.subtree1))) num_and = len(re.findall(r'\+', row['tree'])) - len( re.findall(r'\+', str(com_res.subtree1))) num_seq = len(re.findall(r'->', row['tree'])) - len( re.findall(r'->', str(com_res.subtree1))) total = num_loop + num_xor + num_seq + num_and # rate = [num_loop, num_xor] if total == 0: rate = [ 0, 0, 0, 0, len(re.findall(r'\*', str(com_res.subtree1))), len(re.findall(r'\*', str(com_res.subtree2))) ] else: rate = [ num_loop / total, num_xor / total, num_and / total, num_seq / total, len(re.findall(r'\*', str(com_res.subtree1))), len(re.findall(r'\*', str(com_res.subtree2))) ] # subtree parent loop st = com_res.subtree1 num_loop, num_xor = 0, 0 while st.parent is not None: if st.parent.operator == Operator.LOOP: num_loop += 1 if st.parent.operator == Operator.XOR: num_xor += 1 st = st.parent return pd.Series([min_fit] + rate + ops + depths)