def __call__(self, x): if not isinstance(x, Results): x = Results(x) if x.idx is None: n = len(any_value(x.module_output.logits)) tree = Tree() batch_node = tree.create_node(tag='batch', identifier='batch') for i in range(n): inp_tree = Tree() inp_node = inp_tree.create_node(tag=f'inp {i}', identifier=f'inp_{i}.{self.prefix}') x_for_id = Results(x.module_output, i) out = TreeExplanation(inp_tree, inp_node, x_for_id, f'inp_{i}.') out = self.flow(self, x_for_id, out) tree.paste(batch_node.identifier, out.tree) if len(self.prefix) == 0: return tree return TreeExplanation(tree, start_node=batch_node, results=x, prefix=self.prefix) tree = Tree() start_node = tree.create_node(tag=self.task_name, identifier=f'inp_{x.idx}.{self.prefix}.{self.task_name}') out = TreeExplanation(tree, start_node=start_node, results=x, prefix=f'inp_{x.idx}.{self.prefix}') out = self.flow(self, x, out) no_new_nodes_added = (len(out.tree.nodes) == 1) and (out.start_node.identifier in out.tree) # if all the nodes of a nested flow are empty, remove the whole flow node. if no_new_nodes_added: out.tree = Tree() if len(self.prefix) == 0: return out.tree return out
def DecitionTreeLearning(examples, attributes, pexamples): if len(examples) == 0: return getPlurality(pexamples) elif isSameClassification(examples): SameClassTree = Tree() if examples[0][len(examples[0]) - 1] == 'e': SameClassTree.create_node("Edible") else: SameClassTree.create_node("Poison") return SameClassTree elif len(attributes) == 0: return getPlurality(examples) else: node = getImportant(examples, attributes) tree = Tree() tree.create_node(str(getPropertyName(node)), str(node)) #make the identifier be str subExamples = getSubExamples(examples, node) attributes.remove(node) for key, value in subExamples.items(): nodeIdentifier = getPropertyName(node) + "_" + key tree.create_node(key, nodeIdentifier, str(node)) subtree = DecitionTreeLearning(value, attributes, examples) tree.paste(nodeIdentifier, subtree) return tree
def merge_trees(t1, t2, tick): t = Tree() identifier = -tick # using negative numbers as identifiers, positive numbers are ids for the leaf nodes name = "new_cluster_%s" % tick t.create_node(name, identifier) t.paste(identifier, t1) t.paste(identifier, t2) return t, name
def create_tree(indexed_titles, root, children=None): t = Tree() identifier = indexed_titles[root] t.create_node(root, identifier) if children: for sub_tree in children: t.paste(identifier, sub_tree) return t
def test_paste_duplicate_nodes(self): t1 = Tree() t1.create_node(identifier='A') t2 = Tree() t2.create_node(identifier='A') t2.create_node(identifier='B', parent='A') with self.assertRaises(ValueError) as e: t1.paste('A', t2) self.assertEqual(e.exception.args, ("Duplicated nodes ['A'] exists.", ))
def absorb(t1: tl.Tree, t2: tl.Tree) -> tl.Tree: # work with copies. t1 = tl.Tree(tree=t1, deep=True) t2 = tl.Tree(tree=t2, deep=True) # reset all the identifiers: t1 = reset_ids(t1) t2 = reset_ids(t2) t1.paste(t1.root, t2) return t1
def generateTree(rootFile): files = rootFile.load() rootId = rootFile.id rootTag = rootFile.tag() myTree = Tree() myTree.create_node(rootTag, rootId) for file in files: myTree.paste(rootId, generateTree(file)) return myTree
def branch(partition, vertex, partidx): """Append a branch attached to a partition to an existing partition tree""" subTree = Tree() leaf = partition.split(vertex, partidx) subTree.create_node(leaf.PaintedVertices, leaf) if leaf.isatomic(): return subTree # recurse onto children nodes to build partition tree depth first for v in leaf.Parts[leaf.nextsplitting()]: subTree.paste(leaf, branch(leaf, v, leaf.nextsplitting())) return subTree
def run(self, args): tree = self._login_info_manager.tree() if not args[0] in tree: return tmp_tree = Tree() tmp_tree.create_node(args[0], '') has_leef = False for tree_node in tree.children(args[0]): tmp_tree.paste('', tree.subtree(tree_node.identifier)) has_leef = True if not has_leef: tmp_tree.get_node('').tag = tree.get_node(args[0]).tag tmp_tree.show()
def help_func_return(grammar, tokens, function=None): tree = Tree() return_node = Node(tag=tokens[0][1]) tree.add_node(return_node, parent=None) skip_tokens = 0 if (tokens[1][0] != ';'): expr_help_out = help_func_expression(grammar, tokens[1:], function=function) tree.paste(return_node.identifier, expr_help_out[0]) skip_tokens = expr_help_out[1] return [tree, skip_tokens + 2]
def collapse(t1: tl.Tree, t2: tl.Tree) -> tl.Tree: # work with copies. t1 = tl.Tree(tree=t1, deep=True) t2 = tl.Tree(tree=t2, deep=True) # reset all the identifiers: t1 = reset_ids(t1) t2 = reset_ids(t2) # paste all the children of t2 into the root of t1 for child in t2.children(t2.root): t1.paste(t1.root, t2.subtree(child.identifier)) return t1
def get_bonus_tree(self): bonus_tree=Tree() doc_tree=self.get_tree() c_nid='' b_nid='' b_data='' # 寻找条件 for nid in self.level_one: if '条件'in doc_tree.get_node(nid).data[0]: c_nid=nid break # 寻找优惠 for nid in self.level_one: for key in self.keywords: if key in doc_tree.get_node(nid).data[0]: b_nid=nid break if b_nid!='' and c_nid!='': for node in doc_tree.expand_tree(nid=b_nid, mode=Tree.DEPTH): if node==b_nid: p_nodedata=doc_tree[node].data.copy() del p_nodedata[0] # 把‘四.奖励标准’过滤掉 b_data += ','.join(p_nodedata) continue b_data+=','.join(doc_tree[node].data) # 创建优惠树 bonus_tree.create_node(identifier='root',tag=b_data,data=b_data) bonus_tree.create_node('partition', 'partition', parent='root') # 父节点可能会有信息,因为有可能他没有序号就是一段话 p_data=doc_tree.get_node(c_nid).data.copy() # 把‘二.申请条件’和‘满足以下条件’删除 if len(p_data)>=2: if len(p_data[0])<8 : del p_data[0] if ':' in p_data[0]: del p_data[0] p_data=','.join(p_data) if p_data: bonus_tree.create_node(identifier=c_nid, tag=p_data, data=p_data,parent='partition') # 可以继续改进,这里用树的复制其实是浪费了资源 new_tree=self.copy_tree(doc_tree.subtree(c_nid),'') for children in new_tree.children(''+'_'+c_nid): bonus_tree.paste('partition', new_tree.subtree(children.identifier)) return bonus_tree
def _join_trees(self, subtree_1: Tree, subtree_2: Tree): top_tree = Tree() arg_1_span = subtree_1.root.split('-') arg_2_span = subtree_2.root.split('-') start = int(arg_1_span[0]) end = int(arg_2_span[1]) identifier = '{}-{}'.format(start, end) span = Span(height=100, span=(start, end), content=self._tokens[start:end], constant=None) top_tree.create_node(identifier=identifier, data=span) top_tree.paste(nid=identifier, new_tree=subtree_1) top_tree.paste(nid=identifier, new_tree=subtree_2) return top_tree
def run_parser(tokens, grammar, look_for_brace=False, root_name="program", clear_symbol_table=False): # Create dictionary of symbol tables global __symbol_tables if (clear_symbol_table): __symbol_tables = {} # Create base abstract syntax tree tree = Tree() # create root node root = Node(tag=root_name) tree.add_node(root, parent=None) num_tokens_to_skip = 0 list_of_tokens = [] for i in range(0, len(tokens)): if (num_tokens_to_skip > 0): num_tokens_to_skip -= 1 continue if (look_for_brace and tokens[i][0] == "}"): break list_of_tokens.append(tokens[i]) # append token and metadata result = check_rules("program", list_of_tokens, grammar) if (result[0] > 1): #matches more than one possible rule continue elif (result[0] == 1): #matches one possible rule help_fun_tuple = help_func_manager( result, grammar, tokens[i - len(list_of_tokens) + 1:]) sub_tree = help_fun_tuple[0] num_tokens_to_skip = help_fun_tuple[1] - len(list_of_tokens) tree.paste(root.identifier, sub_tree) #call helper function list_of_tokens = [] elif (result[0] == 0): #matches zero rules. parser crash tree.show(key=lambda x: x.identifier, line_type='ascii') print("ERRONEOUS RESULT:", result) print("ERRONEOUS TOKEN LIST:", list_of_tokens) raise Exception(errors.ERR_NO_RULE + " '" + tokens[i][0] + "' on line " + str(tokens[i][2])) return [tree, num_tokens_to_skip, __symbol_tables]
def _random(max_depth=5, min_width=1, max_width=2, offset=(0, )): tree = Tree() root = tree.create_node(identifier=offset) if max_depth == 0: return tree elif max_depth == 1: nb = random.randint(min_width, max_width) for i in range(nb): tree.create_node(identifier=offset + (i, ), parent=offset) else: nb = random.randint(min_width, max_width) for i in range(nb): subtree = _random(max_depth=max_depth - 1, max_width=max_width, offset=offset + (i, )) tree.paste(offset, subtree) return tree
def test_shallow_paste(self): t1 = Tree() n1 = t1.create_node(identifier='A') t2 = Tree() n2 = t2.create_node(identifier='B') t3 = Tree() n3 = t3.create_node(identifier='C') t1.paste(n1.identifier, t2) self.assertEqual(t1.to_dict(), {'A': {'children': ['B']}}) t1.paste(n1.identifier, t3) self.assertEqual(t1.to_dict(), {'A': {'children': ['B', 'C']}}) self.assertEqual(t1.level(n1.identifier), 0) self.assertEqual(t1.level(n2.identifier), 1) self.assertEqual(t1.level(n3.identifier), 1)
def create_tree(adict): tree = Tree() if type(adict) is dict: # print ("procdessing dict to tree...") root = list(adict.keys())[0] # print (root) tree.create_node(timestamp_node(root), root, data=time()) for node in list(adict.values()): # print ("processing a node of the dict values") if type(node) is dict: newTree = create_tree(node) tree.paste(root, newTree) elif type(node) is list: for item in node: newTree = create_tree(item) tree.paste(root, newTree) else: tree.create_node(timestamp_node(node), node, parent=root, data=time()) else: tree.create_node(timestamp_node(adict), adict, data=time()) return tree
def crossOver(individualA, individualB): tree = None while tree is None or tree.depth(tree.get_node(tree.root)) > TREE_MAX_DEPTH: treeA = Tree(tree = individualA.tree, deep=True) treeB = Tree(tree = individualB.tree, deep=True) regenerate_ids(treeA) regenerate_ids(treeB) removedNode = random.choice(treeA.all_nodes()) addedNode = random.choice(treeB.all_nodes()) addedSubtree = Tree(tree = treeB.subtree(addedNode.identifier), deep=True) if treeA.root == removedNode.identifier: tree = addedSubtree else: parent = treeA.parent(removedNode.identifier) treeA.remove_subtree(removedNode.identifier) treeA.paste(parent.identifier, addedSubtree) tree = treeA return Individual(tree)
def swap(tree): internalNodes = [n for n in tree.all_nodes_itr() if n.var != None] if len(internalNodes) == 1: return tree internalNodes.remove(tree[0]) cNode = random.choice(internalNodes) tagc = (cNode.identifier, cNode.var, cNode.split) pid = cNode.bpointer tree1 = Tree(tree, deep=True) sub = tree1.remove_subtree(pid) tags = recurTag(sub, pid) tagp = tags[0] tags[tags.index(tagc)] = (tagc[0], tagp[1], tagp[2]) tags[0] = (tagp[0], tagc[1], tagc[2]) string = f'{mi} swap {t}: {tags[0]}; ' try: sub1 = genTree(tree[pid], tags) except IndexError: print(string + 'unswappable') return tree #rTransit = 1 rLike = get_like_ratio(tree.R, sub.leaves(), sub1.leaves()) rStruct = get_struct(sub.all_nodes_itr(), sub1.all_nodes_itr()) r = rLike * rStruct print(string + f'{r.round(4)}') if random.uniform(0, 1) < r: if pid > 0: gpid = tree[pid].bpointer tree1.paste(gpid, sub1) tree1[gpid].fpointer = sorted(tree1[gpid].fpointer) else: tree1 = sub1 tree1.w2 = tree.w2 tree1.R = tree.R tree1.leaf = [n.identifier for n in tree1.leaves() if len(n.xvar) > 0] tree1.show() return tree1 return tree
def _combine_trees(self, subtree_1: Tree, subtree_2: Tree): subtree_2.paste(nid=subtree_2.root, new_tree=subtree_1) return subtree_2
# print tree[node].tag print(sep + "Let me introduce Diane family only:") sub_t = tree.subtree("diane") sub_t.show() print(sep + "Children of Diane") for child in tree.is_branch("diane"): print(tree[child].tag) print(sep + "OOhh~ new members join Jill's family:") new_tree = Tree() new_tree.create_node("n1", 1) # root node new_tree.create_node("n2", 2, parent=1) new_tree.create_node("n3", 3, parent=1) tree.paste("jill", new_tree) tree.show() print(sep + "They leave after a while:") tree.remove_node(1) tree.show() print(sep + "Now Jill moves to live with Grand-x-father Harry:") tree.move_node("jill", "harry") tree.show() print(sep + "A big family for George to send message to the oldest Harry:") for node in tree.rsearch("george"): print(tree[node].tag) ########NEW FILE######## __FILENAME__ = folder_tree
def _follow_bps(self, derivation: Derivation, sentence: List[str], collected_spans: List[CollectedSpan] = None): """Transforms back-pointers collected in CKY into a span tree.""" if not derivation.split: # Stopping criteria for unary derivations. tree = Tree() start = derivation.span[0] end = derivation.span[1] identifier = '{}-{}'.format(start, end) span = Span(span=derivation.span, content=sentence[start:end], constant=derivation.category) tree.create_node(identifier=identifier, data=span) if collected_spans is not None: collected_spans.append( CollectedSpan(category=derivation.category, span=(start + 1, end + 1))) return tree # follow left back pointer left_tree = self._follow_bps(derivation=derivation.left_bp, sentence=sentence, collected_spans=collected_spans) # follow right back pointer right_tree = self._follow_bps(derivation=derivation.right_bp, sentence=sentence, collected_spans=collected_spans) # merge children to tree top_tree = Tree() left_tree_span = left_tree.root.split('-') right_tree_span = right_tree.root.split('-') start = int(left_tree_span[0]) end = int(right_tree_span[1]) identifier = '{}-{}'.format(start, end) span = Span(span=(start, end), content=sentence[start:end], constant=derivation.category) top_tree.create_node(identifier=identifier, data=span) top_tree.paste(nid=identifier, new_tree=left_tree) if derivation.middle_bp is not None: middle_tree = self._follow_bps(derivation=derivation.middle_bp, sentence=sentence, collected_spans=collected_spans) top_tree.paste(nid=identifier, new_tree=middle_tree) top_tree.paste(nid=identifier, new_tree=right_tree) if collected_spans is not None: if derivation.category == SPAN_LABEL: # Make sure both spans are not some combination of not_span if not derivation.middle_bp or derivation.middle_bp.category != NOT_SPAN_LABEL: # add 1 to match label spans collected_spans.append( CollectedSpan(category=SPAN_LABEL, span=(derivation.left_bp.span[0] + 1, derivation.right_bp.span[1] + 1))) else: # add 1 to match label spans collected_spans.append( CollectedSpan(category=derivation.category, span=(derivation.left_bp.span[0] + 1, derivation.right_bp.span[1] + 1))) return top_tree
leaf = partition.split(vertex, partidx) subTree.create_node(leaf.PaintedVertices, leaf) if leaf.isatomic(): return subTree # recurse onto children nodes to build partition tree depth first for v in leaf.Parts[leaf.nextsplitting()]: subTree.paste(leaf, branch(leaf, v, leaf.nextsplitting())) return subTree from treelib import Node, Tree tree = Tree() tree.create_node(P0.PaintedVertices, P0) # root node if not P0.isatomic(): for v in P0.Parts[P0.nextsplitting()]: tree.paste(P0, branch(P0, v, P0.nextsplitting())) tree.show() for node in tree.leaves(): # print(node.identifier.permutation()) P = node.identifier sG = P.applyautomorphism() print(lexifyedges(sG)) # P1 = tree.leaves()[0].identifier # p = P1.permutation() # G1 = P1.applyautomorphism()
print("#"*4 + "Let me introduce Diane family only") sub_t = tree.subtree('diane') sub_t.show() print('\n') print("#"*4 + "Children of Diane") print tree.is_branch('diane') print('\n') print("#"*4 + "OOhh~ new members enter Jill's family") new_tree = Tree() new_tree.create_node("n1", 1) # root node new_tree.create_node("n2", 2, parent=1) new_tree.create_node("n3", 3, parent=1) tree.paste('jill', new_tree) tree.show() print('\n') print("#"*4 + "We are sorry they are gone accidently :(") tree.remove_node(1) tree.show() print('\n') print("#"*4 + "Now Jill moves to live with Grand-x-father Harry") tree.move_node('jill', 'harry') tree.show() print('\n') print("#"*4 + "A big family for George to talk to Grand-x-father Harry") for node in tree.rsearch('george', filter=lambda x: x != 'harry'):
def create_ast(self, filename): """ Create an ast for a given file Arguments : filename : The name of the file to parse """ # Create parser if self.is_64_bit: opcache = opcache_parser_64.OPcacheParser(filename) else: opcache = opcache_parser.OPcacheParser(filename) # Create syntax tree ast = Tree() ast.create_node("script", "script") ast.create_node("main_op_array", "main_op_array", parent="script") ast.create_node("function_table", "function_table", parent="script") ast.create_node("class_table", "class_table", parent="script") # Get main structures main_op_array = opcache['script']['main_op_array'] functions = opcache['script']['function_table']['buckets'] classes = opcache['script']['class_table']['buckets'] # Main OP array for idx, opcode in enumerate(main_op_array['opcodes']): opcode = OPcode(str(idx), opcode, main_op_array, opcache, self.is_64_bit) ast.paste("main_op_array", opcode) # Function Table for function in functions: # Create function node function_name = function['key']['val'] function_id = function_name + "_function" ast.create_node(function_name, function_id, parent="function_table") # Iterate over opcodes op_array = function['val']['op_array'] for idx, opcode in enumerate(op_array['opcodes']): opcode = OPcode(str(idx), opcode, op_array, opcache, self.is_64_bit) ast.paste(function_id, opcode) # Class Table for class_ in classes: # Check for real classes if class_['val']['u1']['type'] == IS_PTR: # Create class node class_name = class_['key']['val'] class_id = class_name + "_class" ast.create_node(class_name, class_id, parent="class_table") # Function Table for function in class_['val']['class']['function_table']['buckets']: # Create function node function_name = function['key']['val'] class_function_id = function_name + "_class_function" ast.create_node(function_name, class_function_id, parent=class_id) # Iterate over opcodes for idx, opcode in enumerate(function['val']['op_array']['opcodes']): opcode = OPcode(str(idx), opcode, function['val']['op_array'], opcache) ast.paste(class_function_id, opcode) return ast
def create_ast(self, filename): """ Create an ast for a given file Arguments : filename : The name of the file to parse """ # Create parser opcache = OPcacheParser(filename) # Create syntax tree ast = Tree() ast.create_node("script", "script") ast.create_node("main_op_array", "main_op_array", parent="script") ast.create_node("function_table", "function_table", parent="script") ast.create_node("class_table", "class_table", parent="script") # Get main structures main_op_array = opcache['script']['main_op_array'] functions = opcache['script']['function_table']['buckets'] classes = opcache['script']['class_table']['buckets'] # Main OP array for idx, opcode in enumerate(main_op_array['opcodes']): opcode = OPcode(str(idx), opcode, main_op_array, opcache) ast.paste("main_op_array", opcode) # Function Table for function in functions: # Create function node function_name = function['key']['val'] function_id = function_name + "_function" ast.create_node(function_name, function_id, parent="function_table") # Iterate over opcodes op_array = function['val']['op_array'] for idx, opcode in enumerate(op_array['opcodes']): opcode = OPcode(str(idx), opcode, op_array, opcache) ast.paste(function_id, opcode) # Class Table for class_ in classes: # Check for real classes if class_['val']['u1']['type'] == IS_PTR: # Create class node class_name = class_['key']['val'] class_id = class_name + "_class" ast.create_node(class_name, class_id, parent="class_table") # Function Table for function in class_['val']['class']['function_table'][ 'buckets']: # Create function node function_name = function['key']['val'] class_function_id = function_name + "_class_function" ast.create_node(function_name, class_function_id, parent=class_id) # Iterate over opcodes for idx, opcode in enumerate( function['val']['op_array']['opcodes']): opcode = OPcode(str(idx), opcode, function['val']['op_array'], opcache) ast.paste(class_function_id, opcode) return ast
from treelib import Tree, Node if __name__ == '__main__': # 树的创建,每个节点都有唯一的identifier作为标记,可以手动指定 tree = Tree() # 增加树的节点,tag是树输出时的显示,identifier是唯一标志,根节点可以不指定父 tree.create_node(tag='root', identifier='root', data=0) tree.create_node(tag='1_child', identifier='1_child', data=1, parent='root') tree.create_node(tag='2_child', identifier='2_child', data=2, parent='root') tree.create_node(tag='3_child', identifier='3_child', data=3, parent='1_child') # 树的粘贴,需要注意的是这个nid是tree的identifier,不是tree2的 tree2 = Tree() tree2.create_node(tag='tutu', identifier='tutu', data=0) tree.paste(nid='root', new_tree=tree2) # 删除树的节点 tree.remove_node('tutu') # 移动树的节点 tree.move_node('3_child', 'root') # 打印树的结构 tree.show()
def analyze(token): def getNext(): i = 0 while True: yield i i += 1 import pandas as pd from collections import deque from treelib import Tree, Node import re tokens = [] for l in token: s = re.split(r' ', l) tokens.append(s[0]) tokens.append('$') test = deque(tokens) rr = { 'r1': 'E=E+T', 'r2': 'E=T', 'r3': 'T=T*F', 'r4': 'T=F', 'r5': 'F=(E)', 'r6': 'F=i' } class CompiledError(StandardError): def __init__(self, arg): self.arg = arg def __str__(self): return self.arg state = [0] symbolic = ['$'] ast = [] table = pd.read_csv('table.csv', index_col=0, na_filter=False) ltest = len(test) while (len(test) != 0): if test[0] not in table.columns: raise CompiledError('%s, unrecognized token at %d' % (test[0], ltest - len(test))) ins = table.loc[state[-1], test[0]] if len(ins) == 0: raise CompiledError('%s complie failed at %d, unexpected token' % (test[0], ltest - len(test))) if ins[0] == 's': state.append(int(ins[1:])) tree = Tree() tree.create_node(test[0], getNext()) ast.append(tree) symbolic.append(test.popleft()) elif ins[0] == 'r': rule = rr[ins] print rule li = list(rule) li.reverse() temptree = [] for i in li: if i != '=': symbolic.pop() state.pop() temptree.append(ast.pop()) else: break symb = rule[:rule.find('=')] symbolic.append(symb) state.append(int(table.loc[state[-1], symbolic[-1]])) tree = Tree() ii = getNext() tree.create_node(symb, ii) for tri in temptree: tree.paste(ii, tri) ast.append(tree) elif ins == 'AC': print 'succeed' tree = ast.pop() tree.show() return raise CompiledError('%s compiled failed at %d, unexpected token' % (test[0], ltest - len(test)))
def help_func_funDeclaration(grammar, tokens): #first token is return type #the next token is the name #the third token is the ( #sometime after that should be a #) assert (len(tokens) >= 4) tree = Tree() # organization nodes return_node = Node(tag="return_type") params_node = Node(tag="params") body_node = Node(tag="func_body") # create root node if (tokens[1][0] == "_start"): raise Exception("Function name _start is reserved for assembly") elif (tokens[1][0] == "main"): tokens[1][0] = "_start" func_name = tokens[1][0] func_root = Node(tag="func:" + func_name) return_type = Node(tag=tokens[0][0]) # Create symbol subtable __symbol_tables[func_name] = {} # Assemble basic subtree tree.add_node(func_root, parent=None) tree.add_node(return_node, parent=func_root) tree.add_node(params_node, parent=func_root) tree.add_node(return_type, parent=return_node) # Create and add params nodes params = [] var_case = 0 # 0 = empty, 1 = void, 2 = variables for i in range(3, len(tokens), 3): if (i == 3 and tokens[i][0] == 'void'): var_case = 1 break elif (tokens[i][0] == ")"): break else: try: params.append((tokens[i][0], tokens[i + 1][0])) # i+0 = type # i+1 = name # i+3 = comma if it exists var_case = 2 except: raise Exception(errors.ERR_BAD_FUNC_PAR + " '" + tokens[i][0] + "' on line " + str(tokens[i][2])) if (tokens[i + 2][0] != ','): break for param in params: type_node = Node(tag=param[0]) name_node = Node(tag=param[1]) tree.add_node(type_node, parent=params_node) tree.add_node(name_node, parent=type_node) #check grammar rules # Create and add body body_tokens = [] skip_tokens = 0 if (var_case % 3 == 0): # Empty parameters body_tokens = tokens[5:] skip_tokens = 5 pass elif (var_case % 3 == 1): # Void parameter body_tokens = tokens[6:] skip_tokens = 6 pass elif (var_case % 3 == 2): # Has paremeters body_tokens = tokens[4 + (3 * (len(params))):] skip_tokens = 4 + (3 * (len(params))) pass #call help_func_block #parser_out = run_parser(body_tokens, grammar, look_for_brace=True, root_name="func_body") #may be off by one block_out = help_func_block(grammar, body_tokens, root_name="func_body", function=func_name) body_tree = block_out[0] skip_tokens += block_out[1] tree.paste(func_root.identifier, body_tree) return [tree, skip_tokens]
def help_func_block(grammar, tokens, root_name="block", function=None): #go line by line #if } #return tree #if { #recursive help_func_block #grab up to till first ; #call expression handeler on that sub list #returns a tree which is appended tree = Tree() root_node = Node(tag=root_name) tree.add_node(root_node, parent=None) func_flag_no_init = 0 func_flag_init = 0 func_flag = 0 front_index = 0 num_tokens_to_skip = 0 i = 0 while (i < len(tokens)): if (tokens[i][0] == "}"): return [tree, num_tokens_to_skip + 1] elif (tokens[i][0] == "{"): result = help_func_block(grammar, tokens[i + 1:], function=function) front_index += 1 + result[1] i += 1 + result[1] num_tokens_to_skip += 1 + result[1] tree.paste(root_node.identifier, result[0]) elif (tokens[i][0] in ["if", "while"]): if_node = Node(tag=tokens[i][0]) tree.add_node(if_node, parent=root_node) if_cond = Node(tag="condition") tree.add_node(if_cond, parent=if_node) first_bracket = -1 for token in tokens[i:]: if (token[0] == '{'): first_bracket = tokens.index(token) break elif (token[0] == '}'): # Break to throw error if unmatched break if (first_bracket < 0): raise Exception(tokens[i][0] + " without body '{' on line " + str(tokens[i][2])) cond_result = help_func_expression(grammar, tokens[i + 2:first_bracket - 1], function=function) body_result = help_func_block(grammar, tokens[first_bracket + 1:], root_name="condition_body", function=function) # Increment i, num_tokens_to_skip, and front_index if_skip = 1 # if/while if_skip += 1 # opening bracket if_skip += 2 # parens if_skip += cond_result[1] if_skip += body_result[1] num_tokens_to_skip += if_skip front_index += if_skip i += if_skip tree.paste(if_cond.identifier, cond_result[0]) tree.paste(if_node.identifier, body_result[0]) elif (tokens[i][0] == "return"): result = help_func_return(grammar, tokens[i:], function=function) front_index += result[1] i += result[1] num_tokens_to_skip += result[1] tree.paste(root_node.identifier, result[0]) elif (tokens[i][0] == ";"): back_index = i expr_tokens = tokens[front_index:back_index] # Remove leading and trailing ( and ) while (len(expr_tokens) > 0 and (expr_tokens[0][0] == '(' or expr_tokens[0][0] == ')')): expr_tokens.pop(0) num_tokens_to_skip += 1 while (len(expr_tokens) > 0 and (expr_tokens[-1][0] == '(' or expr_tokens[-1][0] == ')')): expr_tokens.pop(-1) num_tokens_to_skip += 1 if (len(expr_tokens) > 0): if (len(expr_tokens) == 2 and expr_tokens[0][1] == 'typeSpecifier' and expr_tokens[1][1] == 'ID'): func_flag = 1 func_flag_no_init = 1 # print("This is a variable declaration with no intilization") var_type = expr_tokens[0][0] var_name = expr_tokens[1][0] __symbol_tables[function][var_name] = var_type expr_tokens.pop(0) elif (len(expr_tokens) > 2 and expr_tokens[0][1] == 'typeSpecifier' and expr_tokens[1][1] == 'ID' and expr_tokens[2][1] == '='): func_flag = 1 # print("This is a variable declaration with intilization") var_type = expr_tokens[0][0] var_name = expr_tokens[1][0] __symbol_tables[function][var_name] = var_type expr_tokens.pop(0) if (func_flag == 1): tmp_tree = Tree() tmp_tree_root = Node(tag=var_type) tmp_tree.add_node(tmp_tree_root, parent=None) tmp_tree.add_node(Node(tag=var_name), parent=tmp_tree_root) result = help_func_expression(grammar, expr_tokens, function=function) if (func_flag == 1): result[1] += 1 front_index = back_index + 1 i += 1 num_tokens_to_skip += 1 + result[1] if (func_flag_no_init != 1): tree.paste(root_node.identifier, result[0]) if (func_flag == 1): # pass tree.paste(root_node.identifier, tmp_tree) func_flag_no_init = 0 func_flag_init = 0 func_flag = 0 tmp_tree = None else: i += 1 # Iterated through tokens without closing '}' raise Exception(errors.ERR_NO_BLOCK_END + " on line " + str(tokens[i - 1][2]))
def help_func_expression(grammar, tokens, function=None): tokens_skip = 0 # Remove leading and trailing ( and ) while (len(tokens) > 0 and (tokens[0][0] == '(' or tokens[0][0] == ')')): tokens.pop(0) tokens_skip += 1 while (len(tokens) > 0 and (tokens[-1][0] == '(' or tokens[-1][0] == ')')): tokens.pop() tokens_skip += 1 # Check for subexpression denoted by parentheses op_depth = [] depth = 0 paren_open = -1 paren_close = -1 for i in range(len(tokens)): if (tokens[i][0] == ';'): break # End of expression elif (tokens[i][0] == '('): depth += 1 paren_open = i elif (tokens[i][0] == ')'): paren_close = i depth -= 1 op_depth.append(depth) # Find the lowest precedence operator lowest_prec_op = [] op_precedence = { "&&": 50, "||": 40, "shiftop": 35, "mulop": 30, "sumop": 20, "relop": 10, "=": 0, "+=": 0, "-=": 0, "*=": 0, "\=": 0, "%=": 0 } for token in tokens: if (token[0] == ';'): break elif (token[1] in op_precedence): if (len(lowest_prec_op) == 0): lowest_prec_op = token else: cur_token_depth = op_depth[tokens.index(token)] lowest_prec_depth = op_depth[tokens.index(lowest_prec_op)] if (cur_token_depth < lowest_prec_depth): # Higher depth guarantees replacement lowest_prec_op = token elif (cur_token_depth == lowest_prec_depth and op_precedence[token[1]] <= op_precedence[lowest_prec_op[1]]): # To replace, must be on same depth and lower precedence lowest_prec_op = token if (len(lowest_prec_op) == 0): # Check if "expression" is just a single constant if (len(tokens) > 1 and tokens[0][1] == "ID" and tokens[1][1] == "("): # Create node with function name tree = Tree() if (tokens[0][0] == "_start"): raise Exception( "Function name _start is reserved for assembly") elif (tokens[0][0] == "main"): tokens[0][0] = "_start" call_node = Node(tag="func:" + tokens[0][0]) tree.add_node(call_node, parent=None) tokens_skip += 2 # Children of node are function parameters # Iterate through tokens to find each parameter # Parameters split on ',' with depth=0 depth = 0 token_depth = [] end_point = -1 for i in range(2, len(tokens)): tokens_skip += 1 if (tokens[i][0] == "("): depth += 1 elif (tokens[i][0] == ")"): depth -= 1 token_depth.append(depth) if (depth < 0): # End ) found end_point = i break if (end_point < 0): end_point = len(tokens) #raise Exception("No ending ')' for function call '" + # tokens[0][0] + "'") # Find split points for the expressions split_points = [2] for i in range(len(token_depth)): if (token_depth[i] == 0 and tokens[i + 2][0] == ','): split_points.append(i + 3) split_points.append(end_point) func_params = [] for i in range(len(split_points) - 1): # print([split_points[i], split_points[i+1]]) func_params.append(tokens[split_points[i]:split_points[i + 1]]) # Add parameters to tree for p in func_params: # Needs to call expression handler to evaluate parameters # - Currently, operators in function calls are lower prec than the function for some reason # - Exceptions caused by nesting function calls param_node = Node(tag=p[0][0]) tree.add_node(param_node, parent=call_node) return [tree, tokens_skip] elif ((tokens[0][1] == "NUMCONST" or tokens[0][1] == "FLOATCONST" or tokens[0][1] == "CHARCONST" or tokens[0][1] == "STRINGCONST" or tokens[0][1] == "true" or tokens[0][1] == "false" or tokens[0][1] == "ID")): # Check for no-parameter function if (tokens[0][0] in __symbol_tables.keys()): # Found a function call without parameters tokens_skip += 1 tree = Tree() if (tokens[0][0] == "_start"): raise Exception( "Function name _start is reserved for assembly") elif (tokens[0][0] == "main"): tokens[0][0] = "_start" value_node = Node(tag="func:" + tokens[0][0]) tree.add_node(value_node, parent=None) return [tree, tokens_skip] else: # Expression is a constant or named variable tokens_skip += 1 tree = Tree() value_node = Node(tag=tokens[0][0]) tree.add_node(value_node, parent=None) return [tree, tokens_skip] else: raise Exception("Unknown token sequence: " + str(tokens)) # Lowest precedence operator found # Lowest precedence operator is root. tree = Tree() op_node = Node(tag=lowest_prec_op[0]) tree.add_node(op_node, parent=None) # Recursive calls to make left and right subtrees tokens_skip += 1 tokens_l = tokens[:tokens.index(lowest_prec_op)] tokens_r = tokens[tokens.index(lowest_prec_op) + 1:] has_tokens_l = False for token in tokens_l: if (token[0] != '(' and token[0] != ')'): has_tokens_l = True break has_tokens_r = False for token in tokens_r: if (token[0] != '(' and token[0] != ')'): has_tokens_r = True break if (len(tokens_l) > 0 and has_tokens_l): expr_l = help_func_expression(grammar, tokens_l, function=function) tree.paste(op_node.identifier, expr_l[0]) tokens_skip += expr_l[1] else: tokens_skip += len(tokens_l) if (len(tokens_r) > 0 and has_tokens_r): expr_r = help_func_expression(grammar, tokens_r, function=function) tree.paste(op_node.identifier, expr_r[0]) tokens_skip += expr_r[1] else: tokens_skip += len(tokens_r) return [tree, tokens_skip]
sub_t = tree.subtree('diane') sub_t.show() print('\n') print("#"*4 + "Children of Diane") print tree.is_branch('diane') print('\n') print("#"*4 + "OOhh~ new members enter Jill's family") new_tree = Tree() new_tree.create_node("n1", 1) # root node new_tree.create_node("n2", 2, parent=1) new_tree.create_node("n3", 3, parent=1) tree.paste('jill', new_tree) tree.show() print('\n') print("#"*4 + "We are sorry they are gone accidently :(") tree.remove_node(1) tree.show() print('\n') print("#"*4 + "Now Jill moves to live with Grand-x-father Harry") tree.move_node('jill', 'harry') tree.show() print('\n')
class Group(ElementWithAttributes): def __init__(self): super(Group, self).__init__() self.type = DATA_DIR_TYPES.GROUP self.path = None self.tree = Tree() def __getitem__(self, item): if item not in self.tree: rsplit = item.rsplit("/", maxsplit=1) if len(rsplit) == 1: item_0 = self.tree.root key = rsplit[0] else: item_0, key = rsplit if item_0 in self.tree: node = self.tree[item_0] if (isinstance(node.data, ElementWithAttributes) and key in node.data.attrs): return node.data.attrs[ key] # ### RETURN attribute value ### raise KeyError(f"{item} is not a valid key") node = self.tree[item] if isinstance(node.data, Group): # rebuild tree with reduced identifiers stree = self.tree.subtree(item) for n in stree.all_nodes_itr(): if n.predecessor(stree.identifier) is None: parent = None else: parent = n.predecessor(stree.identifier).split( item, maxsplit=1)[1] node.data.tree.create_node(n.tag, n.identifier.split(item, maxsplit=1)[1], parent, data=n.data) elif isinstance(node.data, DataSet): if node.data.df.empty: if self.path is None: raise GroupError( f"{item} is not loaded yet and this element is not linked to a File or Group" ) node.data.df = pd.read_parquet(self.path / item / DATA_FILE) return node.data def __setitem__(self, key, value): if key in self.tree: raise KeyError(f"{key} already exists") rsplit = key.rsplit("/", maxsplit=1) if len(rsplit) == 1: item_0 = self.tree.root key_1 = rsplit[0] else: item_0, key_1 = rsplit if item_0 is not None and item_0 not in self.tree: raise KeyError(f"Parent key {item_0} does not exist") dd_type = None if isinstance(value, Group): dd_type = value.type new_tree = Tree() for node in value.tree.all_nodes_itr(): if node.parent is None: parent = None else: parent = key + "/" + node.parent new_tree.create_node(node.tag, key + "/" + node.identifier, parent=parent, data=node.data) value.tree = new_tree self.tree.create_node(tag=key_1, identifier=key, parent=item_0, data=value) self.tree.paste(key, new_tree) elif isinstance(value, DataSet): dd_type = DATA_DIR_TYPES.DATASET self.tree.create_node(tag=key_1, identifier=key, parent=item_0, data=value) if self.path is not None: value.df.to_parquet(self.path / key / DATA_FILE) elif isinstance(value, Raw): pass elif isinstance(value, Attribute): pass else: raise ValueError(f"{value} is not a valid type for DataDir") # write ddir and attributes file if self is linked if isinstance(value, ElementWithAttributes) and self.path is not None: (self.path / key).mkdir() _write_ddir_json(self.path / key, dd_type=dd_type) json.dump(value.attrs, (self.path / key / ATTRIBUTES_FILE).open("w"), indent=4) def link(self, path): self.path = path
class TreeT(object): def __init__(self, max_id=0): self.tree = Tree() def from_ptb_to_tree(self, line, max_id=0, leaf_id=1, parent_id=None): # starts by ['(', 'pos'] pos_tag = line[1] if parent_id is None: pos_id = 0 else: pos_id = max_id max_id += 1 self.tree.create_node(pos_tag, pos_id, parent_id, TreeData()) parent_id = pos_id total_offset = 2 if line[2] != '(': # sub-tree is leaf # line[0:3] = ['(', 'pos', 'word', ')'] word_tag = line[2] self.tree.create_node(word_tag, leaf_id, parent_id, TreeData()) return 4, max_id, leaf_id + 1 line = line[2:] while line[0] != ')': offset, max_id, leaf_id = self.from_ptb_to_tree( line, max_id, leaf_id, parent_id) total_offset += offset line = line[offset:] return total_offset + 1, max_id, leaf_id def add_height(self, tree_dep): for n in self.tree.all_nodes(): n.data.leaves = [] for leaf in self.tree.leaves(): lid = leaf.identifier hid = tree_dep[lid] if hid == self.tree.root: self.tree[lid].data.height = self.tree.depth(self.tree[lid]) for cid in [ p for p in self.tree.paths_to_leaves() if lid in p ][0]: self.tree[cid].data.leaves += [lid] else: height = -1 cid = lid cond = True while cond: self.tree[cid].data.leaves += [lid] height += 1 cid = self.tree.parent(cid).identifier cid_leaves = [l.identifier for l in self.tree.leaves(cid)] cid_l_dep = [tree_dep[l] for l in cid_leaves if l != lid] cond = set(cid_l_dep).issubset(set(cid_leaves)) self.tree[lid].data.height = height x_nodes = [ n.identifier for n in self.tree.all_nodes() if n.data.leaves == [] ] for x_node in x_nodes[::-1]: min_id = min(self.tree.children(x_node), key=lambda c: c.data.height) _lid = min_id.data.leaves[0] self.tree[_lid].data.height += 1 self.tree[x_node].data.leaves += [_lid] return True def _from_tree_to_ptb(self, nid): nid = self.tree.subtree(nid).root if self.tree[nid].is_leaf(): return ' (' + self.tree[nid].tag + ' ' + self.tree[ nid].data.word + ')' res = ' (' + self.tree[nid].tag for c_nid in sorted(self.tree.children(nid), key=lambda x: x.identifier): res += self._from_tree_to_ptb(c_nid.identifier) return res + ')' def from_tree_to_ptb(self): return self._from_tree_to_ptb(self.tree.root) def from_tag_to_tree(self, tag, word, pos_id=0): parent_id = None for tag_nodes in tag: if tag_nodes[0] in [CL, CR]: c_side = tag_nodes[0] _tag_nodes = tag_nodes[1:] if len(tag_nodes) > 1 else [''] else: c_side = '' _tag_nodes = tag_nodes self.tree.create_node(_tag_nodes[0], pos_id, parent=parent_id, data=TreeData(comb_side=c_side)) parent_id = pos_id pos_id += 1 for tag_node in _tag_nodes[1:]: self.tree.create_node(tag_node[1:], pos_id, parent=parent_id, data=TreeData(miss_side=tag_node[0])) pos_id += 1 for l in self.tree.leaves(): if l.data.miss_side == '': l.data.word = word break return pos_id @memoize def is_combine_to(self, side): return self.tree[self.tree.root].data.comb_side == side @memoize def is_combine_right(self): return self.is_combine_to(CR) @memoize def is_combine_left(self): return self.is_combine_to(CL) @memoize def is_complete_tree(self): return all([n.data.miss_side == '' for n in self.tree.all_nodes()]) @memoize def get_missing_leaves_to(self, miss_val, side): return [ l.identifier for l in self.tree.leaves(self.tree.root) if l.data.miss_side == side and l.tag == miss_val ] @memoize def get_missing_leaves_left(self, miss_val): return self.get_missing_leaves_to(miss_val, L) @memoize def get_missing_leaves_right(self, miss_val): return self.get_missing_leaves_to(miss_val, R) @memoize def root_tag(self): return self.tree[self.tree.root].tag @memoize def is_no_missing_leaves(self): return all( [l.data.miss_side == '' for l in self.tree.leaves(self.tree.root)]) @memoize def combine_tree(self, _tree, comb_leaf): self.tree.paste(comb_leaf, _tree.tree) self.tree.link_past_node(comb_leaf) return self def tree_to_path(self, nid, path): # Stop condition if self.tree[nid].is_leaf(): path[nid] = [] return nid, self.tree[nid].data.height # Recursion flag = CR for child in self.tree.children(nid): cid = child.identifier leaf_id, height = self.tree_to_path(cid, path) if (height == 0): # Reached end of path can add flag path[leaf_id].insert(0, flag) # path[leaf_id].append(flag) if height > 0: path[leaf_id].insert(0, nid) # only single child will have height>0 # and its value will be the one that is returned # to the parent ret_leaf_id, ret_height = leaf_id, height - 1 # once we reached a height>0, it means that # this path includes the parent, and thus flag # direction should flip flag = CL return ret_leaf_id, ret_height def path_to_tags(self, path): tags = [] for p in path: _res = [] _p = copy.copy(p) if _p[0] in [CL, CR]: _res.append(_p[0]) _p = _p[1:] while _p[:-1]: el_p = _p.pop(0) _res.append(self.tree[el_p].tag) for c in self.tree.children(el_p): if c.identifier != _p[0]: _res.append(R + c.tag if c.identifier > _p[0] else L + c.tag) _res.append(self.tree[_p[0]].tag) tags.append(_res) return tags def path_to_words(self, path): return [self.tree[k].tag for k in path] def from_tree_to_tag(self): path = {} self.tree_to_path(self.tree.root, path) return { 'tags': self.path_to_tags(path.values()), 'words': self.path_to_words(path.keys()) } def from_ptb_to_tag(self, line, max_id, depend): self.from_ptb_to_tree(line, max_id) self.add_height(depend) path = {} self.tree_to_path(self.tree.root, path) return self.path_to_tags(path.values())
def change(tree): nidInternal = nidValid(tree) choices = [getChoice(tree, n) for n in nidInternal] n_choices = map(lambda L: sum([len(i) for i in L]), choices) choiceDic = { a: b for (a, b, c) in zip(nidInternal, choices, n_choices) if c > 1 } choices1 = list(choiceDic.keys()) nid = random.choice(choices1) p = tree[nid].data.shape[1] x0 = tree[nid].var s0 = tree[nid].split choices = choiceDic[nid] # choose nid to split if s0 in choices[x0 - 1]: choices[x0 - 1].remove(s0) # remove original split option choices2 = [i for i in range(p - 1) if len(choices[i]) > 0] # choose var to split x = random.choice(choices2) choices3 = choices[x] # choose value to split x += 1 s = random.choice(choices3) tree1 = Tree(tree, deep=True) pid = tree1[nid].bpointer sub = tree1.remove_subtree(nid) tags = recurTag(sub, nid) tags[0] = (nid, x, s) try: sub1 = genTree(sub[nid], tags) except IndexError: print(f'{mi} change {t}: {tags[0]}; unchangable') return tree if pid is not None: tree1.paste(pid, sub1) tree1[pid].fpointer = sorted(tree1[pid].fpointer) else: tree1 = sub1 nidInternal1 = set(nidValid(tree1)) choices1 = set(choices1) choices11 = nidInternal1.intersection(choices1) extra = nidInternal1 - choices1 n_choices = map(lambda L: sum([len(i) for i in L]), [getChoice(tree1, n) for n in extra]) choices11 = list(choices11) + [ a for (a, b) in zip(extra, n_choices) if b > 1 ] choices31 = getChoice(tree1, nid, x0)[x0 - 1] n31 = len(choices31) if (sub1[nid].var == sub[nid].var) and (s0 in choices31): n31 -= 1 rTransit = len(choices1) * len(choices3) / (len(choices11) * n31) rLike = get_like_ratio(tree.R, sub.leaves(), sub1.leaves()) rStruct = get_struct(sub.all_nodes_itr(), sub1.all_nodes_itr()) r = rLike * rTransit * rStruct print(f'{mi} change {t}: {tags[0]}; r={r.round(4)}') if random.uniform(0, 1) < r: tree1.w2 = tree.w2 tree1.R = tree.R tree1.leaf = [n.identifier for n in tree1.leaves() if len(n.xvar) > 0] tree1.show() return tree1 return tree