def build_tree(sentences): sent = nltk.tokenize.sent_tokenize(sentences) roots = [] for s in sent: parse = nlp.parse(s) stack = [] root = None for token in parse.split(): if token[0] == '(': node = Node(token[1:]) if stack: stack[-1].addkid(node) stack.append(node) if root is None: root = node else: first = token.find(')') child = Node(token[:first]) if stack: stack[-1].addkid(child) for x in range(len(token) - first): if stack: stack.pop() roots.append(root) R = Node('R') for node in roots: R.addkid(node) return (R)
def tree_generate(node_name, index, pos, dependency_list, visited_list, reverse=False): """Generating a tree from the node name given :type index: int :type pos: str :type visited_list: list :type node_name: str :type dependency_list: list """ node = Node(node_name, pos, index=index) visited_list += [index] kids_index_name_pos = [(d[1][0], d[1][1], d[1][2]) for d in dependency_list if d[0][0] == index] for x in kids_index_name_pos: if x in visited_list: node.value = 1 node.addkid(Node(label=x[1], index=x[0], pos=x[2])) else: node.addkid( tree_generate(x[1], x[0], x[2], dependency_list, visited_list)) visited_list += [x] else: if Node.get_children(node): node.value += sum([g.value for g in Node.get_children(node)]) # print node.label, node.value, node.pos node.children.sort(key=lambda x: x.label, reverse=reverse) return node
def tree_generate(node_name, index, pos, dependency_list, visited_list, reverse=False): """Generating a tree from the node name given :type index: int :type pos: str :type visited_list: list :type node_name: str :type dependency_list: list """ node = Node(node_name, pos, index=index) visited_list += [index] kids_index_name_pos = [(d[1][0], d[1][1], d[1][2]) for d in dependency_list if d[0][0] == index] for x in kids_index_name_pos: if x in visited_list: node.value = 1 node.addkid(Node(label=x[1], index=x[0], pos=x[2])) else: node.addkid(tree_generate(x[1], x[0], x[2], dependency_list, visited_list)) visited_list += [x] else: if Node.get_children(node): node.value += sum([g.value for g in Node.get_children(node)]) # print node.label, node.value, node.pos node.children.sort(key=lambda x: x.label, reverse=reverse) return node
def helper(node: Node, index: int): children = dep[str(index)]['deps'].values() children = sum(children, []) for c in children: cNode = Node(dep[str(c)]['word']) helper(cNode, c) node.addkid(cNode) return
def helper(obj): if isinstance(obj, list): node = Node(obj[0]) for kid in obj[1:]: node.addkid(helper(kid)) return node else: return Node(obj)
def dgl_tree_to_zzs_tree(tree, vocab_key_list, u): if tree.in_degrees(u) == 0: return Node(vocab_key_list[tree.ndata['x'][u]]) node = Node("PAD") in_nodes = tree.in_edges(u)[0] for in_node in in_nodes: in_node = int(in_node) node.addkid(dgl_tree_to_zzs_tree(tree, vocab_key_list, in_node)) return node
def construct_node(tree, level, threshold = 1000): root=Node(tree['tagName']) if 'children' not in tree or level == threshold: root.label = tree['tagName'] return root for child in tree['children']: child_node = construct_node(child, level+1, threshold) root.addkid(child_node) return root
def totree(self, e): if isinstance(e, Var): return Node(('V', str(e))) if isinstance(e, Const): return Node(('C', str(e))) if isinstance(e, Op): n = Node(('O', e.name)) for arg in e.args: n.addkid(self.totree(arg)) return n
def get_ztree(cn, ztp=None): if isinstance(cn, str): cn = Tree.fromstring(cn) if ztp is None: ztp = Node(cn.label()) for subtree in cn: if isinstance(subtree, Tree): n = Node(subtree.label()) ztp.addkid(n) get_ztree(subtree, n) return ztp
def parsed_tree_to_zzs_tree(u): if len(u.child) == 1: return parsed_tree_to_zzs_tree(u.child[0]) elif len(u.child) > 1: assert len(u.child) == 2 node = Node("PAD") node.addkid(parsed_tree_to_zzs_tree(u.child[0])) node.addkid(parsed_tree_to_zzs_tree(u.child[1])) return node else: return Node(u.value)
def make_zss_tree(ast_node): if hasattr(ast_node, "data"): data = ast_node.data else: data = ast_node.type # data = "TOKEN" node = Node(data) if hasattr(ast_node, "children"): for child in ast_node.children: node.addkid(make_zss_tree(child)) return node
def make_html_zssgraph(parent, graph=None, ignore_comments=True): ''' Given a string containing HTML, return a zss style tree of the DOM ''' if not graph: graph = Node(parent.tag) for node in parent.getchildren(): # if the element is a comment, ignore it if ignore_comments and not isinstance(node.tag, basestring): continue graph.addkid(Node(node.tag)) make_html_zssgraph(node, graph) return graph
def convert(args, tree, label, height): node = Node(label) if height == args.tree_height2: return (node, height) heights = [height] for child in tree[label]: if child in tree: kid, height_kid = convert(args, tree, child, height + 1) heights.append(height_kid) node.addkid(kid) return (node, max(heights))
def totree(self, e): if isinstance(e, Var): return Node(('V', str(e))) if isinstance(e, Const): return Node(('C', str(e))) if isinstance(e, Op): name = e.name if name == 'AssAdd': name = 'Ass' n = Node(('O', name)) for arg in e.args: n.addkid(self.totree(arg)) return n
def tree_edit_distance(s1,s2): l1 = s1.split(',') l2 = s2.split(',') n1 = Node("") for item in l1: #print item n1.addkid(Node(item)) n2 = Node("") for item in l2: #print item n2.addkid(Node(item)) return simple_distance(n1, n2)
def tree_edit_distance(s1, s2): l1 = s1.split(',') l2 = s2.split(',') n1 = Node("") for item in l1: #print item n1.addkid(Node(item)) n2 = Node("") for item in l2: #print item n2.addkid(Node(item)) return simple_distance(n1, n2)
def json_to_tree(self, toplevel): prog = Node("toplevel") def helper(obj): if isinstance(obj, list): node = Node(obj[0]) for kid in obj[1:]: node.addkid(helper(kid)) return node else: return Node(obj) for fun in toplevel: prog.addkid(helper(fun)) return prog
def dfs_search(n_node): if isinstance(n_node, dict): if "_nodetype" in n_node.keys(): res = Node(n_node['_nodetype']) for item in n_node.values(): if isinstance(item, dict): kid = dfs_search(item) res.addkid(kid) elif isinstance(item, list): for block in item: kid = dfs_search(block) if kid: res.addkid(kid) return res else: return None else: return None
def convert_body(body, parent_node=None, root_node=None): body = seperate_dict(body) if isinstance(body, dict): if parent_node == None: parent_node = Node(body['_PyType']) new_parent = parent_node root_node = parent_node for j in body: if j != '_PyType': # still have a kid, then recursion needed if isinstance(body[j], dict): if '_PyType' in body[j].keys(): if 'attr' in body[j].keys(): node_content = j + ' ' + body[j][ '_PyType'] + ' ' + body[j]['attr'] new_parent = Node(node_content) parent_node.addkid(new_parent) new_parent = convert_body(body[j], parent_node=new_parent, root_node=root_node) else: call_call_func_name = '' #if j == '' if j == 'func': call_call_func_name = ' ' + body[j]['id'] node_content = j + ' ' + body[j][ '_PyType'] + call_call_func_name new_parent = Node(node_content) parent_node.addkid(new_parent) new_parent = convert_body(body[j], parent_node=new_parent, root_node=root_node) else: # case when it's a dict but not with PyType if 'udv' in json.dumps(body[j]): node_content = j + ' ' + 'udv' else: node_content = j + ' ' + json.dumps(body[j]) parent_node = parent_node.addkid(Node(node_content)) elif isinstance(body[j], list) or isinstance(body[j], str): if body[j]: node_content = j + ' ' + body[j] else: node_content = j + ' ' + '' parent_node = parent_node.addkid(Node(node_content)) return root_node
def convert_to_tree(source): ''' Convert a given AST into a zss tree, recursively processing the AST and handling special cases as specified. For example, with if statements: if someone is missing an IF altogether, then their distance will be at least 3 from something with the if - 1 error for the if condition, 1 error for the code if true, 1 error for the code if false. Also note that the zss.simple_distance metric, "swapping" two lines is TWO operations: re-labeling the first node and the second node. ''' node_type = source['type'] if node_type == 'statementList' or node_type == 'maze_turn': # Skip these nodes because they are redundant with their children node_list = [] for child in source['children']: child_node = convert_to_tree(child) if type(child_node) == list: for c in child_node: node_list.append(c) else: node_list.append(child_node) return node_list elif node_type == 'maze_forever': # This node type (loop until finish) is never used in HOC4 assert len(source['children']) == 1, "While has more than 1 child" for child in source[ 'children']: # There's only 1 child, the DO statement child_node = convert_to_tree(child) if type(child_node) == list: # This should never happen raise Exception("Should never be here") for list_elem in child_node: node.addkid(list_elem) else: child_node.label = "while_" + child_node.label return child_node elif node_type == 'maze_ifElse': # This node type (if/else) is never used in HOC4. It should consist # of 3 children: # - the if condition # - the statements to be executed if the condition is met # - the statements to be executed if the condition is NOT met assert len( source['children']) == 3, "If/else has wrong number of children" condition = source['children'][0]['type'] assert condition in ['isPathLeft', 'isPathRight', 'isPathForward'], "Bad condition" condition_node = Node(condition) # The condition of the IF statement if_stats = source['children'][1] if_stats_return = convert_to_tree(if_stats) if type(if_stats_return) == list: # Children if the IF statement is satisfied for c in if_stats_return: condition_node.addkid(c) else: condition_node.addkid(if_stats_return) else_stats = source['children'][2] else_stats_return = convert_to_tree(else_stats) if type(else_stats_return) == list: # Children if the IF statement is NOT satisfied for c in else_stats_return: condition_node.addkid(c) else: condition_node.addkid(else_stats_return) return condition_node else: node = Node(source['type']) if 'children' in source: for child in source['children']: child_node = convert_to_tree(child) if type(child_node) == list: for list_elem in child_node: node.addkid(list_elem) else: node.addkid(child_node) return node
def constructZssTree(self, treeNode): newNode = Node(treeNode.getLabel()) for childNode in treeNode.childNodes: newNode.addkid(self.constructZssTree(childNode)) return newNode
from zss import simple_distance, Node #only forever,repeat,if then, if then else,repeat until-- are nestable blocks. Hence they have children.rest will not #whenever we encounter these blocks, start adding child nodes. fileName = '' fName = open(fileName,'r') control_blocks = ['forever', 'if-then', 'if-then-else', 'repeat', 'repeat-until'] init_tree = Node('root') for opcode in fName: if opcode in control_blocks: currentNode = opcode init_tree.addkid(Node(currentNode)) prevNode = currentNode A = ( Node("f") .addkid(Node("a") .addkid(Node("h")) .addkid(Node("c") .addkid(Node("l")))) .addkid(Node("e")) ) B = ( Node("f") .addkid(Node("a") .addkid(Node("d")) .addkid(Node("c") .addkid(Node("b")))) .addkid(Node("e")) ) print simple_distance(A, B)