def tree_generate(node_name, index, pos, dependency_list, visited_list, reverse=False): """Generating a tree from the node name given :type index: int :type pos: str :type visited_list: list :type node_name: str :type dependency_list: list """ node = Node(node_name, pos, index=index) visited_list += [index] kids_index_name_pos = [(d[1][0], d[1][1], d[1][2]) for d in dependency_list if d[0][0] == index] for x in kids_index_name_pos: if x in visited_list: node.value = 1 node.addkid(Node(label=x[1], index=x[0], pos=x[2])) else: node.addkid( tree_generate(x[1], x[0], x[2], dependency_list, visited_list)) visited_list += [x] else: if Node.get_children(node): node.value += sum([g.value for g in Node.get_children(node)]) # print node.label, node.value, node.pos node.children.sort(key=lambda x: x.label, reverse=reverse) return node
def convert_parse_tree_to_zss_tree(self, tree_as_string, ignore_leaves=False): ''' The ignore leaves argument will create a tree where the words in the sentence are not included. This will only represent sentence structure. ''' tree_as_list = [item.strip() for item in re.split(r'([\(\)])', tree_as_string) if item.strip()] tree_as_list = tree_as_list[2:-1] stack = [Node('ROOT')] root_node = stack[0] # Iterate over the list for i, item in enumerate(tree_as_list): if item == '(': # match the string for each item match = re.search(r'[A-Z]+[ ][A-Za-z]+', tree_as_list[i + 1]) if match: # if match, node has no children label = match.group().split(' ') node = Node(label[0]).addkid(Node(label[1])) if not ignore_leaves else Node(label[0]) else: # otherwise node has children node = Node(tree_as_list[i + 1]) # Add the node to the children of the current item stack[-1].addkid(node) # Then add the node to the stack itself stack.append(node) elif item == ')': # this node has no children so just pop it from the stack stack.pop() return root_node
def convertTreeToEditDistanceFormat(self): nodeObjects = dict() #remove all weights and then make a set. This should help in having the same ordered list. If the order of the edges is not the same, then zss will mess up the edit distance somehow. unweightedEdges = [] for edge in self.edgeList: newEdge = (0, edge[1], edge[2]) unweightedEdges.append(newEdge) unweightedEdges.sort(key=lambda tup: tup[1]) for edge in set(unweightedEdges): #obtain the parent and child parent = edge[1] child = edge[2] #Create an object for the parent. #The only object that we will be appending to is the parent one if parent not in nodeObjects.keys(): parentNode = Node(str(parent), []) nodeObjects[parent] = parentNode if child not in nodeObjects.keys(): childNode = Node(str(child), []) nodeObjects[child] = childNode nodeObjects[parent].addkid(nodeObjects[child]) return nodeObjects
def list_to_zsstree(input_list, label='0'): if not isinstance(input_list, list): # 叶节点 return Node(label) else: # 中间节点 return Node(label)\ .addkid(list_to_zsstree(input_list[0]))\ .addkid(list_to_zsstree(input_list[1]))
def zss_similarity(node1, node2): a = Node(node1['name'], node1['children']) b = Node(node2['name'], node2['children']) dist = simple_distance(a, b) return dist
def loop_tree(dictionary, node): print node print "----" for k, v in dictionary.iteritems(): if isinstance(dictionary[k], dict): loop_tree(dictionary[k], node.addkid(Node(k))) else: node.addkid(Node(v))
def order_children(rootnode): if len(rootnode.children) == 0: return # order children of the rootnode rootnode.children = sorted(Node.get_children(rootnode), key=lambda c: c.label) for cnode in Node.get_children(rootnode): order_children(cnode)
def test_de(): expected_ops = [ Operation(Operation.remove, Node("b"), None), Operation(Operation.remove, Node("c"), None), Operation(Operation.match, Node("a"), Node("a")) ] cost, ops = simple_distance(D, E, return_operations=True) assert ops == expected_ops
def helper(obj): if isinstance(obj, list): node = Node(obj[0]) for kid in obj[1:]: node.addkid(helper(kid)) return node else: return Node(obj)
def helper(node: Node, index: int): children = dep[str(index)]['deps'].values() children = sum(children, []) for c in children: cNode = Node(dep[str(c)]['word']) helper(cNode, c) node.addkid(cNode) return
def treetostr(self, node): l = Node.get_label(node) t = None if isinstance(l, tuple): t, l = l if t == 'O': return '%s(%s)' % (l, ', '.join( map(self.treetostr, Node.get_children(node)))) return l
def dgl_tree_to_zzs_tree(tree, vocab_key_list, u): if tree.in_degrees(u) == 0: return Node(vocab_key_list[tree.ndata['x'][u]]) node = Node("PAD") in_nodes = tree.in_edges(u)[0] for in_node in in_nodes: in_node = int(in_node) node.addkid(dgl_tree_to_zzs_tree(tree, vocab_key_list, in_node)) return node
def construct_node(tree, level, threshold = 1000): root=Node(tree['tagName']) if 'children' not in tree or level == threshold: root.label = tree['tagName'] return root for child in tree['children']: child_node = construct_node(child, level+1, threshold) root.addkid(child_node) return root
def build_tree_no_date(self, event): A = ( Node(event.event_type.lower()) .addkid(Node(event.country.lower())) .addkid(Node(event.state.lower())) .addkid(Node(event.city.lower())) .addkid(Node(event.name.lower())) ) return A
def transform(parent): if parent not in clause: return Node(parent[0]) children = clause[parent] root = parent[0] xs = [] for child in children: xs.append(transform(child)) res = Node(root, children=xs) return res
def parsed_tree_to_zzs_tree(u): if len(u.child) == 1: return parsed_tree_to_zzs_tree(u.child[0]) elif len(u.child) > 1: assert len(u.child) == 2 node = Node("PAD") node.addkid(parsed_tree_to_zzs_tree(u.child[0])) node.addkid(parsed_tree_to_zzs_tree(u.child[1])) return node else: return Node(u.value)
def get_ztree(cn, ztp=None): if isinstance(cn, str): cn = Tree.fromstring(cn) if ztp is None: ztp = Node(cn.label()) for subtree in cn: if isinstance(subtree, Tree): n = Node(subtree.label()) ztp.addkid(n) get_ztree(subtree, n) return ztp
def main(node,child): if isinstance(child, list) and len(child) == 0: return node.addkid(Node('')) if not isinstance(child, list) and not isinstance(child, dict): return node.addkid(Node(child)) if isinstance(child, dict): for k, v in child.items(): node.addkid(main(Node(k), v)) if isinstance(child, list): for n, i in enumerate(child): node.addkid(main(Node(n), i))
def make_zss_tree(ast_node): if hasattr(ast_node, "data"): data = ast_node.data else: data = ast_node.type # data = "TOKEN" node = Node(data) if hasattr(ast_node, "children"): for child in ast_node.children: node.addkid(make_zss_tree(child)) return node
def zss_code_distance(code_a, code_b): root_node_a = ast.parse(code_a) root_zss_node_a = Node("root") zss_ast_visit(root_node_a, root_zss_node_a) root_node_b = ast.parse(code_b) root_zss_node_b = Node("root") zss_ast_visit(root_node_b, root_zss_node_b) return simple_distance(root_zss_node_a, root_zss_node_b, label_dist=label_weight)
def make_html_zssgraph(parent, graph=None, ignore_comments=True): ''' Given a string containing HTML, return a zss style tree of the DOM ''' if not graph: graph = Node(parent.tag) for node in parent.getchildren(): # if the element is a comment, ignore it if ignore_comments and not isinstance(node.tag, basestring): continue graph.addkid(Node(node.tag)) make_html_zssgraph(node, graph) return graph
def convert(args, tree, label, height): node = Node(label) if height == args.tree_height2: return (node, height) heights = [height] for child in tree[label]: if child in tree: kid, height_kid = convert(args, tree, child, height + 1) heights.append(height_kid) node.addkid(kid) return (node, max(heights))
def totree(self, e): if isinstance(e, Var): return Node(('V', str(e))) if isinstance(e, Const): return Node(('C', str(e))) if isinstance(e, Op): name = e.name if name == 'AssAdd': name = 'Ass' n = Node(('O', name)) for arg in e.args: n.addkid(self.totree(arg)) return n
def zss_code_ast_edit(code_a, code_b): root_node_a = ast.parse(code_a) root_zss_node_a = Node("root") zss_ast_visit(root_node_a, root_zss_node_a) root_node_b = ast.parse(code_b) root_zss_node_b = Node("root") zss_ast_visit(root_node_b, root_zss_node_b) cost, ops = simple_distance(root_zss_node_a, root_zss_node_b, label_dist=label_weight, return_operations=True) return cost, ops
def tree_generate(node_name, index, pos, dependency_list, visited_list, reverse=False): """Generating a tree from the node name given :type index: int :type pos: str :type visited_list: list :type node_name: str :type dependency_list: list """ node = Node(node_name, pos, index=index) visited_list += [index] kids_index_name_pos = [(d[1][0], d[1][1], d[1][2]) for d in dependency_list if d[0][0] == index] for x in kids_index_name_pos: if x in visited_list: node.value = 1 node.addkid(Node(label=x[1], index=x[0], pos=x[2])) else: node.addkid(tree_generate(x[1], x[0], x[2], dependency_list, visited_list)) visited_list += [x] else: if Node.get_children(node): node.value += sum([g.value for g in Node.get_children(node)]) # print node.label, node.value, node.pos node.children.sort(key=lambda x: x.label, reverse=reverse) return node
def tree_edit_distance(s1, s2): l1 = s1.split(',') l2 = s2.split(',') n1 = Node("") for item in l1: #print item n1.addkid(Node(item)) n2 = Node("") for item in l2: #print item n2.addkid(Node(item)) return simple_distance(n1, n2)
def mktree(node, child, count=0): print(count) if isinstance(child, list): for c in child: count += 1 return mktree(node, c, count) elif isinstance(child, dict): for k, v in child.items(): if isinstance(child[k], dict): node.addkid(Node(k)) return mktree(node, v, count) else: node.addkid(Node(v))
def to_zzzNode(E, root=0): from zss import Node A = Node(str(root)) U = [(0, A)] while len(U) != 0: parent = U[0][0] node = U[0][1] del U[0] children = getChildren(E, parent) if len(children) == 0: continue for i in range(len(children)): node.addkid(Node(str(children[i]))) U.append((children[i], node.children[i])) return A
def syntax_similarity_conversation(self, documents1): """Syntax similarity of each document with its before and after.""" global numnodes documents1parsed = [] # Detect sentences and parse them for d1 in tqdm(range(len(documents1))): tempsents = (self.sent_detector.tokenize(documents1[d1].strip())) for s in tempsents: if len(s.split()) > 70: documents1parsed.append("NA") break else: temp = list(self.parser.raw_parse_sents((tempsents))) for i in range(len(temp)): temp[i] = list(temp[i])[0] temp[i] = ParentedTree.convert(temp[i]) documents1parsed.append(list(temp)) results = [] for d1 in range(len(documents1parsed) - 1): d2 = d1 + 1 if documents1parsed[d1] == "NA" or documents1parsed[d2] == "NA": results.append(float('NaN')) continue costMatrix = [] for i in range(len(documents1parsed[d1])): numnodes = 0 tempnode = Node(documents1parsed[d1][i].root().label()) sentencedoc1 = self.convert_mytree(documents1parsed[d1][i], tempnode) temp_costMatrix = [] sen1nodes = numnodes for j in range(len(documents1parsed[d2])): numnodes = .0 tempnode = Node(documents1parsed[d2][j].root().label()) sentencedoc2 = self.convert_mytree(documents1parsed[d2][j], tempnode) ED = simple_distance(sentencedoc1, sentencedoc2) ED /= (numnodes + sen1nodes) temp_costMatrix.append(ED) costMatrix.append(temp_costMatrix) costMatrix = np.array(costMatrix) results.append(1 - np.mean(costMatrix)) return np.array(results)
def json_to_tree(self, toplevel): prog = Node("toplevel") def helper(obj): if isinstance(obj, list): node = Node(obj[0]) for kid in obj[1:]: node.addkid(helper(kid)) return node else: return Node(obj) for fun in toplevel: prog.addkid(helper(fun)) return prog
def randtree(depth=2, alpha='abcdefghijklmnopqrstuvwxyz', repeat=2, width=2): labels = [''.join(x) for x in itertools.product(alpha, repeat=repeat)] shuffle(labels) labels = (x for x in labels) root = Node("root") p = [root] c = list() for x in xrange(depth - 1): for y in p: for z in xrange(randint(1, 1 + width)): n = Node(labels.next()) y.addkid(n) c.append(n) p = c c = list() return root
def parseTreeFromStrings(tree, debug=False): """ Create the tree from a list of strings @param tree: {List} string format of the tree, one element per string @param debug: {Boolean} True to display debugging information; False not @return: {Dictionary} the three types of trees in the dictionary """ if tree is None: return None root = None rightMosts, cur = [], 0 for line in tree: header = line.split(": ")[0] level, tag = (0, header) if "|-" not in header else header.split("- ") tag = tag.strip() if "|-" in header: level = 1 + int(len(level) / 2) xs = [int(x) for x in re.split("\\D+", line) if x != ""] cur = Node("%s[x=%04d,y=%04d]" % (tag, xs[2]-xs[0], xs[3]-xs[1])) if level == len(rightMosts): rightMosts.append(cur) else: assert level < len(rightMosts) rightMosts[level] = cur pass # else - if level == len(rightMosts) if level == 0: root = cur else: rightMosts[level-1].addkid(cur) pass # else - if level == 0 pass # for line in tree return root
def build_tree(self, event): A = (Node(event.event_type.lower()).addkid(Node( event.country.lower())).addkid(Node(event.state.lower())).addkid( Node(event.city.lower())).addkid(Node( event.name.lower())).addkid( Node(event.day.lower())).addkid( Node(event.month.lower())).addkid( Node(event.year.lower()))) return A
def treesize(self, t): ''' Calculates the total size of the tree ''' size = 0 def ts(node): return 1 + sum(map(ts, Node.get_children(node))) for loc in t: for var, tree in t[loc].items(): lab = Node.get_label(tree) if lab == ('V', var): continue size += ts(tree) return size
def tree_edit_distance(s1,s2): l1 = s1.split(',') l2 = s2.split(',') n1 = Node("") for item in l1: #print item n1.addkid(Node(item)) n2 = Node("") for item in l2: #print item n2.addkid(Node(item)) return simple_distance(n1, n2)
def ts(node): return 1 + sum(map(ts, Node.get_children(node)))
def get_children(node): """Returns the children of node""" return Node.get_children(node)