Exemple #1
0
def build_tree(sentences):
    sent = nltk.tokenize.sent_tokenize(sentences)
    roots = []
    for s in sent:
        parse = nlp.parse(s)
        stack = []
        root = None
        for token in parse.split():
            if token[0] == '(':
                node = Node(token[1:])
                if stack:
                    stack[-1].addkid(node)
                stack.append(node)
                if root is None:
                    root = node
            else:
                first = token.find(')')
                child = Node(token[:first])
                if stack:
                    stack[-1].addkid(child)
                for x in range(len(token) - first):
                    if stack:
                        stack.pop()
        roots.append(root)

    R = Node('R')
    for node in roots:
        R.addkid(node)
    return (R)
Exemple #2
0
def tree_generate(node_name,
                  index,
                  pos,
                  dependency_list,
                  visited_list,
                  reverse=False):
    """Generating a tree from the node name given

    :type index: int
    :type pos: str
    :type visited_list: list
    :type node_name: str
    :type dependency_list: list
    """
    node = Node(node_name, pos, index=index)
    visited_list += [index]
    kids_index_name_pos = [(d[1][0], d[1][1], d[1][2]) for d in dependency_list
                           if d[0][0] == index]
    for x in kids_index_name_pos:
        if x in visited_list:
            node.value = 1
            node.addkid(Node(label=x[1], index=x[0], pos=x[2]))
        else:
            node.addkid(
                tree_generate(x[1], x[0], x[2], dependency_list, visited_list))
        visited_list += [x]

    else:
        if Node.get_children(node):
            node.value += sum([g.value for g in Node.get_children(node)])
            # print node.label, node.value, node.pos
    node.children.sort(key=lambda x: x.label, reverse=reverse)
    return node
Exemple #3
0
def tree_generate(node_name, index, pos, dependency_list, visited_list, reverse=False):
    """Generating a tree from the node name given

    :type index: int
    :type pos: str
    :type visited_list: list
    :type node_name: str
    :type dependency_list: list
    """
    node = Node(node_name, pos, index=index)
    visited_list += [index]
    kids_index_name_pos = [(d[1][0], d[1][1], d[1][2]) for d in dependency_list if d[0][0] == index]
    for x in kids_index_name_pos:
        if x in visited_list:
            node.value = 1
            node.addkid(Node(label=x[1], index=x[0], pos=x[2]))
        else:
            node.addkid(tree_generate(x[1], x[0], x[2], dependency_list, visited_list))
        visited_list += [x]

    else:
        if Node.get_children(node):
            node.value += sum([g.value for g in Node.get_children(node)])
            # print node.label, node.value, node.pos
    node.children.sort(key=lambda x: x.label, reverse=reverse)
    return node
Exemple #4
0
 def helper(node: Node, index: int):
     children = dep[str(index)]['deps'].values()
     children = sum(children, [])
     for c in children:
         cNode = Node(dep[str(c)]['word'])
         helper(cNode, c)
         node.addkid(cNode)
     return
Exemple #5
0
 def helper(obj):
     if isinstance(obj, list):
         node = Node(obj[0])
         for kid in obj[1:]:
             node.addkid(helper(kid))
         return node
     else:
         return Node(obj)
def dgl_tree_to_zzs_tree(tree, vocab_key_list, u):
    if tree.in_degrees(u) == 0:
        return Node(vocab_key_list[tree.ndata['x'][u]])
    node = Node("PAD")
    in_nodes = tree.in_edges(u)[0]
    for in_node in in_nodes:
        in_node = int(in_node)
        node.addkid(dgl_tree_to_zzs_tree(tree, vocab_key_list, in_node))
    return node
Exemple #7
0
def construct_node(tree, level, threshold = 1000):
  root=Node(tree['tagName'])
  if 'children' not in tree or level == threshold:
    root.label = tree['tagName']
    return root
  for child in tree['children']:
    child_node = construct_node(child, level+1, threshold)
    root.addkid(child_node)
  return root
Exemple #8
0
 def totree(self, e):
     if isinstance(e, Var):
         return Node(('V', str(e)))
     if isinstance(e, Const):
         return Node(('C', str(e)))
     if isinstance(e, Op):
         n = Node(('O', e.name))
         for arg in e.args:
             n.addkid(self.totree(arg))
         return n
Exemple #9
0
def get_ztree(cn, ztp=None):
    if isinstance(cn, str):
        cn = Tree.fromstring(cn)
    if ztp is None:
        ztp = Node(cn.label())
    for subtree in cn:
        if isinstance(subtree, Tree):
            n = Node(subtree.label())
            ztp.addkid(n)
            get_ztree(subtree, n)
    return ztp
def parsed_tree_to_zzs_tree(u):
    if len(u.child) == 1:
        return parsed_tree_to_zzs_tree(u.child[0])
    elif len(u.child) > 1:
        assert len(u.child) == 2
        node = Node("PAD")
        node.addkid(parsed_tree_to_zzs_tree(u.child[0]))
        node.addkid(parsed_tree_to_zzs_tree(u.child[1]))
        return node
    else:
        return Node(u.value)
Exemple #11
0
def make_zss_tree(ast_node):
    if hasattr(ast_node, "data"):
        data = ast_node.data
    else:
        data = ast_node.type
        # data = "TOKEN"
    node = Node(data)
    if hasattr(ast_node, "children"):
        for child in ast_node.children:
            node.addkid(make_zss_tree(child))
    return node
Exemple #12
0
def make_html_zssgraph(parent, graph=None, ignore_comments=True):
  ''' Given a string containing HTML, return a zss style tree of the DOM
  '''
  if not graph:
    graph = Node(parent.tag)
  for node in parent.getchildren():
    # if the element is a comment, ignore it
    if ignore_comments and not isinstance(node.tag, basestring):
      continue
    graph.addkid(Node(node.tag))
    make_html_zssgraph(node, graph)
  return graph
Exemple #13
0
def convert(args, tree, label, height):
    node = Node(label)
    if height == args.tree_height2:
        return (node, height)

    heights = [height]
    for child in tree[label]:
        if child in tree:
            kid, height_kid = convert(args, tree, child, height + 1)
            heights.append(height_kid)
            node.addkid(kid)
    return (node, max(heights))
Exemple #14
0
def make_html_zssgraph(parent, graph=None, ignore_comments=True):
    ''' Given a string containing HTML, return a zss style tree of the DOM
  '''
    if not graph:
        graph = Node(parent.tag)
    for node in parent.getchildren():
        # if the element is a comment, ignore it
        if ignore_comments and not isinstance(node.tag, basestring):
            continue
        graph.addkid(Node(node.tag))
        make_html_zssgraph(node, graph)
    return graph
Exemple #15
0
 def totree(self, e):
     if isinstance(e, Var):
         return Node(('V', str(e)))
     if isinstance(e, Const):
         return Node(('C', str(e)))
     if isinstance(e, Op):
         name = e.name
         if name == 'AssAdd':
             name = 'Ass'
         n = Node(('O', name))
         for arg in e.args:
             n.addkid(self.totree(arg))
         return n
Exemple #16
0
def tree_edit_distance(s1,s2):	
	l1 = s1.split(',')
	l2 = s2.split(',')	
	n1 = Node("")
	for item in l1:
		#print item
		n1.addkid(Node(item))
		
	n2 = Node("")
	for item in l2:
		#print item
		n2.addkid(Node(item))
	
	return simple_distance(n1, n2)
Exemple #17
0
def tree_edit_distance(s1, s2):
    l1 = s1.split(',')
    l2 = s2.split(',')
    n1 = Node("")
    for item in l1:
        #print item
        n1.addkid(Node(item))

    n2 = Node("")
    for item in l2:
        #print item
        n2.addkid(Node(item))

    return simple_distance(n1, n2)
Exemple #18
0
    def json_to_tree(self, toplevel):
        prog = Node("toplevel")

        def helper(obj):
            if isinstance(obj, list):
                node = Node(obj[0])
                for kid in obj[1:]:
                    node.addkid(helper(kid))
                return node
            else:
                return Node(obj)

        for fun in toplevel:
            prog.addkid(helper(fun))

        return prog
Exemple #19
0
def dfs_search(n_node):
    if isinstance(n_node, dict):
        if "_nodetype" in n_node.keys():
            res = Node(n_node['_nodetype'])
            for item in n_node.values():
                if isinstance(item, dict):
                    kid = dfs_search(item)
                    res.addkid(kid)
                elif isinstance(item, list):
                    for block in item:
                        kid = dfs_search(block)
                        if kid:
                            res.addkid(kid)
            return res
        else:
            return None
    else:
        return None
Exemple #20
0
def convert_body(body, parent_node=None, root_node=None):
    body = seperate_dict(body)
    if isinstance(body, dict):
        if parent_node == None:
            parent_node = Node(body['_PyType'])
            new_parent = parent_node
            root_node = parent_node
        for j in body:
            if j != '_PyType':
                # still have a kid, then recursion needed
                if isinstance(body[j], dict):
                    if '_PyType' in body[j].keys():
                        if 'attr' in body[j].keys():
                            node_content = j + ' ' + body[j][
                                '_PyType'] + ' ' + body[j]['attr']
                            new_parent = Node(node_content)
                            parent_node.addkid(new_parent)
                            new_parent = convert_body(body[j],
                                                      parent_node=new_parent,
                                                      root_node=root_node)
                        else:
                            call_call_func_name = ''
                            #if j == ''
                            if j == 'func':
                                call_call_func_name = ' ' + body[j]['id']
                            node_content = j + ' ' + body[j][
                                '_PyType'] + call_call_func_name
                            new_parent = Node(node_content)
                            parent_node.addkid(new_parent)
                            new_parent = convert_body(body[j],
                                                      parent_node=new_parent,
                                                      root_node=root_node)
                    else:
                        # case when it's a dict but not with PyType
                        if 'udv' in json.dumps(body[j]):
                            node_content = j + ' ' + 'udv'
                        else:
                            node_content = j + ' ' + json.dumps(body[j])
                        parent_node = parent_node.addkid(Node(node_content))

                elif isinstance(body[j], list) or isinstance(body[j], str):
                    if body[j]:
                        node_content = j + ' ' + body[j]
                    else:
                        node_content = j + ' ' + ''
                    parent_node = parent_node.addkid(Node(node_content))
    return root_node
Exemple #21
0
def convert_to_tree(source):
    '''
    Convert a given AST into a zss tree, recursively processing the AST and
    handling special cases as specified.

    For example, with if statements: if someone is missing an IF altogether,
    then their distance will be at least 3 from something with the if - 1 error
    for the if condition, 1 error for the code if true, 1 error for the code if
    false.

    Also note that the zss.simple_distance metric, "swapping" two lines is TWO
    operations: re-labeling the first node and the second node.
    '''

    node_type = source['type']
    if node_type == 'statementList' or node_type == 'maze_turn':
        # Skip these nodes because they are redundant with their children
        node_list = []
        for child in source['children']:
            child_node = convert_to_tree(child)
            if type(child_node) == list:
                for c in child_node:
                    node_list.append(c)
            else:
                node_list.append(child_node)
        return node_list
    elif node_type == 'maze_forever':
        # This node type (loop until finish) is never used in HOC4
        assert len(source['children']) == 1, "While has more than 1 child"
        for child in source[
                'children']:  # There's only 1 child, the DO statement
            child_node = convert_to_tree(child)
            if type(child_node) == list:  # This should never happen
                raise Exception("Should never be here")
                for list_elem in child_node:
                    node.addkid(list_elem)
            else:
                child_node.label = "while_" + child_node.label
                return child_node
    elif node_type == 'maze_ifElse':
        # This node type (if/else) is never used in HOC4. It should consist
        # of 3 children:
        # - the if condition
        # - the statements to be executed if the condition is met
        # - the statements to be executed if the condition is NOT met
        assert len(
            source['children']) == 3, "If/else has wrong number of children"

        condition = source['children'][0]['type']
        assert condition in ['isPathLeft', 'isPathRight',
                             'isPathForward'], "Bad condition"

        condition_node = Node(condition)  # The condition of the IF statement

        if_stats = source['children'][1]
        if_stats_return = convert_to_tree(if_stats)
        if type(if_stats_return) == list:
            # Children if the IF statement is satisfied
            for c in if_stats_return:
                condition_node.addkid(c)
        else:
            condition_node.addkid(if_stats_return)

        else_stats = source['children'][2]
        else_stats_return = convert_to_tree(else_stats)
        if type(else_stats_return) == list:
            # Children if the IF statement is NOT satisfied
            for c in else_stats_return:
                condition_node.addkid(c)
        else:
            condition_node.addkid(else_stats_return)

        return condition_node
    else:
        node = Node(source['type'])
        if 'children' in source:
            for child in source['children']:
                child_node = convert_to_tree(child)
                if type(child_node) == list:
                    for list_elem in child_node:
                        node.addkid(list_elem)
                else:
                    node.addkid(child_node)
        return node
 def constructZssTree(self, treeNode):
     newNode = Node(treeNode.getLabel())
     for childNode in treeNode.childNodes:
         newNode.addkid(self.constructZssTree(childNode))
     return newNode
Exemple #23
0
from zss import simple_distance, Node
#only forever,repeat,if then, if then else,repeat until-- are nestable blocks. Hence they have children.rest will not
#whenever we encounter these blocks, start adding child nodes.

fileName = ''
fName = open(fileName,'r')

control_blocks = ['forever', 'if-then', 'if-then-else', 'repeat', 'repeat-until']
init_tree = Node('root')
for opcode in fName:
	if opcode in control_blocks:
		currentNode = opcode
		init_tree.addkid(Node(currentNode))
		prevNode = currentNode
A = (
    Node("f")
        .addkid(Node("a")
            .addkid(Node("h"))
            .addkid(Node("c")
                .addkid(Node("l"))))
        .addkid(Node("e"))
    )
B = (
    Node("f")
        .addkid(Node("a")
            .addkid(Node("d"))
            .addkid(Node("c")
                .addkid(Node("b"))))
        .addkid(Node("e"))
    )
print simple_distance(A, B)