Beispiel #1
0
def build_tree(sentences):
    sent = nltk.tokenize.sent_tokenize(sentences)
    roots = []
    for s in sent:
        parse = nlp.parse(s)
        stack = []
        root = None
        for token in parse.split():
            if token[0] == '(':
                node = Node(token[1:])
                if stack:
                    stack[-1].addkid(node)
                stack.append(node)
                if root is None:
                    root = node
            else:
                first = token.find(')')
                child = Node(token[:first])
                if stack:
                    stack[-1].addkid(child)
                for x in range(len(token) - first):
                    if stack:
                        stack.pop()
        roots.append(root)

    R = Node('R')
    for node in roots:
        R.addkid(node)
    return (R)
Beispiel #2
0
def zss_similarity(node1, node2):
    a = Node(node1['name'], node1['children'])
    b = Node(node2['name'], node2['children'])

    dist = simple_distance(a, b)

    return dist
Beispiel #3
0
    def convertTreeToEditDistanceFormat(self):
        nodeObjects = dict()

        #remove all weights and then make a set. This should help in having the same ordered list. If the order of the edges is not the same, then zss will mess up the edit distance somehow.
        unweightedEdges = []
        for edge in self.edgeList:
            newEdge = (0, edge[1], edge[2])
            unweightedEdges.append(newEdge)

        unweightedEdges.sort(key=lambda tup: tup[1])

        for edge in set(unweightedEdges):
            #obtain the parent and child
            parent = edge[1]
            child = edge[2]

            #Create an object for the parent.
            #The only object that we will be appending to is the parent one
            if parent not in nodeObjects.keys():
                parentNode = Node(str(parent), [])
                nodeObjects[parent] = parentNode

            if child not in nodeObjects.keys():
                childNode = Node(str(child), [])
                nodeObjects[child] = childNode

            nodeObjects[parent].addkid(nodeObjects[child])

        return nodeObjects
Beispiel #4
0
def list_to_zsstree(input_list, label='0'):
    if not isinstance(input_list, list):  # 叶节点
        return Node(label)
    else:  # 中间节点
        return Node(label)\
            .addkid(list_to_zsstree(input_list[0]))\
            .addkid(list_to_zsstree(input_list[1]))
    def convert_parse_tree_to_zss_tree(self, tree_as_string, ignore_leaves=False):
        '''
        The ignore leaves argument will create a tree where the words in the sentence
        are not included. This will only represent sentence structure. 
        '''

        tree_as_list = [item.strip() for item in re.split(r'([\(\)])', tree_as_string) if item.strip()]

        tree_as_list = tree_as_list[2:-1]

        stack = [Node('ROOT')]
        root_node = stack[0]
        # Iterate over the list
        for i, item in enumerate(tree_as_list):
            if item == '(':
                # match the string for each item
                match = re.search(r'[A-Z]+[ ][A-Za-z]+', tree_as_list[i + 1])
                if match:
                    # if match, node has no children
                    label = match.group().split(' ')
                    node = Node(label[0]).addkid(Node(label[1])) if not ignore_leaves else Node(label[0])
                else:
                    # otherwise node has children
                    node = Node(tree_as_list[i + 1])
                # Add the node to the children of the current item
                stack[-1].addkid(node)
                # Then add the node to the stack itself
                stack.append(node)
            elif item == ')':
                # this node has no children so just pop it from the stack
                stack.pop()
        return root_node
Beispiel #6
0
def tree_generate(node_name,
                  index,
                  pos,
                  dependency_list,
                  visited_list,
                  reverse=False):
    """Generating a tree from the node name given

    :type index: int
    :type pos: str
    :type visited_list: list
    :type node_name: str
    :type dependency_list: list
    """
    node = Node(node_name, pos, index=index)
    visited_list += [index]
    kids_index_name_pos = [(d[1][0], d[1][1], d[1][2]) for d in dependency_list
                           if d[0][0] == index]
    for x in kids_index_name_pos:
        if x in visited_list:
            node.value = 1
            node.addkid(Node(label=x[1], index=x[0], pos=x[2]))
        else:
            node.addkid(
                tree_generate(x[1], x[0], x[2], dependency_list, visited_list))
        visited_list += [x]

    else:
        if Node.get_children(node):
            node.value += sum([g.value for g in Node.get_children(node)])
            # print node.label, node.value, node.pos
    node.children.sort(key=lambda x: x.label, reverse=reverse)
    return node
def test_de():
    expected_ops = [
        Operation(Operation.remove, Node("b"), None),
        Operation(Operation.remove, Node("c"), None),
        Operation(Operation.match, Node("a"), Node("a"))
    ]
    cost, ops = simple_distance(D, E, return_operations=True)
    assert ops == expected_ops
Beispiel #8
0
def loop_tree(dictionary, node):
    print node
    print "----"
    for k, v in dictionary.iteritems():
        if isinstance(dictionary[k], dict):
            loop_tree(dictionary[k], node.addkid(Node(k)))
        else:
            node.addkid(Node(v))
Beispiel #9
0
 def helper(obj):
     if isinstance(obj, list):
         node = Node(obj[0])
         for kid in obj[1:]:
             node.addkid(helper(kid))
         return node
     else:
         return Node(obj)
def dgl_tree_to_zzs_tree(tree, vocab_key_list, u):
    if tree.in_degrees(u) == 0:
        return Node(vocab_key_list[tree.ndata['x'][u]])
    node = Node("PAD")
    in_nodes = tree.in_edges(u)[0]
    for in_node in in_nodes:
        in_node = int(in_node)
        node.addkid(dgl_tree_to_zzs_tree(tree, vocab_key_list, in_node))
    return node
Beispiel #11
0
 def build_tree_no_date(self, event):
         A = (
             Node(event.event_type.lower())
                 .addkid(Node(event.country.lower()))
                 .addkid(Node(event.state.lower()))
                 .addkid(Node(event.city.lower()))
                 .addkid(Node(event.name.lower()))
             )
         return A
Beispiel #12
0
 def totree(self, e):
     if isinstance(e, Var):
         return Node(('V', str(e)))
     if isinstance(e, Const):
         return Node(('C', str(e)))
     if isinstance(e, Op):
         n = Node(('O', e.name))
         for arg in e.args:
             n.addkid(self.totree(arg))
         return n
Beispiel #13
0
def get_ztree(cn, ztp=None):
    if isinstance(cn, str):
        cn = Tree.fromstring(cn)
    if ztp is None:
        ztp = Node(cn.label())
    for subtree in cn:
        if isinstance(subtree, Tree):
            n = Node(subtree.label())
            ztp.addkid(n)
            get_ztree(subtree, n)
    return ztp
def parsed_tree_to_zzs_tree(u):
    if len(u.child) == 1:
        return parsed_tree_to_zzs_tree(u.child[0])
    elif len(u.child) > 1:
        assert len(u.child) == 2
        node = Node("PAD")
        node.addkid(parsed_tree_to_zzs_tree(u.child[0]))
        node.addkid(parsed_tree_to_zzs_tree(u.child[1]))
        return node
    else:
        return Node(u.value)
Beispiel #15
0
def main(node,child):
    if isinstance(child, list) and len(child) == 0:
        return node.addkid(Node(''))
    if not isinstance(child, list) and not isinstance(child, dict):
        return node.addkid(Node(child))
    if isinstance(child, dict):
        for k, v in child.items():
            node.addkid(main(Node(k), v))
    if isinstance(child, list):
        for n, i in enumerate(child):
            node.addkid(main(Node(n), i))
Beispiel #16
0
        def transform(parent):
            if parent not in clause:
                return Node(parent[0])

            children = clause[parent]
            root = parent[0]
            xs = []
            for child in children:
                xs.append(transform(child))
            res = Node(root, children=xs)
            return res
Beispiel #17
0
def zss_code_distance(code_a, code_b):
    root_node_a = ast.parse(code_a)
    root_zss_node_a = Node("root")
    zss_ast_visit(root_node_a, root_zss_node_a)

    root_node_b = ast.parse(code_b)
    root_zss_node_b = Node("root")
    zss_ast_visit(root_node_b, root_zss_node_b)

    return simple_distance(root_zss_node_a,
                           root_zss_node_b,
                           label_dist=label_weight)
Beispiel #18
0
def make_html_zssgraph(parent, graph=None, ignore_comments=True):
    ''' Given a string containing HTML, return a zss style tree of the DOM
  '''
    if not graph:
        graph = Node(parent.tag)
    for node in parent.getchildren():
        # if the element is a comment, ignore it
        if ignore_comments and not isinstance(node.tag, basestring):
            continue
        graph.addkid(Node(node.tag))
        make_html_zssgraph(node, graph)
    return graph
Beispiel #19
0
def mktree(node, child, count=0):
    print(count)
    if isinstance(child, list):
        for c in child:
            count += 1
            return mktree(node, c, count)

    elif isinstance(child, dict):
        for k, v in child.items():
            if isinstance(child[k], dict):
                node.addkid(Node(k))
                return mktree(node, v, count)
            else:
                node.addkid(Node(v))
Beispiel #20
0
def zss_code_ast_edit(code_a, code_b):
    root_node_a = ast.parse(code_a)
    root_zss_node_a = Node("root")
    zss_ast_visit(root_node_a, root_zss_node_a)

    root_node_b = ast.parse(code_b)
    root_zss_node_b = Node("root")
    zss_ast_visit(root_node_b, root_zss_node_b)

    cost, ops = simple_distance(root_zss_node_a,
                                root_zss_node_b,
                                label_dist=label_weight,
                                return_operations=True)
    return cost, ops
Beispiel #21
0
def tree_edit_distance(s1, s2):
    l1 = s1.split(',')
    l2 = s2.split(',')
    n1 = Node("")
    for item in l1:
        #print item
        n1.addkid(Node(item))

    n2 = Node("")
    for item in l2:
        #print item
        n2.addkid(Node(item))

    return simple_distance(n1, n2)
Beispiel #22
0
def to_zzzNode(E, root=0):
    from zss import Node
    A = Node(str(root))
    U = [(0, A)]
    while len(U) != 0:
        parent = U[0][0]
        node = U[0][1]
        del U[0]
        children = getChildren(E, parent)
        if len(children) == 0:
            continue
        for i in range(len(children)):
            node.addkid(Node(str(children[i])))
            U.append((children[i], node.children[i]))
    return A
    def syntax_similarity_conversation(self, documents1):
        """Syntax similarity of each document with its before and after."""
        global numnodes
        documents1parsed = []

        # Detect sentences and parse them
        for d1 in tqdm(range(len(documents1))):
            tempsents = (self.sent_detector.tokenize(documents1[d1].strip()))
            for s in tempsents:
                if len(s.split()) > 70:
                    documents1parsed.append("NA")
                    break
            else:
                temp = list(self.parser.raw_parse_sents((tempsents)))
                for i in range(len(temp)):
                    temp[i] = list(temp[i])[0]
                    temp[i] = ParentedTree.convert(temp[i])
                documents1parsed.append(list(temp))

        results = []
        for d1 in range(len(documents1parsed) - 1):
            d2 = d1 + 1
            if documents1parsed[d1] == "NA" or documents1parsed[d2] == "NA":
                results.append(float('NaN'))
                continue

            costMatrix = []
            for i in range(len(documents1parsed[d1])):
                numnodes = 0
                tempnode = Node(documents1parsed[d1][i].root().label())
                sentencedoc1 = self.convert_mytree(documents1parsed[d1][i],
                                                   tempnode)
                temp_costMatrix = []
                sen1nodes = numnodes
                for j in range(len(documents1parsed[d2])):
                    numnodes = .0
                    tempnode = Node(documents1parsed[d2][j].root().label())
                    sentencedoc2 = self.convert_mytree(documents1parsed[d2][j],
                                                       tempnode)
                    ED = simple_distance(sentencedoc1, sentencedoc2)
                    ED /= (numnodes + sen1nodes)
                    temp_costMatrix.append(ED)
                costMatrix.append(temp_costMatrix)
            costMatrix = np.array(costMatrix)

            results.append(1 - np.mean(costMatrix))

        return np.array(results)
Beispiel #24
0
def randtree(depth=2, alpha='abcdefghijklmnopqrstuvwxyz', repeat=2, width=2):
    labels = [''.join(x) for x in itertools.product(alpha, repeat=repeat)]
    shuffle(labels)
    labels = (x for x in labels)
    root = Node("root")
    p = [root]
    c = list()
    for x in xrange(depth - 1):
        for y in p:
            for z in xrange(randint(1, 1 + width)):
                n = Node(labels.next())
                y.addkid(n)
                c.append(n)
        p = c
        c = list()
    return root
Beispiel #25
0
def parseTreeFromStrings(tree, debug=False):
    """
    Create the tree from a list of strings
    @param tree:       {List} string format of the tree, one element per string
    @param debug:       {Boolean} True to display debugging information; False not
    @return:            {Dictionary} the three types of trees in the dictionary
    """
    if tree is None:
        return None

    root = None
    rightMosts, cur = [], 0
    for line in tree:
        header = line.split(": ")[0]
        level, tag = (0, header) if "|-" not in header else header.split("- ")
        tag = tag.strip()
        if "|-" in header:
            level = 1 + int(len(level) / 2)
        xs = [int(x) for x in re.split("\\D+", line) if x != ""]
        cur = Node("%s[x=%04d,y=%04d]" % (tag, xs[2]-xs[0], xs[3]-xs[1]))
        if level == len(rightMosts):
            rightMosts.append(cur)
        else:
            assert level < len(rightMosts)
            rightMosts[level] = cur
        pass # else - if level == len(rightMosts)
        if level == 0:
            root = cur
        else:
            rightMosts[level-1].addkid(cur)
        pass # else - if level == 0
    pass # for line in tree

    return root
Beispiel #26
0
 def build_tree(self, event):
     A = (Node(event.event_type.lower()).addkid(Node(
         event.country.lower())).addkid(Node(event.state.lower())).addkid(
             Node(event.city.lower())).addkid(Node(
                 event.name.lower())).addkid(
                     Node(event.day.lower())).addkid(
                         Node(event.month.lower())).addkid(
                             Node(event.year.lower())))
     return A
Beispiel #27
0
def convert_body(body, parent_node=None, root_node=None):
    body = seperate_dict(body)
    if isinstance(body, dict):
        if parent_node == None:
            parent_node = Node(body['_PyType'])
            new_parent = parent_node
            root_node = parent_node
        for j in body:
            if j != '_PyType':
                # still have a kid, then recursion needed
                if isinstance(body[j], dict):
                    if '_PyType' in body[j].keys():
                        if 'attr' in body[j].keys():
                            node_content = j + ' ' + body[j][
                                '_PyType'] + ' ' + body[j]['attr']
                            new_parent = Node(node_content)
                            parent_node.addkid(new_parent)
                            new_parent = convert_body(body[j],
                                                      parent_node=new_parent,
                                                      root_node=root_node)
                        else:
                            call_call_func_name = ''
                            #if j == ''
                            if j == 'func':
                                call_call_func_name = ' ' + body[j]['id']
                            node_content = j + ' ' + body[j][
                                '_PyType'] + call_call_func_name
                            new_parent = Node(node_content)
                            parent_node.addkid(new_parent)
                            new_parent = convert_body(body[j],
                                                      parent_node=new_parent,
                                                      root_node=root_node)
                    else:
                        # case when it's a dict but not with PyType
                        if 'udv' in json.dumps(body[j]):
                            node_content = j + ' ' + 'udv'
                        else:
                            node_content = j + ' ' + json.dumps(body[j])
                        parent_node = parent_node.addkid(Node(node_content))

                elif isinstance(body[j], list) or isinstance(body[j], str):
                    if body[j]:
                        node_content = j + ' ' + body[j]
                    else:
                        node_content = j + ' ' + ''
                    parent_node = parent_node.addkid(Node(node_content))
    return root_node
Beispiel #28
0
 def helper(node: Node, index: int):
     children = dep[str(index)]['deps'].values()
     children = sum(children, [])
     for c in children:
         cNode = Node(dep[str(c)]['word'])
         helper(cNode, c)
         node.addkid(cNode)
     return
def buildTree(state, node, chart):
    if state[4] == ():
        return
    else:
        for ptr in state[4]:
            child = Node(chart[ptr[0]][ptr[1]][0])
            node.addkid(child)
            buildTree(chart[ptr[0]][ptr[1]], child, chart)
Beispiel #30
0
def getModel(edges):
    newList = []
    data = {}
    for i in edges:
        if i[0] not in newList:
            newList.append(i[0])
        if i[1] not in newList:
            newList.append(i[1])

    for i in range(len(newList)):
        data[newList[i]] = Node(i)
    for edge in edges:
        data[edge[0]].addkid(data[edge[1]])
    if len(newList) == 0:
        return Node(0)
    else:
        return data[0]