コード例 #1
0
ファイル: lfg.py プロジェクト: wrand/tweater
    def to_depgraph(self, rel=None):
        depgraph = DependencyGraph()
        nodelist = depgraph.nodelist

        self._to_depgraph(nodelist, 0, 'ROOT')

        #Add all the dependencies for all the nodes
        for node_addr, node in enumerate(nodelist):
            for n2 in nodelist[1:]:
                if n2['head'] == node_addr:
                    node['deps'].append(n2['address'])

        depgraph.root = nodelist[1]

        return depgraph
コード例 #2
0
ファイル: lfg.py プロジェクト: jparise/haitwu-appengine
    def to_depgraph(self, rel=None):
        depgraph = DependencyGraph()
        nodelist = depgraph.nodelist
        
        self._to_depgraph(nodelist, 0, 'ROOT')
        
        #Add all the dependencies for all the nodes
        for node_addr, node in enumerate(nodelist):
            for n2 in nodelist[1:]:
                if n2['head'] == node_addr:
                    node['deps'].append(n2['address'])
        
        depgraph.root = nodelist[1]

        return depgraph
コード例 #3
0
    def to_depgraph(self, rel=None):
        from nltk.parse.dependencygraph import DependencyGraph
        depgraph = DependencyGraph()
        nodes = depgraph.nodes

        self._to_depgraph(nodes, 0, 'ROOT')

        # Add all the dependencies for all the nodes
        for address, node in nodes.items():
            for n2 in (n for n in nodes.values() if n['rel'] != 'TOP'):
                if n2['head'] == address:
                    node['deps'].append(n2['address'])

        depgraph.root = nodes[1]

        return depgraph
コード例 #4
0
ファイル: lfg.py プロジェクト: xim/nltk
    def to_depgraph(self, rel=None):
        from nltk.parse.dependencygraph import DependencyGraph

        depgraph = DependencyGraph()
        nodelist = depgraph.nodelist

        self._to_depgraph(nodelist, 0, "ROOT")

        # Add all the dependencies for all the nodes
        for node_addr, node in enumerate(nodelist):
            for n2 in nodelist[1:]:
                if n2["head"] == node_addr:
                    node["deps"].append(n2["address"])

        depgraph.root = nodelist[1]

        return depgraph
コード例 #5
0
    def to_depgraph(self, rel=None):
        from nltk.parse.dependencygraph import DependencyGraph
        depgraph = DependencyGraph()
        nodes = depgraph.nodes

        self._to_depgraph(nodes, 0, 'ROOT')

        # Add all the dependencies for all the nodes
        for address, node in nodes.items():
            for n2 in (n for n in nodes.values() if n['rel'] != 'TOP'):
                if n2['head'] == address:
                    relation = n2['rel']
                    node['deps'].setdefault(relation,[])
                    node['deps'][relation].append(n2['address'])

        depgraph.root = nodes[1]

        return depgraph
コード例 #6
0
    def to_depgraph(self, rel=None):
        from nltk.parse.dependencygraph import DependencyGraph

        depgraph = DependencyGraph()
        nodes = depgraph.nodes

        self._to_depgraph(nodes, 0, "ROOT")

        # Add all the dependencies for all the nodes
        for address, node in nodes.items():
            for n2 in (n for n in nodes.values() if n["rel"] != "TOP"):
                if n2["head"] == address:
                    relation = n2["rel"]
                    node["deps"].setdefault(relation, [])
                    node["deps"][relation].append(n2["address"])

        depgraph.root = nodes[1]

        return depgraph
コード例 #7
0
def tree_to_graph(tree):
    '''Converts a tree structure to a graph structure. This is for the accuracy() function.

    Args: tree: the tree to convert
    Returns: a graph representing the tree. note that this graph is really only
        useable in accuracy() (the only attribute we bother setting is 'head')
    Raises: None
    '''
    # nodes are dictionaries, which are mutable. So we copy them so we can
    # change attributes without changing the original nodes
    tree2 = tree_map(copy.copy, tree)

    # set the head attributes of each node according to our tree structure
    def set_heads(tree, parent=0):
        n = label(tree)
        n['head'] = parent
        if isinstance(tree, Tree):
            [set_heads(child, n['address']) for child in tree]

    set_heads(tree2)

    # now we need to generate our nodelist. This requires getting all the
    # elements ("labels") of our tree and putting them in a flat list
    def all_elems(tree):
        elems = [label(tree)]
        if isinstance(tree, Tree):
            for t in tree:
                elems += all_elems(t)
        return elems

    dg = DependencyGraph()
    dg.root = dg.nodelist[0]
    all = all_elems(tree2)
    # nodelist should be ordered by address
    all.sort(key=lambda t: label(t)['address'])
    dg.nodelist += all

    return dg
コード例 #8
0
ファイル: MyCabocha.py プロジェクト: fqq11679/Fuzzy
def cabocha2depgraph(t):
    dg = DependencyGraph()
    i = 0
    for line in t.splitlines():
        if line.startswith("*") and not line.endswith('*'):
            # start of bunsetsu  and not the real *

            cells = line.strip().split(" ", 3)
            m = re.match(r"([\-0-9]*)([ADIP])", cells[2])

            node = dg.nodelist[i]
            node.update({
                'address': i,
                'rel': m.group(2),  # dep_type
                'word': [],
                'tag': [],
                'str': ""
            })
            dep_parent = int(m.group(1))

            while len(dg.nodelist) < i + 1 or len(
                    dg.nodelist) < dep_parent + 1:
                dg.nodelist.append({'word': [], 'deps': [], 'tag': []})

            if dep_parent == -1:
                dg.root = node
            else:
                dg.nodelist[dep_parent]['deps'].append(i)

            i += 1
        elif not line.startswith("EOS"):
            # normal morph
            cells = line.strip().split("\t")
            morph = (cells[0], tuple(cells[1].split(',')))
            dg.nodelist[i - 1]['word'].append(morph[0])
            dg.nodelist[i - 1]['tag'].append(morph[1])

    return dg
コード例 #9
0
ファイル: util.py プロジェクト: chegejames/NLP
def make_dep_tree(sent, deps):
    adj = merge_with(cons, [], *[{x:[m]} for x,m,_ in deps])
    heads = dict([(m,h) for h,m,_ in deps])
    rel = dict([(m,rel) for _,m,rel in deps])
    n = len(sent["x"])
    pos = sent["pos"]
    x = sent["x"]
    nodelist = defaultdict(lambda: {"address": -1, "head": -1, "deps": [], "rel": "", "tag": "", "word": None})
    
    for i in range(1, n):
        node = nodelist[i]
        node["address"] = i
        node["head"] = heads[i]
        node["deps"] = adj[i] if adj.has_key(i) else []
        node["tag"] = pos[i]
        node["word"] = x[i]
        node["rel"] = rel[i]
    
    g = DependencyGraph()
    g.get_by_address(0)["deps"] = adj[0] if adj.has_key(0) else []
    [g.add_node(node) for node in nodelist.values()]
    g.root = nodelist[adj[0][0]]
    
    return g
コード例 #10
0
ファイル: master.py プロジェクト: lurke/DependencyParsing
def tree_to_graph(tree):
    '''Converts a tree structure to a graph structure. This is for the accuracy() function.

    Args: tree: the tree to convert
    Returns: a graph representing the tree. note that this graph is really only
        useable in accuracy() (the only attribute we bother setting is 'head')
    Raises: None
    '''
    # nodes are dictionaries, which are mutable. So we copy them so we can 
    # change attributes without changing the original nodes
    tree2 = tree_map(copy.copy, tree)
    # set the head attributes of each node according to our tree structure
    def set_heads(tree, parent=0):
        n = label(tree)
        n['head'] = parent
        if isinstance(tree, Tree):
            [set_heads(child, n['address']) for child in tree]
    set_heads(tree2)

    # now we need to generate our nodelist. This requires getting all the
    # elements ("labels") of our tree and putting them in a flat list
    def all_elems(tree):
        elems = [label(tree)]
        if isinstance(tree, Tree):
            for t in tree:
                elems += all_elems(t)
        return elems

    dg = DependencyGraph()
    dg.root = dg.nodelist[0]
    all = all_elems(tree2)
    # nodelist should be ordered by address
    all.sort(key=lambda t: label(t)['address'])
    dg.nodelist += all

    return dg
コード例 #11
0
    def parse(self, tokens):
        """
        Parses the input tokens with respect to the parser's grammar.  Parsing
        is accomplished by representing the search-space of possible parses as
        a fully-connected directed graph.  Arcs that would lead to ungrammatical
        parses are removed and a lattice is constructed of length n, where n is
        the number of input tokens, to represent all possible grammatical
        traversals.  All possible paths through the lattice are then enumerated
        to produce the set of non-projective parses.

        param tokens: A list of tokens to parse.
        type tokens: list(str)
        return: An iterator of non-projective parses.
        rtype: iter(DependencyGraph)
        """
        # Create graph representation of tokens
        self._graph = DependencyGraph()

        for index, token in enumerate(tokens):
            self._graph.nodes[index] = {
                'word': token,
                'deps': [],
                'rel': 'NTOP',
                'address': index,
            }

        for head_node in self._graph.nodes.values():
            deps = []
            for dep_node in self._graph.nodes.values():
                if (self._grammar.contains(head_node['word'], dep_node['word'])
                        and head_node['word'] != dep_node['word']):
                    deps.append(dep_node['address'])
            head_node['deps'] = deps

        # Create lattice of possible heads
        roots = []
        possible_heads = []
        for i, word in enumerate(tokens):
            heads = []
            for j, head in enumerate(tokens):
                if (i != j) and self._grammar.contains(head, word):
                    heads.append(j)
            if len(heads) == 0:
                roots.append(i)
            possible_heads.append(heads)

        # Set roots to attempt
        if len(roots) < 2:
            if len(roots) == 0:
                for i in range(len(tokens)):
                    roots.append(i)

            # Traverse lattice
            analyses = []
            for root in roots:
                stack = []
                analysis = [[] for i in range(len(possible_heads))]
            i = 0
            forward = True
            while i >= 0:
                if forward:
                    if len(possible_heads[i]) == 1:
                        analysis[i] = possible_heads[i][0]
                    elif len(possible_heads[i]) == 0:
                        analysis[i] = -1
                    else:
                        head = possible_heads[i].pop()
                        analysis[i] = head
                        stack.append([i, head])
                if not forward:
                    index_on_stack = False
                    for stack_item in stack:
                        if stack_item[0] == i:
                            index_on_stack = True
                    orig_length = len(possible_heads[i])

                    if index_on_stack and orig_length == 0:
                        for j in xrange(len(stack) - 1, -1, -1):
                            stack_item = stack[j]
                            if stack_item[0] == i:
                                possible_heads[i].append(stack.pop(j)[1])

                    elif index_on_stack and orig_length > 0:
                        head = possible_heads[i].pop()
                        analysis[i] = head
                        stack.append([i, head])
                        forward = True

                if i + 1 == len(possible_heads):
                    analyses.append(analysis[:])
                    forward = False
                if forward:
                    i += 1
                else:
                    i -= 1

        # Filter parses
        # ensure 1 root, every thing has 1 head
        for analysis in analyses:
            if analysis.count(-1) > 1:
                # there are several root elements!
                continue

            graph = DependencyGraph()
            graph.root = graph.nodes[analysis.index(-1) + 1]

            for address, (token,
                          head_index) in enumerate(zip(tokens, analysis),
                                                   start=1):
                head_address = head_index + 1

                node = graph.nodes[address]
                node.update({
                    'word': token,
                    'address': address,
                })

                if head_address == 0:
                    rel = 'ROOT'
                else:
                    rel = ''
                graph.nodes[head_index + 1]['deps'][rel].append(address)

            # TODO: check for cycles
            yield graph
コード例 #12
0
    def parse(self, tokens):
        """
        Parses the input tokens with respect to the parser's grammar.  Parsing
        is accomplished by representing the search-space of possible parses as
        a fully-connected directed graph.  Arcs that would lead to ungrammatical
        parses are removed and a lattice is constructed of length n, where n is
        the number of input tokens, to represent all possible grammatical
        traversals.  All possible paths through the lattice are then enumerated
        to produce the set of non-projective parses.

        param tokens: A list of tokens to parse.
        type tokens: list(str)
        return: An iterator of non-projective parses.
        rtype: iter(DependencyGraph)
        """
        # Create graph representation of tokens
        self._graph = DependencyGraph()

        for index, token in enumerate(tokens):
            self._graph.nodes[index] = {
                'word': token,
                'deps': [],
                'rel': 'NTOP',
                'address': index,
            }

        for head_node in self._graph.nodes.values():
            deps = []
            for dep_node in self._graph.nodes.values()  :
                if (
                    self._grammar.contains(head_node['word'], dep_node['word'])
                    and head_node['word'] != dep_node['word']
                ):
                    deps.append(dep_node['address'])
            head_node['deps'] = deps

        # Create lattice of possible heads
        roots = []
        possible_heads = []
        for i, word in enumerate(tokens):
            heads = []
            for j, head in enumerate(tokens):
                if (i != j) and self._grammar.contains(head, word):
                    heads.append(j)
            if len(heads) == 0:
                roots.append(i)
            possible_heads.append(heads)

        # Set roots to attempt
        if len(roots) < 2:
            if len(roots) == 0:
                for i in range(len(tokens)):
                    roots.append(i)

            # Traverse lattice
            analyses = []
            for root in roots:
                stack = []
                analysis = [[] for i in range(len(possible_heads))]
            i = 0
            forward = True
            while i >= 0:
                if forward:
                    if len(possible_heads[i]) == 1:
                        analysis[i] = possible_heads[i][0]
                    elif len(possible_heads[i]) == 0:
                        analysis[i] = -1
                    else:
                        head = possible_heads[i].pop()
                        analysis[i] = head
                        stack.append([i, head])
                if not forward:
                    index_on_stack = False
                    for stack_item in stack:
                        if stack_item[0] == i:
                            index_on_stack = True
                    orig_length = len(possible_heads[i])

                    if index_on_stack and orig_length == 0:
                        for j in range(len(stack) - 1, -1, -1):
                            stack_item = stack[j]
                            if stack_item[0] == i:
                                possible_heads[i].append(stack.pop(j)[1])

                    elif index_on_stack and orig_length > 0:
                        head = possible_heads[i].pop()
                        analysis[i] = head
                        stack.append([i, head])
                        forward = True

                if i + 1 == len(possible_heads):
                    analyses.append(analysis[:])
                    forward = False
                if forward:
                    i += 1
                else:
                    i -= 1

        # Filter parses
        # ensure 1 root, every thing has 1 head
        for analysis in analyses:
            if analysis.count(-1) > 1:
                # there are several root elements!
                continue

            graph = DependencyGraph()
            graph.root = graph.nodes[analysis.index(-1) + 1]

            for address, (token, head_index) in enumerate(zip(tokens, analysis), start=1):
                head_address = head_index + 1

                node = graph.nodes[address]
                node.update(
                    {
                        'word': token,
                        'address': address,
                    }
                )

                if head_address == 0:
                    rel = 'ROOT'
                else:
                    rel = ''
                graph.nodes[head_index + 1]['deps'][rel].append(address)

            # TODO: check for cycles
            yield graph
コード例 #13
0
ファイル: utils.py プロジェクト: xmichelf/estnltk
    def as_dependencygraph( self, keep_dummy_root=False, add_morph=True ):
        ''' Returns this tree as NLTK's DependencyGraph object.
            
            Note that this method constructs 'zero_based' graph,
            where counting of the words starts from 0 and the 
            root index is -1 (not 0, as in Malt-TAB format);
            
            Parameters
            -----------
            add_morph : bool
                Specifies whether the morphological information 
                (information about word lemmas, part-of-speech, and 
                features) should be added to graph nodes.
                Note that even if **add_morph==True**, morphological
                information is only added if it is available via
                estnltk's layer  token['analysis'];
                Default: True
            keep_dummy_root : bool
                Specifies whether the graph should include a dummy
                TOP / ROOT node, which does not refer to any word,
                and yet is the topmost node of the tree.
                If the dummy root node is not used, then the root 
                node is the word node headed by -1;
                Default: False
            
            For more information about NLTK's DependencyGraph, see:
             http://www.nltk.org/_modules/nltk/parse/dependencygraph.html
        '''
        from nltk.parse.dependencygraph import DependencyGraph
        graph = DependencyGraph( zero_based = True )
        all_tree_nodes = [self] + self.get_children()
        #
        # 0) Fix the root
        #
        if keep_dummy_root:
            #  Note: we have to re-construct  the root node manually, 
            #  as DependencyGraph's current interface seems to provide
            #  no easy/convenient means for fixing the root node;
            graph.nodes[-1] = graph.nodes[0]
            graph.nodes[-1].update( { 'address': -1 } )
            graph.root = graph.nodes[-1]
        del graph.nodes[0]
        #
        # 1) Update / Add nodes of the graph 
        #
        for child in all_tree_nodes:
            rel  = 'xxx' if not child.labels else '|'.join(child.labels)
            address = child.word_id
            word    = child.text
            graph.nodes[address].update(
            {
                'address': address,
                'word':  child.text,
                'rel':   rel,
            } )
            if not keep_dummy_root and child == self:
                # If we do not keep the dummy root node, set this tree
                # as the root node
                graph.root = graph.nodes[address]
            if add_morph and child.morph:
                # Add morphological information, if possible
                lemmas  = set([analysis[LEMMA] for analysis in child.morph])
                postags = set([analysis[POSTAG] for analysis in child.morph])
                feats   = set([analysis[FORM] for analysis in child.morph])
                lemma  = ('|'.join( list(lemmas)  )).replace(' ','_')
                postag = ('|'.join( list(postags) )).replace(' ','_')
                feats  = ('|'.join( list(feats) )).replace(' ','_')
                graph.nodes[address].update(
                {
                    'tag  ': postag,
                    'ctag' : postag,
                    'feats': feats,
                    'lemma': lemma
                } )

        #
        # 2) Update / Add arcs of the graph 
        #
        for child in all_tree_nodes:
            #  Connect children of given word
            deps = [] if not child.children else [c.word_id for c in child.children]
            head_address = child.word_id
            for dep in deps:
                graph.add_arc( head_address, dep )
            if child.parent == None and keep_dummy_root:
                graph.add_arc( -1, head_address )
            #  Connect the parent of given node
            head = -1 if not child.parent else child.parent.word_id
            graph.nodes[head_address].update(
            {
                'head':  head,
            } )
        return graph