Exemple #1
0
    def _parse(self, t):
        dg = DependencyGraph()
        i = 0
        for line in t.splitlines():
            if line[0] in "*+":
                # start of bunsetsu or tag

                cells = line.strip().split(" ", 3)
                m = re.match(r"([\-0-9]*)([ADIP])", cells[1])

                assert m is not None

                node = dg.nodes[i]
                node.update({"address": i, "rel": m.group(2), "word": []})

                dep_parent = int(m.group(1))

                if dep_parent == -1:
                    dg.root = node
                else:
                    dg.nodes[dep_parent]["deps"].append(i)

                i += 1
            elif line[0] != "#":
                # normal morph
                cells = line.strip().split(" ")
                # convert cells to morph tuples
                morph = cells[0], " ".join(cells[1:])
                dg.nodes[i - 1]["word"].append(morph)

        if self.morphs2str:
            for node in dg.nodes.values():
                node["word"] = self.morphs2str(node["word"])

        return dg.tree()
Exemple #2
0
    def _parse(self, t):
        dg = DependencyGraph()
        i = 0
        for line in t.splitlines():
            if line[0] in '*+':
                # start of bunsetsu or tag

                cells = line.strip().split(" ", 3)
                m = re.match(r"([\-0-9]*)([ADIP])", cells[1])

                assert m is not None

                node = dg.nodes[i]
                node.update({'address': i, 'rel': m.group(2), 'word': []})

                dep_parent = int(m.group(1))

                if dep_parent == -1:
                    dg.root = node
                else:
                    dg.nodes[dep_parent]['deps'].append(i)

                i += 1
            elif line[0] != '#':
                # normal morph
                cells = line.strip().split(" ")
                # convert cells to morph tuples
                morph = cells[0], ' '.join(cells[1:])
                dg.nodes[i - 1]['word'].append(morph)

        if self.morphs2str:
            for node in dg.nodes.values():
                node['word'] = self.morphs2str(node['word'])

        return dg.tree()
Exemple #3
0
def flattened_node_list(graph):
    """
    Takes an instance of DependencyGraph corresponding to a parsed sentence.
    Flattens into a list of DependencyGraph instances, each with a different
    word from the sentence as its root node (and no children).
    """
    nodelist = copy.copy(graph.nodelist[1:])
    flattened = []
    for node in nodelist:
        node["deps"] = []
        node["head"] = 0
        node["address"] = 1
        new_graph = DependencyGraph()
        new_graph.nodelist.append(node)
        new_graph.root = node
        flattened.append(new_graph)
    return flattened
Exemple #4
0
def flattened_node_list(graph):
    """
    Takes an instance of DependencyGraph corresponding to a parsed sentence.
    Flattens into a list of DependencyGraph instances, each with a different
    word from the sentence as its root node (and no children).
    """
    nodelist = copy.copy(graph.nodelist[1:])
    flattened = []
    for node in nodelist:
        node["deps"] = []
        node["head"] = 0
        node["address"] = 1
        new_graph = DependencyGraph()
        new_graph.nodelist.append(node)
        new_graph.root = node
        flattened.append(new_graph)
    return flattened
Exemple #5
0
    def _parse(self, t):
        dg = DependencyGraph()
        i = 0
        for line in t.splitlines():
            if line.startswith("*") or line.startswith("+"):
                # start of bunsetsu or tag

                cells = line.strip().split(" ", 3)
                m = re.match(r"([\-0-9]*)([ADIP])", cells[1])

                assert m is not None

                node = dg.nodelist[i]
                node['address'] = i
                node['rel'] = m.group(2)  # dep_type

                node['word'] = []

                dep_parent = int(m.group(1))

                while len(dg.nodelist) < i + 1 or len(
                        dg.nodelist) < dep_parent + 1:
                    dg.nodelist.append({'word': [], 'deps': []})

                if dep_parent == -1:
                    dg.root = node
                else:
                    dg.nodelist[dep_parent]['deps'].append(i)

                i += 1
            elif not line.startswith("#"):
                # normal morph
                cells = line.strip().split(" ")
                # convert cells to morph tuples
                morph = (cells[0], ' '.join(cells[1:]))
                dg.nodelist[i - 1]['word'].append(morph)

        if self.morphs2str:
            for node in dg.nodelist:
                node['word'] = self.morphs2str(node['word'])

        return dg.tree()
Exemple #6
0
    def _parse(self, t):
        dg = DependencyGraph()
        i = 0
        for line in t.splitlines():
            if line.startswith("*") or line.startswith("+"):
                # start of bunsetsu or tag

                cells = line.strip().split(" ", 3)
                m = re.match(r"([\-0-9]*)([ADIP])", cells[1])

                assert m is not None

                node = dg.nodelist[i]
                node['address'] = i
                node['rel'] = m.group(2)  # dep_type

                node['word'] = []

                dep_parent = int(m.group(1))

                while len(dg.nodelist) < i+1 or len(dg.nodelist) < dep_parent+1:
                    dg.nodelist.append({'word':[], 'deps':[]})

                if dep_parent == -1:
                    dg.root = node
                else:
                    dg.nodelist[dep_parent]['deps'].append(i)

                i += 1
            elif not line.startswith("#"):
                # normal morph
                cells = line.strip().split(" ")
                # convert cells to morph tuples
                morph = ( cells[0], ' '.join(cells[1:]) )
                dg.nodelist[i-1]['word'].append(morph)

        if self.morphs2str:
            for node in dg.nodelist:
                node['word'] = self.morphs2str(node['word'])

        return dg.tree()
Exemple #7
0
def cabocha2depgraph(t):
    dg = DependencyGraph()
    i = 0
    for line in t.splitlines():
        if line.startswith("*"):
            # start of bunsetsu

            cells = line.strip().split(" ", 3)
            m = re.match(r"([\-0-9]*)([ADIP])", cells[2])

            node = dg.nodelist[i]
            node.update(
                {'address': i,
                 'rel': m.group(2),  # dep_type
                 'word': [],
                 'tag': []
                 })
            dep_parent = int(m.group(1))

            while len(dg.nodelist) < i + 1 or len(dg.nodelist) < dep_parent + 1:
                dg.nodelist.append({'word': [], 'deps': [], 'tag': []})

            if dep_parent == -1:
                dg.root = node
            else:
                dg.nodelist[dep_parent]['deps'].append(i)

            i += 1
        elif not line.startswith("EOS"):
            # normal morph
            cells = line.strip().split("\t")

            morph = (cells[0], tuple(cells[1].split(',')))
            dg.nodelist[i - 1]['word'].append(morph[0])
            dg.nodelist[i - 1]['tag'].append(morph[1])

        return dg