コード例 #1
0
ファイル: test_node.py プロジェクト: udapi/udapi-python
    def test_print_subtree(self):
        """Test print_subtree() method, which uses udapi.block.write.textmodetrees."""
        doc = Document()
        data_filename = os.path.join(os.path.dirname(__file__), 'data', 'enh_deps.conllu')
        doc.load_conllu(data_filename)
        root = doc.bundles[0].get_tree()

        expected1 = ("# sent_id = a-mf920901-001-p1s1A\n"
                     "# text = Slovenská ústava: pro i proti\n"
                     "─┮\n"
                     " │ ╭─╼ Slovenská ADJ amod\n"
                     " ╰─┾ ústava NOUN root\n"
                     "   ┡─╼ : PUNCT punct\n"
                     "   ╰─┮ pro ADP appos\n"
                     "     ┡─╼ i CONJ cc\n"
                     "     ╰─╼ proti ADP conj\n"
                     "\n")
        expected2 = ("─┮\n"
                     " │ ╭─╼ Slovenská Case=Nom|Degree=Pos|Gender=Fem|Negative=Pos|Number=Sing _\n"
                     " ╰─┾ ústava Case=Nom|Gender=Fem|Negative=Pos|Number=Sing SpaceAfter=No\n"
                     "   ┡─╼ : _ _\n"
                     "   ╰─┮ pro AdpType=Prep|Case=Acc LId=pro-1\n"
                     "     ┡─╼ i _ LId=i-1\n"
                     "     ╰─╼ proti AdpType=Prep|Case=Dat LId=proti-1\n"
                     "\n")

        # test non-projective tree
        root3 = Root()
        for i in range(1, 5):
            root3.create_child(form=str(i))
        nodes = root3.descendants(add_self=1)
        nodes[1].parent = nodes[3]
        nodes[4].parent = nodes[2]
        expected3 = ("─┮\n"
                     " │ ╭─╼ 1\n"
                     " ┡─╪───┮ 2\n"
                     " ╰─┶ 3 │\n"
                     "       ╰─╼ 4\n"
                     "\n")

        try:
            sys.stdout = capture = io.StringIO()
            root.print_subtree(color=False)
            self.assertEqual(capture.getvalue(), expected1)
            capture.seek(0)
            capture.truncate()
            root.print_subtree(color=False, attributes='form,feats,misc',
                               print_sent_id=False, print_text=False)
            self.assertEqual(capture.getvalue(), expected2)
            capture.seek(0)
            capture.truncate()
            root3.print_subtree(color=False, attributes='form', print_sent_id=0, print_text=0)
            self.assertEqual(capture.getvalue(), expected3)
        finally:
            sys.stdout = sys.__stdout__  # pylint: disable=redefined-variable-type
コード例 #2
0
    def test_deps_setter(self):
        """Test the deserialization of enhanced dependencies."""
        # Create a sample dependency tree.
        root = Root()
        for _ in range(3):
            root.create_child()

        nodes = root.descendants()
        nodes[0].deps.append({'parent': nodes[1], 'deprel': 'test'})

        self.assertEqual(nodes[0].raw_deps, '2:test')
コード例 #3
0
ファイル: test_node.py プロジェクト: udapi/udapi-python
    def test_deps_setter(self):
        """Test the deserialization of enhanced dependencies."""
        # Create a sample dependency tree.
        root = Root()
        for _ in range(3):
            root.create_child()

        nodes = root.descendants()
        nodes[0].deps.append({'parent': nodes[1], 'deprel': 'test'})

        self.assertEqual(nodes[0].raw_deps, '2:test')
コード例 #4
0
    def read_tree(self, document=None):
        if self.filehandle is None:
            return None

        root = Root()
        parents = [0]
        words = []
        form = None
        for line in self.filehandle:
            line = line.rstrip()
            if line == '':
                break
            if line[0] == '#':
                # Are comments allowed in VISL-cg?
                continue

            if line[0].isspace():
                line.lstrip(line)
                node, parent_ord = self._node(line, root)
                words.append(node)
                parents.append(parent_ord)
            else:
                if words:
                    words[0].form = form
                    if len(words) > 1:
                        split_forms = form.split()
                        if len(words) == len(split_forms):
                            for word, split_form in zip(words, split_forms):
                                word.form = split_form
                        else:
                            for word in words[1:]:
                                word.form = '_'
                        root.create_multiword_token(words, form=form)
                    words = []
                form = line[2:-2]

        if words:
            words[0].form = form
            for word in words[1:]:
                word.form = '_'

        nodes = root.descendants(add_self=True)
        if len(nodes) == 1:
            return None
        for node_ord, node in enumerate(nodes[1:], 1):
            try:
                node.parent = nodes[parents[node_ord]]
            except IndexError:
                raise ValueError("Node %s HEAD is out of range (%d)" %
                                 (node, parents[node_ord]))

        return root
コード例 #5
0
ファイル: vislcg.py プロジェクト: udapi/udapi-python
    def read_tree(self):
        if self.filehandle is None:
            return None

        root = Root()
        parents = [0]
        words = []
        form = None
        for line in self.filehandle:
            line = line.rstrip()
            if line == '':
                break
            if line[0] == '#':
                root.comment += line[1:] + "\n"
                continue

            if line[0].isspace():
                node, parent_ord = self._node(line.lstrip(), root)
                words.append(node)
                parents.append(parent_ord)
                continue

            if words:
                words[0].form = form
                if len(words) > 1:
                    split_forms = form.split()
                    if len(words) == len(split_forms):
                        for word, split_form in zip(words, split_forms):
                            word.form = split_form
                    else:
                        for word in words[1:]:
                            word.form = '_'
                    root.create_multiword_token(words, form=form)
                words = []
            form = line[2:-2]

        if words:
            words[0].form = form
            for word in words[1:]:
                word.form = '_'

        nodes = root.descendants(add_self=True)
        if len(nodes) == 1:
            return None
        for node_ord, node in enumerate(nodes[1:], 1):
            try:
                node.parent = nodes[parents[node_ord]]
            except IndexError:
                raise ValueError("Node %s HEAD is out of range (%d)" % (node, parents[node_ord]))

        return root
コード例 #6
0
    def test_print_subtree(self):
        """Test print_subtree() method, which uses udapi.block.write.textmodetrees."""
        doc = Document()
        data_filename = os.path.join(os.path.dirname(__file__), 'data',
                                     'enh_deps.conllu')
        doc.load_conllu(data_filename)
        root = doc.bundles[0].get_tree()

        expected1 = ("# sent_id = a-mf920901-001-p1s1A\n"
                     "# text = Slovenská ústava: pro i proti\n"
                     "─┮\n"
                     " │ ╭─╼ Slovenská ADJ amod\n"
                     " ╰─┾ ústava NOUN root\n"
                     "   ┡─╼ : PUNCT punct\n"
                     "   ╰─┮ pro ADP appos\n"
                     "     ┡─╼ i CONJ cc\n"
                     "     ╰─╼ proti ADP conj\n"
                     "\n")
        expected2 = (
            "─┮\n"
            " │ ╭─╼ Slovenská Case=Nom|Degree=Pos|Gender=Fem|Negative=Pos|Number=Sing _\n"
            " ╰─┾ ústava Case=Nom|Gender=Fem|Negative=Pos|Number=Sing SpaceAfter=No\n"
            "   ┡─╼ : _ _\n"
            "   ╰─┮ pro AdpType=Prep|Case=Acc LId=pro-1\n"
            "     ┡─╼ i _ LId=i-1\n"
            "     ╰─╼ proti AdpType=Prep|Case=Dat LId=proti-1\n"
            "\n")

        # test non-projective tree
        root3 = Root()
        for i in range(1, 5):
            root3.create_child(form=str(i))
        nodes = root3.descendants(add_self=1)
        nodes[1].parent = nodes[3]
        nodes[4].parent = nodes[2]
        expected3 = ("─┮\n"
                     " │ ╭─╼ 1\n"
                     " ┡─╪───┮ 2\n"
                     " ╰─┶ 3 │\n"
                     "       ╰─╼ 4\n"
                     "\n")

        try:
            sys.stdout = capture = io.StringIO()
            root.print_subtree(color=False)
            self.assertEqual(capture.getvalue(), expected1)
            capture.seek(0)
            capture.truncate()
            root.print_subtree(color=False,
                               attributes='form,feats,misc',
                               print_sent_id=False,
                               print_text=False)
            self.assertEqual(capture.getvalue(), expected2)
            capture.seek(0)
            capture.truncate()
            root3.print_subtree(color=False,
                                attributes='form',
                                print_sent_id=0,
                                print_text=0)
            self.assertEqual(capture.getvalue(), expected3)
        finally:
            sys.stdout = sys.__stdout__  # pylint: disable=redefined-variable-type