def test_print_subtree(self): """Test print_subtree() method, which uses udapi.block.write.textmodetrees.""" doc = Document() data_filename = os.path.join(os.path.dirname(__file__), 'data', 'enh_deps.conllu') doc.load_conllu(data_filename) root = doc.bundles[0].get_tree() expected1 = ("# sent_id = a-mf920901-001-p1s1A\n" "# text = Slovenská ústava: pro i proti\n" "─┮\n" " │ ╭─╼ Slovenská ADJ amod\n" " ╰─┾ ústava NOUN root\n" " ┡─╼ : PUNCT punct\n" " ╰─┮ pro ADP appos\n" " ┡─╼ i CONJ cc\n" " ╰─╼ proti ADP conj\n" "\n") expected2 = ("─┮\n" " │ ╭─╼ Slovenská Case=Nom|Degree=Pos|Gender=Fem|Negative=Pos|Number=Sing _\n" " ╰─┾ ústava Case=Nom|Gender=Fem|Negative=Pos|Number=Sing SpaceAfter=No\n" " ┡─╼ : _ _\n" " ╰─┮ pro AdpType=Prep|Case=Acc LId=pro-1\n" " ┡─╼ i _ LId=i-1\n" " ╰─╼ proti AdpType=Prep|Case=Dat LId=proti-1\n" "\n") # test non-projective tree root3 = Root() for i in range(1, 5): root3.create_child(form=str(i)) nodes = root3.descendants(add_self=1) nodes[1].parent = nodes[3] nodes[4].parent = nodes[2] expected3 = ("─┮\n" " │ ╭─╼ 1\n" " ┡─╪───┮ 2\n" " ╰─┶ 3 │\n" " ╰─╼ 4\n" "\n") try: sys.stdout = capture = io.StringIO() root.print_subtree(color=False) self.assertEqual(capture.getvalue(), expected1) capture.seek(0) capture.truncate() root.print_subtree(color=False, attributes='form,feats,misc', print_sent_id=False, print_text=False) self.assertEqual(capture.getvalue(), expected2) capture.seek(0) capture.truncate() root3.print_subtree(color=False, attributes='form', print_sent_id=0, print_text=0) self.assertEqual(capture.getvalue(), expected3) finally: sys.stdout = sys.__stdout__ # pylint: disable=redefined-variable-type
def test_deps_setter(self): """Test the deserialization of enhanced dependencies.""" # Create a sample dependency tree. root = Root() for _ in range(3): root.create_child() nodes = root.descendants() nodes[0].deps.append({'parent': nodes[1], 'deprel': 'test'}) self.assertEqual(nodes[0].raw_deps, '2:test')
def read_tree(self, document=None): if self.filehandle is None: return None root = Root() parents = [0] words = [] form = None for line in self.filehandle: line = line.rstrip() if line == '': break if line[0] == '#': # Are comments allowed in VISL-cg? continue if line[0].isspace(): line.lstrip(line) node, parent_ord = self._node(line, root) words.append(node) parents.append(parent_ord) else: if words: words[0].form = form if len(words) > 1: split_forms = form.split() if len(words) == len(split_forms): for word, split_form in zip(words, split_forms): word.form = split_form else: for word in words[1:]: word.form = '_' root.create_multiword_token(words, form=form) words = [] form = line[2:-2] if words: words[0].form = form for word in words[1:]: word.form = '_' nodes = root.descendants(add_self=True) if len(nodes) == 1: return None for node_ord, node in enumerate(nodes[1:], 1): try: node.parent = nodes[parents[node_ord]] except IndexError: raise ValueError("Node %s HEAD is out of range (%d)" % (node, parents[node_ord])) return root
def read_tree(self): if self.filehandle is None: return None root = Root() parents = [0] words = [] form = None for line in self.filehandle: line = line.rstrip() if line == '': break if line[0] == '#': root.comment += line[1:] + "\n" continue if line[0].isspace(): node, parent_ord = self._node(line.lstrip(), root) words.append(node) parents.append(parent_ord) continue if words: words[0].form = form if len(words) > 1: split_forms = form.split() if len(words) == len(split_forms): for word, split_form in zip(words, split_forms): word.form = split_form else: for word in words[1:]: word.form = '_' root.create_multiword_token(words, form=form) words = [] form = line[2:-2] if words: words[0].form = form for word in words[1:]: word.form = '_' nodes = root.descendants(add_self=True) if len(nodes) == 1: return None for node_ord, node in enumerate(nodes[1:], 1): try: node.parent = nodes[parents[node_ord]] except IndexError: raise ValueError("Node %s HEAD is out of range (%d)" % (node, parents[node_ord])) return root
def test_print_subtree(self): """Test print_subtree() method, which uses udapi.block.write.textmodetrees.""" doc = Document() data_filename = os.path.join(os.path.dirname(__file__), 'data', 'enh_deps.conllu') doc.load_conllu(data_filename) root = doc.bundles[0].get_tree() expected1 = ("# sent_id = a-mf920901-001-p1s1A\n" "# text = Slovenská ústava: pro i proti\n" "─┮\n" " │ ╭─╼ Slovenská ADJ amod\n" " ╰─┾ ústava NOUN root\n" " ┡─╼ : PUNCT punct\n" " ╰─┮ pro ADP appos\n" " ┡─╼ i CONJ cc\n" " ╰─╼ proti ADP conj\n" "\n") expected2 = ( "─┮\n" " │ ╭─╼ Slovenská Case=Nom|Degree=Pos|Gender=Fem|Negative=Pos|Number=Sing _\n" " ╰─┾ ústava Case=Nom|Gender=Fem|Negative=Pos|Number=Sing SpaceAfter=No\n" " ┡─╼ : _ _\n" " ╰─┮ pro AdpType=Prep|Case=Acc LId=pro-1\n" " ┡─╼ i _ LId=i-1\n" " ╰─╼ proti AdpType=Prep|Case=Dat LId=proti-1\n" "\n") # test non-projective tree root3 = Root() for i in range(1, 5): root3.create_child(form=str(i)) nodes = root3.descendants(add_self=1) nodes[1].parent = nodes[3] nodes[4].parent = nodes[2] expected3 = ("─┮\n" " │ ╭─╼ 1\n" " ┡─╪───┮ 2\n" " ╰─┶ 3 │\n" " ╰─╼ 4\n" "\n") try: sys.stdout = capture = io.StringIO() root.print_subtree(color=False) self.assertEqual(capture.getvalue(), expected1) capture.seek(0) capture.truncate() root.print_subtree(color=False, attributes='form,feats,misc', print_sent_id=False, print_text=False) self.assertEqual(capture.getvalue(), expected2) capture.seek(0) capture.truncate() root3.print_subtree(color=False, attributes='form', print_sent_id=0, print_text=0) self.assertEqual(capture.getvalue(), expected3) finally: sys.stdout = sys.__stdout__ # pylint: disable=redefined-variable-type