def test_tree_basics(self): tree = Tree(sample_tree) assert str(tree) == sample_tree assert tree.pretty_string() == sample_tree_pretty assert tree.tokens() == ('This', 'is', 'a', 'fairly', 'simple', 'parse', 'tree', '.') assert tree.tags() == ('DT', 'VBZ', 'DT', 'RB', 'JJ', 'NN', 'NN', '.') assert tree.tokens_and_tags() == \ [('This', 'DT'), ('is', 'VBZ'), ('a', 'DT'), ('fairly', 'RB'), ('simple', 'JJ'), ('parse', 'NN'), ('tree', 'NN'), ('.', '.')] assert tree.span() == (0, 8) assert tree.label == 'S1' subtrees = tree.subtrees() assert len(subtrees) == 1 assert str(subtrees[0]) == '(S (NP (DT This)) (VP (VBZ is) (NP ' \ '(DT a) (ADJP (RB fairly) (JJ simple)) ' \ '(NN parse) (NN tree))) (. .))' assert subtrees[0].label == 'S' assert str(subtrees[0][0]) == '(NP (DT This))' assert subtrees[0][0].label == 'NP' assert subtrees[0][0].span() == (0, 1) assert subtrees[0][0].tags() == ('DT', ) assert subtrees[0][0].tokens() == ('This', ) assert str(subtrees[0][0][0]) == '(DT This)' assert subtrees[0][0][0].token == 'This' assert subtrees[0][0][0].label == 'DT' assert tree[0][0][0].is_preterminal() assert len(tree[0]) == 3 subtrees = iter(tree[0]) assert str(next(subtrees)) == '(NP (DT This))' assert str(next(subtrees)) == '(VP (VBZ is) (NP (DT a) (ADJP ' \ '(RB fairly) (JJ simple)) (NN parse) ' \ '(NN tree)))' assert str(next(subtrees)) == '(. .)' pairs = [(False, sample_tree), (False, '(S (NP (DT This)) (VP (VBZ is) (NP (DT a) (ADJP ' '(RB fairly) (JJ simple)) (NN parse) (NN tree))) ' '(. .))'), (False, '(NP (DT This))'), (True, '(DT This)'), (False, '(VP (VBZ is) (NP (DT a) (ADJP (RB fairly) (JJ ' 'simple)) (NN parse) (NN tree)))'), (True, '(VBZ is)'), (False, '(NP (DT a) (ADJP (RB fairly) (JJ simple)) (NN ' 'parse) (NN tree))'), (True, '(DT a)'), (False, '(ADJP (RB fairly) (JJ simple))'), (True, '(RB fairly)'), (True, '(JJ simple)'), (True, '(NN parse)'), (True, '(NN tree)'), (True, '(. .)')] actual_pairs = [(subtree.is_preterminal(), str(subtree)) for subtree in tree.all_subtrees()] assert pairs == actual_pairs # index into a preterminal self.assertRaises(IndexError, lambda: tree[0][0][0][0]) # index a child that doesn't exist self.assertRaises(IndexError, lambda: tree[500]) self.assertRaises(IndexError, lambda: tree[0][0][7777]) self.assertRaises(IndexError, lambda: tree[-30]) # repr shouldn't crash, but we don't check (or rely on) its form repr(tree) repr(tree[0]) repr(tree[0][1]) repr(tree[0][1][0])
def ptb(line, words): t = Tree(line) forms = [] ptb_recurse(t.subtrees()[0], words, forms) return ' ' + ' '.join(forms) + ' '
def test_tree_basics(self): tree = Tree(sample_tree) assert str(tree) == sample_tree assert tree.pretty_string() == sample_tree_pretty assert tree.tokens() == ('This', 'is', 'a', 'fairly', 'simple', 'parse', 'tree', '.') assert tree.tags() == ('DT', 'VBZ', 'DT', 'RB', 'JJ', 'NN', 'NN', '.') assert tree.tokens_and_tags() == \ [('This', 'DT'), ('is', 'VBZ'), ('a', 'DT'), ('fairly', 'RB'), ('simple', 'JJ'), ('parse', 'NN'), ('tree', 'NN'), ('.', '.')] assert tree.span() == (0, 8) assert tree.label == 'S1' subtrees = tree.subtrees() assert len(subtrees) == 1 assert str(subtrees[0]) == '(S (NP (DT This)) (VP (VBZ is) (NP ' \ '(DT a) (ADJP (RB fairly) (JJ simple)) ' \ '(NN parse) (NN tree))) (. .))' assert subtrees[0].label == 'S' assert str(subtrees[0][0]) == '(NP (DT This))' assert subtrees[0][0].label == 'NP' assert subtrees[0][0].span() == (0, 1) assert subtrees[0][0].tags() == ('DT',) assert subtrees[0][0].tokens() == ('This',) assert str(subtrees[0][0][0]) == '(DT This)' assert subtrees[0][0][0].token == 'This' assert subtrees[0][0][0].label == 'DT' assert tree[0][0][0].is_preterminal() assert len(tree[0]) == 3 subtrees = iter(tree[0]) assert str(next(subtrees)) == '(NP (DT This))' assert str(next(subtrees)) == '(VP (VBZ is) (NP (DT a) (ADJP ' \ '(RB fairly) (JJ simple)) (NN parse) ' \ '(NN tree)))' assert str(next(subtrees)) == '(. .)' pairs = [(False, sample_tree), (False, '(S (NP (DT This)) (VP (VBZ is) (NP (DT a) (ADJP ' '(RB fairly) (JJ simple)) (NN parse) (NN tree))) ' '(. .))'), (False, '(NP (DT This))'), (True, '(DT This)'), (False, '(VP (VBZ is) (NP (DT a) (ADJP (RB fairly) (JJ ' 'simple)) (NN parse) (NN tree)))'), (True, '(VBZ is)'), (False, '(NP (DT a) (ADJP (RB fairly) (JJ simple)) (NN ' 'parse) (NN tree))'), (True, '(DT a)'), (False, '(ADJP (RB fairly) (JJ simple))'), (True, '(RB fairly)'), (True, '(JJ simple)'), (True, '(NN parse)'), (True, '(NN tree)'), (True, '(. .)')] actual_pairs = [(subtree.is_preterminal(), str(subtree)) for subtree in tree.all_subtrees()] assert pairs == actual_pairs # index into a preterminal self.assertRaises(IndexError, lambda: tree[0][0][0][0]) # index a child that doesn't exist self.assertRaises(IndexError, lambda: tree[500]) self.assertRaises(IndexError, lambda: tree[0][0][7777]) self.assertRaises(IndexError, lambda: tree[-30]) # repr shouldn't crash, but we don't check (or rely on) its form repr(tree) repr(tree[0]) repr(tree[0][1]) repr(tree[0][1][0])