Example #1
0
    def test_tree_basics(self):
        tree = Tree(sample_tree)
        assert str(tree) == sample_tree
        assert tree.pretty_string() == sample_tree_pretty
        assert tree.tokens() == ('This', 'is', 'a', 'fairly', 'simple',
                                 'parse', 'tree', '.')
        assert tree.tags() == ('DT', 'VBZ', 'DT', 'RB', 'JJ', 'NN', 'NN', '.')
        assert tree.tokens_and_tags() == \
            [('This', 'DT'), ('is', 'VBZ'), ('a', 'DT'), ('fairly', 'RB'),
             ('simple', 'JJ'), ('parse', 'NN'), ('tree', 'NN'), ('.', '.')]
        assert tree.span() == (0, 8)
        assert tree.label == 'S1'

        subtrees = tree.subtrees()
        assert len(subtrees) == 1
        assert str(subtrees[0]) == '(S (NP (DT This)) (VP (VBZ is) (NP ' \
                                   '(DT a) (ADJP (RB fairly) (JJ simple)) ' \
                                   '(NN parse) (NN tree))) (. .))'
        assert subtrees[0].label == 'S'
        assert str(subtrees[0][0]) == '(NP (DT This))'
        assert subtrees[0][0].label == 'NP'
        assert subtrees[0][0].span() == (0, 1)
        assert subtrees[0][0].tags() == ('DT', )
        assert subtrees[0][0].tokens() == ('This', )
        assert str(subtrees[0][0][0]) == '(DT This)'
        assert subtrees[0][0][0].token == 'This'
        assert subtrees[0][0][0].label == 'DT'
        assert tree[0][0][0].is_preterminal()
        assert len(tree[0]) == 3

        subtrees = iter(tree[0])
        assert str(next(subtrees)) == '(NP (DT This))'
        assert str(next(subtrees)) == '(VP (VBZ is) (NP (DT a) (ADJP ' \
                                      '(RB fairly) (JJ simple)) (NN parse) ' \
                                      '(NN tree)))'
        assert str(next(subtrees)) == '(. .)'

        pairs = [(False, sample_tree),
                 (False, '(S (NP (DT This)) (VP (VBZ is) (NP (DT a) (ADJP '
                  '(RB fairly) (JJ simple)) (NN parse) (NN tree))) '
                  '(. .))'), (False, '(NP (DT This))'), (True, '(DT This)'),
                 (False, '(VP (VBZ is) (NP (DT a) (ADJP (RB fairly) (JJ '
                  'simple)) (NN parse) (NN tree)))'), (True, '(VBZ is)'),
                 (False, '(NP (DT a) (ADJP (RB fairly) (JJ simple)) (NN '
                  'parse) (NN tree))'), (True, '(DT a)'),
                 (False, '(ADJP (RB fairly) (JJ simple))'),
                 (True, '(RB fairly)'), (True, '(JJ simple)'),
                 (True, '(NN parse)'), (True, '(NN tree)'), (True, '(. .)')]
        actual_pairs = [(subtree.is_preterminal(), str(subtree))
                        for subtree in tree.all_subtrees()]
        assert pairs == actual_pairs

        # index into a preterminal
        self.assertRaises(IndexError, lambda: tree[0][0][0][0])
        # index a child that doesn't exist
        self.assertRaises(IndexError, lambda: tree[500])
        self.assertRaises(IndexError, lambda: tree[0][0][7777])
        self.assertRaises(IndexError, lambda: tree[-30])

        # repr shouldn't crash, but we don't check (or rely on) its form
        repr(tree)
        repr(tree[0])
        repr(tree[0][1])
        repr(tree[0][1][0])
Example #2
0
def ptb(line, words):
  t = Tree(line)
  forms = []
  ptb_recurse(t.subtrees()[0], words, forms)
  return ' ' + ' '.join(forms) + ' '
Example #3
0
    def test_tree_basics(self):
        tree = Tree(sample_tree)
        assert str(tree) == sample_tree
        assert tree.pretty_string() == sample_tree_pretty
        assert tree.tokens() == ('This', 'is', 'a', 'fairly', 'simple',
                                 'parse', 'tree', '.')
        assert tree.tags() == ('DT', 'VBZ', 'DT', 'RB', 'JJ', 'NN',
                               'NN', '.')
        assert tree.tokens_and_tags() == \
            [('This', 'DT'), ('is', 'VBZ'), ('a', 'DT'), ('fairly', 'RB'),
             ('simple', 'JJ'), ('parse', 'NN'), ('tree', 'NN'), ('.', '.')]
        assert tree.span() == (0, 8)
        assert tree.label == 'S1'

        subtrees = tree.subtrees()
        assert len(subtrees) == 1
        assert str(subtrees[0]) == '(S (NP (DT This)) (VP (VBZ is) (NP ' \
                                   '(DT a) (ADJP (RB fairly) (JJ simple)) ' \
                                   '(NN parse) (NN tree))) (. .))'
        assert subtrees[0].label == 'S'
        assert str(subtrees[0][0]) == '(NP (DT This))'
        assert subtrees[0][0].label == 'NP'
        assert subtrees[0][0].span() == (0, 1)
        assert subtrees[0][0].tags() == ('DT',)
        assert subtrees[0][0].tokens() == ('This',)
        assert str(subtrees[0][0][0]) == '(DT This)'
        assert subtrees[0][0][0].token == 'This'
        assert subtrees[0][0][0].label == 'DT'
        assert tree[0][0][0].is_preterminal()
        assert len(tree[0]) == 3

        subtrees = iter(tree[0])
        assert str(next(subtrees)) == '(NP (DT This))'
        assert str(next(subtrees)) == '(VP (VBZ is) (NP (DT a) (ADJP ' \
                                      '(RB fairly) (JJ simple)) (NN parse) ' \
                                      '(NN tree)))'
        assert str(next(subtrees)) == '(. .)'

        pairs = [(False, sample_tree),
                 (False, '(S (NP (DT This)) (VP (VBZ is) (NP (DT a) (ADJP '
                         '(RB fairly) (JJ simple)) (NN parse) (NN tree))) '
                         '(. .))'),
                 (False, '(NP (DT This))'),
                 (True, '(DT This)'),
                 (False, '(VP (VBZ is) (NP (DT a) (ADJP (RB fairly) (JJ '
                         'simple)) (NN parse) (NN tree)))'),
                 (True, '(VBZ is)'),
                 (False, '(NP (DT a) (ADJP (RB fairly) (JJ simple)) (NN '
                         'parse) (NN tree))'),
                 (True, '(DT a)'),
                 (False, '(ADJP (RB fairly) (JJ simple))'),
                 (True, '(RB fairly)'),
                 (True, '(JJ simple)'),
                 (True, '(NN parse)'),
                 (True, '(NN tree)'),
                 (True, '(. .)')]
        actual_pairs = [(subtree.is_preterminal(), str(subtree))
                        for subtree in tree.all_subtrees()]
        assert pairs == actual_pairs

        # index into a preterminal
        self.assertRaises(IndexError, lambda: tree[0][0][0][0])
        # index a child that doesn't exist
        self.assertRaises(IndexError, lambda: tree[500])
        self.assertRaises(IndexError, lambda: tree[0][0][7777])
        self.assertRaises(IndexError, lambda: tree[-30])

        # repr shouldn't crash, but we don't check (or rely on) its form
        repr(tree)
        repr(tree[0])
        repr(tree[0][1])
        repr(tree[0][1][0])