Esempio n. 1
0
    def test_tree_basics(self):
        tree = Tree(sample_tree)
        assert str(tree) == sample_tree
        assert tree.pretty_string() == sample_tree_pretty
        assert tree.tokens() == ('This', 'is', 'a', 'fairly', 'simple',
                                 'parse', 'tree', '.')
        assert tree.tags() == ('DT', 'VBZ', 'DT', 'RB', 'JJ', 'NN', 'NN', '.')
        assert tree.tokens_and_tags() == \
            [('This', 'DT'), ('is', 'VBZ'), ('a', 'DT'), ('fairly', 'RB'),
             ('simple', 'JJ'), ('parse', 'NN'), ('tree', 'NN'), ('.', '.')]
        assert tree.span() == (0, 8)
        assert tree.label == 'S1'

        subtrees = tree.subtrees()
        assert len(subtrees) == 1
        assert str(subtrees[0]) == '(S (NP (DT This)) (VP (VBZ is) (NP ' \
                                   '(DT a) (ADJP (RB fairly) (JJ simple)) ' \
                                   '(NN parse) (NN tree))) (. .))'
        assert subtrees[0].label == 'S'
        assert str(subtrees[0][0]) == '(NP (DT This))'
        assert subtrees[0][0].label == 'NP'
        assert subtrees[0][0].span() == (0, 1)
        assert subtrees[0][0].tags() == ('DT', )
        assert subtrees[0][0].tokens() == ('This', )
        assert str(subtrees[0][0][0]) == '(DT This)'
        assert subtrees[0][0][0].token == 'This'
        assert subtrees[0][0][0].label == 'DT'
        assert tree[0][0][0].is_preterminal()
        assert len(tree[0]) == 3

        subtrees = iter(tree[0])
        assert str(next(subtrees)) == '(NP (DT This))'
        assert str(next(subtrees)) == '(VP (VBZ is) (NP (DT a) (ADJP ' \
                                      '(RB fairly) (JJ simple)) (NN parse) ' \
                                      '(NN tree)))'
        assert str(next(subtrees)) == '(. .)'

        pairs = [(False, sample_tree),
                 (False, '(S (NP (DT This)) (VP (VBZ is) (NP (DT a) (ADJP '
                  '(RB fairly) (JJ simple)) (NN parse) (NN tree))) '
                  '(. .))'), (False, '(NP (DT This))'), (True, '(DT This)'),
                 (False, '(VP (VBZ is) (NP (DT a) (ADJP (RB fairly) (JJ '
                  'simple)) (NN parse) (NN tree)))'), (True, '(VBZ is)'),
                 (False, '(NP (DT a) (ADJP (RB fairly) (JJ simple)) (NN '
                  'parse) (NN tree))'), (True, '(DT a)'),
                 (False, '(ADJP (RB fairly) (JJ simple))'),
                 (True, '(RB fairly)'), (True, '(JJ simple)'),
                 (True, '(NN parse)'), (True, '(NN tree)'), (True, '(. .)')]
        actual_pairs = [(subtree.is_preterminal(), str(subtree))
                        for subtree in tree.all_subtrees()]
        assert pairs == actual_pairs

        # index into a preterminal
        self.assertRaises(IndexError, lambda: tree[0][0][0][0])
        # index a child that doesn't exist
        self.assertRaises(IndexError, lambda: tree[500])
        self.assertRaises(IndexError, lambda: tree[0][0][7777])
        self.assertRaises(IndexError, lambda: tree[-30])

        # repr shouldn't crash, but we don't check (or rely on) its form
        repr(tree)
        repr(tree[0])
        repr(tree[0][1])
        repr(tree[0][1][0])
Esempio n. 2
0
    # needs to be run from the root of the repository for the parser
    # model path below to work

    from bllipparser import RerankingParser, Tree

    rrp = RerankingParser()
    rrp.load_parser_model('first-stage/DATA/EN', heads_only=True)

    tree1 = Tree('''(S1 (SQ (VBZ Swears) (NP (PRP she)) (VP (VBD
    recognized) (NP (PRP$ his) (NN voice)) (, ,) (SBAR (IN that) (S
    (NP (NNP Tim)) (VP (VBD fired)))) (, ,) ('' ') (S (S (NP (PRP It))
    (VP (VBZ 's) (NP (PRP$ my) (NN money)))) (CC and) (S (NP (PRP I))
    (VP (VBP want) (S (NP (PRP it)) (VP (POS '))))))) (. !)))''')

    head = tree1.head()
    print 'head word of sentence:', head.token
    print 'head tree of sentence:', head
    print

    # print all syntactic dependencies
    for goveror, dependent in tree1.dependencies():
        print 'dependency: %s -> %s' % (goveror.token, dependent.token)
    print

    # demo of how to lexicalize a tree by adding the headword to the
    # label of the tree
    for subtree in tree1.all_subtrees():
        subtree.label += '-' + subtree.head().token
    print 'lexicalized tree:'
    print tree1.pretty_string()
Esempio n. 3
0
    def test_tree_basics(self):
        tree = Tree(sample_tree)
        assert str(tree) == sample_tree
        assert tree.pretty_string() == sample_tree_pretty
        assert tree.tokens() == ('This', 'is', 'a', 'fairly', 'simple',
                                 'parse', 'tree', '.')
        assert tree.tags() == ('DT', 'VBZ', 'DT', 'RB', 'JJ', 'NN',
                               'NN', '.')
        assert tree.tokens_and_tags() == \
            [('This', 'DT'), ('is', 'VBZ'), ('a', 'DT'), ('fairly', 'RB'),
             ('simple', 'JJ'), ('parse', 'NN'), ('tree', 'NN'), ('.', '.')]
        assert tree.span() == (0, 8)
        assert tree.label == 'S1'

        subtrees = tree.subtrees()
        assert len(subtrees) == 1
        assert str(subtrees[0]) == '(S (NP (DT This)) (VP (VBZ is) (NP ' \
                                   '(DT a) (ADJP (RB fairly) (JJ simple)) ' \
                                   '(NN parse) (NN tree))) (. .))'
        assert subtrees[0].label == 'S'
        assert str(subtrees[0][0]) == '(NP (DT This))'
        assert subtrees[0][0].label == 'NP'
        assert subtrees[0][0].span() == (0, 1)
        assert subtrees[0][0].tags() == ('DT',)
        assert subtrees[0][0].tokens() == ('This',)
        assert str(subtrees[0][0][0]) == '(DT This)'
        assert subtrees[0][0][0].token == 'This'
        assert subtrees[0][0][0].label == 'DT'
        assert tree[0][0][0].is_preterminal()
        assert len(tree[0]) == 3

        subtrees = iter(tree[0])
        assert str(next(subtrees)) == '(NP (DT This))'
        assert str(next(subtrees)) == '(VP (VBZ is) (NP (DT a) (ADJP ' \
                                      '(RB fairly) (JJ simple)) (NN parse) ' \
                                      '(NN tree)))'
        assert str(next(subtrees)) == '(. .)'

        pairs = [(False, sample_tree),
                 (False, '(S (NP (DT This)) (VP (VBZ is) (NP (DT a) (ADJP '
                         '(RB fairly) (JJ simple)) (NN parse) (NN tree))) '
                         '(. .))'),
                 (False, '(NP (DT This))'),
                 (True, '(DT This)'),
                 (False, '(VP (VBZ is) (NP (DT a) (ADJP (RB fairly) (JJ '
                         'simple)) (NN parse) (NN tree)))'),
                 (True, '(VBZ is)'),
                 (False, '(NP (DT a) (ADJP (RB fairly) (JJ simple)) (NN '
                         'parse) (NN tree))'),
                 (True, '(DT a)'),
                 (False, '(ADJP (RB fairly) (JJ simple))'),
                 (True, '(RB fairly)'),
                 (True, '(JJ simple)'),
                 (True, '(NN parse)'),
                 (True, '(NN tree)'),
                 (True, '(. .)')]
        actual_pairs = [(subtree.is_preterminal(), str(subtree))
                        for subtree in tree.all_subtrees()]
        assert pairs == actual_pairs

        # index into a preterminal
        self.assertRaises(IndexError, lambda: tree[0][0][0][0])
        # index a child that doesn't exist
        self.assertRaises(IndexError, lambda: tree[500])
        self.assertRaises(IndexError, lambda: tree[0][0][7777])
        self.assertRaises(IndexError, lambda: tree[-30])

        # repr shouldn't crash, but we don't check (or rely on) its form
        repr(tree)
        repr(tree[0])
        repr(tree[0][1])
        repr(tree[0][1][0])