def test_tree_basics(self): tree = Tree(sample_tree) assert str(tree) == sample_tree assert tree.pretty_string() == sample_tree_pretty assert tree.tokens() == ('This', 'is', 'a', 'fairly', 'simple', 'parse', 'tree', '.') assert tree.tags() == ('DT', 'VBZ', 'DT', 'RB', 'JJ', 'NN', 'NN', '.') assert tree.tokens_and_tags() == \ [('This', 'DT'), ('is', 'VBZ'), ('a', 'DT'), ('fairly', 'RB'), ('simple', 'JJ'), ('parse', 'NN'), ('tree', 'NN'), ('.', '.')] assert tree.span() == (0, 8) assert tree.label == 'S1' subtrees = tree.subtrees() assert len(subtrees) == 1 assert str(subtrees[0]) == '(S (NP (DT This)) (VP (VBZ is) (NP ' \ '(DT a) (ADJP (RB fairly) (JJ simple)) ' \ '(NN parse) (NN tree))) (. .))' assert subtrees[0].label == 'S' assert str(subtrees[0][0]) == '(NP (DT This))' assert subtrees[0][0].label == 'NP' assert subtrees[0][0].span() == (0, 1) assert subtrees[0][0].tags() == ('DT', ) assert subtrees[0][0].tokens() == ('This', ) assert str(subtrees[0][0][0]) == '(DT This)' assert subtrees[0][0][0].token == 'This' assert subtrees[0][0][0].label == 'DT' assert tree[0][0][0].is_preterminal() assert len(tree[0]) == 3 subtrees = iter(tree[0]) assert str(next(subtrees)) == '(NP (DT This))' assert str(next(subtrees)) == '(VP (VBZ is) (NP (DT a) (ADJP ' \ '(RB fairly) (JJ simple)) (NN parse) ' \ '(NN tree)))' assert str(next(subtrees)) == '(. .)' pairs = [(False, sample_tree), (False, '(S (NP (DT This)) (VP (VBZ is) (NP (DT a) (ADJP ' '(RB fairly) (JJ simple)) (NN parse) (NN tree))) ' '(. .))'), (False, '(NP (DT This))'), (True, '(DT This)'), (False, '(VP (VBZ is) (NP (DT a) (ADJP (RB fairly) (JJ ' 'simple)) (NN parse) (NN tree)))'), (True, '(VBZ is)'), (False, '(NP (DT a) (ADJP (RB fairly) (JJ simple)) (NN ' 'parse) (NN tree))'), (True, '(DT a)'), (False, '(ADJP (RB fairly) (JJ simple))'), (True, '(RB fairly)'), (True, '(JJ simple)'), (True, '(NN parse)'), (True, '(NN tree)'), (True, '(. .)')] actual_pairs = [(subtree.is_preterminal(), str(subtree)) for subtree in tree.all_subtrees()] assert pairs == actual_pairs # index into a preterminal self.assertRaises(IndexError, lambda: tree[0][0][0][0]) # index a child that doesn't exist self.assertRaises(IndexError, lambda: tree[500]) self.assertRaises(IndexError, lambda: tree[0][0][7777]) self.assertRaises(IndexError, lambda: tree[-30]) # repr shouldn't crash, but we don't check (or rely on) its form repr(tree) repr(tree[0]) repr(tree[0][1]) repr(tree[0][1][0])
# needs to be run from the root of the repository for the parser # model path below to work from bllipparser import RerankingParser, Tree rrp = RerankingParser() rrp.load_parser_model('first-stage/DATA/EN', heads_only=True) tree1 = Tree('''(S1 (SQ (VBZ Swears) (NP (PRP she)) (VP (VBD recognized) (NP (PRP$ his) (NN voice)) (, ,) (SBAR (IN that) (S (NP (NNP Tim)) (VP (VBD fired)))) (, ,) ('' ') (S (S (NP (PRP It)) (VP (VBZ 's) (NP (PRP$ my) (NN money)))) (CC and) (S (NP (PRP I)) (VP (VBP want) (S (NP (PRP it)) (VP (POS '))))))) (. !)))''') head = tree1.head() print 'head word of sentence:', head.token print 'head tree of sentence:', head print # print all syntactic dependencies for goveror, dependent in tree1.dependencies(): print 'dependency: %s -> %s' % (goveror.token, dependent.token) print # demo of how to lexicalize a tree by adding the headword to the # label of the tree for subtree in tree1.all_subtrees(): subtree.label += '-' + subtree.head().token print 'lexicalized tree:' print tree1.pretty_string()
def test_tree_basics(self): tree = Tree(sample_tree) assert str(tree) == sample_tree assert tree.pretty_string() == sample_tree_pretty assert tree.tokens() == ('This', 'is', 'a', 'fairly', 'simple', 'parse', 'tree', '.') assert tree.tags() == ('DT', 'VBZ', 'DT', 'RB', 'JJ', 'NN', 'NN', '.') assert tree.tokens_and_tags() == \ [('This', 'DT'), ('is', 'VBZ'), ('a', 'DT'), ('fairly', 'RB'), ('simple', 'JJ'), ('parse', 'NN'), ('tree', 'NN'), ('.', '.')] assert tree.span() == (0, 8) assert tree.label == 'S1' subtrees = tree.subtrees() assert len(subtrees) == 1 assert str(subtrees[0]) == '(S (NP (DT This)) (VP (VBZ is) (NP ' \ '(DT a) (ADJP (RB fairly) (JJ simple)) ' \ '(NN parse) (NN tree))) (. .))' assert subtrees[0].label == 'S' assert str(subtrees[0][0]) == '(NP (DT This))' assert subtrees[0][0].label == 'NP' assert subtrees[0][0].span() == (0, 1) assert subtrees[0][0].tags() == ('DT',) assert subtrees[0][0].tokens() == ('This',) assert str(subtrees[0][0][0]) == '(DT This)' assert subtrees[0][0][0].token == 'This' assert subtrees[0][0][0].label == 'DT' assert tree[0][0][0].is_preterminal() assert len(tree[0]) == 3 subtrees = iter(tree[0]) assert str(next(subtrees)) == '(NP (DT This))' assert str(next(subtrees)) == '(VP (VBZ is) (NP (DT a) (ADJP ' \ '(RB fairly) (JJ simple)) (NN parse) ' \ '(NN tree)))' assert str(next(subtrees)) == '(. .)' pairs = [(False, sample_tree), (False, '(S (NP (DT This)) (VP (VBZ is) (NP (DT a) (ADJP ' '(RB fairly) (JJ simple)) (NN parse) (NN tree))) ' '(. .))'), (False, '(NP (DT This))'), (True, '(DT This)'), (False, '(VP (VBZ is) (NP (DT a) (ADJP (RB fairly) (JJ ' 'simple)) (NN parse) (NN tree)))'), (True, '(VBZ is)'), (False, '(NP (DT a) (ADJP (RB fairly) (JJ simple)) (NN ' 'parse) (NN tree))'), (True, '(DT a)'), (False, '(ADJP (RB fairly) (JJ simple))'), (True, '(RB fairly)'), (True, '(JJ simple)'), (True, '(NN parse)'), (True, '(NN tree)'), (True, '(. .)')] actual_pairs = [(subtree.is_preterminal(), str(subtree)) for subtree in tree.all_subtrees()] assert pairs == actual_pairs # index into a preterminal self.assertRaises(IndexError, lambda: tree[0][0][0][0]) # index a child that doesn't exist self.assertRaises(IndexError, lambda: tree[500]) self.assertRaises(IndexError, lambda: tree[0][0][7777]) self.assertRaises(IndexError, lambda: tree[-30]) # repr shouldn't crash, but we don't check (or rely on) its form repr(tree) repr(tree[0]) repr(tree[0][1]) repr(tree[0][1][0])