def test_strip_functional_tags(self):
     ptb_reader = PennTreeBankConstituencySpanDatasetReader()
     # Get gold spans should strip off all the functional tags.
     tree = Tree.fromstring(
         "(S (NP=PRP (D the) (N dog)) (VP-0 (V chased) (NP|FUN-TAGS (D the) (N cat))))"
     )
     ptb_reader._strip_functional_tags(tree)
     assert tree == Tree.fromstring(
         "(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
Exemple #2
0
 def test_get_gold_spans_correctly_extracts_spans_with_nested_labels(self):
     ptb_reader = PennTreeBankConstituencySpanDatasetReader()
     # Here we have a parse with several nested labels - particularly the (WHNP (WHNP (WP What)))
     # fragment. These should be concatenated into a single label by get_gold_spans.
     tree = Tree.fromstring("""
         (S
     (`` ``)
     (S-TPC
     (NP-SBJ (PRP We))
     (VP
         (VBP have)
         (S
         (VP
             (TO to)
             (VP
             (VP
                 (VB clear)
                 (PRT (RP up))
                 (NP (DT these) (NNS issues)))
             (CC and)
             (VP
                 (VB find)
                 (PRT (RP out))
                 (SBAR-NOM
                 (WHNP (WHNP (WP what)))
                 (S
                     (VP
                     (VBZ is)
                     (ADJP-PRD (JJ present))
                     (SBAR
                         (WHNP (WDT that))
                         (S
                         (VP
                             (VBZ is)
                             (VP
                             (VBG creating)
                             (NP (JJ artificial) (NN volatility)))))))))))))))
     (, ,)
     ('' '')
     (NP-SBJ (NNP Mr.) (NNP Fisher))
     (VP (VBD said))
     (. .))
     """)
     span_dict = {}
     ptb_reader._strip_functional_tags(tree) # pylint: disable=protected-access
     ptb_reader._get_gold_spans(tree, 0, span_dict) # pylint: disable=protected-access
     assert span_dict == {(1, 1): 'NP', (5, 5): 'PRT', (6, 7): 'NP', (4, 7): 'VP', (10, 10): 'PRT',
                          (11, 11): 'WHNP-WHNP', (13, 13): 'ADJP', (14, 14): 'WHNP', (17, 18): 'NP',
                          (16, 18): 'VP', (15, 18): 'S-VP', (14, 18): 'SBAR', (12, 18): 'S-VP',
                          (11, 18): 'SBAR', (9, 18): 'VP', (4, 18): 'VP', (3, 18): 'S-VP',
                          (2, 18): 'VP', (1, 18): 'S', (21, 22): 'NP', (23, 23): 'VP', (0, 24): 'S'}
 def test_strip_functional_tags(self):
     ptb_reader = PennTreeBankConstituencySpanDatasetReader()
     # Get gold spans should strip off all the functional tags.
     tree = Tree.fromstring("(S (NP=PRP (D the) (N dog)) (VP-0 (V chased) (NP|FUN-TAGS (D the) (N cat))))")
     ptb_reader._strip_functional_tags(tree)
     assert tree == Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")