def test1_EasySRL_BoyGirl2(self):
     txt = r'''(<T S[dcl] 1 2> (<T NP 0 2> (<L NP/N DT DT The NP/N>) (<L N NN NN boy N>) ) (<T S[dcl]\NP 0 2>
     (<L (S[dcl]\NP)/(S[b]\NP) MD MD will (S[dcl]\NP)/(S[b]\NP)>) (<T S[b]\NP 0 2>
     (<L (S[b]\NP)/(S[to]\NP) VB VB want (S[b]\NP)/(S[to]\NP)>) (<T S[to]\NP 0 2>
     (<L (S[to]\NP)/(S[b]\NP) TO TO to (S[to]\NP)/(S[b]\NP)>) (<T S[b]\NP 0 2>
     (<L (S[b]\NP)/NP VB VB believe (S[b]\NP)/NP>) (<T NP 0 2> (<L NP/N DT DT the NP/N>)
     (<L N NN NN girl N>) ) ) ) ) ) )'''
     pt = parse_ccg_derivation(txt)
     self.assertIsNotNone(pt)
     s = sentence_from_pt(pt)
     dprint(s)
     ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     ccg.build_execution_sequence(pt)
     ccg.create_drs()
     ccg.resolve_proper_names()
     ccg.final_rename()
     d = ccg.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     x = '[X1,E2,E3,X4| boy(X1),will(E2),_MODAL(E2),want(E2),_EVENT(E2),_ARG0(E2,X1),_ARG1(E2,E3),believe(E3),_EVENT(E3),_ARG0(E3,X1),_ARG1(E3,X4),girl(X4)]'
     self.assertEqual(x, s)
     a = get_constituents_string_list(ccg)
     dprint('\n'.join(a))
     x = [
         'S(The boy #will want to believe the girl)',
         'NP(#The boy)',
         'S_INF(#want to believe the girl)',
         'S_INF(#to believe the girl)',
         'S_INF(#believe the girl)',
         'NP(#the girl)'
     ]
     self.assertListEqual(x, a)
     s = get_constituent_string(ccg.get_verbnet_sentence())
     self.assertEqual('NP(#The boy) VP(#will want) S_INF(#to believe) NP(#the girl)', s)
Exemple #2
0
 def test8_Wsj0004_3(self):
     txt = r'''
     (<T S[dcl] 0 2> (<T S[dcl] 1 2> (<T NP 0 1> (<T N 1 2> (<L N/N NN NN Compound N_309/N_309>) 
     (<L N NNS NNS yields N>) ) ) (<T S[dcl]\NP 0 2> (<L (S[dcl]\NP)/NP VBP VBP assume (S[dcl]\NP_236)/NP_237>) 
     (<T NP 0 2> (<T NP 0 2> (<T NP 0 1> (<L N NN NN reinvestment N>) ) (<T NP\NP 0 2> 
     (<L (NP\NP)/NP IN IN of (NP_248\NP_248)/NP_249>) (<T NP 0 1> (<L N NNS NNS dividends N>) ) ) ) (<T NP[conj] 1 2> 
     (<L conj CC CC and conj>) (<T S[em] 0 2> (<L S[em]/S[dcl] IN IN that S[em]/S[dcl]_257>) (<T S[dcl] 1 2> 
     (<T NP 1 2> (<L NP[nb]/N DT DT the NP[nb]_297/N_297>) (<T N 1 2> (<L N/N JJ JJ current N_292/N_292>) 
     (<L N NN NN yield N>) ) ) (<T S[dcl]\NP 0 2> (<L S[dcl]\NP VBZ VBZ continues S[dcl]\NP_262>) 
     (<T (S\NP)\(S\NP) 0 2> (<L ((S\NP)\(S\NP))/NP IN IN for ((S_275\NP_270)_275\(S_275\NP_270)_275)/NP_276>) 
     (<T NP 1 2> (<L NP[nb]/N DT DT a NP[nb]_283/N_283>) (<L N NN NN year N>) ) ) ) ) ) ) ) ) ) (<L . . . . .>) ) '''
     pt = parse_ccg_derivation(txt)
     ccg = Ccg2Drs()
     rule = get_rule(Category.from_cache('conj'),
                     Category.from_cache('S[em]'),
                     Category.from_cache('NP[conj]'))
     self.assertEqual(rule, RL_TC_ATOM)
     ccg.build_execution_sequence(pt)
     # Check execution queue
     actual = [repr(x) for x in ccg.exeque]
     expected = [
         '<PushOp>:(compound, N/N, NN)',
         '<PushOp>:(yields, N, NNS)',
         '<ExecOp>:(2, FA N)',
         '<ExecOp>:(1, LP NP)',
         '<PushOp>:(assume, (S[dcl]\\NP)/NP, VBP)',
         '<PushOp>:(reinvestment, N, NN)',
         '<ExecOp>:(1, LP NP)',
         '<PushOp>:(of, (NP\\NP)/NP, IN)',
         '<PushOp>:(dividends, N, NNS)',
         '<ExecOp>:(1, LP NP)',
         '<ExecOp>:(2, FA NP\\NP)',
         '<ExecOp>:(2, BA NP)',
         '<PushOp>:(and, conj, CC)',
         '<PushOp>:(that, S[em]/S[dcl], IN)',
         '<PushOp>:(the, NP[nb]/N, DT)',
         '<PushOp>:(current, N/N, JJ)',
         '<PushOp>:(yield, N, NN)',
         '<ExecOp>:(2, FA N)',
         '<ExecOp>:(2, FA NP)',
         '<PushOp>:(continue, S[dcl]\\NP, VBZ)',
         '<PushOp>:(for, ((S\\NP)\\(S\\NP))/NP, IN)',
         '<PushOp>:(a, NP[nb]/N, DT)',
         '<PushOp>:(year, N, NN)',
         '<ExecOp>:(2, FA NP)',
         '<ExecOp>:(2, FA (S\\NP)\\(S\\NP))',
         '<ExecOp>:(2, BA S[dcl]\\NP)',
         '<ExecOp>:(2, BA S[dcl])',
         '<ExecOp>:(2, FA S[em])',
         '<ExecOp>:(2, ATOM_TC NP[conj])',
         '<ExecOp>:(2, RCONJ NP)',
         '<ExecOp>:(2, FA S[dcl]\\NP)',
         '<ExecOp>:(2, BA S[dcl])',
         '<PushOp>:(., ., .)',
         '<ExecOp>:(2, LP S[dcl])',
     ]
     self.assertListEqual(expected, actual)
    def test2_GOLD_Wsj0051_13(self):
        txt = r'''
(<T S[dcl] 0 2> 
  (<T S[dcl] 1 2> 
    (<T NP 1 2> 
      (<L NP[nb]/N DT DT The NP[nb]_273/N_273>) 
      (<L N NNS NNS bids N>) 
    ) 
    (<T S[dcl]\NP 1 2> 
      (<T (S\NP)/(S\NP) 1 2> 
        (<L , , , , ,>) 
        (<T (S\NP)/(S\NP) 0 2> 
          (<T S[dcl]/S[dcl] 1 2> 
            (<T S/(S\NP) 0 1> 
              (<L NP PRP PRP he NP>) 
            ) 
            (<L (S[dcl]\NP)/S[dcl] VBD VBD added (S[dcl]\NP_242)/S[dcl]_243>) 
          ) 
          (<L , , , , ,>) 
        ) 
      ) 
      (<T S[dcl]\NP 0 2> 
        (<L (S[dcl]\NP)/(S[adj]\NP) VBD VBD were (S[dcl]\NP_211)/(S[adj]_212\NP_211:B)_212>) 
        (<T S[adj]\NP 0 2> 
          (<L (S[adj]\NP)/PP JJ JJ contrary (S[adj]\NP_219)/PP_220>) 
          (<T PP 0 2> 
            (<L PP/NP TO TO to PP/NP_225>) 
            (<T NP 0 1> 
              (<T N 1 2> 
                (<L N/N JJ JJ common N_234/N_234>) 
                (<L N NN NN sense N>) 
              ) 
            ) 
          ) 
        ) 
      ) 
    ) 
  ) 
  (<L . . . . .>) 
) 
'''
        pt = parse_ccg_derivation(txt)
        s = sentence_from_pt(pt)
        dprint(s)
        self.assertIsNotNone(pt)
        ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
        ccg.build_execution_sequence(pt)
        ccg.create_drs()
        ccg.final_rename()
        d = ccg.get_drs()
        s = d.show(SHOW_LINEAR)
        dprint(s)
        sent = ccg.get_verbnet_sentence()
        a = get_constituents_string_list(sent)
        x = [
            'NP(The #bids)',
            'ADVP(he #added)',
            'VP(#were)',
            'ADJP(#contrary to common sense)',
            'PP(#to)',
            'NP(common #sense)'
        ]
        dprint('\n'.join(a))
        self.assertListEqual(x, a)
 def test2_GOLD_Wsj0003_1(self):
     # A form of asbestos once used to make Kent cigarette filters has caused a high percentage of cancer deaths
     # among a group of workers exposed to it more than 30 years ago, researchers reported.
     # ID=wsj_0003.1 PARSER=GOLD NUMPARSE=1
     # (<T S[dcl] 0 2>
     #   (<T S[dcl] 1 2>
     #       (<T S[dcl] 1 2>
     #           (<T NP 0 2>
     #               (<T NP 0 2>
     #                   (<T NP 1 2>
     #                       (<L NP[nb]/N DT DT A NP[nb]_166/N_166>)
     #                       (<L N NN NN form N>)
     #                   )
     #                   (<T NP\NP 0 2>
     #                       (<L (NP\NP)/NP IN IN of (NP_174\NP_174)/NP_175>)
     #                       (<T NP 0 1>
     #                           (<L N NN NN asbestos N>)
     #                       )
     #                   )
     #               )
     #               (<T NP\NP 0 1>
     #                   (<T S[pss]\NP 1 2>
     #                       (<L (S\NP)/(S\NP) RB RB once (S_235\NP_230)_235/(S_235\NP_230)_235>)
     #                       (<T S[pss]\NP 0 2>
     #                           (<L (S[pss]\NP)/(S[to]\NP) VBN VBN used (S[pss]\NP_187)/(S[to]_188\NP_187:B)_188>)
     #                           (<T S[to]\NP 0 2>
     #                               (<L (S[to]\NP)/(S[b]\NP) TO TO to (S[to]\NP_197)/(S[b]_198\NP_197:B)_198>)
     #                               (<T S[b]\NP 0 2>
     #                                   (<L (S[b]\NP)/NP VB VB make (S[b]\NP_205)/NP_206>)
     #                                   (<T NP 0 1>
     #                                       (<T N 1 2>
     #                                           (<L N/N NNP NNP Kent N_222/N_222>)
     #                                           (<T N 1 2>
     #                                               (<L N/N NN NN cigarette N_215/N_215>)
     #                                               (<L N NNS NNS filters N>)
     #                                           )
     #                                       )
     #                                   )
     #                               )
     #                           )
     #                       )
     #                   )
     #               )
     #           )
     #           (<T S[dcl]\NP 0 2>
     #               (<L (S[dcl]\NP)/(S[pt]\NP) VBZ VBZ has (S[dcl]\NP_23)/(S[pt]_24\NP_23:B)_24>)
     #               (<T S[pt]\NP 0 2>
     #                   (<L (S[pt]\NP)/NP VBN VBN caused (S[pt]\NP_31)/NP_32>)
     #                       (<T NP 0 2>
     #                           (<T NP 0 2>
     #                               (<T NP 1 2>
     #                                   (<L NP[nb]/N DT DT a NP[nb]_46/N_46>)
     #                                   (<T N 1 2>
     #                                       (<L N/N JJ JJ high N_41/N_41>)
     #                                       (<L N NN NN percentage N>)
     #                                   )
     #                               )
     #                               (<T NP\NP 0 2>
     #                                   (<L (NP\NP)/NP IN IN of (NP_54\NP_54)/NP_55>)
     #                                   (<T NP 0 1>
     #                                       (<T N 1 2>
     #                                           (<L N/N NN NN cancer N_64/N_64>)
     #                                           (<L N NNS NNS deaths N>)
     #                                       )
     #                                   )
     #                               )
     #                           )
     #                           (<T NP\NP 0 2>
     #                               (<L (NP\NP)/NP IN IN among (NP_73\NP_73)/NP_74>)
     #                               (<T NP 0 2>
     #                                   (<T NP 1 2>
     #                                       (<L NP[nb]/N DT DT a NP[nb]_81/N_81>)
     #                                       (<L N NN NN group N>)
     #                                   )
     #                                   (<T NP\NP 0 2>
     #                                       (<L (NP\NP)/NP IN IN of (NP_89\NP_89)/NP_90>)
     #                                       (<T NP 0 2>
     #                                           (<T NP 0 1>
     #                                               (<L N NNS NNS workers N>)
     #                                           )
     #                                           (<T NP\NP 0 1>
     #                                               (<T S[pss]\NP 0 2>
     #                                                   (<T S[pss]\NP 0 2>
     #                                                       (<L (S[pss]\NP)/PP VBN VBN exposed (S[pss]\NP_100)/PP_101>)
     #                                                       (<T PP 0 2>
     #                                                           (<L PP/NP TO TO to PP/NP_106>)
     #                                                           (<L NP PRP PRP it NP>)
     #                                                       )
     #                                                   )
     #                                                   (<T (S\NP)\(S\NP) 1 2>
     #                                                       (<T NP 0 1>
     #                                                           (<T N 1 2>
     #                                                               (<T N/N 1 2>
     #                                                                   (<T (N/N)/(N/N) 1 2>
     #                                                                       (<L S[adj]\NP RBR RBR more S[adj]\NP_153>)
     #                                                                       (<L ((N/N)/(N/N))\(S[adj]\NP) IN IN than ((N_147/N_139)_147/(N_147/N_139)_147)\(S[adj]_148\NP_142)_148>)
     #                                                                   )
     #                                                                   (<L N/N CD CD 30 N_131/N_131>)
     #                                                               )
     #                                                               (<L N NNS NNS years N>)
     #                                                           )
     #                                                       )
     #                                                       (<L ((S\NP)\(S\NP))\NP IN IN ago ((S_121\NP_116)_121\(S_121\NP_116)_121)\NP_122>)
     #                                                   )
     #                                               )
     #                                           )
     #                                       )
     #                                   )
     #                               )
     #                           )
     #                       )
     #                   )
     #               )
     #           )
     #           (<T S[dcl]\S[dcl] 1 2>
     #               (<L , , , , ,>)
     #               (<T S[dcl]\S[dcl] 1 2>
     #                   (<T NP 0 1>
     #                       (<L N NNS NNS researchers N>)
     #                   )
     #                   (<L (S[dcl]\S[dcl])\NP VBD VBD reported (S[dcl]\S[dcl]_8)\NP_9>)
     #               )
     #           )
     #       )
     #       (<L . . . . .>)
     #   )
     txt = r'''(<T S[dcl] 0 2> (<T S[dcl] 1 2> (<T S[dcl] 1 2> (<T NP 0 2> (<T NP 0 2> (<T NP 1 2>
     (<L NP[nb]/N DT DT A NP[nb]_166/N_166>) (<L N NN NN form N>) ) (<T NP\NP 0 2>
     (<L (NP\NP)/NP IN IN of (NP_174\NP_174)/NP_175>) (<T NP 0 1> (<L N NN NN asbestos N>) ) ) ) (<T NP\NP 0 1>
     (<T S[pss]\NP 1 2> (<L (S\NP)/(S\NP) RB RB once (S_235\NP_230)_235/(S_235\NP_230)_235>) (<T S[pss]\NP 0 2>
     (<L (S[pss]\NP)/(S[to]\NP) VBN VBN used (S[pss]\NP_187)/(S[to]_188\NP_187:B)_188>) (<T S[to]\NP 0 2>
     (<L (S[to]\NP)/(S[b]\NP) TO TO to (S[to]\NP_197)/(S[b]_198\NP_197:B)_198>) (<T S[b]\NP 0 2>
     (<L (S[b]\NP)/NP VB VB make (S[b]\NP_205)/NP_206>) (<T NP 0 1> (<T N 1 2> (<L N/N NNP NNP Kent N_222/N_222>)
     (<T N 1 2> (<L N/N NN NN cigarette N_215/N_215>) (<L N NNS NNS filters N>) ) ) ) ) ) ) ) ) ) (<T S[dcl]\NP 0 2>
     (<L (S[dcl]\NP)/(S[pt]\NP) VBZ VBZ has (S[dcl]\NP_23)/(S[pt]_24\NP_23:B)_24>) (<T S[pt]\NP 0 2>
     (<L (S[pt]\NP)/NP VBN VBN caused (S[pt]\NP_31)/NP_32>) (<T NP 0 2> (<T NP 0 2> (<T NP 1 2>
     (<L NP[nb]/N DT DT a NP[nb]_46/N_46>) (<T N 1 2> (<L N/N JJ JJ high N_41/N_41>) (<L N NN NN percentage N>) ) )
     (<T NP\NP 0 2> (<L (NP\NP)/NP IN IN of (NP_54\NP_54)/NP_55>) (<T NP 0 1> (<T N 1 2>
     (<L N/N NN NN cancer N_64/N_64>) (<L N NNS NNS deaths N>) ) ) ) ) (<T NP\NP 0 2>
     (<L (NP\NP)/NP IN IN among (NP_73\NP_73)/NP_74>) (<T NP 0 2> (<T NP 1 2> (<L NP[nb]/N DT DT a NP[nb]_81/N_81>)
     (<L N NN NN group N>) ) (<T NP\NP 0 2> (<L (NP\NP)/NP IN IN of (NP_89\NP_89)/NP_90>) (<T NP 0 2> (<T NP 0 1>
     (<L N NNS NNS workers N>) ) (<T NP\NP 0 1> (<T S[pss]\NP 0 2> (<T S[pss]\NP 0 2>
     (<L (S[pss]\NP)/PP VBN VBN exposed (S[pss]\NP_100)/PP_101>) (<T PP 0 2> (<L PP/NP TO TO to PP/NP_106>)
     (<L NP PRP PRP it NP>) ) ) (<T (S\NP)\(S\NP) 1 2> (<T NP 0 1> (<T N 1 2> (<T N/N 1 2> (<T (N/N)/(N/N) 1 2>
     (<L S[adj]\NP RBR RBR more S[adj]\NP_153>)
     (<L ((N/N)/(N/N))\(S[adj]\NP) IN IN than ((N_147/N_139)_147/(N_147/N_139)_147)\(S[adj]_148\NP_142)_148>) )
     (<L N/N CD CD 30 N_131/N_131>) ) (<L N NNS NNS years N>) ) )
     (<L ((S\NP)\(S\NP))\NP IN IN ago ((S_121\NP_116)_121\(S_121\NP_116)_121)\NP_122>) ) ) ) ) ) ) ) ) ) ) )
     (<T S[dcl]\S[dcl] 1 2> (<L , , , , ,>) (<T S[dcl]\S[dcl] 1 2> (<T NP 0 1> (<L N NNS NNS researchers N>) )
     (<L (S[dcl]\S[dcl])\NP VBD VBD reported (S[dcl]\S[dcl]_8)\NP_9>) ) ) ) (<L . . . . .>) )'''
     pt = parse_ccg_derivation(txt)
     s = sentence_from_pt(pt)
     dprint(s)
     self.assertIsNotNone(pt)
     ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     ccg.build_execution_sequence(pt)
     ccg.create_drs()
     ccg.final_rename()
     d = ccg.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     sent = ccg.get_verbnet_sentence()
     a = get_constituents_string_list(sent)
     x = [
         'NP(A #form)',              # 0
         'PP(#of)',                  # 1
         'NP(#asbestos)',            # 2
         'ADVP(once #used to make Kent cigarette filters)',   # 3
         'S_INF(#to make)',          # 4
         'NP(Kent cigarette #filters)',  # 5
         'VP(#has caused)',          # 6
         'NP(a high #percentage)',   # 7
         'PP(#of)',                  # 8
         'NP(cancer #deaths)',       # 9
         'PP(#among)',               #10
         'NP(a #group)',             #11
         'PP(#of)',                  #12
         'NP(#workers)',             #13
         'ADVP(#exposed to it more than 30 years ago)',  #14
         'NP(more than 30 #years)',  #15
         'NP(#researchers)',         #16
         'VP(#reported)',            #17
     ]
     dprint('\n'.join(a))
     self.assertListEqual(x, a)
     # 17 VP(reported.)
     #    06 VP(has caused)
     #       00 NP(A form)
     #          01 PP(of)
     #             02 NP(asbestos)
     #          03 ADVP(once used to make Kent cigarette filters)
     #             04 S_INF(to make)
     #                05 NP(Kent cigarette filters)
     #       07 NP(a high percentage)
     #          08 PP(of)
     #             09 NP(cancer deaths)
     #          10 PP(among)
     #             11 NP(a group)
     #                12 PP(of)
     #                   13 NP(workers)
     #                      14 ADVP(exposed to it more than 30 years ago)
     #                         15 NP(more than 30 years)
     #    16 NP(reserchers)
     x = (17, [(6, [(0, [(1, [(2, [])]), (3, [(4, [(5, [])])])]), (7, [(8, [(9, [])]), (10, [(11, [(12, [(13, [(14, [(15, [])])])])])])])]), (16, [])])
     a = sent.get_constituent_tree()
     dprint_constituent_tree(sent, a)
     self.assertEqual(repr(x), repr(a))
    def test2_GOLD_Wsj0001_2(self):
        # Mr. Vinken is chairman of Elsevier N.V. , the Dutch publishing group .
        #
        # PARG
        # 1      0      N/N             1      Vinken Mr.
        # 1      2      (S[dcl]\NP)/NP  1      Vinken is
        # 3      2      (S[dcl]\NP)/NP  2      chairman is
        # 3      4      (NP\NP)/NP      1      chairman of
        # 6      4      (NP\NP)/NP      2      N.V. of
        # 6      5      N/N             1      N.V. Elsevier
        # 11     4      (NP\NP)/NP      2      group of
        # 11     8      NP[nb]/N        1      group the
        # 11     9      N/N             1      group Dutch
        # 11     10     N/N             1      group publishing
        txt = r'''
(<T S[dcl] 0 2>
    (<T S[dcl] 1 2>
        (<T NP 0 1>
            (<T N 1 2>
                (<L N/N NNP NNP Mr. N_142/N_142>)
                (<L N NNP NNP Vinken N>)
            )
        )
        (<T S[dcl]\NP 0 2>
            (<L (S[dcl]\NP)/NP VBZ VBZ is (S[dcl]\NP_87)/NP_88>)
            (<T NP 0 2>
                (<T NP 0 1>
                    (<L N NN NN chairman N>)
                )
                (<T NP\NP 0 2>
                    (<L (NP\NP)/NP IN IN of (NP_99\NP_99)/NP_100>)
                    (<T NP 0 2>
                        (<T NP 0 1>
                            (<T N 1 2>
                                (<L N/N NNP NNP Elsevier N_109/N_109>)
                                (<L N NNP NNP N.V. N>)
                            )
                        )
                        (<T NP[conj] 1 2>
                            (<L , , , , ,>)
                            (<T NP 1 2>
                                (<L NP[nb]/N DT DT the NP[nb]_131/N_131>)
                                (<T N 1 2>
                                    (<L N/N NNP NNP Dutch N_126/N_126>)
                                    (<T N 1 2>
                                        (<L N/N VBG VBG publishing N_119/N_119>)
                                        (<L N NN NN group N>)
                                    )
                                )
                            )
                        )
                    )
                )
            )
        )
    )
    (<L . . . . .>)
)'''
        pt = parse_ccg_derivation(txt)
        s = sentence_from_pt(pt)
        dprint(s)
        self.assertIsNotNone(pt)
        ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
        ccg.build_execution_sequence(pt)
        ccg.create_drs()
        ccg.resolve_proper_names()
        ccg.final_rename()
        d = ccg.get_drs()
        s = d.show(SHOW_LINEAR)
        dprint(s)
        sent = ccg.get_verbnet_sentence()
        a = get_constituents_string_list(sent)
        x = [
            'NP(#Mr.-Vinken)',
            'VP(#is)',
            'NP(#chairman)',
            'PP(#of)',
            'NP(#Elsevier-N.V.)',
            'NP(the Dutch publishing #group)',
        ]
        dprint('\n'.join(a))
        self.assertListEqual(x, a)
        # 01 VP(is)
        #    00 NP(Mr.-Vinken)
        #    02 NP(chairman)
        #       03 PP(of Elsevier N.V. the Dutch publishing group)
        #          04 NP(Elsevier N.V.)
        #             05 NP(the Dutch publishing group)
        x = (1, [(0, []), (2, [(3, [(4, [(5, [])])])])])
        a = sent.get_constituent_tree()
        dprint_constituent_tree(sent, a)
        self.assertEqual(repr(x), repr(a))
 def test2_GOLD_Wsj0001_1(self):
     # ID=wsj_0001.1 PARSER=GOLD NUMPARSE=1
     # Pierre Vinken, 61 years old, will join the board as a nonexecutive director Nov 29.
     # (<T S[dcl] 0 2>
     #   (<T S[dcl] 1 2>
     #       (<T NP 0 2>
     #           (<T NP 0 2>
     #               (<T NP 0 2>
     #                   (<T NP 0 1>
     #                       (<T N 1 2>
     #                           (<L N/N NNP NNP Pierre N_73/N_73>)
     #                           (<L N NNP NNP Vinken N>)
     #                       )
     #                   )
     #                   (<L , , , , ,>)
     #               )
     #               (<T NP\NP 0 1>
     #                   (<T S[adj]\NP 1 2>
     #                       (<T NP 0 1>
     #                           (<T N 1 2>
     #                               (<L N/N CD CD 61 N_93/N_93>)
     #                               (<L N NNS NNS years N>)
     #                           )
     #                       )
     #                       (<L (S[adj]\NP)\NP JJ JJ old (S[adj]\NP_83)\NP_84>)
     #                   )
     #               )
     #           )
     #           (<L , , , , ,>)
     #       )
     #       (<T S[dcl]\NP 0 2>
     #           (<L (S[dcl]\NP)/(S[b]\NP) MD MD will (S[dcl]\NP_10)/(S[b]_11\NP_10:B)_11>)
     #           (<T S[b]\NP 0 2>
     #               (<T S[b]\NP 0 2>
     #                   (<T (S[b]\NP)/PP 0 2>
     #                       (<L ((S[b]\NP)/PP)/NP VB VB join ((S[b]\NP_20)/PP_21)/NP_22>)
     #                       (<T NP 1 2>
     #                           (<L NP[nb]/N DT DT the NP[nb]_29/N_29>)
     #                           (<L N NN NN board N>)
     #                       )
     #                   )
     #                   (<T PP 0 2>
     #                       (<L PP/NP IN IN as PP/NP_34>)
     #                       (<T NP 1 2>
     #                           (<L NP[nb]/N DT DT a NP[nb]_48/N_48>)
     #                           (<T N 1 2>
     #                               (<L N/N JJ JJ nonexecutive N_43/N_43>)
     #                               (<L N NN NN director N>)
     #                           )
     #                       )
     #                   )
     #               )
     #               (<T (S\NP)\(S\NP) 0 2>
     #                   (<L ((S\NP)\(S\NP))/N[num] NNP NNP Nov. ((S_61\NP_56)_61\(S_61\NP_56)_61)/N[num]_62>)
     #                   (<L N[num] CD CD 29 N[num]>)
     #               )
     #           )
     #       )
     #   )
     #   (<L . . . . .>)
     # )
     txt = r'''(<T S[dcl] 0 2> (<T S[dcl] 1 2> (<T NP 0 2> (<T NP 0 2> (<T NP 0 2> (<T NP 0 1> (<T N 1 2>
         (<L N/N NNP NNP Pierre N_73/N_73>) (<L N NNP NNP Vinken N>) ) ) (<L , , , , ,>) ) (<T NP\NP 0 1>
         (<T S[adj]\NP 1 2> (<T NP 0 1> (<T N 1 2> (<L N/N CD CD 61 N_93/N_93>) (<L N NNS NNS years N>) ) )
         (<L (S[adj]\NP)\NP JJ JJ old (S[adj]\NP_83)\NP_84>) ) ) ) (<L , , , , ,>) ) (<T S[dcl]\NP 0 2>
         (<L (S[dcl]\NP)/(S[b]\NP) MD MD will (S[dcl]\NP_10)/(S[b]_11\NP_10:B)_11>) (<T S[b]\NP 0 2>
         (<T S[b]\NP 0 2> (<T (S[b]\NP)/PP 0 2> (<L ((S[b]\NP)/PP)/NP VB VB join ((S[b]\NP_20)/PP_21)/NP_22>)
         (<T NP 1 2> (<L NP[nb]/N DT DT the NP[nb]_29/N_29>) (<L N NN NN board N>) ) ) (<T PP 0 2>
         (<L PP/NP IN IN as PP/NP_34>) (<T NP 1 2> (<L NP[nb]/N DT DT a NP[nb]_48/N_48>) (<T N 1 2>
         (<L N/N JJ JJ nonexecutive N_43/N_43>) (<L N NN NN director N>) ) ) ) ) (<T (S\NP)\(S\NP) 0 2>
         (<L ((S\NP)\(S\NP))/N[num] NNP NNP Nov. ((S_61\NP_56)_61\(S_61\NP_56)_61)/N[num]_62>)
         (<L N[num] CD CD 29 N[num]>) ) ) ) ) (<L . . . . .>) )'''
     pt = parse_ccg_derivation(txt)
     self.assertIsNotNone(pt)
     s = sentence_from_pt(pt)
     dprint(s)
     ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     ccg.build_execution_sequence(pt)
     ccg.create_drs()
     ccg.resolve_proper_names()
     ccg.final_rename()
     d = ccg.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     sent = ccg.get_verbnet_sentence()
     a = get_constituents_string_list(sent)
     # FIXME: VP(will #join) should be S_INF(will #join).
     # Issues occurs because I convert modal-verb combinator categories to modifiers. Must be fixed on functor
     # creation - Lexeme.get_production()
     # will: (S[dcl]\NP)/(S[b]/NP) -> (S\NP)/(S/NP)
     x = [
         'NP(#Pierre-Vinken)',
         'ADJP(61 years #old)',
         'NP(61 #years)',
         'VP(#will join)',
         'NP(the #board)',
         'PP(#as)',
         'NP(a nonexecutive #director)',
         'NP(#Nov. 29)'
     ]
     dprint('\n'.join(a))
     self.assertListEqual(x, a)
     # 03 VP(will join)
     #    00 NP(Pierre-Vinken)
     #       01 ADJP(61 years old)
     #          02 NP(61 years)
     #    04 NP(the board)
     #    05 PP(as)
     #       06 NP(a nonexecutive director)
     #    07 NP(Nov. 29)
     x = (3, [(0, [(1, [(2, [])])]), (4, []), (5, [(6, [])]), (7, [])])
     a = sent.get_constituent_tree()
     dprint_constituent_tree(sent, a)
     self.assertEqual(repr(x), repr(a))
 def test2_GOLD_Wsj0002_1(self):
     # ID=wsj_0002.1 PARSER=GOLD NUMPARSE=1
     # Rudolph Agnew, 55 years old and former chairman of Consolidated Gold Fields PLC, was named a nonexecutive
     # director of this British industrial conglomerate.
     # (<T S[dcl] 0 2>
     #   (<T S[dcl] 1 2>
     #       (<T NP 0 2>
     #           (<T NP 0 2>
     #               (<T NP 0 2>
     #                   (<T NP 0 1>
     #                       (<T N 1 2>
     #                           (<L N/N NNP NNP Rudolph N_72/N_72>)
     #                           (<L N NNP NNP Agnew N>)
     #                       )
     #                   )
     #                   (<L , , , , ,>)
     #               )
     #               (<T NP\NP 0 1>
     #                   (<T S[adj]\NP 0 2>
     #                       (<T S[adj]\NP 1 2>
     #                           (<T NP 0 1>
     #                               (<T N 1 2>
     #                                   (<L N/N CD CD 55 N_92/N_92>)
     #                                   (<L N NNS NNS years N>)
     #                               )
     #                           )
     #                           (<L (S[adj]\NP)\NP JJ JJ old (S[adj]\NP_82)\NP_83>)
     #                       )
     #                       (<T S[adj]\NP[conj] 1 2>
     #                           (<L conj CC CC and conj>)
     #                           (<T NP 0 2>
     #                               (<T NP 0 1>
     #                                   (<T N 1 2>
     #                                       (<L N/N JJ JJ former N_102/N_102>)
     #                                       (<L N NN NN chairman N>)
     #                                   )
     #                               )
     #                               (<T NP\NP 0 2>
     #                                   (<L (NP\NP)/NP IN IN of (NP_111\NP_111)/NP_112>)
     #                                   (<T NP 0 1>
     #                                       (<T N 1 2>
     #                                           (<L N/N NNP NNP Consolidated N_135/N_135>)
     #                                           (<T N 1 2>
     #                                               (<L N/N NNP NNP Gold N_128/N_128>)
     #                                               (<T N 1 2>
     #                                                   (<L N/N NNP NNP Fields N_121/N_121>)
     #                                                   (<L N NNP NNP PLC N>)
     #                                               )
     #                                           )
     #                                       )
     #                                   )
     #                               )
     #                           )
     #                       )
     #                   )
     #               )
     #           )
     #           (<L , , , , ,>)
     #       )
     #       (<T S[dcl]\NP 0 2>
     #           (<L (S[dcl]\NP)/(S[pss]\NP) VBD VBD was (S[dcl]\NP_10)/(S[pss]_11\NP_10:B)_11>)
     #           (<T S[pss]\NP 0 2>
     #               (<L (S[pss]\NP)/NP VBN VBN named (S[pss]\NP_18)/NP_19>)
     #                   (<T NP 0 2> (<T NP 1 2>
     #                       (<L NP[nb]/N DT DT a NP[nb]_33/N_33>)
     #                       (<T N 1 2>
     #                           (<L N/N JJ JJ nonexecutive N_28/N_28>)
     #                           (<L N NN NN director N>)
     #                       )
     #                   )
     #                   (<T NP\NP 0 2>
     #                       (<L (NP\NP)/NP IN IN of (NP_41\NP_41)/NP_42>)
     #                       (<T NP 1 2>
     #                           (<L NP[nb]/N DT DT this NP[nb]_63/N_63>)
     #                           (<T N 1 2>
     #                               (<L N/N JJ JJ British N_58/N_58>)
     #                               (<T N 1 2>
     #                                   (<L N/N JJ JJ industrial N_51/N_51>)
     #                                   (<L N NN NN conglomerate N>)
     #                               )
     #                           )
     #                       )
     #                   )
     #               )
     #           )
     #       )
     #   )
     #   (<L . . . . .>)
     # )
     txt = r'''(<T S[dcl] 0 2> (<T S[dcl] 1 2> (<T NP 0 2> (<T NP 0 2> (<T NP 0 2> (<T NP 0 1> (<T N 1 2>
         (<L N/N NNP NNP Rudolph N_72/N_72>) (<L N NNP NNP Agnew N>) ) ) (<L , , , , ,>) ) (<T NP\NP 0 1>
         (<T S[adj]\NP 0 2> (<T S[adj]\NP 1 2> (<T NP 0 1> (<T N 1 2> (<L N/N CD CD 55 N_92/N_92>)
         (<L N NNS NNS years N>) ) ) (<L (S[adj]\NP)\NP JJ JJ old (S[adj]\NP_82)\NP_83>) ) (<T S[adj]\NP[conj] 1 2>
         (<L conj CC CC and conj>) (<T NP 0 2> (<T NP 0 1> (<T N 1 2> (<L N/N JJ JJ former N_102/N_102>)
         (<L N NN NN chairman N>) ) ) (<T NP\NP 0 2> (<L (NP\NP)/NP IN IN of (NP_111\NP_111)/NP_112>) (<T NP 0 1>
         (<T N 1 2> (<L N/N NNP NNP Consolidated N_135/N_135>) (<T N 1 2> (<L N/N NNP NNP Gold N_128/N_128>)
         (<T N 1 2> (<L N/N NNP NNP Fields N_121/N_121>) (<L N NNP NNP PLC N>) ) ) ) ) ) ) ) ) ) ) (<L , , , , ,>) )
         (<T S[dcl]\NP 0 2> (<L (S[dcl]\NP)/(S[pss]\NP) VBD VBD was (S[dcl]\NP_10)/(S[pss]_11\NP_10:B)_11>)
         (<T S[pss]\NP 0 2> (<L (S[pss]\NP)/NP VBN VBN named (S[pss]\NP_18)/NP_19>) (<T NP 0 2> (<T NP 1 2>
         (<L NP[nb]/N DT DT a NP[nb]_33/N_33>) (<T N 1 2> (<L N/N JJ JJ nonexecutive N_28/N_28>)
         (<L N NN NN director N>) ) ) (<T NP\NP 0 2> (<L (NP\NP)/NP IN IN of (NP_41\NP_41)/NP_42>) (<T NP 1 2>
         (<L NP[nb]/N DT DT this NP[nb]_63/N_63>) (<T N 1 2> (<L N/N JJ JJ British N_58/N_58>) (<T N 1 2>
         (<L N/N JJ JJ industrial N_51/N_51>) (<L N NN NN conglomerate N>) ) ) ) ) ) ) ) ) (<L . . . . .>) )'''
     pt = parse_ccg_derivation(txt)
     self.assertIsNotNone(pt)
     s = sentence_from_pt(pt)
     dprint(s)
     ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     ccg.build_execution_sequence(pt)
     ccg.create_drs()
     ccg.resolve_proper_names()
     ccg.final_rename()
     d = ccg.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     sent = ccg.get_verbnet_sentence()
     a = get_constituents_string_list(sent)
     dprint('\n'.join(a))
     # Hash indicates head word in constituent
     x = [
         'NP(#Rudolph-Agnew)',
         'ADJP(55 years #old and former chairman of Consolidated-Gold-Fields-PLC)',
         'NP(55 #years)',
         'NP(former #chairman)',
         'PP(#of)',
         'NP(#Consolidated-Gold-Fields-PLC)',
         'VP(#was named)',
         'NP(a nonexecutive #director)',
         'PP(#of)',
         'NP(this British industrial #conglomerate)'
     ]
     self.assertListEqual(x, a)
     # 6 VP(was named)
     #   0 NP(Rudolph-Agnew)
     #     1 ADVP(55 years old former chairman of Consolidated-Gold-Fields-PLC)
     #       2 NP(55 years)
     #       3 NP(former chairman)
     #         4 PP(of)
     #           5 NP(Consolidated-Gold-Fields-PLC)
     #   7 NP(a nonexecutive director)
     #     8 PP(of)
     #       9 NP(this British industrial conglomerate)
     x = (6, [(0, [(1, [(2, []), (3, [(4, [(5, [])])])])]), (7, [(8, [(9, [])])])])
     a = sent.get_constituent_tree()
     dprint_constituent_tree(sent, a)
     self.assertEqual(repr(x), repr(a))
Exemple #8
0
    def test8_RuleUniquenessEasySRL(self):
        allfiles = []
        projdir = os.path.dirname(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(
                        os.path.dirname(
                            os.path.dirname(os.path.dirname(__file__)))))))
        ldcpath = os.path.join(projdir, 'data', 'ldc', 'easysrl', 'ccgbank')
        dirlist1 = os.listdir(ldcpath)
        for fname in dirlist1:
            if 'ccg_derivation' not in fname:
                continue
            ldcpath1 = os.path.join(ldcpath, fname)
            if os.path.isfile(ldcpath1):
                allfiles.append(ldcpath1)

        failed_parse = 0
        ambiguous = []
        start = 0
        for fn in allfiles:
            with open(fn, 'r') as fd:
                lines = fd.readlines()

            name, _ = os.path.splitext(os.path.basename(fn))
            for i in range(start, len(lines), 50):
                start = 0
                ccgbank = lines[i]
                dprint('%s-%04d' % (name, i))
                ccg = Ccg2Drs()
                try:
                    pt = parse_ccg_derivation(ccgbank)
                    ccg.build_execution_sequence(pt)
                except Exception:
                    failed_parse += 1
                    continue

                self.assertIsNotNone(pt)
                for op in ccg.exeque:
                    if isinstance(op, PushOp):
                        continue
                    self.assertIsInstance(op, ExecOp)
                    left = op.sub_ops[0].category
                    result = op.category
                    if len(op.sub_ops) == 2:
                        right = op.sub_ops[1].category
                    else:
                        right = CAT_EMPTY
                    exclude = []
                    # Should not have ambiguity
                    rule = get_rule(left, right, result, exclude)
                    if rule is None and right != CAT_EMPTY:
                        rule = get_rule(left.remove_features(),
                                        right.remove_features(),
                                        result.remove_features(), exclude)
                    self.assertIsNotNone(rule)
                    rstr = ''
                    while rule is not None:
                        rstr += repr(rule) + '|'
                        rule = get_rule(left, right, result, exclude)
                    if len(exclude) > 1:
                        ambiguous.append(
                            ('%s <- %s <{%s}> %s' %
                             (result, left, rstr, right), exclude))
        for x in ambiguous:
            dprint('ambiguous rule in %s-%04d: %s {%s}' % x)
        self.assertTrue(len(ambiguous) == 0)
Exemple #9
0
    def test6_Wsj0051_13(self):
        txt = r'''
(<T S[dcl] 0 2> 
  (<T S[dcl] 1 2> 
    (<T NP 1 2> 
      (<L NP[nb]/N DT DT The NP[nb]_273/N_273>) 
      (<L N NNS NNS bids N>) 
    ) 
    (<T S[dcl]\NP 1 2> 
      (<T (S\NP)/(S\NP) 1 2> 
        (<L , , , , ,>) 
        (<T (S\NP)/(S\NP) 0 2> 
          (<T S[dcl]/S[dcl] 1 2> 
            (<T S/(S\NP) 0 1> 
              (<L NP PRP PRP he NP>) 
            ) 
            (<L (S[dcl]\NP)/S[dcl] VBD VBD added (S[dcl]\NP_242)/S[dcl]_243>) 
          ) 
          (<L , , , , ,>) 
        ) 
      ) 
      (<T S[dcl]\NP 0 2> 
        (<L (S[dcl]\NP)/(S[adj]\NP) VBD VBD were (S[dcl]\NP_211)/(S[adj]_212\NP_211:B)_212>) 
        (<T S[adj]\NP 0 2> 
          (<L (S[adj]\NP)/PP JJ JJ contrary (S[adj]\NP_219)/PP_220>) 
          (<T PP 0 2> 
            (<L PP/NP TO TO to PP/NP_225>) 
            (<T NP 0 1> 
              (<T N 1 2> 
                (<L N/N JJ JJ common N_234/N_234>) 
                (<L N NN NN sense N>) 
              ) 
            ) 
          ) 
        ) 
      ) 
    ) 
  ) 
  (<L . . . . .>) 
) 
'''
        pt = parse_ccg_derivation(txt)
        ccg = Ccg2Drs()
        ccg.build_execution_sequence(pt)
        # Check execution queue
        actual = [repr(x) for x in ccg.exeque]
        expected = [
            '<PushOp>:(the, NP[nb]/N, DT)',
            '<PushOp>:(bids, N, NNS)',
            '<ExecOp>:(2, FA NP)',
            '<PushOp>:(,, ,, ,)',
            '<PushOp>:(he, NP, PRP)',
            '<ExecOp>:(1, TR S/(S\\NP))',
            '<PushOp>:(add, (S[dcl]\\NP)/S[dcl], VBD)',
            '<ExecOp>:(2, FC S[dcl]/S[dcl])',
            '<PushOp>:(,, ,, ,)',
            '<ExecOp>:(2, L_UNARY_TC (S\\NP)/(S\\NP))',
            '<ExecOp>:(2, RP (S\\NP)/(S\\NP))',
            '<PushOp>:(be, (S[dcl]\\NP)/(S[adj]\\NP), VBD)',
            '<PushOp>:(contrary, (S[adj]\\NP)/PP, JJ)',
            '<PushOp>:(to, PP/NP, TO)',
            '<PushOp>:(common, N/N, JJ)',
            '<PushOp>:(sense, N, NN)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(1, LP NP)',
            '<ExecOp>:(2, FA PP)',
            '<ExecOp>:(2, FA S[adj]\\NP)',
            '<ExecOp>:(2, FA S[dcl]\\NP)',
            '<ExecOp>:(2, FA S[dcl]\\NP)',
            '<ExecOp>:(2, BA S[dcl])',
            '<PushOp>:(., ., .)',
            '<ExecOp>:(2, LP S[dcl])',
        ]
        self.assertListEqual(expected, actual)
Exemple #10
0
    def test5_EasySRL_04_1850(self):
        txt = r'''
(<T S[dcl] 0 2>
  (<T S[dcl] 1 2>
    (<T NP 0 2>
      (<L NP/N DT DT The NP/N>)
      (<T N 1 2>
        (<L N/N NN NN investment N/N>)
        (<T N 0 2>
          (<L N NN NN community N>)
          (<L , , , , ,>)
        )
      )
    )
    (<T S[dcl]\NP 1 2>
      (<T (S\NP)/(S\NP) 0 2>
        (<L (S\NP)/(S\NP) RB RB however (S\NP)/(S\NP)>)
        (<T ((S\NP)/(S\NP))\((S\NP)/(S\NP)) 1 2>
          (<L , , , , ,>)
          (<L (S\NP)/(S\NP) RB RB strongly (S\NP)/(S\NP)>)
        )
      )
      (<T S[dcl]\NP 0 2>
        (<L (S[dcl]\NP)/S[em] VBZ VBZ believes (S[dcl]\NP)/S[em]>)
          (<T S[em] 0 2>
            (<L S[em]/S[dcl] IN IN that S[em]/S[dcl]>)
            (<T S[dcl] 1 2>
              (<T NP 0 2>
                 (<L NP/N DT DT the NP/N>)
                 (<L N NN NN strike N>)
              )
              (<T S[dcl]\NP 0 2>
                (<L (S[dcl]\NP)/(S[b]\NP) MD MD will (S[dcl]\NP)/(S[b]\NP)>)
                (<T S[b]\NP 0 2>
                  (<T S[b]\NP 0 2>
                    (<L (S[b]\NP)/(S[pss]\NP) VB VB be (S[b]\NP)/(S[pss]\NP)>)
                    (<L S[pss]\NP VBN VBN settled S[pss]\NP>)
                  )
                (<T (S\NP)\(S\NP) 0 2>
                  (<L ((S\NP)\(S\NP))/S[dcl] IN IN before ((S\NP)\(S\NP))/S[dcl]>)
                  (<T S[dcl] 1 2>
                    (<L NP[thr] EX EX there NP[thr]>)
                    (<T S[dcl]\NP[thr] 0 2>
                      (<L (S[dcl]\NP[thr])/NP VBZ VBZ is (S[dcl]\NP[thr])/NP>)
                      (<T NP 0 2>
                        (<T NP 0 2>
                          (<L NP/N DT DT any NP/N>)
                          (<T N 1 2>
                            (<L N/N JJ JJ lasting N/N>)
                            (<T N 0 2>
                              (<L N/PP NN NN effect N/PP>)
                              (<T PP 0 2>
                                (<L PP/NP IN IN on PP/NP>)
                                (<T NP 1 2>
                                  (<L NP/NP CC CC either NP/NP>)
                                  (<T NP 0 1>
                                    (<L N NNP NNP Boeing N>)
                                  )
                                )
                              )
                            )
                          )
                        )
                        (<T NP\NP 1 2>
                          (<L conj CC CC or conj>)
                          (<T NP 0 2>
                            (<L NP/(N/PP) PRP$ PRP$ its NP/(N/PP)>)
                            (<T N/PP 1 2>
                              (<L N/N NN NN work N/N>)
                              (<L N/PP NN NN force N/PP>)
                            )
                          )
                        )
                      )
                    )
                  )
                )
              )
            )
          )
        )
      )
    )
  )
  (<L . . . . .>)
)'''
        pt = parse_ccg_derivation(txt)
        ccg = Ccg2Drs()
        ccg.build_execution_sequence(pt)
        # Check execution queue
        actual = [repr(x) for x in ccg.exeque]
        expected = [
            '<PushOp>:(the, NP/N, DT)',
            '<PushOp>:(investment, N/N, NN)',
            '<PushOp>:(community, N, NN)',
            '<PushOp>:(,, ,, ,)',
            '<ExecOp>:(2, LP N)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(2, FA NP)',
            '<PushOp>:(however, (S\\NP)/(S\\NP), RB)',
            '<PushOp>:(,, ,, ,)',
            '<PushOp>:(strongly, (S\\NP)/(S\\NP), RB)',
            '<ExecOp>:(2, R_UNARY_TC ((S\\NP)/(S\\NP))\((S\\NP)/(S\\NP)))',
            '<ExecOp>:(2, BA (S\\NP)/(S\\NP))',
            '<PushOp>:(believe, (S[dcl]\\NP)/S[em], VBZ)',
            '<PushOp>:(that, S[em]/S[dcl], IN)',
            '<PushOp>:(the, NP/N, DT)',
            '<PushOp>:(strike, N, NN)',
            '<ExecOp>:(2, FA NP)',
            '<PushOp>:(will, (S\\NP)/(S\\NP), MD)',
            '<PushOp>:(be, (S[b]\\NP)/(S[pss]\\NP), VB)',
            '<PushOp>:(settle, S[pss]\\NP, VBN)',
            '<ExecOp>:(2, FA S[b]\\NP)',
            '<PushOp>:(before, ((S\\NP)\(S\\NP))/S[dcl], IN)',
            '<PushOp>:(there, NP[thr], EX)',
            '<PushOp>:(be, (S[dcl]\\NP[thr])/NP, VBZ)',
            '<PushOp>:(any, NP/N, DT)',
            '<PushOp>:(lasting, N/N, JJ)',
            '<PushOp>:(effect, N/PP, NN)',
            '<PushOp>:(on, PP/NP, IN)',
            '<PushOp>:(either, NP/NP, CC)',
            '<PushOp>:(Boeing, N, NNP)',
            '<ExecOp>:(1, LP NP)',
            '<ExecOp>:(2, FA NP)',
            '<ExecOp>:(2, FA PP)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(2, FA NP)',
            '<PushOp>:(or, conj, CC)',
            '<PushOp>:(its, NP/(N/PP), PRP$)',
            '<PushOp>:(work, N/N, NN)',
            '<PushOp>:(force, N/PP, NN)',
            '<ExecOp>:(2, FC N/PP)',
            '<ExecOp>:(2, FA NP)',
            '<ExecOp>:(2, R_UNARY_TC NP\\NP)',
            '<ExecOp>:(2, BA NP)',
            '<ExecOp>:(2, FA S[dcl]\\NP[thr])',
            '<ExecOp>:(2, BA S[dcl])',
            '<ExecOp>:(2, FA (S\\NP)\\(S\\NP))',
            '<ExecOp>:(2, BA S[b]\\NP)',
            '<ExecOp>:(2, FA S[dcl]\\NP)',
            '<ExecOp>:(2, BA S[dcl])',
            '<ExecOp>:(2, FA S[em])',
            '<ExecOp>:(2, FA S[dcl]\\NP)',
            '<ExecOp>:(2, FA S[dcl]\\NP)',
            '<ExecOp>:(2, BA S[dcl])',
            '<PushOp>:(., ., .)',
            '<ExecOp>:(2, LP S[dcl])',
        ]
        self.assertListEqual(expected, actual)
Exemple #11
0
    def test4_Wsj0999_11(self):
        txt = r'''
(<T S[dcl] 0 2>
  (<T S[dcl] 0 2>
    (<T S[dcl] 1 2>
      (<T NP 0 2>
        (<T NP 0 1>
          (<L N NNS NNS People N>)
        )
        (<T NP\NP 0 2>
          (<L (NP\NP)/NP IN IN on (NP_159\NP_159)/NP_160>)
          (<T NP 0 1>
            (<T N 1 2>
              (<L N/N VBN VBN fixed N_169/N_169>)
              (<L N NNS NNS incomes N>)
            )
          )
        )
      )
      (<T S[dcl]\NP 0 2>
        (<L (S[dcl]\NP)/NP VBP VBP get (S[dcl]\NP_128)/NP_129>)
        (<T NP 0 2>
          (<T NP 1 2>
            (<L NP[nb]/N DT DT a NP[nb]_136/N_136>)
            (<L N NN NN break N>)
          )
          (<T NP\NP 0 2>
            (<L (NP\NP)/NP IN IN at (NP_144\NP_144)/NP_145>)
            (<T NP 0 1>
              (<L N NNP NNP Espre N>)
            )
          )
        )
      )
    )
    (<T S[dcl][conj] 1 2>
      (<L ; ; : ; ;>)
      (<T S[dcl] 1 2>
        (<T NP 0 1>
          (<T N 1 2>
            (<L N/N IN IN over N_248/N_248>)
            (<L N CD CD 55 N>)
          )
        )
        (<T S[dcl]\NP 0 2>
          (<L (S[dcl]\NP)/NP VBZ NNS wins (S[dcl]\NP_177)/NP_178>)
          (<T NP 0 2>
            (<T NP 1 2>
              (<L NP[nb]/N DT DT a NP[nb]_206/N_206>)
              (<T N 1 2>
                (<T N/N 1 2>
                  (<L (N/N)/(N/N) CD CD 45 (N_201/N_195)_201/(N_201/N_195)_201>)
                  (<L N/N NN NN % N_187/N_187>)
                )
                (<L N NN NN discount N>)
              )
            )
            (<T NP\NP 0 2>
              (<L (NP\NP)/NP IN IN at (NP_214\NP_214)/NP_215>)
              (<T NP 0 1>
                (<T N 1 2>
                  (<L N/N NNP NNP Anaheim N_238/N_238>)
                  (<T N 1 2>
                    (<L N/N NNP NNP Imperial N_231/N_231>)
                    (<T N 1 2>
                      (<L N/N NNP NNP Health N_224/N_224>)
                      (<L N NNP NNP Spa N>)
                    )
                  )
                )
              )
            )
          )
        )
      )
    )
  )
  (<L . . . . .>)
)'''
        pt = parse_ccg_derivation(txt)
        ccg = Ccg2Drs()
        ccg.build_execution_sequence(pt)
        # Check execution queue
        actual = [repr(x) for x in ccg.exeque]
        expected = [
            '<PushOp>:(people, N, NNS)',
            '<ExecOp>:(1, LP NP)',
            '<PushOp>:(on, (NP\\NP)/NP, IN)',
            '<PushOp>:(fix, N/N, VBN)',
            '<PushOp>:(incomes, N, NNS)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(1, LP NP)',
            '<ExecOp>:(2, FA NP\\NP)',
            '<ExecOp>:(2, BA NP)',
            '<PushOp>:(get, (S[dcl]\\NP)/NP, VBP)',
            '<PushOp>:(a, NP[nb]/N, DT)',
            '<PushOp>:(break, N, NN)',
            '<ExecOp>:(2, FA NP)',
            '<PushOp>:(at, (NP\\NP)/NP, IN)',
            '<PushOp>:(Espre, N, NNP)',
            '<ExecOp>:(1, LP NP)',
            '<ExecOp>:(2, FA NP\\NP)',
            '<ExecOp>:(2, BA NP)',
            '<ExecOp>:(2, FA S[dcl]\\NP)',
            '<ExecOp>:(2, BA S[dcl])',
            '<PushOp>:(;, ;, ;)',
            '<PushOp>:(over, N/N, IN)',
            '<PushOp>:(55, N, CD)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(1, LP NP)',
            '<PushOp>:(win, (S[dcl]\\NP)/NP, VBZ)',
            '<PushOp>:(a, NP[nb]/N, DT)',
            '<PushOp>:(45, (N/N)/(N/N), CD)',
            '<PushOp>:(%, N/N, NN)',
            '<ExecOp>:(2, FA N/N)',
            '<PushOp>:(discount, N, NN)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(2, FA NP)',
            '<PushOp>:(at, (NP\\NP)/NP, IN)',
            '<PushOp>:(Anaheim, N/N, NNP)',
            '<PushOp>:(Imperial, N/N, NNP)',
            '<PushOp>:(Health, N/N, NNP)',
            '<PushOp>:(Spa, N, NNP)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(1, LP NP)',
            '<ExecOp>:(2, FA NP\\NP)',
            '<ExecOp>:(2, BA NP)',
            '<ExecOp>:(2, FA S[dcl]\\NP)',
            '<ExecOp>:(2, BA S[dcl])',
            '<ExecOp>:(2, RP S[dcl][conj])',
            '<ExecOp>:(2, RCONJ S[dcl])',
            '<PushOp>:(., ., .)',
            '<ExecOp>:(2, LP S[dcl])',
        ]
        self.assertListEqual(expected, actual)
Exemple #12
0
    def test3_Wsj0002_1(self):
        # Rudolph Agnew, 55 years old and former chairman of Consolidated Gold Fields PLC, was named a nonexecutive
        # director of this British industrial conglomerate.
        txt = r'''
(<T S[dcl] 0 2>
  (<T S[dcl] 1 2>
    (<T NP 0 2>
      (<T NP 0 2>
        (<T NP 0 2>
          (<T NP 0 1>
            (<T N 1 2>
              (<L N/N NNP NNP Rudolph N_72/N_72>)
              (<L N NNP NNP Agnew N>)
            )
          )
          (<L , , , , ,>)
        )
        (<T NP\NP 0 1>
          (<T S[adj]\NP 0 2>
            (<T S[adj]\NP 1 2>
              (<T NP 0 1>
                (<T N 1 2>
                  (<L N/N CD CD 55 N_92/N_92>)
                  (<L N NNS NNS years N>)
                )
              )
              (<L (S[adj]\NP)\NP JJ JJ old (S[adj]\NP_82)\NP_83>)
            )
            (<T S[adj]\NP[conj] 1 2>
              (<L conj CC CC and conj>)
              (<T NP 0 2>
                (<T NP 0 1>
                  (<T N 1 2>
                    (<L N/N JJ JJ former N_102/N_102>)
                    (<L N NN NN chairman N>)
                  )
                )
                (<T NP\NP 0 2>
                  (<L (NP\NP)/NP IN IN of (NP_111\NP_111)/NP_112>)
                  (<T NP 0 1>
                    (<T N 1 2>
                      (<L N/N NNP NNP Consolidated N_135/N_135>)
                      (<T N 1 2>
                        (<L N/N NNP NNP Gold N_128/N_128>)
                        (<T N 1 2>
                          (<L N/N NNP NNP Fields N_121/N_121>)
                          (<L N NNP NNP PLC N>)
                        )
                      )
                    )
                  )
                )
              )
            )
          )
        )
      )
      (<L , , , , ,>)
    )
    (<T S[dcl]\NP 0 2>
      (<L (S[dcl]\NP)/(S[pss]\NP) VBD VBD was (S[dcl]\NP_10)/(S[pss]_11\NP_10:B)_11>)
      (<T S[pss]\NP 0 2>
        (<L (S[pss]\NP)/NP VBN VBN named (S[pss]\NP_18)/NP_19>)
          (<T NP 0 2> (<T NP 1 2>
            (<L NP[nb]/N DT DT a NP[nb]_33/N_33>)
            (<T N 1 2>
              (<L N/N JJ JJ nonexecutive N_28/N_28>)
              (<L N NN NN director N>)
            )
          )
          (<T NP\NP 0 2>
            (<L (NP\NP)/NP IN IN of (NP_41\NP_41)/NP_42>)
            (<T NP 1 2>
              (<L NP[nb]/N DT DT this NP[nb]_63/N_63>)
              (<T N 1 2>
                (<L N/N JJ JJ British N_58/N_58>)
                (<T N 1 2>
                  (<L N/N JJ JJ industrial N_51/N_51>)
                  (<L N NN NN conglomerate N>)
                )
              )
            )
          )
        )
      )
    )
  )
  (<L . . . . .>)
)'''
        pt = parse_ccg_derivation(txt)
        if future_string == unicode:
            pt_old = parse_ccg_derivation_old(txt)
        else:
            pt_old = pt_to_utf8(parse_ccg_derivation_old(txt), True)
        actual = repr(pt)
        expected = repr(pt_old)
        self.assertEquals(expected, actual)
        ccg = Ccg2Drs()
        ccg.build_execution_sequence(pt)
        # Check execution queue
        actual = [repr(x) for x in ccg.exeque]
        expected = [
            '<PushOp>:(Rudolph, N/N, NNP)',
            '<PushOp>:(Agnew, N, NNP)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(1, LP NP)',
            '<PushOp>:(,, ,, ,)',
            '<ExecOp>:(2, LP NP)',
            '<PushOp>:(55, N/N, CD)',
            '<PushOp>:(years, N, NNS)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(1, LP NP)',
            '<PushOp>:(old, (S[adj]\\NP)\\NP, JJ)',
            '<ExecOp>:(2, BA S[adj]\\NP)',
            '<PushOp>:(and, conj, CC)',
            '<PushOp>:(former, N/N, JJ)',
            '<PushOp>:(chairman, N, NN)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(1, LP NP)',
            '<PushOp>:(of, (NP\\NP)/NP, IN)',
            '<PushOp>:(Consolidated, N/N, NNP)',
            '<PushOp>:(Gold, N/N, NNP)',
            '<PushOp>:(Fields, N/N, NNP)',
            '<PushOp>:(PLC, N, NNP)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(1, LP NP)',
            '<ExecOp>:(2, FA NP\\NP)',
            '<ExecOp>:(2, BA NP)',
            '<ExecOp>:(2, CONJ_TC S[adj]\\NP[conj])',
            '<ExecOp>:(2, RCONJ S[adj]\\NP)',
            '<ExecOp>:(1, L_UNARY_TC NP\\NP)',
            '<ExecOp>:(2, BA NP)',
            '<PushOp>:(,, ,, ,)',
            '<ExecOp>:(2, LP NP)',
            '<PushOp>:(be, (S[dcl]\\NP)/(S[pss]\\NP), VBD)',
            '<PushOp>:(name, (S[pss]\\NP)/NP, VBN)',
            '<PushOp>:(a, NP[nb]/N, DT)',
            '<PushOp>:(nonexecutive, N/N, JJ)',
            '<PushOp>:(director, N, NN)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(2, FA NP)',
            '<PushOp>:(of, (NP\\NP)/NP, IN)',
            '<PushOp>:(this, NP[nb]/N, DT)',
            '<PushOp>:(british, N/N, JJ)',
            '<PushOp>:(industrial, N/N, JJ)',
            '<PushOp>:(conglomerate, N, NN)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(2, FA NP)',
            '<ExecOp>:(2, FA NP\\NP)',
            '<ExecOp>:(2, BA NP)',
            '<ExecOp>:(2, FA S[pss]\\NP)',
            '<ExecOp>:(2, FA S[dcl]\\NP)',
            '<ExecOp>:(2, BA S[dcl])',
            '<PushOp>:(., ., .)',
            '<ExecOp>:(2, LP S[dcl])',
        ]
        self.assertListEqual(expected, actual)
Exemple #13
0
    def test1_Wsj0001_2(self):
        txt = r'''
(<T S[dcl] 0 2>
  (<T S[dcl] 1 2>
    (<T NP 0 1>
      (<T N 1 2>
        (<L N/N NNP NNP Mr. N_107/N_107>)
        (<L N NNP NNP Vinken N>)
      )
    )
    (<T S[dcl]\NP 0 2>
      (<L (S[dcl]\NP)/NP VBZ VBZ is (S[dcl]\NP_112)/NP_113>)
      (<T NP 0 2>
        (<T NP 0 1>
          (<L N NN NN chairman N>)
        )
        (<T NP\NP 0 2>
          (<L (NP\NP)/NP IN IN of (NP_109\NP_109)/NP_110>)
          (<T NP 0 2>
            (<T NP 0 1>
              (<T N 1 2>
                (<L N/N NNP NNP Elsevier N_107/N_107>)
                (<L N NNP NNP N.V. N>)
              )
            )
            (<T NP[conj] 1 2>
              (<L , , , , ,>)
              (<T NP 1 2>
                (<L NP[nb]/N DT DT the NP[nb]_48/N_48>)
                (<T N 1 2>
                  (<L N/N NNP NNP Dutch N_107/N_107>)
                  (<T N 1 2>
                    (<L N/N VBG VBG publishing N_107/N_107>)
                    (<L N NN NN group N>)
                  )
                )
              )
            )
          )
        )
      )
    )
  )
  (<L . . . . .>)
)'''
        pt = parse_ccg_derivation(txt)
        ccg = Ccg2Drs()
        ccg.build_execution_sequence(pt)
        # Check execution queue
        actual = [repr(x) for x in ccg.exeque]
        expected = [
            '<PushOp>:(Mr, N/N, NNP)',
            '<PushOp>:(Vinken, N, NNP)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(1, LP NP)',
            '<PushOp>:(be, (S[dcl]\\NP)/NP, VBZ)',
            '<PushOp>:(chairman, N, NN)',
            '<ExecOp>:(1, LP NP)',
            '<PushOp>:(of, (NP\\NP)/NP, IN)',
            '<PushOp>:(Elsevier, N/N, NNP)',
            '<PushOp>:(N.V, N, NNP)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(1, LP NP)',
            '<PushOp>:(,, ,, ,)',
            '<PushOp>:(the, NP[nb]/N, DT)',
            '<PushOp>:(Dutch, N/N, NNP)',
            '<PushOp>:(publish, N/N, VBG)',
            '<PushOp>:(group, N, NN)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(2, FA NP)',
            '<ExecOp>:(2, RP NP[conj])',
            '<ExecOp>:(2, RCONJ NP)',
            '<ExecOp>:(2, FA NP\\NP)',
            '<ExecOp>:(2, BA NP)',
            '<ExecOp>:(2, FA S[dcl]\\NP)',
            '<ExecOp>:(2, BA S[dcl])',
            '<PushOp>:(., ., .)',
            '<ExecOp>:(2, LP S[dcl])',
        ]
        self.assertListEqual(expected, actual)
        # Check ccgbank generation
        txt2 = '\n' + ccg.get_predarg_ccgbank(pretty=True)
        self.assertEquals(txt, txt2)
        # Check lexicon
        expected = [
            'Mr.', 'Vinken', 'is', 'chairman', 'of', 'Elsevier', 'N.V.', ',',
            'the', 'Dutch', 'publishing', 'group', '.'
        ]
        actual = [x.word for x in ccg.lexque]
        self.assertListEqual(expected, actual)
        # Check dependencies
        self.assertEquals(ccg.lexque[0].head, 1)  # Mr. -> Vinken
        self.assertEquals(ccg.lexque[1].head, 2)  # Vinken -> is
        self.assertEquals(ccg.lexque[2].head, 2)  # root
        self.assertEquals(ccg.lexque[3].head, 2)  # chairman -> is
        self.assertEquals(ccg.lexque[4].head, 3)  # of -> chairman
        self.assertEquals(ccg.lexque[5].head, 6)  # Elsevier -> N.V.
        self.assertEquals(ccg.lexque[6].head, 4)  # N.V. -> of
        self.assertEquals(ccg.lexque[8].head, 11)  # the -> group
        self.assertEquals(ccg.lexque[9].head, 11)  # Dutch -> group
        self.assertEquals(ccg.lexque[10].head, 11)  # publishing -> group
        self.assertEquals(ccg.lexque[11].head, 6)  # group -> N.V
Exemple #14
0
    def test2_Wsj0037_37(self):
        txt = r'''
(<T S[dcl] 0 2>
  (<T S[dcl] 1 2>
    (<T NP 0 2>
      (<T NP 0 1>
        (<T N 1 2>
          (<T N/N 0 2>
            (<L N/N JJR JJR More N_134/N_134>)
            (<T N/N[conj] 1 2>
              (<L conj CC CC and conj>)
              (<L N/N JJR JJR more N_141/N_141>)
            )
          )
          (<L N NNS NNS corners N>)
        )
      )
      (<T NP\NP 0 2>
        (<L (NP\NP)/NP IN IN of (NP_152\NP_152)/NP_153>)
        (<T NP 1 2>
          (<L NP[nb]/N DT DT the NP[nb]_160/N_160>)
          (<L N NN NN globe N>)
        )
      )
    )
    (<T S[dcl]\NP 0 2>
      (<L (S[dcl]\NP)/(S[ng]\NP) VBP VBP are (S[dcl]\NP_91)/(S[ng]_92\NP_91:B)_92>)
      (<T S[ng]\NP 0 2>
        (<L (S[ng]\NP)/(S[adj]\NP) VBG VBG becoming (S[ng]\NP_101)/(S[adj]_102\NP_101:B)_102>)
        (<T S[adj]\NP 0 2>
          (<L (S[adj]\NP)/PP JJ JJ free (S[adj]\NP_109)/PP_110>)
          (<T PP 0 2>
            (<L PP/NP IN IN of PP/NP_115>)
            (<T NP 0 1>
              (<T N 1 2>
                (<L N/N NN NN tobacco N_124/N_124>)
                (<L N NN NN smoke N>)
              )
            )
          )
        )
      )
    )
  )
  (<L . . . . .>)
)'''
        pt = parse_ccg_derivation(txt)
        ccg = Ccg2Drs()
        ccg.build_execution_sequence(pt)
        # Check execution queue
        actual = [repr(x) for x in ccg.exeque]
        expected = [
            '<PushOp>:(more, N/N, JJR)',
            '<PushOp>:(and, conj, CC)',
            '<PushOp>:(more, N/N, JJR)',
            '<ExecOp>:(2, RP N/N[conj])',
            '<ExecOp>:(2, RCONJ N/N)',
            '<PushOp>:(corners, N, NNS)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(1, LP NP)',
            '<PushOp>:(of, (NP\\NP)/NP, IN)',
            '<PushOp>:(the, NP[nb]/N, DT)',
            '<PushOp>:(globe, N, NN)',
            '<ExecOp>:(2, FA NP)',
            '<ExecOp>:(2, FA NP\\NP)',
            '<ExecOp>:(2, BA NP)',
            '<PushOp>:(be, (S[dcl]\\NP)/(S[ng]\\NP), VBP)',
            '<PushOp>:(become, (S[ng]\\NP)/(S[adj]\\NP), VBG)',
            '<PushOp>:(free, (S[adj]\\NP)/PP, JJ)',
            '<PushOp>:(of, PP/NP, IN)',
            '<PushOp>:(tobacco, N/N, NN)',
            '<PushOp>:(smoke, N, NN)',
            '<ExecOp>:(2, FA N)',
            '<ExecOp>:(1, LP NP)',
            '<ExecOp>:(2, FA PP)',
            '<ExecOp>:(2, FA S[adj]\\NP)',
            '<ExecOp>:(2, FA S[ng]\\NP)',
            '<ExecOp>:(2, FA S[dcl]\\NP)',
            '<ExecOp>:(2, BA S[dcl])',
            '<PushOp>:(., ., .)',
            '<ExecOp>:(2, LP S[dcl])',
        ]
        self.assertListEqual(expected, actual)
        # Check lexicon
        expected = [
            'More', 'and', 'more', 'corners', 'of', 'the', 'globe', 'are',
            'becoming', 'free', 'of', 'tobacco', 'smoke', '.'
        ]
        actual = [x.word for x in ccg.lexque]
        self.assertListEqual(expected, actual)
        # Check dependencies
        self.assertEquals(ccg.lexque[0].head, 3)  # More -> corners
        self.assertEquals(ccg.lexque[2].head, 0)  # more -> More
        self.assertEquals(ccg.lexque[3].head, 7)  # corners -> are
        self.assertEquals(ccg.lexque[4].head, 3)  # of -> corners
        self.assertEquals(ccg.lexque[5].head, 6)  # the -> globe
        self.assertEquals(ccg.lexque[6].head, 4)  # globe -> of
        self.assertEquals(ccg.lexque[7].head, 7)  # root
        self.assertEquals(ccg.lexque[8].head, 7)  # becoming -> are
        self.assertEquals(ccg.lexque[9].head, 8)  # free -> becoming
        self.assertEquals(ccg.lexque[10].head, 9)  # of -> free
        self.assertEquals(ccg.lexque[11].head, 12)  # tobacco -> smoke
        self.assertEquals(ccg.lexque[12].head, 10)  # smoke -> of
Exemple #15
0
    def test9_RuleExecutionEasySRL(self):
        allfiles = []
        projdir = os.path.dirname(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(
                        os.path.dirname(
                            os.path.dirname(os.path.dirname(__file__)))))))
        ldcpath = os.path.join(projdir, 'data', 'ldc', 'easysrl', 'ccgbank')
        dirlist1 = os.listdir(ldcpath)
        for fname in dirlist1:
            if 'ccg_derivation' not in fname:
                continue
            ldcpath1 = os.path.join(ldcpath, fname)
            if os.path.isfile(ldcpath1):
                allfiles.append(ldcpath1)

        failed_parse = 0
        failed_exec = []
        start = 0
        analysis = []
        for fn in allfiles[0:]:
            with open(fn, 'r') as fd:
                lines = fd.readlines()

            name, _ = os.path.splitext(os.path.basename(fn))
            for i in range(start, len(lines), 50):
                #for i in range(start, len(lines)):
                start = 0
                ccgbank = lines[i]
                dprint('%s-%04d' % (name, i))
                ccg = Ccg2Drs()
                try:
                    pt = parse_ccg_derivation(ccgbank)
                    ccg.build_execution_sequence(pt)
                except Exception:
                    failed_parse += 1
                    continue

                for op in ccg.exeque:
                    if isinstance(op, PushOp):
                        continue
                    self.assertIsInstance(op, ExecOp)
                    left = op.sub_ops[0].category.remove_wildcards()
                    result = op.category.remove_wildcards()
                    if len(op.sub_ops) == 2:
                        right = op.sub_ops[1].category.remove_wildcards()
                    else:
                        right = CAT_EMPTY

                    if op.rule is not None and op.rule not in [RL_TCL_UNARY, RL_TCR_UNARY, RL_TC_ATOM, RL_TC_CONJ, \
                                                               RL_LPASS, RL_RPASS, RL_TYPE_RAISE]:
                        actual = op.rule.apply_rule_to_category(left, right)
                        if not actual.can_unify(result):
                            failed_exec.append('%s-%04d: %s <!> %s' %
                                               (name, i, actual, result))
                            failed_exec.append('%s <- %s %s %s' %
                                               (actual, left, op.rule, right))
                            failed_exec.append(ccgbank)
                        else:
                            # Can add analysis here
                            if left == CAT_NP and right == CAT_NP_NP and op.rule == RL_BA and op.head != 0:
                                # Expected: NP(x) <- λx.NP(x) -BA- λxλy.NP(y)\NP(x)
                                # Actual: NP(y) <- λx.NP(x) -BA- λxλy.NP(y)\NP(x)
                                analysis.append('%s-%04d: NP <ba> NP\\NP' %
                                                (name, i))

                        self.assertTrue(actual.can_unify(result))

        if len(analysis) != 0:
            dprint('-----------------------')
            print('%d rules failed analysis')
            dprint('--')
            for ln in analysis:
                dprint(ln)
        if len(failed_exec) != 0:
            dprint('-----------------------')
            print('%d rules failed exec' % len(failed_exec) / 3)
            dprint('--')
            for ln in failed_exec:
                dprint(ln)

        self.assertTrue(len(failed_exec) == 0)
        self.assertTrue(len(analysis) == 0)
Exemple #16
0
    def test7_Wsj0051_30(self):
        txt = r'''
(<T S[dcl] 0 2> 
  (<T S[dcl] 1 2> 
    (<T NP 0 1> 
      (<T N 1 2> 
        (<L N NNP NNP Fujitsu N>) 
        (<T N[conj] 1 2> 
          (<L conj CC CC and conj>) 
          (<L N NNP NNP NEC N>) 
        ) 
      ) 
    ) 
    (<T S[dcl]\NP 0 2> 
      (<L (S[dcl]\NP)/S[dcl] VBD VBD said (S[dcl]\NP_146)/S[dcl]_147>) 
      (<T S[dcl] 0 2> 
        (<T S[dcl] 1 2> 
          (<L NP PRP PRP they NP>) 
          (<T S[dcl]\NP 0 2> 
            (<T (S[dcl]\NP)/(S[ng]\NP) 0 2> 
              (<L (S[dcl]\NP)/(S[ng]\NP) VBD VBD were (S[dcl]\NP_156)/(S[ng]_157\NP_156:B)_157>) 
              (<L (S\NP)\(S\NP) RB RB still (S_169\NP_164)_169\(S_169\NP_164)_169>) 
            ) 
            (<L S[ng]\NP VBG VBG investigating S[ng]\NP_174>) 
          ) 
        ) 
        (<T S[dcl][conj] 1 2> 
          (<L , , , , ,>) 
          (<T S[dcl][conj] 1 2> 
            (<L conj CC CC and conj>) 
            (<T S[em] 0 2> 
              (<L S[em]/S[dcl] IN IN that S[em]/S[dcl]_181>) 
              (<T S[dcl] 1 2> 
                (<T NP 0 2> 
                  (<T NP 0 1> 
                    (<L N NN NN knowledge N>) 
                  ) 
                  (<T NP\NP 0 2> 
                    (<L (NP\NP)/NP IN IN of (NP_207\NP_207)/NP_208>) 
                    (<T NP 0 1> 
                      (<T N 1 2> 
                        (<L N/N JJR JJR more N_224/N_224>) 
                        (<T N 1 2> 
                          (<L N/N JJ JJ such N_217/N_217>) 
                          (<L N NNS NNS bids N>) 
                        ) 
                      ) 
                    ) 
                  ) 
                ) 
                (<T S[dcl]\NP 0 2> 
                  (<L (S[dcl]\NP)/(S[b]\NP) MD MD could (S[dcl]\NP_190)/(S[b]_191\NP_190:B)_191>) 
                  (<L S[b]\NP VB VB emerge S[b]\NP_196>) 
                ) 
              ) 
            ) 
          ) 
        ) 
      ) 
    ) 
  ) 
  (<L . . . . .>)
) 
'''
        pt = parse_ccg_derivation(txt)
        ccg = Ccg2Drs()
        rule = get_rule(Category.from_cache('conj'),
                        Category.from_cache('S[em]'),
                        Category.from_cache('S[dcl][conj]'))
        self.assertEqual(rule, RL_RPASS)
        ccg.build_execution_sequence(pt)
        # Check execution queue
        actual = [repr(x) for x in ccg.exeque]
        expected = [
            '<PushOp>:(Fujitsu, N, NNP)', '<PushOp>:(and, conj, CC)',
            '<PushOp>:(NEC, N, NNP)', '<ExecOp>:(2, RP N[conj])',
            '<ExecOp>:(2, RCONJ N)', '<ExecOp>:(1, LP NP)',
            '<PushOp>:(say, (S[dcl]\\NP)/S[dcl], VBD)',
            '<PushOp>:(they, NP, PRP)',
            '<PushOp>:(be, (S[dcl]\\NP)/(S[ng]\\NP), VBD)',
            '<PushOp>:(still, (S\\NP)\\(S\\NP), RB)',
            '<ExecOp>:(2, BX (S[dcl]\\NP)/(S[ng]\\NP))',
            '<PushOp>:(investigate, S[ng]\\NP, VBG)',
            '<ExecOp>:(2, FA S[dcl]\\NP)', '<ExecOp>:(2, BA S[dcl])',
            '<PushOp>:(,, ,, ,)', '<PushOp>:(and, conj, CC)',
            '<PushOp>:(that, S[em]/S[dcl], IN)', '<PushOp>:(knowledge, N, NN)',
            '<ExecOp>:(1, LP NP)', '<PushOp>:(of, (NP\\NP)/NP, IN)',
            '<PushOp>:(more, N/N, JJR)', '<PushOp>:(such, N/N, JJ)',
            '<PushOp>:(bids, N, NNS)', '<ExecOp>:(2, FA N)',
            '<ExecOp>:(2, FA N)', '<ExecOp>:(1, LP NP)',
            '<ExecOp>:(2, FA NP\\NP)', '<ExecOp>:(2, BA NP)',
            '<PushOp>:(could, (S\\NP)/(S\\NP), MD)',
            '<PushOp>:(emerge, S[b]\\NP, VB)', '<ExecOp>:(2, FA S[dcl]\\NP)',
            '<ExecOp>:(2, BA S[dcl])', '<ExecOp>:(2, FA S[em])',
            '<ExecOp>:(2, RP S[dcl][conj])', '<ExecOp>:(2, RP S[dcl][conj])',
            '<ExecOp>:(2, RCONJ S[dcl])', '<ExecOp>:(2, FA S[dcl]\\NP)',
            '<ExecOp>:(2, BA S[dcl])', '<PushOp>:(., ., .)',
            '<ExecOp>:(2, LP S[dcl])'
        ]
        self.assertListEqual(expected, actual)
Exemple #17
0
def build_from_ldc_ccgbank(fn_dict, outdir, verbose=False, verify=True):
    print('Building function templates from LDC ccgbank...')

    allfiles = []
    ldcpath = os.path.join(projdir, 'data', 'ldc', 'ccgbank_1_1', 'data', 'AUTO')
    dirlist1 = os.listdir(ldcpath)
    for dir1 in dirlist1:
        ldcpath1 = os.path.join(ldcpath, dir1)
        if os.path.isdir(ldcpath1):
            dirlist2 = os.listdir(ldcpath1)
            for dir2 in dirlist2:
                ldcpath2 = os.path.join(ldcpath1, dir2)
                if os.path.isfile(ldcpath2):
                    allfiles.append(ldcpath2)

    failed_parse = []
    failed_rules = []
    rules = []
    progress = 0
    for fn in allfiles:
        progress = print_progress(progress, 10)
        with open(fn, 'r') as fd:
            lines = fd.readlines()
        for hdr,ccgbank in zip(lines[0::2], lines[1::2]):
            pt = None
            try:
                pt = parse_ccg_derivation(ccgbank)
                extract_predarg_categories_from_pt(pt, rules)
            except Exception as e:
                failed_parse.append(safe_utf8_encode('CCGBANK: ' + ccgbank.strip()))
                failed_parse.append(safe_utf8_encode('Error: %s' % e))
            # Now attempt to track undefined unary rules
            if pt is not None:
                try:
                    builder = Ccg2Drs()
                    builder.build_execution_sequence(pt)
                    # Calling this will track undefined
                    builder.get_predarg_ccgbank()
                except Exception as e:
                    pass

    progress = (progress / 10) * 1000
    for predarg in rules:
        progress = print_progress(progress, 1000)
        try:
            catkey = predarg.clean(True)
            template = FunctorTemplate.create_from_category(predarg)
            if template is None:
                continue
            if catkey.signature not in fn_dict:
                fn_dict[catkey.signature] = template
            elif verify:
                f1 = fn_dict[catkey.signature]
                t1 = future_string(f1)
                t2 = future_string(template)
                assert t1 == t2, 'verify failed\n  t1=%s\n  t2=%s\n  f1=%s\n  f2=%s' % (t1, t2, f1.predarg_category, predarg)
        except Exception as e:
            failed_rules.append(safe_utf8_encode('%s: %s' % (predarg, e)))
            # DEBUG ?
            if False:
                try:
                    FunctorTemplate.create_from_category(predarg)
                except Exception:
                    pass

    print_progress(progress, done=True)

    if len(failed_parse) != 0:
        print('Warning: ldc - %d parses failed' % (len(failed_parse)/2))
        with open(os.path.join(outdir, 'parse_ccg_derivation_failed.dat'), 'w') as fd:
            fd.write(b'\n'.join(failed_parse))
        if verbose:
            for x, m in failed_parse:
                print(m)

    if len(failed_rules) != 0:
        print('Warning: ldc - %d rules failed' % len(failed_rules))
        with open(os.path.join(outdir, 'functor_ldc_templates_failed.dat'), 'w') as fd:
            fd.write(b'\n'.join(failed_rules))
        if verbose:
            for m in failed_rules:
                print(m)

    return fn_dict
Exemple #18
0
    def test7_RuleUniquenessLDC(self):
        allfiles = []
        projdir = os.path.dirname(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(
                        os.path.dirname(
                            os.path.dirname(os.path.dirname(__file__)))))))
        ldcpath = os.path.join(projdir, 'data', 'ldc', 'ccgbank_1_1', 'data',
                               'AUTO')
        dirlist1 = os.listdir(ldcpath)
        for dir1 in dirlist1:
            ldcpath1 = os.path.join(ldcpath, dir1)
            if os.path.isdir(ldcpath1):
                dirlist2 = os.listdir(ldcpath1)
                for dir2 in dirlist2:
                    ldcpath2 = os.path.join(ldcpath1, dir2)
                    if os.path.isfile(ldcpath2):
                        allfiles.append(ldcpath2)

        failed_parse = 0
        ambiguous = []
        for fn in allfiles:
            with open(fn, 'r') as fd:
                lines = fd.readlines()
            for hdr, ccgbank in zip(lines[0::10], lines[1::10]):
                dprint(hdr.strip())
                ccg = Ccg2Drs()
                try:
                    pt = parse_ccg_derivation(ccgbank)
                    ccg.build_execution_sequence(pt)
                except Exception:
                    failed_parse += 1
                    continue
                self.assertIsNotNone(pt)

                for op in ccg.exeque:
                    if isinstance(op, PushOp):
                        continue
                    self.assertIsInstance(op, ExecOp)
                    left = op.sub_ops[0].category
                    result = op.category
                    if len(op.sub_ops) == 2:
                        right = op.sub_ops[1].category
                    else:
                        right = CAT_EMPTY

                    exclude = []
                    # Should not have ambiguity
                    rule = get_rule(left, right, result, exclude)
                    limit = 5
                    rstr = ''
                    while rule is not None:
                        rstr += repr(rule) + '|'
                        rule = get_rule(left, right, result, exclude)
                        limit -= 1
                        if limit == 0:
                            rule = get_rule(left, right, result, exclude)
                            break
                    if len(exclude) > 1:
                        ambiguous.append(
                            ('%s <- %s <{%s}> %s' %
                             (result, left, rstr, right), exclude))
                    self.assertGreater(limit, 0)

        for x in ambiguous:
            dprint('ambiguous rule: %s {%s}' % x)
        self.assertTrue(len(ambiguous) == 0)