예제 #1
0
    def test1_JsonFiles(self):
        filelist = os.listdir(datapath)
        allfiles = []
        for fn in filelist:
            if not os.path.isfile(os.path.join(datapath, fn)):
                continue
            f, x = os.path.splitext(fn)
            if x == '.json' and f == '9255a890ffe40c05876d8d402044ab11':
                allfiles.append(os.path.join(datapath, fn))

        for fn in allfiles:
            with open(fn, 'r') as fd:
                body = json.load(fd, encoding='utf-8')

            smod = preprocess_sentence(body['title'])
            ccgbank = grpc.ccg_parse(self.stub, smod, grpc.DEFAULT_SESSION)
            pt = parse_ccg_derivation(ccgbank)
            ccg = process_ccg_pt(pt)

            ccgbody = {}
            ccgbody['story'] = {
                'title': [x.get_json() for x in ccg.get_span()],
                'paragraphs': []
            }
            paragraphs = filter(
                lambda y: len(y) != 0,
                map(lambda x: x.strip(), body['content'].split('\n')))
            i = 0
            for p in paragraphs[i:]:
                sentences = filter(lambda x: len(x.strip()) != 0,
                                   sent_tokenize(p))
                sp = []
                j = 0
                for s in sentences[j:]:
                    dprint('p:s = %d:%d' % (i, j))
                    smod = preprocess_sentence(s)
                    ccgbank = grpc.ccg_parse(self.stub, smod,
                                             grpc.DEFAULT_SESSION)
                    pt = parse_ccg_derivation(ccgbank)
                    ccg = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
                    sp.append([x.get_json() for x in ccg.get_span()])
                    j += 1
                ccgbody['story']['paragraphs'].append(sp)
                i += 1

            msgbody = json.dumps(ccgbody)

        pass
예제 #2
0
파일: conj_test.py 프로젝트: marbles-ai/ie
 def test10_OrOfVerb_OrInBrackets(self):
     text = "That which is perceived or known or inferred to have its own distinct existence (living or nonliving)"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs(nodups=True)
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     # RT_EMPTY_DRS adds 'or' to phrases
     f = sentence.select_phrases(lambda x: x.pos is POS.from_cache('WDT') or \
                                                0 == (x.mask & RT_EMPTY_DRS),
                                 contiguous=False)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('That which' in phrases)
     self.assertTrue('have' in phrases)
     self.assertTrue('is perceived known inferred' in phrases)
     self.assertTrue('its own distinct existence' in phrases)
     verb1 = filter(lambda x: 'is perceived known inferred' == x[1].text,
                    f.iteritems())[0]
     verb2 = filter(lambda x: 'have' == x[1].text, f.iteritems())[0]
     agent = filter(lambda x: 'That which' == x[1].text, f.iteritems())[0]
     theme = filter(lambda x: 'its own distinct existence' == x[1].text,
                    f.iteritems())[0]
     X1 = agent[0]
     E1 = verb1[0]
     E2 = verb2[0]
     X2 = theme[1][0].refs[1]
     X3 = theme[1][1].refs[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E1, E2])) is not None)
     # TODO: should the theme attach to X2?
     self.assertTrue(d.find_condition(Rel('_ARG1', [E2, X3])) is not None)
     self.assertTrue(d.find_condition(Rel('_POSS', [X2, X3])) is not None)
예제 #3
0
파일: conj_test.py 프로젝트: marbles-ai/ie
 def test01_AndOfSubj(self):
     text = "John and Paul went to the movies"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('John' in phrases)
     self.assertTrue('Paul' in phrases)
     self.assertTrue('went' in phrases)
     john = filter(lambda x: 'John' == x[1].text, f.iteritems())[0]
     paul = filter(lambda x: 'Paul' == x[1].text, f.iteritems())[0]
     went = filter(lambda x: 'went' == x[1].text, f.iteritems())[0]
     J = john[0]
     P = paul[0]
     E = went[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('go', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('John', [J])) is not None)
     self.assertTrue(d.find_condition(Rel('Paul', [P])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E, J])) is not None)
예제 #4
0
 def test1_PP_Attachment(self):
     # NCCG get the PP attachment wrong
     txt = "Eat spaghetti with meatballs"
     derivation = grpc.ccg_parse(self.stub, txt, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     self.assertIsNotNone(pt)
     s = sentence_from_pt(pt)
     dprint(s)
     sent = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sent.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     a = get_constituents_string_list(sent)
     dprint('\n'.join(a))
     x = [
         'S_INF(#Eat spaghetti with meatballs)',  # 0
         'NP(#spaghetti)',  # 1
         'NP(#meatballs)',  # 2
     ]
     self.assertListEqual(x, a)
     x = (0, [(1, []), (2, [])])
     a = sent.get_constituent_tree()
     dprint_constituent_tree(sent, a)
     self.assertEqual(repr(x), repr(a))
     vsent = get_constituent_string(sent.get_verbnet_sentence())
     self.assertEqual('S_INF(#Eat with) NP(#spaghetti) NP(#meatballs)',
                      vsent)
예제 #5
0
파일: conj_test.py 프로젝트: marbles-ai/ie
 def test03_OrOfObj(self):
     text = "To participate in games or sport"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_ENTITY | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('participate' in phrases)
     self.assertTrue('games' in phrases)
     self.assertTrue('sport' in phrases)
     noun1 = filter(lambda x: 'games' == x[1].text, f.iteritems())[0]
     noun2 = filter(lambda x: 'sport' == x[1].text, f.iteritems())[0]
     verb = filter(lambda x: 'participate' == x[1].text, f.iteritems())[0]
     X1 = noun1[0]
     X2 = noun2[0]
     E = verb[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('participate', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('games', [X1])) is not None)
     self.assertTrue(d.find_condition(Rel('sport', [X2])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E, X2])) is not None)
예제 #6
0
 def test1_EasySRL_BoyGirl2(self):
     txt = r'''(<T S[dcl] 1 2> (<T NP 0 2> (<L NP/N DT DT The NP/N>) (<L N NN NN boy N>) ) (<T S[dcl]\NP 0 2>
     (<L (S[dcl]\NP)/(S[b]\NP) MD MD will (S[dcl]\NP)/(S[b]\NP)>) (<T S[b]\NP 0 2>
     (<L (S[b]\NP)/(S[to]\NP) VB VB want (S[b]\NP)/(S[to]\NP)>) (<T S[to]\NP 0 2>
     (<L (S[to]\NP)/(S[b]\NP) TO TO to (S[to]\NP)/(S[b]\NP)>) (<T S[b]\NP 0 2>
     (<L (S[b]\NP)/NP VB VB believe (S[b]\NP)/NP>) (<T NP 0 2> (<L NP/N DT DT the NP/N>)
     (<L N NN NN girl N>) ) ) ) ) ) )'''
     pt = parse_ccg_derivation(txt)
     self.assertIsNotNone(pt)
     s = sentence_from_pt(pt)
     dprint(s)
     ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     ccg.build_execution_sequence(pt)
     ccg.create_drs()
     ccg.resolve_proper_names()
     ccg.final_rename()
     d = ccg.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     x = '[X1,E2,E3,X4| boy(X1),will(E2),_MODAL(E2),want(E2),_EVENT(E2),_ARG0(E2,X1),_ARG1(E2,E3),believe(E3),_EVENT(E3),_ARG0(E3,X1),_ARG1(E3,X4),girl(X4)]'
     self.assertEqual(x, s)
     a = get_constituents_string_list(ccg)
     dprint('\n'.join(a))
     x = [
         'S(The boy #will want to believe the girl)',
         'NP(#The boy)',
         'S_INF(#want to believe the girl)',
         'S_INF(#to believe the girl)',
         'S_INF(#believe the girl)',
         'NP(#the girl)'
     ]
     self.assertListEqual(x, a)
     s = get_constituent_string(ccg.get_verbnet_sentence())
     self.assertEqual('NP(#The boy) VP(#will want) S_INF(#to believe) NP(#the girl)', s)
예제 #7
0
파일: conj_test.py 프로젝트: marbles-ai/ie
 def test05_AndOfVerb_AndOfObj(self):
     text = "Bell makes and distributes computers, electronics, and building products"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_ENTITY | RT_EVENT
                                 | RT_ATTRIBUTE)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('Bell' in phrases)
     self.assertTrue('makes distributes' in phrases)
     self.assertTrue('computers' in phrases)
     self.assertTrue('electronics' in phrases)
     # Note if we add RT_EMPTY_DRS to the selection criteria then this phrase becomes 'and building products'
     self.assertTrue('building products' in phrases)
     self.assertEqual(5, len(phrases))
     verb1 = filter(lambda x: 'makes distributes' == x[1].text,
                    f.iteritems())[0]
     agent = filter(lambda x: 'Bell' == x[1].text, f.iteritems())[0]
     theme1 = filter(lambda x: 'computers' == x[1].text, f.iteritems())[0]
     theme2 = filter(lambda x: 'electronics' == x[1].text, f.iteritems())[0]
     theme3 = filter(lambda x: 'building products' == x[1].text,
                     f.iteritems())[0]
     X1 = agent[0]
     Y1 = theme1[0]
     Y2 = theme2[0]
     Y3 = theme3[0]
     E1 = verb1[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None)
     # TODO: should we add proposition for multi NP's conjoined?
     self.assertTrue(d.find_condition(Rel('_ARG1', [E1, Y3])) is not None)
예제 #8
0
파일: conj_test.py 프로젝트: marbles-ai/ie
 def test02_AndOfObj(self):
     text = "He saw John and Paul"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('John' in phrases)
     self.assertTrue('Paul' in phrases)
     self.assertTrue('saw' in phrases)
     john = filter(lambda x: 'John' == x[1].text, f.iteritems())[0]
     paul = filter(lambda x: 'Paul' == x[1].text, f.iteritems())[0]
     saw = filter(lambda x: 'saw' == x[1].text, f.iteritems())[0]
     J = john[0]
     P = paul[0]
     E = saw[0]
     # FIXME: wn lemmatizer does not convert saw to see - I guess to to ambiguity
     self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('saw', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('John', [J])) is not None)
     self.assertTrue(d.find_condition(Rel('Paul', [P])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E, J])) is not None)
예제 #9
0
 def test8_Wsj0004_3(self):
     txt = r'''
     (<T S[dcl] 0 2> (<T S[dcl] 1 2> (<T NP 0 1> (<T N 1 2> (<L N/N NN NN Compound N_309/N_309>) 
     (<L N NNS NNS yields N>) ) ) (<T S[dcl]\NP 0 2> (<L (S[dcl]\NP)/NP VBP VBP assume (S[dcl]\NP_236)/NP_237>) 
     (<T NP 0 2> (<T NP 0 2> (<T NP 0 1> (<L N NN NN reinvestment N>) ) (<T NP\NP 0 2> 
     (<L (NP\NP)/NP IN IN of (NP_248\NP_248)/NP_249>) (<T NP 0 1> (<L N NNS NNS dividends N>) ) ) ) (<T NP[conj] 1 2> 
     (<L conj CC CC and conj>) (<T S[em] 0 2> (<L S[em]/S[dcl] IN IN that S[em]/S[dcl]_257>) (<T S[dcl] 1 2> 
     (<T NP 1 2> (<L NP[nb]/N DT DT the NP[nb]_297/N_297>) (<T N 1 2> (<L N/N JJ JJ current N_292/N_292>) 
     (<L N NN NN yield N>) ) ) (<T S[dcl]\NP 0 2> (<L S[dcl]\NP VBZ VBZ continues S[dcl]\NP_262>) 
     (<T (S\NP)\(S\NP) 0 2> (<L ((S\NP)\(S\NP))/NP IN IN for ((S_275\NP_270)_275\(S_275\NP_270)_275)/NP_276>) 
     (<T NP 1 2> (<L NP[nb]/N DT DT a NP[nb]_283/N_283>) (<L N NN NN year N>) ) ) ) ) ) ) ) ) ) (<L . . . . .>) ) '''
     pt = parse_ccg_derivation(txt)
     ccg = Ccg2Drs()
     rule = get_rule(Category.from_cache('conj'),
                     Category.from_cache('S[em]'),
                     Category.from_cache('NP[conj]'))
     self.assertEqual(rule, RL_TC_ATOM)
     ccg.build_execution_sequence(pt)
     # Check execution queue
     actual = [repr(x) for x in ccg.exeque]
     expected = [
         '<PushOp>:(compound, N/N, NN)',
         '<PushOp>:(yields, N, NNS)',
         '<ExecOp>:(2, FA N)',
         '<ExecOp>:(1, LP NP)',
         '<PushOp>:(assume, (S[dcl]\\NP)/NP, VBP)',
         '<PushOp>:(reinvestment, N, NN)',
         '<ExecOp>:(1, LP NP)',
         '<PushOp>:(of, (NP\\NP)/NP, IN)',
         '<PushOp>:(dividends, N, NNS)',
         '<ExecOp>:(1, LP NP)',
         '<ExecOp>:(2, FA NP\\NP)',
         '<ExecOp>:(2, BA NP)',
         '<PushOp>:(and, conj, CC)',
         '<PushOp>:(that, S[em]/S[dcl], IN)',
         '<PushOp>:(the, NP[nb]/N, DT)',
         '<PushOp>:(current, N/N, JJ)',
         '<PushOp>:(yield, N, NN)',
         '<ExecOp>:(2, FA N)',
         '<ExecOp>:(2, FA NP)',
         '<PushOp>:(continue, S[dcl]\\NP, VBZ)',
         '<PushOp>:(for, ((S\\NP)\\(S\\NP))/NP, IN)',
         '<PushOp>:(a, NP[nb]/N, DT)',
         '<PushOp>:(year, N, NN)',
         '<ExecOp>:(2, FA NP)',
         '<ExecOp>:(2, FA (S\\NP)\\(S\\NP))',
         '<ExecOp>:(2, BA S[dcl]\\NP)',
         '<ExecOp>:(2, BA S[dcl])',
         '<ExecOp>:(2, FA S[em])',
         '<ExecOp>:(2, ATOM_TC NP[conj])',
         '<ExecOp>:(2, RCONJ NP)',
         '<ExecOp>:(2, FA S[dcl]\\NP)',
         '<ExecOp>:(2, BA S[dcl])',
         '<PushOp>:(., ., .)',
         '<ExecOp>:(2, LP S[dcl])',
     ]
     self.assertListEqual(expected, actual)
예제 #10
0
 def test1_Currency_00_0194(self):
     text = r"Without the Cray-3 research and development expenses, the company would have been able to report a profit of $19.3 million for the first half of 1989 rather than the $5.9 million it posted."
     etext = r"Without the Cray-3 research and development expenses , the company would have been able to report a profit of $ 19.3 million for the first half of 1989 rather than the $ 5.9 million it posted"
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs(nodups=True)
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     self.assertTrue('the Cray-3 research and development expenses' in nps)
     self.assertTrue('the company' in nps)
     self.assertTrue('a profit' in nps)
     self.assertTrue('$ 19.3 million' in nps)
     self.assertTrue('the first half' in nps)
     self.assertTrue('the $ 5.9 million' in nps)
     self.assertTrue('1989' in nps)
     fvps = sentence.get_vp_nominals()
     vps = [sp.text for r, sp in fvps]
     self.assertTrue('would have been' in vps)
     self.assertTrue('report' in vps)
     self.assertTrue('posted' in vps)
     would_have_been = filter(lambda x: 'would have been' == x[1].text,
                              fvps)[0][0]
     report = filter(lambda x: 'report' == x[1].text, fvps)[0][0]
     posted = filter(lambda x: 'posted' == x[1].text, fvps)[0][0]
     cray_rnd = filter(
         lambda x: 'the Cray-3 research and development expenses' == x[1].
         text, fnps)[0][0]
     company = filter(lambda x: 'the company' == x[1].text, fnps)[0][0]
     profit = filter(lambda x: 'a profit' == x[1].text, fnps)[0][0]
     first_half = filter(lambda x: 'the first half' == x[1].text,
                         fnps)[0][0]
     n1989 = filter(lambda x: '1989' == x[1].text, fnps)[0][0]
     n19_3M = filter(lambda x: '$ 19.3 million' == x[1].text, fnps)[0][0]
     n5_9M = filter(lambda x: 'the $ 5.9 million' == x[1].text, fnps)[0][0]
     self.assertTrue(
         d.find_condition(Rel('without', [would_have_been, cray_rnd]))
         is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [would_have_been, company]))
         is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [report, company])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [report, profit])) is not None)
     self.assertTrue(
         d.find_condition(Rel('of', [profit, n19_3M])) is not None)
     self.assertTrue(
         d.find_condition(Rel('for', [profit, first_half])) is not None)
     self.assertTrue(
         d.find_condition(Rel('of', [first_half, n1989])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [posted, n5_9M])) is not None)
예제 #11
0
 def test2_Wsj_0056_1(self):
     # RAW 1043
     txt = '''@'''
     derivation = grpc.ccg_parse(self.stub, txt, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     self.assertIsNotNone(pt)
     s = sentence_from_pt(pt)
     dprint(s)
     sent = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sent.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     a = get_constituents_string_list(sent)
     dprint('\n'.join(a))
     x = ['S(#@)']
     self.assertListEqual(x, a)
예제 #12
0
 def test2_Date_21_0985(self):
     text = r"Annualized interest rates on certain investments as reported by the Federal Reserve Board on a weekly-average basis: 1989 and Wednesday October 4, 1989."
     etext = r"Annualized interest rates on certain investments as reported by the Federal Reserve Board on a weekly-average basis : 1989 and Wednesday October 4 , 1989"
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     self.assertTrue('Annualized interest rates' in nps)
     self.assertTrue('certain investments' in nps)
     self.assertTrue('the Federal-Reserve-Board' in nps)
     self.assertTrue('a weekly-average basis' in nps)
     self.assertTrue('Wednesday October 4' in nps)
예제 #13
0
 def test2_Date_00_1228(self):
     text = r"The reduced dividend is payable Jan. 2 to stock of record Dec. 15"
     etext = r"The reduced dividend is payable Jan. 2 to stock of record Dec. 15"
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     self.assertTrue('The reduced dividend' in nps)
     self.assertTrue('payable' in nps)
     self.assertTrue('Jan. 2' in nps)
     self.assertTrue('Dec. 15' in nps)
     self.assertTrue('stock' in nps)
     self.assertTrue('record' in nps)
예제 #14
0
 def test3_ApposInterrupt(self):
     text = r"Robbie, a hot-tempered tennis player, charged the umpire and tried to crack the poor man's skull with a racket."
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.get_np_nominals()
     phrases = [sp.text for r, sp in f]
     self.assertTrue('Robbie' in phrases)
     self.assertTrue('a hot-tempered tennis player' in phrases)
     robbie = filter(lambda x: 'Robbie' == x[1].text, f)[0]
     temper = filter(lambda x: 'a hot-tempered tennis player' == x[1].text, f)[0]
     X = robbie[0]
     Y = temper[0]
     self.assertNotEqual(X, Y)
     self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None)
     self.assertTrue(len(repr(d).split('_AKA')) == 2)
예제 #15
0
 def test4_ApposInterrupt(self):
     text = r"Bell, a telecommunications company, which is located in Los Angeles, makes and distributes electronics, computers, and building products"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.get_np_nominals()
     phrases = [sp.text for r, sp in f]
     self.assertTrue('Bell' in phrases)
     self.assertTrue('a telecommunications company' in phrases)
     np1 = filter(lambda x: 'Bell' == x[1].text, f)[0]
     np2 = filter(lambda x: 'a telecommunications company' == x[1].text, f)[0]
     X = np1[0]
     Y = np2[0]
     self.assertNotEqual(X, Y)
     self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None)
     self.assertTrue(len(repr(d).split('_AKA')) == 2)
예제 #16
0
 def test1_Currency_00_0195(self):
     text = r"On the other hand, had it existed then, Cray Computer would have incurred a $20.5 million loss."
     etext = r"On the other hand , had it existed then , Cray Computer would have incurred a $ 20.5 million loss ."
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     self.assertTrue('the other hand' in nps)
     self.assertTrue('Cray-Computer' in nps)
     self.assertTrue('$ 20.5 million' in nps)
     fvps = sentence.get_vp_nominals()
     vps = [sp.text for r, sp in fvps]
     self.assertTrue('had' in vps)
     self.assertTrue('existed' in vps)
     self.assertTrue('would have incurred' in vps)
예제 #17
0
 def test2_ApposInterrupt(self):
     text = r"Reliable, Diane's eleven-year-old beagle, chews holes in the living room carpeting as if he were still a puppy."
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.get_np_nominals()
     phrases = [sp.text for r, sp in f]
     self.assertTrue('Reliable' in phrases)
     self.assertTrue("eleven-year-old beagle" in phrases)
     self.assertTrue("Diane" in phrases)
     dog = filter(lambda x: 'Reliable' == x[1].text, f)[0]
     breed = filter(lambda x: "eleven-year-old beagle" == x[1].text, f)[0]
     X = dog[0]
     Y = breed[0]
     self.assertNotEqual(X, Y)
     self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None)
     self.assertTrue(len(repr(d).split('_AKA')) == 2)
예제 #18
0
 def test10_Brutus(self):
     text = "Ceasar was stabbed by Brutus"
     derivation = grpc.ccg_parse(self.stub, text, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     #self.assertTrue('Average maturity' in nps)
     self.assertTrue('Brutus' in nps)
     self.assertTrue('Ceasar' in nps)
     fvps = sentence.get_vp_nominals()
     vps = [sp.text for r, sp in fvps]
     self.assertTrue('was stabbed' in vps)
     E = filter(lambda x: x[1].text == "was stabbed", fvps)[0][0]
     A1 = filter(lambda x: x[1].text == "Brutus", fnps)[0][0]
     A0 = filter(lambda x: x[1].text == "Ceasar", fnps)[0][0]
     self.assertTrue(d.find_condition(Rel('_ARG0', [E, A0])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E, A1])) is not None)
예제 #19
0
 def test10_Ccgbank_00_0036(self):
     text = "Average maturity of the funds' investments lengthened by a day to 41 days, the longest since early August, according to Donoghue's."
     etext = "Average maturity of the funds ' investments lengthened by a day to 41 days , the longest since early August , according to Donoghue 's ."
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     #self.assertTrue('Average maturity' in nps)
     self.assertTrue('the funds' in nps)
     self.assertTrue('a day' in nps)
     self.assertTrue('41 days' in nps)
     self.assertTrue('the longest' in nps)
     self.assertTrue('early August' in nps)
     fvps = sentence.get_vp_nominals()
     vps = [sp.text for r, sp in fvps]
     self.assertTrue('lengthened' in vps)
     self.assertTrue('according' in vps)
예제 #20
0
파일: conj_test.py 프로젝트: marbles-ai/ie
 def test04_AndOfVerb(self):
     text = "Bell makes and distributes computers"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_ENTITY | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('Bell' in phrases)
     self.assertTrue('makes distributes' in phrases)
     self.assertTrue('computers' in phrases)
     verb1 = filter(lambda x: 'makes distributes' == x[1].text,
                    f.iteritems())[0]
     agent = filter(lambda x: 'Bell' == x[1].text, f.iteritems())[0]
     theme = filter(lambda x: 'computers' == x[1].text, f.iteritems())[0]
     X1 = agent[0]
     X2 = theme[0]
     E1 = verb1[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E1, X2])) is not None)
예제 #21
0
                lc = len(lnout)
                lnout.append(ln.strip())

                if mm not in wsjd:
                    lnout.append('ERR: cannot find mapping to %s' % mm)
                    total_err += 1
                    continue
                gold_derivation = wsjd[mm]

                e_sentence = None
                n_sentence = None
                options = CO_NO_VERBNET | CO_NO_WIKI_SEARCH | CO_VARNAMES_MATCH_WORD_INDEX
                try:
                    if estub is not None:
                        ed = grpc.ccg_parse(estub, ln)
                        ept = parse_ccg_derivation(ed)
                        e_sentence = process_ccg_pt(ept, options)

                    if nstub is not None:
                        nd = grpc.ccg_parse(nstub, ln)
                        npt = parse_ccg_derivation(nd)
                        n_sentence = process_ccg_pt(npt, options)

                    gpt = parse_ccg_derivation(gold_derivation)
                    gold_sentence = process_ccg_pt(gpt, options)
                except UnaryRuleError as e:
                    lnout.append('ERR: %s' % e)
                    total_err += 1
                    continue
                except Exception as e:
                    lnout.append('ERR: %s' % e)
예제 #22
0
def build_from_ldc_ccgbank(fn_dict, outdir, verbose=False, verify=True):
    print('Building function templates from LDC ccgbank...')

    allfiles = []
    ldcpath = os.path.join(projdir, 'data', 'ldc', 'ccgbank_1_1', 'data', 'AUTO')
    dirlist1 = os.listdir(ldcpath)
    for dir1 in dirlist1:
        ldcpath1 = os.path.join(ldcpath, dir1)
        if os.path.isdir(ldcpath1):
            dirlist2 = os.listdir(ldcpath1)
            for dir2 in dirlist2:
                ldcpath2 = os.path.join(ldcpath1, dir2)
                if os.path.isfile(ldcpath2):
                    allfiles.append(ldcpath2)

    failed_parse = []
    failed_rules = []
    rules = []
    progress = 0
    for fn in allfiles:
        progress = print_progress(progress, 10)
        with open(fn, 'r') as fd:
            lines = fd.readlines()
        for hdr,ccgbank in zip(lines[0::2], lines[1::2]):
            pt = None
            try:
                pt = parse_ccg_derivation(ccgbank)
                extract_predarg_categories_from_pt(pt, rules)
            except Exception as e:
                failed_parse.append(safe_utf8_encode('CCGBANK: ' + ccgbank.strip()))
                failed_parse.append(safe_utf8_encode('Error: %s' % e))
            # Now attempt to track undefined unary rules
            if pt is not None:
                try:
                    builder = Ccg2Drs()
                    builder.build_execution_sequence(pt)
                    # Calling this will track undefined
                    builder.get_predarg_ccgbank()
                except Exception as e:
                    pass

    progress = (progress / 10) * 1000
    for predarg in rules:
        progress = print_progress(progress, 1000)
        try:
            catkey = predarg.clean(True)
            template = FunctorTemplate.create_from_category(predarg)
            if template is None:
                continue
            if catkey.signature not in fn_dict:
                fn_dict[catkey.signature] = template
            elif verify:
                f1 = fn_dict[catkey.signature]
                t1 = future_string(f1)
                t2 = future_string(template)
                assert t1 == t2, 'verify failed\n  t1=%s\n  t2=%s\n  f1=%s\n  f2=%s' % (t1, t2, f1.predarg_category, predarg)
        except Exception as e:
            failed_rules.append(safe_utf8_encode('%s: %s' % (predarg, e)))
            # DEBUG ?
            if False:
                try:
                    FunctorTemplate.create_from_category(predarg)
                except Exception:
                    pass

    print_progress(progress, done=True)

    if len(failed_parse) != 0:
        print('Warning: ldc - %d parses failed' % (len(failed_parse)/2))
        with open(os.path.join(outdir, 'parse_ccg_derivation_failed.dat'), 'w') as fd:
            fd.write(b'\n'.join(failed_parse))
        if verbose:
            for x, m in failed_parse:
                print(m)

    if len(failed_rules) != 0:
        print('Warning: ldc - %d rules failed' % len(failed_rules))
        with open(os.path.join(outdir, 'functor_ldc_templates_failed.dat'), 'w') as fd:
            fd.write(b'\n'.join(failed_rules))
        if verbose:
            for m in failed_rules:
                print(m)

    return fn_dict
예제 #23
0
파일: __init__.py 프로젝트: marbles-ai/ie
    def run(self):
        """Process messages."""
        for message in receive_messages(self.aws.news_queue, MessageAttributeNames=['All']):
            global _logger
            # Attributes will be passed onto next queue
            attributes = message.message_attributes
            mhash = attributes['hash']['StringValue']
            _logger.debug('Received news_queue(%s) -> hash(%s)', message.message_id, mhash)
            body = json.loads(message.body)
            retry = 3
            ccgbank = None
            title = body['title']
            paragraphs_in = filter(lambda y: len(y) != 0, map(lambda x: x.strip(), body['content'].split('\n')))
            paragraphs_out = []
            if len(paragraphs_in) == 0:
                _logger.debug('No paragraphs for story %s\n%s', (mhash, title))
            # Use NLTK to split paragraphs into sentences.
            for p in paragraphs_in:
                sentences = filter(lambda x: len(x.strip()) != 0, sent_tokenize(p))
                paragraphs_out.append(sentences)

            if self.state.terminate:
                break

            result = {}
            result['title'] = {}
            while retry:
                try:
                    ccgbank = grpc.ccg_parse(self.aws.stub, title, grpc.DEFAULT_SESSION)
                    pt = parse_ccg_derivation(ccgbank)
                    ccg = process_ccg_pt(pt, options=self.options)
                    result['title']['lexemes'] = [x.get_json() for x in ccg.get_span()]
                    result['title']['constituents'] = [c.get_json() for c in ccg.constituents]
                    ccgpara = []
                    result['paragraphs'] = ccgpara
                    for sentences in paragraphs_out:
                        ccgsent = []
                        ccgpara.append(ccgsent)
                        for s in sentences:
                            smod = preprocess_sentence(s)
                            ccgbank = grpc.ccg_parse(self.aws.stub, smod, grpc.DEFAULT_SESSION)
                            pt = parse_ccg_derivation(ccgbank)
                            ccg = process_ccg_pt(pt, options=self.options)
                            ccgentry = {}
                            ccgentry['lexemes'] = [x.get_json() for x in ccg.get_span()]
                            ccgentry['constituents'] = [c.get_json() for c in ccg.constituents]
                            ccgsent.append(ccgentry)
                    break   # exit while
                except requests.exceptions.ConnectionError as e:
                    time.sleep(0.25)
                    retry -= 1
                    _logger.exception('AwsNewsQueueReader.run', exc_info=e)
                    if self.state.pass_on_exceptions:
                        raise
                except Exception as e:
                    # After X reads AWS sends the item to the dead letter queue.
                    # X is configurable in AWS console.
                    retry = 0
                    _logger.exception('AwsNewsQueueReader.run', exc_info=e, rlimitby=mhash)
                    if self.state.pass_on_exceptions:
                        raise

                if self.state.terminate:
                    retry = 0
                    break

            # retry == 0 indicates failure
            if retry == 0:
                continue


            try:
                # Let the queue know that the message is processed
                message.delete()
                if self.aws.ccg_queue:
                    ireduce = -1
                    iorig = len(result['paragraphs'])

                    while True:
                        strm = StringIO.StringIO()
                        # Add indent so easier to debug
                        json.dump(result, strm, indent=2)
                        data = strm.getvalue()
                        if len(data) >= 200*1024:
                            para = result['paragraphs']
                            ireduce = max([1, (len(para) * 200 * 1024)/ len(data)])
                            ireduce = min([len(para)-1, ireduce])
                            result['paragraphs'] = para[0:ireduce]
                        else:
                            break

                        if len(result['paragraphs']) <= 1:
                            break

                    if ireduce >= 0:
                        _logger.warning('Hash(%s) ccg paragraphs reduced from %d to %d' % (mhash, iorig, ireduce))
                    response = self.aws.ccg_queue.send_message(MessageAttributes=attributes, MessageBody=data)
                    _logger.debug('Sent hash(%s) -> ccg_queue(%s)', mhash, response['MessageId'])
            except Exception as e:
                _logger.exception('AwsNewsQueueReader.run', exc_info=e, rlimitby=mhash)
                if self.state.pass_on_exceptions:
                    raise
예제 #24
0
                ccg = None
            drs = None
            pccg = None
            fol = None
            constituents = None
            orphaned = None
            conjoins = None
            functor_phrases = None
            vnconstituents = ''
            constituents = ''
            vsent = None
            sentence = None

            if options.ofmt == 'drs':
                try:
                    pt = parse_ccg_derivation(ccg)
                    pccg = pt_to_ccg_derivation(pt)
                except Exception as e:
                    print('Error: failed to parse ccgbank - %s' % str(e))
                    raise

                ops = CO_BUILD_STATES if options.wordvars else CO_ADD_STATE_PREDICATES
                ops |= CO_NO_VERBNET if options.no_vn else 0
                ops |= CO_NO_WIKI_SEARCH if options.no_wp else 0

                try:
                    sentence = process_ccg_pt(pt, ops)
                    d = sentence.get_drs()
                    fol, _ = d.to_fol()
                    fol = unicode(fol)
                    drs = d.show(SHOW_LINEAR)
예제 #25
0
 def test2_GOLD_Wsj0003_1(self):
     # A form of asbestos once used to make Kent cigarette filters has caused a high percentage of cancer deaths
     # among a group of workers exposed to it more than 30 years ago, researchers reported.
     # ID=wsj_0003.1 PARSER=GOLD NUMPARSE=1
     # (<T S[dcl] 0 2>
     #   (<T S[dcl] 1 2>
     #       (<T S[dcl] 1 2>
     #           (<T NP 0 2>
     #               (<T NP 0 2>
     #                   (<T NP 1 2>
     #                       (<L NP[nb]/N DT DT A NP[nb]_166/N_166>)
     #                       (<L N NN NN form N>)
     #                   )
     #                   (<T NP\NP 0 2>
     #                       (<L (NP\NP)/NP IN IN of (NP_174\NP_174)/NP_175>)
     #                       (<T NP 0 1>
     #                           (<L N NN NN asbestos N>)
     #                       )
     #                   )
     #               )
     #               (<T NP\NP 0 1>
     #                   (<T S[pss]\NP 1 2>
     #                       (<L (S\NP)/(S\NP) RB RB once (S_235\NP_230)_235/(S_235\NP_230)_235>)
     #                       (<T S[pss]\NP 0 2>
     #                           (<L (S[pss]\NP)/(S[to]\NP) VBN VBN used (S[pss]\NP_187)/(S[to]_188\NP_187:B)_188>)
     #                           (<T S[to]\NP 0 2>
     #                               (<L (S[to]\NP)/(S[b]\NP) TO TO to (S[to]\NP_197)/(S[b]_198\NP_197:B)_198>)
     #                               (<T S[b]\NP 0 2>
     #                                   (<L (S[b]\NP)/NP VB VB make (S[b]\NP_205)/NP_206>)
     #                                   (<T NP 0 1>
     #                                       (<T N 1 2>
     #                                           (<L N/N NNP NNP Kent N_222/N_222>)
     #                                           (<T N 1 2>
     #                                               (<L N/N NN NN cigarette N_215/N_215>)
     #                                               (<L N NNS NNS filters N>)
     #                                           )
     #                                       )
     #                                   )
     #                               )
     #                           )
     #                       )
     #                   )
     #               )
     #           )
     #           (<T S[dcl]\NP 0 2>
     #               (<L (S[dcl]\NP)/(S[pt]\NP) VBZ VBZ has (S[dcl]\NP_23)/(S[pt]_24\NP_23:B)_24>)
     #               (<T S[pt]\NP 0 2>
     #                   (<L (S[pt]\NP)/NP VBN VBN caused (S[pt]\NP_31)/NP_32>)
     #                       (<T NP 0 2>
     #                           (<T NP 0 2>
     #                               (<T NP 1 2>
     #                                   (<L NP[nb]/N DT DT a NP[nb]_46/N_46>)
     #                                   (<T N 1 2>
     #                                       (<L N/N JJ JJ high N_41/N_41>)
     #                                       (<L N NN NN percentage N>)
     #                                   )
     #                               )
     #                               (<T NP\NP 0 2>
     #                                   (<L (NP\NP)/NP IN IN of (NP_54\NP_54)/NP_55>)
     #                                   (<T NP 0 1>
     #                                       (<T N 1 2>
     #                                           (<L N/N NN NN cancer N_64/N_64>)
     #                                           (<L N NNS NNS deaths N>)
     #                                       )
     #                                   )
     #                               )
     #                           )
     #                           (<T NP\NP 0 2>
     #                               (<L (NP\NP)/NP IN IN among (NP_73\NP_73)/NP_74>)
     #                               (<T NP 0 2>
     #                                   (<T NP 1 2>
     #                                       (<L NP[nb]/N DT DT a NP[nb]_81/N_81>)
     #                                       (<L N NN NN group N>)
     #                                   )
     #                                   (<T NP\NP 0 2>
     #                                       (<L (NP\NP)/NP IN IN of (NP_89\NP_89)/NP_90>)
     #                                       (<T NP 0 2>
     #                                           (<T NP 0 1>
     #                                               (<L N NNS NNS workers N>)
     #                                           )
     #                                           (<T NP\NP 0 1>
     #                                               (<T S[pss]\NP 0 2>
     #                                                   (<T S[pss]\NP 0 2>
     #                                                       (<L (S[pss]\NP)/PP VBN VBN exposed (S[pss]\NP_100)/PP_101>)
     #                                                       (<T PP 0 2>
     #                                                           (<L PP/NP TO TO to PP/NP_106>)
     #                                                           (<L NP PRP PRP it NP>)
     #                                                       )
     #                                                   )
     #                                                   (<T (S\NP)\(S\NP) 1 2>
     #                                                       (<T NP 0 1>
     #                                                           (<T N 1 2>
     #                                                               (<T N/N 1 2>
     #                                                                   (<T (N/N)/(N/N) 1 2>
     #                                                                       (<L S[adj]\NP RBR RBR more S[adj]\NP_153>)
     #                                                                       (<L ((N/N)/(N/N))\(S[adj]\NP) IN IN than ((N_147/N_139)_147/(N_147/N_139)_147)\(S[adj]_148\NP_142)_148>)
     #                                                                   )
     #                                                                   (<L N/N CD CD 30 N_131/N_131>)
     #                                                               )
     #                                                               (<L N NNS NNS years N>)
     #                                                           )
     #                                                       )
     #                                                       (<L ((S\NP)\(S\NP))\NP IN IN ago ((S_121\NP_116)_121\(S_121\NP_116)_121)\NP_122>)
     #                                                   )
     #                                               )
     #                                           )
     #                                       )
     #                                   )
     #                               )
     #                           )
     #                       )
     #                   )
     #               )
     #           )
     #           (<T S[dcl]\S[dcl] 1 2>
     #               (<L , , , , ,>)
     #               (<T S[dcl]\S[dcl] 1 2>
     #                   (<T NP 0 1>
     #                       (<L N NNS NNS researchers N>)
     #                   )
     #                   (<L (S[dcl]\S[dcl])\NP VBD VBD reported (S[dcl]\S[dcl]_8)\NP_9>)
     #               )
     #           )
     #       )
     #       (<L . . . . .>)
     #   )
     txt = r'''(<T S[dcl] 0 2> (<T S[dcl] 1 2> (<T S[dcl] 1 2> (<T NP 0 2> (<T NP 0 2> (<T NP 1 2>
     (<L NP[nb]/N DT DT A NP[nb]_166/N_166>) (<L N NN NN form N>) ) (<T NP\NP 0 2>
     (<L (NP\NP)/NP IN IN of (NP_174\NP_174)/NP_175>) (<T NP 0 1> (<L N NN NN asbestos N>) ) ) ) (<T NP\NP 0 1>
     (<T S[pss]\NP 1 2> (<L (S\NP)/(S\NP) RB RB once (S_235\NP_230)_235/(S_235\NP_230)_235>) (<T S[pss]\NP 0 2>
     (<L (S[pss]\NP)/(S[to]\NP) VBN VBN used (S[pss]\NP_187)/(S[to]_188\NP_187:B)_188>) (<T S[to]\NP 0 2>
     (<L (S[to]\NP)/(S[b]\NP) TO TO to (S[to]\NP_197)/(S[b]_198\NP_197:B)_198>) (<T S[b]\NP 0 2>
     (<L (S[b]\NP)/NP VB VB make (S[b]\NP_205)/NP_206>) (<T NP 0 1> (<T N 1 2> (<L N/N NNP NNP Kent N_222/N_222>)
     (<T N 1 2> (<L N/N NN NN cigarette N_215/N_215>) (<L N NNS NNS filters N>) ) ) ) ) ) ) ) ) ) (<T S[dcl]\NP 0 2>
     (<L (S[dcl]\NP)/(S[pt]\NP) VBZ VBZ has (S[dcl]\NP_23)/(S[pt]_24\NP_23:B)_24>) (<T S[pt]\NP 0 2>
     (<L (S[pt]\NP)/NP VBN VBN caused (S[pt]\NP_31)/NP_32>) (<T NP 0 2> (<T NP 0 2> (<T NP 1 2>
     (<L NP[nb]/N DT DT a NP[nb]_46/N_46>) (<T N 1 2> (<L N/N JJ JJ high N_41/N_41>) (<L N NN NN percentage N>) ) )
     (<T NP\NP 0 2> (<L (NP\NP)/NP IN IN of (NP_54\NP_54)/NP_55>) (<T NP 0 1> (<T N 1 2>
     (<L N/N NN NN cancer N_64/N_64>) (<L N NNS NNS deaths N>) ) ) ) ) (<T NP\NP 0 2>
     (<L (NP\NP)/NP IN IN among (NP_73\NP_73)/NP_74>) (<T NP 0 2> (<T NP 1 2> (<L NP[nb]/N DT DT a NP[nb]_81/N_81>)
     (<L N NN NN group N>) ) (<T NP\NP 0 2> (<L (NP\NP)/NP IN IN of (NP_89\NP_89)/NP_90>) (<T NP 0 2> (<T NP 0 1>
     (<L N NNS NNS workers N>) ) (<T NP\NP 0 1> (<T S[pss]\NP 0 2> (<T S[pss]\NP 0 2>
     (<L (S[pss]\NP)/PP VBN VBN exposed (S[pss]\NP_100)/PP_101>) (<T PP 0 2> (<L PP/NP TO TO to PP/NP_106>)
     (<L NP PRP PRP it NP>) ) ) (<T (S\NP)\(S\NP) 1 2> (<T NP 0 1> (<T N 1 2> (<T N/N 1 2> (<T (N/N)/(N/N) 1 2>
     (<L S[adj]\NP RBR RBR more S[adj]\NP_153>)
     (<L ((N/N)/(N/N))\(S[adj]\NP) IN IN than ((N_147/N_139)_147/(N_147/N_139)_147)\(S[adj]_148\NP_142)_148>) )
     (<L N/N CD CD 30 N_131/N_131>) ) (<L N NNS NNS years N>) ) )
     (<L ((S\NP)\(S\NP))\NP IN IN ago ((S_121\NP_116)_121\(S_121\NP_116)_121)\NP_122>) ) ) ) ) ) ) ) ) ) ) )
     (<T S[dcl]\S[dcl] 1 2> (<L , , , , ,>) (<T S[dcl]\S[dcl] 1 2> (<T NP 0 1> (<L N NNS NNS researchers N>) )
     (<L (S[dcl]\S[dcl])\NP VBD VBD reported (S[dcl]\S[dcl]_8)\NP_9>) ) ) ) (<L . . . . .>) )'''
     pt = parse_ccg_derivation(txt)
     s = sentence_from_pt(pt)
     dprint(s)
     self.assertIsNotNone(pt)
     ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     ccg.build_execution_sequence(pt)
     ccg.create_drs()
     ccg.final_rename()
     d = ccg.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     sent = ccg.get_verbnet_sentence()
     a = get_constituents_string_list(sent)
     x = [
         'NP(A #form)',              # 0
         'PP(#of)',                  # 1
         'NP(#asbestos)',            # 2
         'ADVP(once #used to make Kent cigarette filters)',   # 3
         'S_INF(#to make)',          # 4
         'NP(Kent cigarette #filters)',  # 5
         'VP(#has caused)',          # 6
         'NP(a high #percentage)',   # 7
         'PP(#of)',                  # 8
         'NP(cancer #deaths)',       # 9
         'PP(#among)',               #10
         'NP(a #group)',             #11
         'PP(#of)',                  #12
         'NP(#workers)',             #13
         'ADVP(#exposed to it more than 30 years ago)',  #14
         'NP(more than 30 #years)',  #15
         'NP(#researchers)',         #16
         'VP(#reported)',            #17
     ]
     dprint('\n'.join(a))
     self.assertListEqual(x, a)
     # 17 VP(reported.)
     #    06 VP(has caused)
     #       00 NP(A form)
     #          01 PP(of)
     #             02 NP(asbestos)
     #          03 ADVP(once used to make Kent cigarette filters)
     #             04 S_INF(to make)
     #                05 NP(Kent cigarette filters)
     #       07 NP(a high percentage)
     #          08 PP(of)
     #             09 NP(cancer deaths)
     #          10 PP(among)
     #             11 NP(a group)
     #                12 PP(of)
     #                   13 NP(workers)
     #                      14 ADVP(exposed to it more than 30 years ago)
     #                         15 NP(more than 30 years)
     #    16 NP(reserchers)
     x = (17, [(6, [(0, [(1, [(2, [])]), (3, [(4, [(5, [])])])]), (7, [(8, [(9, [])]), (10, [(11, [(12, [(13, [(14, [(15, [])])])])])])])]), (16, [])])
     a = sent.get_constituent_tree()
     dprint_constituent_tree(sent, a)
     self.assertEqual(repr(x), repr(a))
예제 #26
0
    def test2_GOLD_Wsj0001_2(self):
        # Mr. Vinken is chairman of Elsevier N.V. , the Dutch publishing group .
        #
        # PARG
        # 1      0      N/N             1      Vinken Mr.
        # 1      2      (S[dcl]\NP)/NP  1      Vinken is
        # 3      2      (S[dcl]\NP)/NP  2      chairman is
        # 3      4      (NP\NP)/NP      1      chairman of
        # 6      4      (NP\NP)/NP      2      N.V. of
        # 6      5      N/N             1      N.V. Elsevier
        # 11     4      (NP\NP)/NP      2      group of
        # 11     8      NP[nb]/N        1      group the
        # 11     9      N/N             1      group Dutch
        # 11     10     N/N             1      group publishing
        txt = r'''
(<T S[dcl] 0 2>
    (<T S[dcl] 1 2>
        (<T NP 0 1>
            (<T N 1 2>
                (<L N/N NNP NNP Mr. N_142/N_142>)
                (<L N NNP NNP Vinken N>)
            )
        )
        (<T S[dcl]\NP 0 2>
            (<L (S[dcl]\NP)/NP VBZ VBZ is (S[dcl]\NP_87)/NP_88>)
            (<T NP 0 2>
                (<T NP 0 1>
                    (<L N NN NN chairman N>)
                )
                (<T NP\NP 0 2>
                    (<L (NP\NP)/NP IN IN of (NP_99\NP_99)/NP_100>)
                    (<T NP 0 2>
                        (<T NP 0 1>
                            (<T N 1 2>
                                (<L N/N NNP NNP Elsevier N_109/N_109>)
                                (<L N NNP NNP N.V. N>)
                            )
                        )
                        (<T NP[conj] 1 2>
                            (<L , , , , ,>)
                            (<T NP 1 2>
                                (<L NP[nb]/N DT DT the NP[nb]_131/N_131>)
                                (<T N 1 2>
                                    (<L N/N NNP NNP Dutch N_126/N_126>)
                                    (<T N 1 2>
                                        (<L N/N VBG VBG publishing N_119/N_119>)
                                        (<L N NN NN group N>)
                                    )
                                )
                            )
                        )
                    )
                )
            )
        )
    )
    (<L . . . . .>)
)'''
        pt = parse_ccg_derivation(txt)
        s = sentence_from_pt(pt)
        dprint(s)
        self.assertIsNotNone(pt)
        ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
        ccg.build_execution_sequence(pt)
        ccg.create_drs()
        ccg.resolve_proper_names()
        ccg.final_rename()
        d = ccg.get_drs()
        s = d.show(SHOW_LINEAR)
        dprint(s)
        sent = ccg.get_verbnet_sentence()
        a = get_constituents_string_list(sent)
        x = [
            'NP(#Mr.-Vinken)',
            'VP(#is)',
            'NP(#chairman)',
            'PP(#of)',
            'NP(#Elsevier-N.V.)',
            'NP(the Dutch publishing #group)',
        ]
        dprint('\n'.join(a))
        self.assertListEqual(x, a)
        # 01 VP(is)
        #    00 NP(Mr.-Vinken)
        #    02 NP(chairman)
        #       03 PP(of Elsevier N.V. the Dutch publishing group)
        #          04 NP(Elsevier N.V.)
        #             05 NP(the Dutch publishing group)
        x = (1, [(0, []), (2, [(3, [(4, [(5, [])])])])])
        a = sent.get_constituent_tree()
        dprint_constituent_tree(sent, a)
        self.assertEqual(repr(x), repr(a))
예제 #27
0
 def test2_GOLD_Wsj0001_1(self):
     # ID=wsj_0001.1 PARSER=GOLD NUMPARSE=1
     # Pierre Vinken, 61 years old, will join the board as a nonexecutive director Nov 29.
     # (<T S[dcl] 0 2>
     #   (<T S[dcl] 1 2>
     #       (<T NP 0 2>
     #           (<T NP 0 2>
     #               (<T NP 0 2>
     #                   (<T NP 0 1>
     #                       (<T N 1 2>
     #                           (<L N/N NNP NNP Pierre N_73/N_73>)
     #                           (<L N NNP NNP Vinken N>)
     #                       )
     #                   )
     #                   (<L , , , , ,>)
     #               )
     #               (<T NP\NP 0 1>
     #                   (<T S[adj]\NP 1 2>
     #                       (<T NP 0 1>
     #                           (<T N 1 2>
     #                               (<L N/N CD CD 61 N_93/N_93>)
     #                               (<L N NNS NNS years N>)
     #                           )
     #                       )
     #                       (<L (S[adj]\NP)\NP JJ JJ old (S[adj]\NP_83)\NP_84>)
     #                   )
     #               )
     #           )
     #           (<L , , , , ,>)
     #       )
     #       (<T S[dcl]\NP 0 2>
     #           (<L (S[dcl]\NP)/(S[b]\NP) MD MD will (S[dcl]\NP_10)/(S[b]_11\NP_10:B)_11>)
     #           (<T S[b]\NP 0 2>
     #               (<T S[b]\NP 0 2>
     #                   (<T (S[b]\NP)/PP 0 2>
     #                       (<L ((S[b]\NP)/PP)/NP VB VB join ((S[b]\NP_20)/PP_21)/NP_22>)
     #                       (<T NP 1 2>
     #                           (<L NP[nb]/N DT DT the NP[nb]_29/N_29>)
     #                           (<L N NN NN board N>)
     #                       )
     #                   )
     #                   (<T PP 0 2>
     #                       (<L PP/NP IN IN as PP/NP_34>)
     #                       (<T NP 1 2>
     #                           (<L NP[nb]/N DT DT a NP[nb]_48/N_48>)
     #                           (<T N 1 2>
     #                               (<L N/N JJ JJ nonexecutive N_43/N_43>)
     #                               (<L N NN NN director N>)
     #                           )
     #                       )
     #                   )
     #               )
     #               (<T (S\NP)\(S\NP) 0 2>
     #                   (<L ((S\NP)\(S\NP))/N[num] NNP NNP Nov. ((S_61\NP_56)_61\(S_61\NP_56)_61)/N[num]_62>)
     #                   (<L N[num] CD CD 29 N[num]>)
     #               )
     #           )
     #       )
     #   )
     #   (<L . . . . .>)
     # )
     txt = r'''(<T S[dcl] 0 2> (<T S[dcl] 1 2> (<T NP 0 2> (<T NP 0 2> (<T NP 0 2> (<T NP 0 1> (<T N 1 2>
         (<L N/N NNP NNP Pierre N_73/N_73>) (<L N NNP NNP Vinken N>) ) ) (<L , , , , ,>) ) (<T NP\NP 0 1>
         (<T S[adj]\NP 1 2> (<T NP 0 1> (<T N 1 2> (<L N/N CD CD 61 N_93/N_93>) (<L N NNS NNS years N>) ) )
         (<L (S[adj]\NP)\NP JJ JJ old (S[adj]\NP_83)\NP_84>) ) ) ) (<L , , , , ,>) ) (<T S[dcl]\NP 0 2>
         (<L (S[dcl]\NP)/(S[b]\NP) MD MD will (S[dcl]\NP_10)/(S[b]_11\NP_10:B)_11>) (<T S[b]\NP 0 2>
         (<T S[b]\NP 0 2> (<T (S[b]\NP)/PP 0 2> (<L ((S[b]\NP)/PP)/NP VB VB join ((S[b]\NP_20)/PP_21)/NP_22>)
         (<T NP 1 2> (<L NP[nb]/N DT DT the NP[nb]_29/N_29>) (<L N NN NN board N>) ) ) (<T PP 0 2>
         (<L PP/NP IN IN as PP/NP_34>) (<T NP 1 2> (<L NP[nb]/N DT DT a NP[nb]_48/N_48>) (<T N 1 2>
         (<L N/N JJ JJ nonexecutive N_43/N_43>) (<L N NN NN director N>) ) ) ) ) (<T (S\NP)\(S\NP) 0 2>
         (<L ((S\NP)\(S\NP))/N[num] NNP NNP Nov. ((S_61\NP_56)_61\(S_61\NP_56)_61)/N[num]_62>)
         (<L N[num] CD CD 29 N[num]>) ) ) ) ) (<L . . . . .>) )'''
     pt = parse_ccg_derivation(txt)
     self.assertIsNotNone(pt)
     s = sentence_from_pt(pt)
     dprint(s)
     ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     ccg.build_execution_sequence(pt)
     ccg.create_drs()
     ccg.resolve_proper_names()
     ccg.final_rename()
     d = ccg.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     sent = ccg.get_verbnet_sentence()
     a = get_constituents_string_list(sent)
     # FIXME: VP(will #join) should be S_INF(will #join).
     # Issues occurs because I convert modal-verb combinator categories to modifiers. Must be fixed on functor
     # creation - Lexeme.get_production()
     # will: (S[dcl]\NP)/(S[b]/NP) -> (S\NP)/(S/NP)
     x = [
         'NP(#Pierre-Vinken)',
         'ADJP(61 years #old)',
         'NP(61 #years)',
         'VP(#will join)',
         'NP(the #board)',
         'PP(#as)',
         'NP(a nonexecutive #director)',
         'NP(#Nov. 29)'
     ]
     dprint('\n'.join(a))
     self.assertListEqual(x, a)
     # 03 VP(will join)
     #    00 NP(Pierre-Vinken)
     #       01 ADJP(61 years old)
     #          02 NP(61 years)
     #    04 NP(the board)
     #    05 PP(as)
     #       06 NP(a nonexecutive director)
     #    07 NP(Nov. 29)
     x = (3, [(0, [(1, [(2, [])])]), (4, []), (5, [(6, [])]), (7, [])])
     a = sent.get_constituent_tree()
     dprint_constituent_tree(sent, a)
     self.assertEqual(repr(x), repr(a))
예제 #28
0
 def test2_GOLD_Wsj0002_1(self):
     # ID=wsj_0002.1 PARSER=GOLD NUMPARSE=1
     # Rudolph Agnew, 55 years old and former chairman of Consolidated Gold Fields PLC, was named a nonexecutive
     # director of this British industrial conglomerate.
     # (<T S[dcl] 0 2>
     #   (<T S[dcl] 1 2>
     #       (<T NP 0 2>
     #           (<T NP 0 2>
     #               (<T NP 0 2>
     #                   (<T NP 0 1>
     #                       (<T N 1 2>
     #                           (<L N/N NNP NNP Rudolph N_72/N_72>)
     #                           (<L N NNP NNP Agnew N>)
     #                       )
     #                   )
     #                   (<L , , , , ,>)
     #               )
     #               (<T NP\NP 0 1>
     #                   (<T S[adj]\NP 0 2>
     #                       (<T S[adj]\NP 1 2>
     #                           (<T NP 0 1>
     #                               (<T N 1 2>
     #                                   (<L N/N CD CD 55 N_92/N_92>)
     #                                   (<L N NNS NNS years N>)
     #                               )
     #                           )
     #                           (<L (S[adj]\NP)\NP JJ JJ old (S[adj]\NP_82)\NP_83>)
     #                       )
     #                       (<T S[adj]\NP[conj] 1 2>
     #                           (<L conj CC CC and conj>)
     #                           (<T NP 0 2>
     #                               (<T NP 0 1>
     #                                   (<T N 1 2>
     #                                       (<L N/N JJ JJ former N_102/N_102>)
     #                                       (<L N NN NN chairman N>)
     #                                   )
     #                               )
     #                               (<T NP\NP 0 2>
     #                                   (<L (NP\NP)/NP IN IN of (NP_111\NP_111)/NP_112>)
     #                                   (<T NP 0 1>
     #                                       (<T N 1 2>
     #                                           (<L N/N NNP NNP Consolidated N_135/N_135>)
     #                                           (<T N 1 2>
     #                                               (<L N/N NNP NNP Gold N_128/N_128>)
     #                                               (<T N 1 2>
     #                                                   (<L N/N NNP NNP Fields N_121/N_121>)
     #                                                   (<L N NNP NNP PLC N>)
     #                                               )
     #                                           )
     #                                       )
     #                                   )
     #                               )
     #                           )
     #                       )
     #                   )
     #               )
     #           )
     #           (<L , , , , ,>)
     #       )
     #       (<T S[dcl]\NP 0 2>
     #           (<L (S[dcl]\NP)/(S[pss]\NP) VBD VBD was (S[dcl]\NP_10)/(S[pss]_11\NP_10:B)_11>)
     #           (<T S[pss]\NP 0 2>
     #               (<L (S[pss]\NP)/NP VBN VBN named (S[pss]\NP_18)/NP_19>)
     #                   (<T NP 0 2> (<T NP 1 2>
     #                       (<L NP[nb]/N DT DT a NP[nb]_33/N_33>)
     #                       (<T N 1 2>
     #                           (<L N/N JJ JJ nonexecutive N_28/N_28>)
     #                           (<L N NN NN director N>)
     #                       )
     #                   )
     #                   (<T NP\NP 0 2>
     #                       (<L (NP\NP)/NP IN IN of (NP_41\NP_41)/NP_42>)
     #                       (<T NP 1 2>
     #                           (<L NP[nb]/N DT DT this NP[nb]_63/N_63>)
     #                           (<T N 1 2>
     #                               (<L N/N JJ JJ British N_58/N_58>)
     #                               (<T N 1 2>
     #                                   (<L N/N JJ JJ industrial N_51/N_51>)
     #                                   (<L N NN NN conglomerate N>)
     #                               )
     #                           )
     #                       )
     #                   )
     #               )
     #           )
     #       )
     #   )
     #   (<L . . . . .>)
     # )
     txt = r'''(<T S[dcl] 0 2> (<T S[dcl] 1 2> (<T NP 0 2> (<T NP 0 2> (<T NP 0 2> (<T NP 0 1> (<T N 1 2>
         (<L N/N NNP NNP Rudolph N_72/N_72>) (<L N NNP NNP Agnew N>) ) ) (<L , , , , ,>) ) (<T NP\NP 0 1>
         (<T S[adj]\NP 0 2> (<T S[adj]\NP 1 2> (<T NP 0 1> (<T N 1 2> (<L N/N CD CD 55 N_92/N_92>)
         (<L N NNS NNS years N>) ) ) (<L (S[adj]\NP)\NP JJ JJ old (S[adj]\NP_82)\NP_83>) ) (<T S[adj]\NP[conj] 1 2>
         (<L conj CC CC and conj>) (<T NP 0 2> (<T NP 0 1> (<T N 1 2> (<L N/N JJ JJ former N_102/N_102>)
         (<L N NN NN chairman N>) ) ) (<T NP\NP 0 2> (<L (NP\NP)/NP IN IN of (NP_111\NP_111)/NP_112>) (<T NP 0 1>
         (<T N 1 2> (<L N/N NNP NNP Consolidated N_135/N_135>) (<T N 1 2> (<L N/N NNP NNP Gold N_128/N_128>)
         (<T N 1 2> (<L N/N NNP NNP Fields N_121/N_121>) (<L N NNP NNP PLC N>) ) ) ) ) ) ) ) ) ) ) (<L , , , , ,>) )
         (<T S[dcl]\NP 0 2> (<L (S[dcl]\NP)/(S[pss]\NP) VBD VBD was (S[dcl]\NP_10)/(S[pss]_11\NP_10:B)_11>)
         (<T S[pss]\NP 0 2> (<L (S[pss]\NP)/NP VBN VBN named (S[pss]\NP_18)/NP_19>) (<T NP 0 2> (<T NP 1 2>
         (<L NP[nb]/N DT DT a NP[nb]_33/N_33>) (<T N 1 2> (<L N/N JJ JJ nonexecutive N_28/N_28>)
         (<L N NN NN director N>) ) ) (<T NP\NP 0 2> (<L (NP\NP)/NP IN IN of (NP_41\NP_41)/NP_42>) (<T NP 1 2>
         (<L NP[nb]/N DT DT this NP[nb]_63/N_63>) (<T N 1 2> (<L N/N JJ JJ British N_58/N_58>) (<T N 1 2>
         (<L N/N JJ JJ industrial N_51/N_51>) (<L N NN NN conglomerate N>) ) ) ) ) ) ) ) ) (<L . . . . .>) )'''
     pt = parse_ccg_derivation(txt)
     self.assertIsNotNone(pt)
     s = sentence_from_pt(pt)
     dprint(s)
     ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     ccg.build_execution_sequence(pt)
     ccg.create_drs()
     ccg.resolve_proper_names()
     ccg.final_rename()
     d = ccg.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     sent = ccg.get_verbnet_sentence()
     a = get_constituents_string_list(sent)
     dprint('\n'.join(a))
     # Hash indicates head word in constituent
     x = [
         'NP(#Rudolph-Agnew)',
         'ADJP(55 years #old and former chairman of Consolidated-Gold-Fields-PLC)',
         'NP(55 #years)',
         'NP(former #chairman)',
         'PP(#of)',
         'NP(#Consolidated-Gold-Fields-PLC)',
         'VP(#was named)',
         'NP(a nonexecutive #director)',
         'PP(#of)',
         'NP(this British industrial #conglomerate)'
     ]
     self.assertListEqual(x, a)
     # 6 VP(was named)
     #   0 NP(Rudolph-Agnew)
     #     1 ADVP(55 years old former chairman of Consolidated-Gold-Fields-PLC)
     #       2 NP(55 years)
     #       3 NP(former chairman)
     #         4 PP(of)
     #           5 NP(Consolidated-Gold-Fields-PLC)
     #   7 NP(a nonexecutive director)
     #     8 PP(of)
     #       9 NP(this British industrial conglomerate)
     x = (6, [(0, [(1, [(2, []), (3, [(4, [(5, [])])])])]), (7, [(8, [(9, [])])])])
     a = sent.get_constituent_tree()
     dprint_constituent_tree(sent, a)
     self.assertEqual(repr(x), repr(a))
예제 #29
0
def make_drs(daemon):
    global pypath, projdir, datapath, idsrch
    allfiles = []
    projdir = os.path.dirname(os.path.dirname(__file__))

    easysrl_path = os.path.join(projdir, 'data', 'ldc', daemon, 'drs')
    if not os.path.exists(easysrl_path):
        os.makedirs(easysrl_path)

    # Get files
    ldcpath = os.path.join(projdir, 'data', 'ldc', daemon, 'ccgbank')
    dirlist1 = os.listdir(ldcpath)
    for fname in dirlist1:
        if 'ccg_derivation' not in fname:
            continue
        ldcpath1 = os.path.join(ldcpath, fname)
        if os.path.isfile(ldcpath1):
            allfiles.append(ldcpath1)

    failed_parse = 0
    failed_ccg2drs = []
    start = 0
    progress = -1
    for fn in allfiles:
        idx = idsrch.match(fn)
        if idx is None:
            continue
        idx = idx.group('id')

        if not os.path.exists(os.path.join(easysrl_path, idx)):
            os.mkdir(os.path.join(easysrl_path, idx))

        with open(fn, 'r') as fd:
            lines = fd.readlines()

        name, _ = os.path.splitext(os.path.basename(fn))
        for i in range(start, len(lines)):
            start = 0
            ccgbank = lines[i].strip()
            if len(ccgbank) == 0 or ccgbank[0] == '#':
                continue

            if progress < 0:
                print('%s-%04d' % (name, i))
            else:
                progress = print_progress(progress, 10)

            try:
                # CCG parser is Java so output is UTF-8.
                pt = parse_ccg_derivation(ccgbank)
                s = sentence_from_pt(pt).strip()
                pccg = pt_to_ccg_derivation(pt)
            except Exception:
                failed_parse += 1
                raise
                continue

            try:
                d = process_ccg_pt(
                    pt, CO_VERIFY_SIGNATURES | CO_NO_VERBNET
                    | CO_NO_WIKI_SEARCH).get_drs()
                assert d is not None
                assert isinstance(d, DRS)
                d = d.show(SHOW_LINEAR).strip()
            except Exception as e:
                print(e)
                failed_ccg2drs.append((name, i, ccgbank))
                raise
                continue

            with open(
                    os.path.join(easysrl_path, idx,
                                 'drs_%s_%04d.dat' % (idx, i)), 'w') as fd:
                fd.write(b'<sentence>\n')
                fd.write(safe_utf8_encode(s))
                fd.write(b'\n</sentence>\n<drs>\n')
                fd.write(safe_utf8_encode(d))
                fd.write(b'\n</drs>\n<predarg>\n')
                fd.write(safe_utf8_encode(pccg))
                fd.write(b'\n')
                fd.write(b'</predarg>\n')

    if failed_parse != 0:
        print('%d derivations failed to parse' % failed_parse)
    if len(failed_ccg2drs) != 0:
        print('%d derivations failed to convert to DRS' % len(failed_ccg2drs))
        for x in failed_ccg2drs:
            print('%s-%04d failed: {%s}' % x)
예제 #30
0
    def test2_GOLD_Wsj0051_13(self):
        txt = r'''
(<T S[dcl] 0 2> 
  (<T S[dcl] 1 2> 
    (<T NP 1 2> 
      (<L NP[nb]/N DT DT The NP[nb]_273/N_273>) 
      (<L N NNS NNS bids N>) 
    ) 
    (<T S[dcl]\NP 1 2> 
      (<T (S\NP)/(S\NP) 1 2> 
        (<L , , , , ,>) 
        (<T (S\NP)/(S\NP) 0 2> 
          (<T S[dcl]/S[dcl] 1 2> 
            (<T S/(S\NP) 0 1> 
              (<L NP PRP PRP he NP>) 
            ) 
            (<L (S[dcl]\NP)/S[dcl] VBD VBD added (S[dcl]\NP_242)/S[dcl]_243>) 
          ) 
          (<L , , , , ,>) 
        ) 
      ) 
      (<T S[dcl]\NP 0 2> 
        (<L (S[dcl]\NP)/(S[adj]\NP) VBD VBD were (S[dcl]\NP_211)/(S[adj]_212\NP_211:B)_212>) 
        (<T S[adj]\NP 0 2> 
          (<L (S[adj]\NP)/PP JJ JJ contrary (S[adj]\NP_219)/PP_220>) 
          (<T PP 0 2> 
            (<L PP/NP TO TO to PP/NP_225>) 
            (<T NP 0 1> 
              (<T N 1 2> 
                (<L N/N JJ JJ common N_234/N_234>) 
                (<L N NN NN sense N>) 
              ) 
            ) 
          ) 
        ) 
      ) 
    ) 
  ) 
  (<L . . . . .>) 
) 
'''
        pt = parse_ccg_derivation(txt)
        s = sentence_from_pt(pt)
        dprint(s)
        self.assertIsNotNone(pt)
        ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
        ccg.build_execution_sequence(pt)
        ccg.create_drs()
        ccg.final_rename()
        d = ccg.get_drs()
        s = d.show(SHOW_LINEAR)
        dprint(s)
        sent = ccg.get_verbnet_sentence()
        a = get_constituents_string_list(sent)
        x = [
            'NP(The #bids)',
            'ADVP(he #added)',
            'VP(#were)',
            'ADJP(#contrary to common sense)',
            'PP(#to)',
            'NP(common #sense)'
        ]
        dprint('\n'.join(a))
        self.assertListEqual(x, a)