Ejemplo n.º 1
0
 def test02_AndOfObj(self):
     text = "He saw John and Paul"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('John' in phrases)
     self.assertTrue('Paul' in phrases)
     self.assertTrue('saw' in phrases)
     john = filter(lambda x: 'John' == x[1].text, f.iteritems())[0]
     paul = filter(lambda x: 'Paul' == x[1].text, f.iteritems())[0]
     saw = filter(lambda x: 'saw' == x[1].text, f.iteritems())[0]
     J = john[0]
     P = paul[0]
     E = saw[0]
     # FIXME: wn lemmatizer does not convert saw to see - I guess to to ambiguity
     self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('saw', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('John', [J])) is not None)
     self.assertTrue(d.find_condition(Rel('Paul', [P])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E, J])) is not None)
Ejemplo n.º 2
0
 def test10_OrOfVerb_OrInBrackets(self):
     text = "That which is perceived or known or inferred to have its own distinct existence (living or nonliving)"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs(nodups=True)
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     # RT_EMPTY_DRS adds 'or' to phrases
     f = sentence.select_phrases(lambda x: x.pos is POS.from_cache('WDT') or \
                                                0 == (x.mask & RT_EMPTY_DRS),
                                 contiguous=False)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('That which' in phrases)
     self.assertTrue('have' in phrases)
     self.assertTrue('is perceived known inferred' in phrases)
     self.assertTrue('its own distinct existence' in phrases)
     verb1 = filter(lambda x: 'is perceived known inferred' == x[1].text,
                    f.iteritems())[0]
     verb2 = filter(lambda x: 'have' == x[1].text, f.iteritems())[0]
     agent = filter(lambda x: 'That which' == x[1].text, f.iteritems())[0]
     theme = filter(lambda x: 'its own distinct existence' == x[1].text,
                    f.iteritems())[0]
     X1 = agent[0]
     E1 = verb1[0]
     E2 = verb2[0]
     X2 = theme[1][0].refs[1]
     X3 = theme[1][1].refs[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E1, E2])) is not None)
     # TODO: should the theme attach to X2?
     self.assertTrue(d.find_condition(Rel('_ARG1', [E2, X3])) is not None)
     self.assertTrue(d.find_condition(Rel('_POSS', [X2, X3])) is not None)
Ejemplo n.º 3
0
 def test01_AndOfSubj(self):
     text = "John and Paul went to the movies"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('John' in phrases)
     self.assertTrue('Paul' in phrases)
     self.assertTrue('went' in phrases)
     john = filter(lambda x: 'John' == x[1].text, f.iteritems())[0]
     paul = filter(lambda x: 'Paul' == x[1].text, f.iteritems())[0]
     went = filter(lambda x: 'went' == x[1].text, f.iteritems())[0]
     J = john[0]
     P = paul[0]
     E = went[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('go', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('John', [J])) is not None)
     self.assertTrue(d.find_condition(Rel('Paul', [P])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E, J])) is not None)
Ejemplo n.º 4
0
 def test03_OrOfObj(self):
     text = "To participate in games or sport"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_ENTITY | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('participate' in phrases)
     self.assertTrue('games' in phrases)
     self.assertTrue('sport' in phrases)
     noun1 = filter(lambda x: 'games' == x[1].text, f.iteritems())[0]
     noun2 = filter(lambda x: 'sport' == x[1].text, f.iteritems())[0]
     verb = filter(lambda x: 'participate' == x[1].text, f.iteritems())[0]
     X1 = noun1[0]
     X2 = noun2[0]
     E = verb[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('participate', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('games', [X1])) is not None)
     self.assertTrue(d.find_condition(Rel('sport', [X2])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E, X2])) is not None)
Ejemplo n.º 5
0
 def test05_AndOfVerb_AndOfObj(self):
     text = "Bell makes and distributes computers, electronics, and building products"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_ENTITY | RT_EVENT
                                 | RT_ATTRIBUTE)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('Bell' in phrases)
     self.assertTrue('makes distributes' in phrases)
     self.assertTrue('computers' in phrases)
     self.assertTrue('electronics' in phrases)
     # Note if we add RT_EMPTY_DRS to the selection criteria then this phrase becomes 'and building products'
     self.assertTrue('building products' in phrases)
     self.assertEqual(5, len(phrases))
     verb1 = filter(lambda x: 'makes distributes' == x[1].text,
                    f.iteritems())[0]
     agent = filter(lambda x: 'Bell' == x[1].text, f.iteritems())[0]
     theme1 = filter(lambda x: 'computers' == x[1].text, f.iteritems())[0]
     theme2 = filter(lambda x: 'electronics' == x[1].text, f.iteritems())[0]
     theme3 = filter(lambda x: 'building products' == x[1].text,
                     f.iteritems())[0]
     X1 = agent[0]
     Y1 = theme1[0]
     Y2 = theme2[0]
     Y3 = theme3[0]
     E1 = verb1[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None)
     # TODO: should we add proposition for multi NP's conjoined?
     self.assertTrue(d.find_condition(Rel('_ARG1', [E1, Y3])) is not None)
Ejemplo n.º 6
0
 def test1_Currency_00_0194(self):
     text = r"Without the Cray-3 research and development expenses, the company would have been able to report a profit of $19.3 million for the first half of 1989 rather than the $5.9 million it posted."
     etext = r"Without the Cray-3 research and development expenses , the company would have been able to report a profit of $ 19.3 million for the first half of 1989 rather than the $ 5.9 million it posted"
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs(nodups=True)
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     self.assertTrue('the Cray-3 research and development expenses' in nps)
     self.assertTrue('the company' in nps)
     self.assertTrue('a profit' in nps)
     self.assertTrue('$ 19.3 million' in nps)
     self.assertTrue('the first half' in nps)
     self.assertTrue('the $ 5.9 million' in nps)
     self.assertTrue('1989' in nps)
     fvps = sentence.get_vp_nominals()
     vps = [sp.text for r, sp in fvps]
     self.assertTrue('would have been' in vps)
     self.assertTrue('report' in vps)
     self.assertTrue('posted' in vps)
     would_have_been = filter(lambda x: 'would have been' == x[1].text,
                              fvps)[0][0]
     report = filter(lambda x: 'report' == x[1].text, fvps)[0][0]
     posted = filter(lambda x: 'posted' == x[1].text, fvps)[0][0]
     cray_rnd = filter(
         lambda x: 'the Cray-3 research and development expenses' == x[1].
         text, fnps)[0][0]
     company = filter(lambda x: 'the company' == x[1].text, fnps)[0][0]
     profit = filter(lambda x: 'a profit' == x[1].text, fnps)[0][0]
     first_half = filter(lambda x: 'the first half' == x[1].text,
                         fnps)[0][0]
     n1989 = filter(lambda x: '1989' == x[1].text, fnps)[0][0]
     n19_3M = filter(lambda x: '$ 19.3 million' == x[1].text, fnps)[0][0]
     n5_9M = filter(lambda x: 'the $ 5.9 million' == x[1].text, fnps)[0][0]
     self.assertTrue(
         d.find_condition(Rel('without', [would_have_been, cray_rnd]))
         is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [would_have_been, company]))
         is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [report, company])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [report, profit])) is not None)
     self.assertTrue(
         d.find_condition(Rel('of', [profit, n19_3M])) is not None)
     self.assertTrue(
         d.find_condition(Rel('for', [profit, first_half])) is not None)
     self.assertTrue(
         d.find_condition(Rel('of', [first_half, n1989])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [posted, n5_9M])) is not None)
Ejemplo n.º 7
0
 def test2_Date_21_0985(self):
     text = r"Annualized interest rates on certain investments as reported by the Federal Reserve Board on a weekly-average basis: 1989 and Wednesday October 4, 1989."
     etext = r"Annualized interest rates on certain investments as reported by the Federal Reserve Board on a weekly-average basis : 1989 and Wednesday October 4 , 1989"
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     self.assertTrue('Annualized interest rates' in nps)
     self.assertTrue('certain investments' in nps)
     self.assertTrue('the Federal-Reserve-Board' in nps)
     self.assertTrue('a weekly-average basis' in nps)
     self.assertTrue('Wednesday October 4' in nps)
Ejemplo n.º 8
0
 def test2_Date_00_1228(self):
     text = r"The reduced dividend is payable Jan. 2 to stock of record Dec. 15"
     etext = r"The reduced dividend is payable Jan. 2 to stock of record Dec. 15"
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     self.assertTrue('The reduced dividend' in nps)
     self.assertTrue('payable' in nps)
     self.assertTrue('Jan. 2' in nps)
     self.assertTrue('Dec. 15' in nps)
     self.assertTrue('stock' in nps)
     self.assertTrue('record' in nps)
Ejemplo n.º 9
0
 def test4_ApposInterrupt(self):
     text = r"Bell, a telecommunications company, which is located in Los Angeles, makes and distributes electronics, computers, and building products"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.get_np_nominals()
     phrases = [sp.text for r, sp in f]
     self.assertTrue('Bell' in phrases)
     self.assertTrue('a telecommunications company' in phrases)
     np1 = filter(lambda x: 'Bell' == x[1].text, f)[0]
     np2 = filter(lambda x: 'a telecommunications company' == x[1].text, f)[0]
     X = np1[0]
     Y = np2[0]
     self.assertNotEqual(X, Y)
     self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None)
     self.assertTrue(len(repr(d).split('_AKA')) == 2)
Ejemplo n.º 10
0
 def test3_ApposInterrupt(self):
     text = r"Robbie, a hot-tempered tennis player, charged the umpire and tried to crack the poor man's skull with a racket."
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.get_np_nominals()
     phrases = [sp.text for r, sp in f]
     self.assertTrue('Robbie' in phrases)
     self.assertTrue('a hot-tempered tennis player' in phrases)
     robbie = filter(lambda x: 'Robbie' == x[1].text, f)[0]
     temper = filter(lambda x: 'a hot-tempered tennis player' == x[1].text, f)[0]
     X = robbie[0]
     Y = temper[0]
     self.assertNotEqual(X, Y)
     self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None)
     self.assertTrue(len(repr(d).split('_AKA')) == 2)
Ejemplo n.º 11
0
 def test10_Brutus(self):
     text = "Ceasar was stabbed by Brutus"
     derivation = grpc.ccg_parse(self.stub, text, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     #self.assertTrue('Average maturity' in nps)
     self.assertTrue('Brutus' in nps)
     self.assertTrue('Ceasar' in nps)
     fvps = sentence.get_vp_nominals()
     vps = [sp.text for r, sp in fvps]
     self.assertTrue('was stabbed' in vps)
     E = filter(lambda x: x[1].text == "was stabbed", fvps)[0][0]
     A1 = filter(lambda x: x[1].text == "Brutus", fnps)[0][0]
     A0 = filter(lambda x: x[1].text == "Ceasar", fnps)[0][0]
     self.assertTrue(d.find_condition(Rel('_ARG0', [E, A0])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E, A1])) is not None)
Ejemplo n.º 12
0
 def test2_ApposInterrupt(self):
     text = r"Reliable, Diane's eleven-year-old beagle, chews holes in the living room carpeting as if he were still a puppy."
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.get_np_nominals()
     phrases = [sp.text for r, sp in f]
     self.assertTrue('Reliable' in phrases)
     self.assertTrue("eleven-year-old beagle" in phrases)
     self.assertTrue("Diane" in phrases)
     dog = filter(lambda x: 'Reliable' == x[1].text, f)[0]
     breed = filter(lambda x: "eleven-year-old beagle" == x[1].text, f)[0]
     X = dog[0]
     Y = breed[0]
     self.assertNotEqual(X, Y)
     self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None)
     self.assertTrue(len(repr(d).split('_AKA')) == 2)
Ejemplo n.º 13
0
 def test1_Currency_00_0195(self):
     text = r"On the other hand, had it existed then, Cray Computer would have incurred a $20.5 million loss."
     etext = r"On the other hand , had it existed then , Cray Computer would have incurred a $ 20.5 million loss ."
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     self.assertTrue('the other hand' in nps)
     self.assertTrue('Cray-Computer' in nps)
     self.assertTrue('$ 20.5 million' in nps)
     fvps = sentence.get_vp_nominals()
     vps = [sp.text for r, sp in fvps]
     self.assertTrue('had' in vps)
     self.assertTrue('existed' in vps)
     self.assertTrue('would have incurred' in vps)
Ejemplo n.º 14
0
 def test10_Ccgbank_00_0036(self):
     text = "Average maturity of the funds' investments lengthened by a day to 41 days, the longest since early August, according to Donoghue's."
     etext = "Average maturity of the funds ' investments lengthened by a day to 41 days , the longest since early August , according to Donoghue 's ."
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     #self.assertTrue('Average maturity' in nps)
     self.assertTrue('the funds' in nps)
     self.assertTrue('a day' in nps)
     self.assertTrue('41 days' in nps)
     self.assertTrue('the longest' in nps)
     self.assertTrue('early August' in nps)
     fvps = sentence.get_vp_nominals()
     vps = [sp.text for r, sp in fvps]
     self.assertTrue('lengthened' in vps)
     self.assertTrue('according' in vps)
Ejemplo n.º 15
0
 def test04_AndOfVerb(self):
     text = "Bell makes and distributes computers"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_ENTITY | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('Bell' in phrases)
     self.assertTrue('makes distributes' in phrases)
     self.assertTrue('computers' in phrases)
     verb1 = filter(lambda x: 'makes distributes' == x[1].text,
                    f.iteritems())[0]
     agent = filter(lambda x: 'Bell' == x[1].text, f.iteritems())[0]
     theme = filter(lambda x: 'computers' == x[1].text, f.iteritems())[0]
     X1 = agent[0]
     X2 = theme[0]
     E1 = verb1[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E1, X2])) is not None)
Ejemplo n.º 16
0
 def test10_Ccgbank_00_0099(self):
     text = "Plans that give advertisers discounts for maintaining or increasing ad spending have become permanent fixtures at the news weeklies and underscore the fierce competition between Newsweek, Time Warner Inc.'s Time magazine, and Mortimer B. Zuckerman's U.S. News & World Report."
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.get_np_nominals()
     phrases = [sp.text for r, sp in f]
     self.assertTrue('Plans' in phrases)
     self.assertTrue('advertisers' in phrases)
     self.assertTrue('discounts' in phrases)
     self.assertTrue('ad spending' in phrases)
     self.assertTrue('permanent fixtures' in phrases)
     self.assertTrue('the news weeklies' in phrases)
     self.assertTrue('the fierce competition' in phrases)
     self.assertTrue("Newsweek" in phrases)
     self.assertTrue("Time-Warner-Inc." in phrases)
     self.assertTrue("Time-magazine" in phrases)
     self.assertTrue("Mortimer-B.-Zuckerman" in phrases)
     self.assertTrue("U.S.-News-&-World-Report" in phrases)
     vf = sentence.get_vp_nominals()
     vphrases = [sp.text for r, sp in vf]
     self.assertTrue('give' in vphrases)
     self.assertTrue('maintaining increasing' in vphrases)
     self.assertTrue('have become' in vphrases)
     self.assertTrue('underscore' in vphrases)
     give = filter(lambda x: 'give' == x[1].text, vf)[0][0]
     become = filter(lambda x: 'have become' == x[1].text, vf)[0][0]
     uscore = filter(lambda x: 'underscore' == x[1].text, vf)[0][0]
     minc = filter(lambda x: 'maintaining increasing' == x[1].text,
                   vf)[0][0]
     plans = filter(lambda x: 'Plans' == x[1].text, f)[0][0]
     advertisers = filter(lambda x: 'advertisers' == x[1].text, f)[0][0]
     discounts = filter(lambda x: 'discounts' == x[1].text, f)[0][0]
     spending = filter(lambda x: 'ad spending' == x[1].text, f)[0][0]
     fixtures = filter(lambda x: 'permanent fixtures' == x[1].text, f)[0][0]
     weeklies = filter(lambda x: 'the news weeklies' == x[1].text, f)[0][0]
     timeinc = filter(lambda x: 'Time-Warner-Inc.' == x[1].text, f)[0][0]
     timemag = filter(lambda x: 'Time-magazine' == x[1].text, f)[0][0]
     mortimer = filter(lambda x: 'Mortimer-B.-Zuckerman' == x[1].text,
                       f)[0][0]
     uswr = filter(lambda x: 'U.S.-News-&-World-Report' == x[1].text,
                   f)[0][0]
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [give, plans])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [give, advertisers])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG2', [give, discounts])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [minc, plans])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [minc, spending])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [become, plans])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [become, fixtures])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_POSS', [mortimer, uswr])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_POSS', [timeinc, timemag])) is not None)
Ejemplo n.º 17
0
def make_drs(daemon):
    global pypath, projdir, datapath, idsrch
    allfiles = []
    projdir = os.path.dirname(os.path.dirname(__file__))

    easysrl_path = os.path.join(projdir, 'data', 'ldc', daemon, 'drs')
    if not os.path.exists(easysrl_path):
        os.makedirs(easysrl_path)

    # Get files
    ldcpath = os.path.join(projdir, 'data', 'ldc', daemon, 'ccgbank')
    dirlist1 = os.listdir(ldcpath)
    for fname in dirlist1:
        if 'ccg_derivation' not in fname:
            continue
        ldcpath1 = os.path.join(ldcpath, fname)
        if os.path.isfile(ldcpath1):
            allfiles.append(ldcpath1)

    failed_parse = 0
    failed_ccg2drs = []
    start = 0
    progress = -1
    for fn in allfiles:
        idx = idsrch.match(fn)
        if idx is None:
            continue
        idx = idx.group('id')

        if not os.path.exists(os.path.join(easysrl_path, idx)):
            os.mkdir(os.path.join(easysrl_path, idx))

        with open(fn, 'r') as fd:
            lines = fd.readlines()

        name, _ = os.path.splitext(os.path.basename(fn))
        for i in range(start, len(lines)):
            start = 0
            ccgbank = lines[i].strip()
            if len(ccgbank) == 0 or ccgbank[0] == '#':
                continue

            if progress < 0:
                print('%s-%04d' % (name, i))
            else:
                progress = print_progress(progress, 10)

            try:
                # CCG parser is Java so output is UTF-8.
                pt = parse_ccg_derivation(ccgbank)
                s = sentence_from_pt(pt).strip()
                pccg = pt_to_ccg_derivation(pt)
            except Exception:
                failed_parse += 1
                raise
                continue

            try:
                d = process_ccg_pt(
                    pt, CO_VERIFY_SIGNATURES | CO_NO_VERBNET
                    | CO_NO_WIKI_SEARCH).get_drs()
                assert d is not None
                assert isinstance(d, DRS)
                d = d.show(SHOW_LINEAR).strip()
            except Exception as e:
                print(e)
                failed_ccg2drs.append((name, i, ccgbank))
                raise
                continue

            with open(
                    os.path.join(easysrl_path, idx,
                                 'drs_%s_%04d.dat' % (idx, i)), 'w') as fd:
                fd.write(b'<sentence>\n')
                fd.write(safe_utf8_encode(s))
                fd.write(b'\n</sentence>\n<drs>\n')
                fd.write(safe_utf8_encode(d))
                fd.write(b'\n</drs>\n<predarg>\n')
                fd.write(safe_utf8_encode(pccg))
                fd.write(b'\n')
                fd.write(b'</predarg>\n')

    if failed_parse != 0:
        print('%d derivations failed to parse' % failed_parse)
    if len(failed_ccg2drs) != 0:
        print('%d derivations failed to convert to DRS' % len(failed_ccg2drs))
        for x in failed_ccg2drs:
            print('%s-%04d failed: {%s}' % x)
Ejemplo n.º 18
0
            drs = None
            pccg = None
            fol = None
            constituents = None
            orphaned = None
            conjoins = None
            functor_phrases = None
            vnconstituents = ''
            constituents = ''
            vsent = None
            sentence = None

            if options.ofmt == 'drs':
                try:
                    pt = parse_ccg_derivation(ccg)
                    pccg = pt_to_ccg_derivation(pt)
                except Exception as e:
                    print('Error: failed to parse ccgbank - %s' % str(e))
                    raise

                ops = CO_BUILD_STATES if options.wordvars else CO_ADD_STATE_PREDICATES
                ops |= CO_NO_VERBNET if options.no_vn else 0
                ops |= CO_NO_WIKI_SEARCH if options.no_wp else 0

                try:
                    sentence = process_ccg_pt(pt, ops)
                    d = sentence.get_drs()
                    fol, _ = d.to_fol()
                    fol = unicode(fol)
                    drs = d.show(SHOW_LINEAR)
                    constituents = []