def test1_JsonFiles(self): filelist = os.listdir(datapath) allfiles = [] for fn in filelist: if not os.path.isfile(os.path.join(datapath, fn)): continue f, x = os.path.splitext(fn) if x == '.json' and f == '9255a890ffe40c05876d8d402044ab11': allfiles.append(os.path.join(datapath, fn)) for fn in allfiles: with open(fn, 'r') as fd: body = json.load(fd, encoding='utf-8') smod = preprocess_sentence(body['title']) ccgbank = grpc.ccg_parse(self.stub, smod, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(ccgbank) ccg = process_ccg_pt(pt) ccgbody = {} ccgbody['story'] = { 'title': [x.get_json() for x in ccg.get_span()], 'paragraphs': [] } paragraphs = filter( lambda y: len(y) != 0, map(lambda x: x.strip(), body['content'].split('\n'))) i = 0 for p in paragraphs[i:]: sentences = filter(lambda x: len(x.strip()) != 0, sent_tokenize(p)) sp = [] j = 0 for s in sentences[j:]: dprint('p:s = %d:%d' % (i, j)) smod = preprocess_sentence(s) ccgbank = grpc.ccg_parse(self.stub, smod, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(ccgbank) ccg = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) sp.append([x.get_json() for x in ccg.get_span()]) j += 1 ccgbody['story']['paragraphs'].append(sp) i += 1 msgbody = json.dumps(ccgbody) pass
def test05_AndOfVerb_AndOfObj(self): text = "Bell makes and distributes computers, electronics, and building products" mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.select_phrases(RT_PROPERNAME | RT_ENTITY | RT_EVENT | RT_ATTRIBUTE) phrases = [sp.text for r, sp in f.iteritems()] self.assertTrue('Bell' in phrases) self.assertTrue('makes distributes' in phrases) self.assertTrue('computers' in phrases) self.assertTrue('electronics' in phrases) # Note if we add RT_EMPTY_DRS to the selection criteria then this phrase becomes 'and building products' self.assertTrue('building products' in phrases) self.assertEqual(5, len(phrases)) verb1 = filter(lambda x: 'makes distributes' == x[1].text, f.iteritems())[0] agent = filter(lambda x: 'Bell' == x[1].text, f.iteritems())[0] theme1 = filter(lambda x: 'computers' == x[1].text, f.iteritems())[0] theme2 = filter(lambda x: 'electronics' == x[1].text, f.iteritems())[0] theme3 = filter(lambda x: 'building products' == x[1].text, f.iteritems())[0] X1 = agent[0] Y1 = theme1[0] Y2 = theme2[0] Y3 = theme3[0] E1 = verb1[0] self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None) self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None) # TODO: should we add proposition for multi NP's conjoined? self.assertTrue(d.find_condition(Rel('_ARG1', [E1, Y3])) is not None)
def test10_OrOfVerb_OrInBrackets(self): text = "That which is perceived or known or inferred to have its own distinct existence (living or nonliving)" mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs(nodups=True) dprint(pt_to_ccg_derivation(pt)) dprint(d) # RT_EMPTY_DRS adds 'or' to phrases f = sentence.select_phrases(lambda x: x.pos is POS.from_cache('WDT') or \ 0 == (x.mask & RT_EMPTY_DRS), contiguous=False) phrases = [sp.text for r, sp in f.iteritems()] self.assertTrue('That which' in phrases) self.assertTrue('have' in phrases) self.assertTrue('is perceived known inferred' in phrases) self.assertTrue('its own distinct existence' in phrases) verb1 = filter(lambda x: 'is perceived known inferred' == x[1].text, f.iteritems())[0] verb2 = filter(lambda x: 'have' == x[1].text, f.iteritems())[0] agent = filter(lambda x: 'That which' == x[1].text, f.iteritems())[0] theme = filter(lambda x: 'its own distinct existence' == x[1].text, f.iteritems())[0] X1 = agent[0] E1 = verb1[0] E2 = verb2[0] X2 = theme[1][0].refs[1] X3 = theme[1][1].refs[0] self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None) self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None) self.assertTrue(d.find_condition(Rel('_ARG1', [E1, E2])) is not None) # TODO: should the theme attach to X2? self.assertTrue(d.find_condition(Rel('_ARG1', [E2, X3])) is not None) self.assertTrue(d.find_condition(Rel('_POSS', [X2, X3])) is not None)
def test01_AndOfSubj(self): text = "John and Paul went to the movies" mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.select_phrases(RT_PROPERNAME | RT_EVENT) phrases = [sp.text for r, sp in f.iteritems()] self.assertTrue('John' in phrases) self.assertTrue('Paul' in phrases) self.assertTrue('went' in phrases) john = filter(lambda x: 'John' == x[1].text, f.iteritems())[0] paul = filter(lambda x: 'Paul' == x[1].text, f.iteritems())[0] went = filter(lambda x: 'went' == x[1].text, f.iteritems())[0] J = john[0] P = paul[0] E = went[0] self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None) self.assertTrue(d.find_condition(Rel('go', [E])) is not None) self.assertTrue(d.find_condition(Rel('John', [J])) is not None) self.assertTrue(d.find_condition(Rel('Paul', [P])) is not None) self.assertTrue(d.find_condition(Rel('_ARG0', [E, J])) is not None)
def test02_AndOfObj(self): text = "He saw John and Paul" mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.select_phrases(RT_PROPERNAME | RT_EVENT) phrases = [sp.text for r, sp in f.iteritems()] self.assertTrue('John' in phrases) self.assertTrue('Paul' in phrases) self.assertTrue('saw' in phrases) john = filter(lambda x: 'John' == x[1].text, f.iteritems())[0] paul = filter(lambda x: 'Paul' == x[1].text, f.iteritems())[0] saw = filter(lambda x: 'saw' == x[1].text, f.iteritems())[0] J = john[0] P = paul[0] E = saw[0] # FIXME: wn lemmatizer does not convert saw to see - I guess to to ambiguity self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None) self.assertTrue(d.find_condition(Rel('saw', [E])) is not None) self.assertTrue(d.find_condition(Rel('John', [J])) is not None) self.assertTrue(d.find_condition(Rel('Paul', [P])) is not None) self.assertTrue(d.find_condition(Rel('_ARG1', [E, J])) is not None)
def test03_OrOfObj(self): text = "To participate in games or sport" mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.select_phrases(RT_ENTITY | RT_EVENT) phrases = [sp.text for r, sp in f.iteritems()] self.assertTrue('participate' in phrases) self.assertTrue('games' in phrases) self.assertTrue('sport' in phrases) noun1 = filter(lambda x: 'games' == x[1].text, f.iteritems())[0] noun2 = filter(lambda x: 'sport' == x[1].text, f.iteritems())[0] verb = filter(lambda x: 'participate' == x[1].text, f.iteritems())[0] X1 = noun1[0] X2 = noun2[0] E = verb[0] self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None) self.assertTrue(d.find_condition(Rel('participate', [E])) is not None) self.assertTrue(d.find_condition(Rel('games', [X1])) is not None) self.assertTrue(d.find_condition(Rel('sport', [X2])) is not None) self.assertTrue(d.find_condition(Rel('_ARG1', [E, X2])) is not None)
def test1_Currency_00_0194(self): text = r"Without the Cray-3 research and development expenses, the company would have been able to report a profit of $19.3 million for the first half of 1989 rather than the $5.9 million it posted." etext = r"Without the Cray-3 research and development expenses , the company would have been able to report a profit of $ 19.3 million for the first half of 1989 rather than the $ 5.9 million it posted" mtext = preprocess_sentence(text) self.assertEqual(etext, mtext) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs(nodups=True) dprint(pt_to_ccg_derivation(pt)) dprint(d) fnps = sentence.get_np_nominals() nps = [sp.text for r, sp in fnps] self.assertTrue('the Cray-3 research and development expenses' in nps) self.assertTrue('the company' in nps) self.assertTrue('a profit' in nps) self.assertTrue('$ 19.3 million' in nps) self.assertTrue('the first half' in nps) self.assertTrue('the $ 5.9 million' in nps) self.assertTrue('1989' in nps) fvps = sentence.get_vp_nominals() vps = [sp.text for r, sp in fvps] self.assertTrue('would have been' in vps) self.assertTrue('report' in vps) self.assertTrue('posted' in vps) would_have_been = filter(lambda x: 'would have been' == x[1].text, fvps)[0][0] report = filter(lambda x: 'report' == x[1].text, fvps)[0][0] posted = filter(lambda x: 'posted' == x[1].text, fvps)[0][0] cray_rnd = filter( lambda x: 'the Cray-3 research and development expenses' == x[1]. text, fnps)[0][0] company = filter(lambda x: 'the company' == x[1].text, fnps)[0][0] profit = filter(lambda x: 'a profit' == x[1].text, fnps)[0][0] first_half = filter(lambda x: 'the first half' == x[1].text, fnps)[0][0] n1989 = filter(lambda x: '1989' == x[1].text, fnps)[0][0] n19_3M = filter(lambda x: '$ 19.3 million' == x[1].text, fnps)[0][0] n5_9M = filter(lambda x: 'the $ 5.9 million' == x[1].text, fnps)[0][0] self.assertTrue( d.find_condition(Rel('without', [would_have_been, cray_rnd])) is not None) self.assertTrue( d.find_condition(Rel('_ARG0', [would_have_been, company])) is not None) self.assertTrue( d.find_condition(Rel('_ARG0', [report, company])) is not None) self.assertTrue( d.find_condition(Rel('_ARG1', [report, profit])) is not None) self.assertTrue( d.find_condition(Rel('of', [profit, n19_3M])) is not None) self.assertTrue( d.find_condition(Rel('for', [profit, first_half])) is not None) self.assertTrue( d.find_condition(Rel('of', [first_half, n1989])) is not None) self.assertTrue( d.find_condition(Rel('_ARG1', [posted, n5_9M])) is not None)
def test2_Date_21_0985(self): text = r"Annualized interest rates on certain investments as reported by the Federal Reserve Board on a weekly-average basis: 1989 and Wednesday October 4, 1989." etext = r"Annualized interest rates on certain investments as reported by the Federal Reserve Board on a weekly-average basis : 1989 and Wednesday October 4 , 1989" mtext = preprocess_sentence(text) self.assertEqual(etext, mtext) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) fnps = sentence.get_np_nominals() nps = [sp.text for r, sp in fnps] self.assertTrue('Annualized interest rates' in nps) self.assertTrue('certain investments' in nps) self.assertTrue('the Federal-Reserve-Board' in nps) self.assertTrue('a weekly-average basis' in nps) self.assertTrue('Wednesday October 4' in nps)
def test2_Date_00_1228(self): text = r"The reduced dividend is payable Jan. 2 to stock of record Dec. 15" etext = r"The reduced dividend is payable Jan. 2 to stock of record Dec. 15" mtext = preprocess_sentence(text) self.assertEqual(etext, mtext) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) fnps = sentence.get_np_nominals() nps = [sp.text for r, sp in fnps] self.assertTrue('The reduced dividend' in nps) self.assertTrue('payable' in nps) self.assertTrue('Jan. 2' in nps) self.assertTrue('Dec. 15' in nps) self.assertTrue('stock' in nps) self.assertTrue('record' in nps)
def test3_ApposInterrupt(self): text = r"Robbie, a hot-tempered tennis player, charged the umpire and tried to crack the poor man's skull with a racket." mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.get_np_nominals() phrases = [sp.text for r, sp in f] self.assertTrue('Robbie' in phrases) self.assertTrue('a hot-tempered tennis player' in phrases) robbie = filter(lambda x: 'Robbie' == x[1].text, f)[0] temper = filter(lambda x: 'a hot-tempered tennis player' == x[1].text, f)[0] X = robbie[0] Y = temper[0] self.assertNotEqual(X, Y) self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None) self.assertTrue(len(repr(d).split('_AKA')) == 2)
def test4_ApposInterrupt(self): text = r"Bell, a telecommunications company, which is located in Los Angeles, makes and distributes electronics, computers, and building products" mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.get_np_nominals() phrases = [sp.text for r, sp in f] self.assertTrue('Bell' in phrases) self.assertTrue('a telecommunications company' in phrases) np1 = filter(lambda x: 'Bell' == x[1].text, f)[0] np2 = filter(lambda x: 'a telecommunications company' == x[1].text, f)[0] X = np1[0] Y = np2[0] self.assertNotEqual(X, Y) self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None) self.assertTrue(len(repr(d).split('_AKA')) == 2)
def test1_Currency_00_0195(self): text = r"On the other hand, had it existed then, Cray Computer would have incurred a $20.5 million loss." etext = r"On the other hand , had it existed then , Cray Computer would have incurred a $ 20.5 million loss ." mtext = preprocess_sentence(text) self.assertEqual(etext, mtext) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) fnps = sentence.get_np_nominals() nps = [sp.text for r, sp in fnps] self.assertTrue('the other hand' in nps) self.assertTrue('Cray-Computer' in nps) self.assertTrue('$ 20.5 million' in nps) fvps = sentence.get_vp_nominals() vps = [sp.text for r, sp in fvps] self.assertTrue('had' in vps) self.assertTrue('existed' in vps) self.assertTrue('would have incurred' in vps)
def test2_ApposInterrupt(self): text = r"Reliable, Diane's eleven-year-old beagle, chews holes in the living room carpeting as if he were still a puppy." mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.get_np_nominals() phrases = [sp.text for r, sp in f] self.assertTrue('Reliable' in phrases) self.assertTrue("eleven-year-old beagle" in phrases) self.assertTrue("Diane" in phrases) dog = filter(lambda x: 'Reliable' == x[1].text, f)[0] breed = filter(lambda x: "eleven-year-old beagle" == x[1].text, f)[0] X = dog[0] Y = breed[0] self.assertNotEqual(X, Y) self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None) self.assertTrue(len(repr(d).split('_AKA')) == 2)
def test10_Ccgbank_00_0036(self): text = "Average maturity of the funds' investments lengthened by a day to 41 days, the longest since early August, according to Donoghue's." etext = "Average maturity of the funds ' investments lengthened by a day to 41 days , the longest since early August , according to Donoghue 's ." mtext = preprocess_sentence(text) self.assertEqual(etext, mtext) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) fnps = sentence.get_np_nominals() nps = [sp.text for r, sp in fnps] #self.assertTrue('Average maturity' in nps) self.assertTrue('the funds' in nps) self.assertTrue('a day' in nps) self.assertTrue('41 days' in nps) self.assertTrue('the longest' in nps) self.assertTrue('early August' in nps) fvps = sentence.get_vp_nominals() vps = [sp.text for r, sp in fvps] self.assertTrue('lengthened' in vps) self.assertTrue('according' in vps)
def test04_AndOfVerb(self): text = "Bell makes and distributes computers" mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.select_phrases(RT_PROPERNAME | RT_ENTITY | RT_EVENT) phrases = [sp.text for r, sp in f.iteritems()] self.assertTrue('Bell' in phrases) self.assertTrue('makes distributes' in phrases) self.assertTrue('computers' in phrases) verb1 = filter(lambda x: 'makes distributes' == x[1].text, f.iteritems())[0] agent = filter(lambda x: 'Bell' == x[1].text, f.iteritems())[0] theme = filter(lambda x: 'computers' == x[1].text, f.iteritems())[0] X1 = agent[0] X2 = theme[0] E1 = verb1[0] self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None) self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None) self.assertTrue(d.find_condition(Rel('_ARG1', [E1, X2])) is not None)
def parse(self, request, context): """Parse a message.""" retry = 3 while retry: if self.state.terminate: context.set_code(grpc.StatusCode.CANCELLED) context.set_details('Application terminating.') raise RuntimeError('Application terminating!') try: # EasyXXX does not handle these smod = preprocess_sentence(request.text) ccgbank = gsvc.ccg_parse(self.ccg_stub, smod, gsvc.DEFAULT_SESSION) pt = parse_ccg_derivation(ccgbank) ccg = process_ccg_pt(pt, options=request.options) sent = ccg.get_verbnet_sentence() response = infox_service_pb2.GSentence() for lex in sent: glex = response.lexemes.add() glex.head = lex.head glex.idx = lex.idx glex.mask = lex.mask for r in lex.refs: glex.refs.append(r.var.to_string()) glex.pos = lex.pos.tag glex.word = lex.word glex.stem = lex.stem glex.category = lex.category.signature if lex.wiki_data is not None: glex.wikidata.title = lex.wiki_data.title glex.wikidata.summary = lex.wiki_data.summary glex.wikidata.page_categories.extend( lex.wiki_data.page_categories) glex.wikidata.url = lex.wiki_data.url for c in ccg.constituents: gc = response.constituents.add() gc.span.extend(c.span.get_indexes()) gc.vntype = c.vntype.signature gc.head = c.chead return response except requests.exceptions.ConnectionError as e: self.state.wait(0.25) retry -= 1 self.logger.exception('Infox.parse', exc_info=e) context.set_code(grpc.StatusCode.ABORTED) context.set_details(e.message) raise except Exception as e: retry = 0 self.logger.exception('Infox.parse', exc_info=e) context.set_code(grpc.StatusCode.ABORTED) context.set_details(e.message) raise context.set_code(grpc.StatusCode.ABORTED) context.set_details('Too many retries!') raise RuntimeError('Too many retries!')
def run(self): """Process messages.""" for message in receive_messages(self.aws.news_queue, MessageAttributeNames=['All']): global _logger # Attributes will be passed onto next queue attributes = message.message_attributes mhash = attributes['hash']['StringValue'] _logger.debug('Received news_queue(%s) -> hash(%s)', message.message_id, mhash) body = json.loads(message.body) retry = 3 ccgbank = None title = body['title'] paragraphs_in = filter(lambda y: len(y) != 0, map(lambda x: x.strip(), body['content'].split('\n'))) paragraphs_out = [] if len(paragraphs_in) == 0: _logger.debug('No paragraphs for story %s\n%s', (mhash, title)) # Use NLTK to split paragraphs into sentences. for p in paragraphs_in: sentences = filter(lambda x: len(x.strip()) != 0, sent_tokenize(p)) paragraphs_out.append(sentences) if self.state.terminate: break result = {} result['title'] = {} while retry: try: ccgbank = grpc.ccg_parse(self.aws.stub, title, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(ccgbank) ccg = process_ccg_pt(pt, options=self.options) result['title']['lexemes'] = [x.get_json() for x in ccg.get_span()] result['title']['constituents'] = [c.get_json() for c in ccg.constituents] ccgpara = [] result['paragraphs'] = ccgpara for sentences in paragraphs_out: ccgsent = [] ccgpara.append(ccgsent) for s in sentences: smod = preprocess_sentence(s) ccgbank = grpc.ccg_parse(self.aws.stub, smod, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(ccgbank) ccg = process_ccg_pt(pt, options=self.options) ccgentry = {} ccgentry['lexemes'] = [x.get_json() for x in ccg.get_span()] ccgentry['constituents'] = [c.get_json() for c in ccg.constituents] ccgsent.append(ccgentry) break # exit while except requests.exceptions.ConnectionError as e: time.sleep(0.25) retry -= 1 _logger.exception('AwsNewsQueueReader.run', exc_info=e) if self.state.pass_on_exceptions: raise except Exception as e: # After X reads AWS sends the item to the dead letter queue. # X is configurable in AWS console. retry = 0 _logger.exception('AwsNewsQueueReader.run', exc_info=e, rlimitby=mhash) if self.state.pass_on_exceptions: raise if self.state.terminate: retry = 0 break # retry == 0 indicates failure if retry == 0: continue try: # Let the queue know that the message is processed message.delete() if self.aws.ccg_queue: ireduce = -1 iorig = len(result['paragraphs']) while True: strm = StringIO.StringIO() # Add indent so easier to debug json.dump(result, strm, indent=2) data = strm.getvalue() if len(data) >= 200*1024: para = result['paragraphs'] ireduce = max([1, (len(para) * 200 * 1024)/ len(data)]) ireduce = min([len(para)-1, ireduce]) result['paragraphs'] = para[0:ireduce] else: break if len(result['paragraphs']) <= 1: break if ireduce >= 0: _logger.warning('Hash(%s) ccg paragraphs reduced from %d to %d' % (mhash, iorig, ireduce)) response = self.aws.ccg_queue.send_message(MessageAttributes=attributes, MessageBody=data) _logger.debug('Sent hash(%s) -> ccg_queue(%s)', mhash, response['MessageId']) except Exception as e: _logger.exception('AwsNewsQueueReader.run', exc_info=e, rlimitby=mhash) if self.state.pass_on_exceptions: raise
try: sessionId = grpc.DEFAULT_SESSION if options.ofmt is not None: if options.ofmt not in [ 'ccgbank', 'html', 'logic', 'extended', 'drs' ]: die('bad output format %s, must be ccgbank|html|logic|extended' % options.ofmt) # Create a session to match output format, default is CCGBANK if options.ofmt != 'ccgbank' and options.ofmt != 'drs': sessionId = grpc.create_session(stub, options.ofmt) titleSrch = re.compile(titleRe) if not options.book: line = preprocess_sentence(' '.join(args)) html = None # FIXME: Convert to python 3. Unicode is default. ccg = grpc.ccg_parse(stub, line, sessionId) if options.ofmt == 'html': html = ccg ccg = None drs = None pccg = None fol = None constituents = None orphaned = None conjoins = None functor_phrases = None vnconstituents = '' constituents = ''
def test10_Ccgbank_00_0099(self): text = "Plans that give advertisers discounts for maintaining or increasing ad spending have become permanent fixtures at the news weeklies and underscore the fierce competition between Newsweek, Time Warner Inc.'s Time magazine, and Mortimer B. Zuckerman's U.S. News & World Report." mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.get_np_nominals() phrases = [sp.text for r, sp in f] self.assertTrue('Plans' in phrases) self.assertTrue('advertisers' in phrases) self.assertTrue('discounts' in phrases) self.assertTrue('ad spending' in phrases) self.assertTrue('permanent fixtures' in phrases) self.assertTrue('the news weeklies' in phrases) self.assertTrue('the fierce competition' in phrases) self.assertTrue("Newsweek" in phrases) self.assertTrue("Time-Warner-Inc." in phrases) self.assertTrue("Time-magazine" in phrases) self.assertTrue("Mortimer-B.-Zuckerman" in phrases) self.assertTrue("U.S.-News-&-World-Report" in phrases) vf = sentence.get_vp_nominals() vphrases = [sp.text for r, sp in vf] self.assertTrue('give' in vphrases) self.assertTrue('maintaining increasing' in vphrases) self.assertTrue('have become' in vphrases) self.assertTrue('underscore' in vphrases) give = filter(lambda x: 'give' == x[1].text, vf)[0][0] become = filter(lambda x: 'have become' == x[1].text, vf)[0][0] uscore = filter(lambda x: 'underscore' == x[1].text, vf)[0][0] minc = filter(lambda x: 'maintaining increasing' == x[1].text, vf)[0][0] plans = filter(lambda x: 'Plans' == x[1].text, f)[0][0] advertisers = filter(lambda x: 'advertisers' == x[1].text, f)[0][0] discounts = filter(lambda x: 'discounts' == x[1].text, f)[0][0] spending = filter(lambda x: 'ad spending' == x[1].text, f)[0][0] fixtures = filter(lambda x: 'permanent fixtures' == x[1].text, f)[0][0] weeklies = filter(lambda x: 'the news weeklies' == x[1].text, f)[0][0] timeinc = filter(lambda x: 'Time-Warner-Inc.' == x[1].text, f)[0][0] timemag = filter(lambda x: 'Time-magazine' == x[1].text, f)[0][0] mortimer = filter(lambda x: 'Mortimer-B.-Zuckerman' == x[1].text, f)[0][0] uswr = filter(lambda x: 'U.S.-News-&-World-Report' == x[1].text, f)[0][0] self.assertTrue( d.find_condition(Rel('_ARG0', [give, plans])) is not None) self.assertTrue( d.find_condition(Rel('_ARG1', [give, advertisers])) is not None) self.assertTrue( d.find_condition(Rel('_ARG2', [give, discounts])) is not None) self.assertTrue( d.find_condition(Rel('_ARG0', [minc, plans])) is not None) self.assertTrue( d.find_condition(Rel('_ARG1', [minc, spending])) is not None) self.assertTrue( d.find_condition(Rel('_ARG0', [become, plans])) is not None) self.assertTrue( d.find_condition(Rel('_ARG1', [become, fixtures])) is not None) self.assertTrue( d.find_condition(Rel('_POSS', [mortimer, uswr])) is not None) self.assertTrue( d.find_condition(Rel('_POSS', [timeinc, timemag])) is not None)