def gen_cameo_event(self, jsonString): events = read_json_(jsonString) if events: updated_events = petrarch2.do_coding(events) return updated_events else: return {}
def test_complex1(): text = "A Tunisian court has jailed a Nigerian student for two years for helping young militants join an armed Islamic group in Lebanon, his lawyer said Wednesday." parse = """( (S (S (NP (DT A) (NNP Tunisian) (NN court)) (VP (AUXZ has) (VP (VBN jailed) (NP (DT a) (JJ Nigerian) (NN student)) (PP (IN for) (NP (CD two) (NNS years))) (PP (IN for) (S (VP (VBG helping) (S (NP (JJ young) (NNS militants)) (VP (VB join) (NP (NP (DT an) (JJ armed) (JJ Islamic) (NN group))f (PP (IN in) (NP (NNP Lebanon)))))))))))) (, ,) (NP (PRP$ his) (NN lawyer)) (VP (VBD said) (NP (NNP Wednesday))) (. .)))""" parsed = utilities._format_parsed_str(parse) dict = {u'test123': {u'sents': {u'0': {u'content': text, u'parsed': parsed}}, u'meta': {u'date': u'20010101'}}} return_dict = petrarch2.do_coding(dict,None) print(return_dict) assert return_dict['test123']['sents']['0']['events'] == [('TUNJUD','NGAEDU','173')]
def test_complex1(): text = "A Tunisian court has jailed a Nigerian student for two years for helping young militants join an armed Islamic group in Lebanon, his lawyer said Wednesday." parse = """( (S (S (NP (DT A) (NNP Tunisian) (NN court)) (VP (AUXZ has) (VP (VBN jailed) (NP (DT a) (JJ Nigerian) (NN student)) (PP (IN for) (NP (CD two) (NNS years))) (PP (IN for) (S (VP (VBG helping) (S (NP (JJ young) (NNS militants)) (VP (VB join) (NP (NP (DT an) (JJ armed) (JJ Islamic) (NN group))f (PP (IN in) (NP (NNP Lebanon)))))))))))) (, ,) (NP (PRP$ his) (NN lawyer)) (VP (VBD said) (NP (NNP Wednesday))) (. .)))""" parsed = utilities._format_parsed_str(parse) dict = {u'test123': {u'sents': {u'0': {u'content': text, u'parsed': parsed}}, u'meta': {u'date': u'20010101'}}} return_dict = petrarch2.do_coding(dict,None) print(return_dict) assert return_dict['test123']['sents']['0']['events'] == [('TUNJUD','NGAEDU','173')]
def get_phrases(self, text, parse): parsed = utilities._format_parsed_str(parse) ddict = {u'test123': {u'sents': {u'0': {u'content': text, u'parsed': parsed}}, u'meta': {u'date': u'20010101'}}} return_dict = petrarch2.do_coding(ddict, None) n = return_dict['test123']['meta']['verbs']['nouns'] nouns = [i[0] for i in n] noun_coding = [i[1] for i in n] try: verbs = return_dict['test123']['meta']['verbs']['eventtext'].values()[0] except KeyError: print "No eventtext" verbs = "" try: verb_coding = return_dict['test123']['meta']['verbs']['eventtext'].keys()[0][2] except KeyError as e: print e verb_coding = "" phrase_dict = {"nouns" : nouns, "noun_coding" : noun_coding, "verbs" : verbs, "verb_coding" : verb_coding} return(phrase_dict)
def parse_sentence(stanford_parser, date, text): nlp = stanford_parser.parse_doc(text) parse = nlp['sentences'][0]['parse'] parsed = utilities._format_parsed_str(parse) dict = { u'doc': { u'sents': { u'0': { u'content': text, u'parsed': parsed } }, u'meta': { u'date': date } } } return_dict = petrarch2.do_coding(dict) has_events = False if "events" in return_dict['doc']['sents']['0']: #events = return_dict['doc']['sents']['0']['events'] has_events = True return_dict['has_events'] = has_events return_dict['sentence_date'] = date print(str(return_dict)) return return_dict
def test_simple2(): text = "Germany arrested France" parse = "(ROOT (S (NP (NNP Germany)) (VP (VBD arrested) (NP (NNP France)))))" parsed = utilities._format_parsed_str(parse) dict = {u'test123': {u'sents': {u'0': {u'content': text, u'parsed': parsed}}, u'meta': {u'date': u'20010101'}}} return_dict = petrarch2.do_coding(dict,None) print(return_dict) assert return_dict['test123']['sents']['0']['events'] == [('DEU','FRA','173')]
def post(self): args = self.reqparse.parse_args() event_dict = args['events'] try: event_dict_updated = petrarch2.do_coding(event_dict) except Exception as e: sys.stderr.write("An error occurred with PETR. {}\n".format(e)) event_dict_updated = event_dict return event_dict_updated
def test_simple2(): text = "Germany arrested France" parse = "(ROOT (S (NP (NNP Germany)) (VP (VBD arrested) (NP (NNP France)))))" parsed = utilities._format_parsed_str(parse) dict = {u'test123': {u'sents': {u'0': {u'content': text, u'parsed': parsed}}, u'meta': {u'date': u'20010101'}}} return_dict = petrarch2.do_coding(dict,None) print(return_dict) assert return_dict['test123']['sents']['0']['events'] == [('DEU','FRA','173')]
def post(self): args = self.reqparse.parse_args() event_dict = args['events'] try: event_dict_updated = petrarch2.do_coding(event_dict) except Exception as e: sys.stderr.write("An error occurred with PETR. {}\n".format(e)) event_dict_updated = event_dict return event_dict_updated
def post(self): args = self.reqparse.parse_args() event_dict = args['events'] to_return = [] try: event_dict_updated = petrarch2.do_coding(event_dict) k = event_dict_updated.keys()[0] try: to_return = event_dict_updated[k]['sents']['0']['events'] except KeyError: logger.info('No events to process') except: logger.exception("An error occured") except: logger.exception("An error occurred") return to_return
def get_phrases(doc): phrases_output=[] nouns=[] noun_coding=[] verbs=[] verb_coding=[] return_dict={} article_id=doc[0] date=date_formatter(doc[1]) #logger.info('Date: '+date) doc_id=doc[3] corenlpJsonData=json.loads(doc[2]) sentences=corenlpJsonData['sentences'] for sentence in sentences: sen_phrases_dict = {} sentenceJson= json.loads(json.dumps(sentence)) sentenceId=sentenceJson['sen_id'] sentenceTree=sentenceJson['tree'] sentenceData=sentenceJson['sentence'] parsed=utilities._format_parsed_str(sentenceTree) dict = {doc_id: {u'sents': {sentenceId: {u'content': sentenceData, u'parsed': parsed}}, u'meta': {u'date': date.encode()}}} try: return_dict = petrarch2.do_coding(dict) n = return_dict[doc_id]['meta']['verbs']['nouns'] nouns = [i[0] for i in n] noun_coding = [i[1] for i in n] verbs = return_dict[doc_id]['meta']['verbs']['eventtext'].values()[0] except: print "No eventtext" verbs = "" try: verb_coding = return_dict[doc_id]['meta']['verbs']['eventtext'].keys()[0][2] except KeyError as e: print e verb_coding = "" phrase_dict = {"nouns": nouns, "noun_coding": noun_coding, "verbs": verbs, "verb_coding": verb_coding} sen_phrases_dict={sentenceData: phrase_dict} phrases_output.append(sen_phrases_dict) #print phrases_output return (article_id, json.dumps(phrases_output),doc_id)
def test_simple(): text = "Germany invaded France" parse = "(ROOT (S (NP (NNP Germany)) (VP (VBD invaded) (NP (NNP France)))))" parsed = utilities._format_parsed_str(parse) dict = { u'test123': { u'sents': { u'0': { u'content': text, u'parsed': parsed } }, u'meta': { u'date': u'20010101' } } } return_dict = petrarch2.do_coding(dict) print(return_dict) events = return_dict['test123']['sents']['0']['events'] print(events)
for row in rows: phrases_output=[] date=date_formatter(row[1]) doc_id=row[3] corenlpJsonData=json.loads(row[2]) sentences=corenlpJsonData['sentences'] for sentence in sentences: sen_phrases_dict = {} sentenceJson= json.loads(json.dumps(sentence)) sentenceId=sentenceJson['sen_id'] sentenceTree=sentenceJson['tree'] sentenceData=sentenceJson['sentence'] parsed=utilities._format_parsed_str(sentenceTree) dict = {doc_id: {u'sents': {sentenceId: {u'content': sentenceData, u'parsed': parsed}}, u'meta': {u'date': date.encode()}}} try: return_dict = petrarch2.do_coding(dict, None) n = return_dict[doc_id]['meta']['verbs']['nouns'] nouns = [i[0] for i in n] noun_coding = [i[1] for i in n] verbs = return_dict[doc_id]['meta']['verbs']['eventtext'].values()[0] except: print "No eventtext" verbs = "" try: verb_coding = return_dict[doc_id]['meta']['verbs']['eventtext'].keys()[0][2] except KeyError as e: print e verb_coding = "" phrase_dict = {"nouns": nouns, "noun_coding": noun_coding, "verbs": verbs,