def test_date_check(): parse = "(S (NP (NNP CARL ) (NN XVI ) (NNP GUSTAF ) ) )" test = ptree.Sentence(parse,"Carl XVI Gustaf", PETRreader.dstr_to_ordate("20150813")) phrase = test.tree.children[0] assert phrase.get_meaning() == ["SWEGOV"] test = ptree.Sentence(parse,"Carl XVI Gustaf", PETRreader.dstr_to_ordate( "19720813")) phrase = test.tree.children[0] assert phrase.get_meaning() == ["SWEELI"] test = ptree.Sentence(parse,"Carl XVI Gustaf", PETRreader.dstr_to_ordate("19010813")) phrase = test.tree.children[0] assert phrase.get_meaning() == ["SWEELI"]
def test_reflexive(): parse = "(S (NP (NNP Obama ) ) (VP (VBD asked ) (NP (PRP himself ) ) (SBAR (WHADVP (WRB why ) ) (S (NP (NNP Biden ) ) (VP (VBD was ) (ADJP (VBN tired ) ) ) ) ) ) )".upper( ) test = ptree.Sentence(parse, "Obama asked himself why Biden was tired", PETRreader.dstr_to_ordate("20150813")) phrase = test.tree.children[1].children[1] assert phrase.get_meaning() == ["USAGOV"]
def test_noun_meaning3(): parse = "(S (NP (NP (NNP BARACK ) (NNP OBAMA ) ) (CC AND ) (NP (NNP VLADIMIR ) (NNP PUTIN ) ) ) )" test = ptree.Sentence(parse, "Barack Obama and Vladimir Putin", "081315") phrase = test.tree.children[0] assert sorted(phrase.get_meaning()) == sorted(["USAGOV", "RUSGOV"])
def test_reflexive2(): parse = "(S (NP (NNP Obama ) ) (VP (VBD knew ) (SBAR (IN that ) (S (NP (NNP Putin ) ) (VP (VBD liked ) (NP (PRP himself ) ) ) ) ) ) ) ".upper( ) test = ptree.Sentence(parse, "Obama knew that Biden liked him", PETRreader.dstr_to_ordate("20150813")) phrase = test.tree.children[1].children[1].children[1].children[ 1].children[1] assert phrase.get_meaning() == ["RUSGOV"]
def test_personal1(): parse = "(S (NP (NNP Obama ) ) (VP (VBD said ) (SBAR (S (NP (PRP he ) ) (VP (VBD was ) (ADJP (VBN tired ) ) ) ) ) ) ) ".upper( ) print('This is a test') test = ptree.Sentence(parse, "Obama said he was tired", PETRreader.dstr_to_ordate("20150813")) phrase = test.tree.children[1].children[1].children[0].children[0] assert phrase.get_meaning() == ["USAGOV"]
def test_noun_meaning5(): parse = "(S (NP (NP (DT THE ) (NNP US ) (NN COMMANDER ) ) (PP (IN IN ) (NP (NNP IRAQ ) ) ) ) )" test = ptree.Sentence(parse, "The US commander in Iraq", "081315") phrase = test.tree.children[0] test.tree.print_to_stdout("") assert phrase.get_meaning() == ['USAMIL'] assert phrase.get_head()[0] == "COMMANDER"
def test_noun_meaning4(): parse = "(S (NP (DT THE ) (NNP REBELS ) (PP (IN FROM ) (NP (NNP SYRIA ) ) ) ) )" test = ptree.Sentence(parse, "The rebels from Syria", "081315") phrase = test.tree.children[0] assert phrase.get_meaning() == ['SYRREB'] assert phrase.get_head()[0] == "REBELS" assert phrase.get_head()[1] == phrase
def test_prepmeaning(): parse = "(S (PP (IN TO ) (NP (DT THE ) (JJ TURKISH ) (NN MARKET ) ) ) )" test = ptree.Sentence(parse, "to the market", "081315") phrase = test.tree.children[0] assert phrase.get_meaning() == ['TUR'] assert phrase.head == "MARKET" assert phrase.get_prep() == "TO"
def test_noun_meaning1(): parse = "(S (NP (DT THE ) (JJ ISLAMIC ) (NN STATE ) ) " test = ptree.Sentence(parse, "The Islamic State", "081315") phrase = test.tree.children[0] head, headphrase = phrase.get_head() assert head == "STATE" assert headphrase == phrase assert phrase.get_meaning() == ["IMGMUSISI"]
def test_noun_meaning2(): parse = "(S (NP (DT THE ) (JJ NORTH ) (NN ATLANTIC ) (NN TREATY ) (NN ORGANIZATION ) ) ) " test = ptree.Sentence(parse,"The North Atlantic Treaty Organization", "081315") phrase = test.tree.children[0] head, headphrase = phrase.get_head() assert head== "ORGANIZATION" assert headphrase == phrase assert phrase.get_meaning() == ["IGOWSTNAT"]
def do_coding(event_dict): """ Main coding loop Note that entering any character other than 'Enter' at the prompt will stop the program: this is deliberate. <14.02.28>: Bug: PETRglobals.PauseByStory actually pauses after the first sentence of the *next* story """ treestr = "" NStory = 0 NSent = 0 NEvents = 0 NEmpty = 0 NDiscardSent = 0 NDiscardStory = 0 logger = logging.getLogger('petr_log') times = 0 sents = 0 for key, val in sorted(event_dict.items()): NStory += 1 prev_code = [] SkipStory = False print('\n\nProcessing story {}'.format(key)) StoryDate = event_dict[key]['meta']['date'] for sent in val['sents']: NSent += 1 if 'parsed' in event_dict[key]['sents'][sent]: if 'config' in val['sents'][sent]: for _, config in event_dict[key]['sents'][sent][ 'config'].items(): change_Config_Options(config) SentenceID = '{}_{}'.format(key, sent) SentenceText = event_dict[key]['sents'][sent]['content'] SentenceDate = event_dict[key]['sents'][sent][ 'date'] if 'date' in event_dict[key]['sents'][ sent] else StoryDate Date = PETRreader.dstr_to_ordate(SentenceDate) print("\n", SentenceID) parsed = event_dict[key]['sents'][sent]['parsed'] treestr = parsed disc = check_discards(SentenceText) if disc[0] > 0: if disc[0] == 1: print("Discard sentence:", disc[1]) logger.info('\tSentence discard. {}'.format(disc[1])) NDiscardSent += 1 continue else: print("Discard story:", disc[1]) logger.info('\tStory discard. {}'.format(disc[1])) SkipStory = True NDiscardStory += 1 break t1 = time.time() sentence = PETRtree.Sentence(treestr, SentenceText, Date) print(sentence.txt) # this is the entry point into the processing in PETRtree coded_events, meta = sentence.get_events() code_time = time.time() - t1 if PETRglobals.NullVerbs or PETRglobals.NullActors: event_dict[key]['meta'] = meta event_dict[key]['text'] = sentence.txt elif PETRglobals.NullActors: event_dict[key]['events'] = coded_events coded_events = None # skips additional processing event_dict[key]['text'] = sentence.txt else: # 16.04.30 pas: we're using the key value 'meta' at two # very different event_dict[key]['meta']['verbs'] = meta # levels of event_dict -- see the code about ten lines below -- and # this is potentially confusing, so it probably would be useful to # change one of those del (sentence) times += code_time sents += 1 # print('\t\t',code_time) if coded_events: event_dict[key]['sents'][sent]['events'] = coded_events event_dict[key]['sents'][sent]['meta'] = meta #print('DC-events:', coded_events) # -- #print('DC-meta:', meta) # -- #print('+++',event_dict[key]['sents'][sent]) # -- if PETRglobals.WriteActorText or PETRglobals.WriteEventText or PETRglobals.WriteActorRoot: text_dict = utilities.extract_phrases( event_dict[key]['sents'][sent], SentenceID) # -- print('DC-td1:',text_dict) # -- if text_dict: event_dict[key]['sents'][sent]['meta'][ 'actortext'] = {} event_dict[key]['sents'][sent]['meta'][ 'eventtext'] = {} event_dict[key]['sents'][sent]['meta'][ 'actorroot'] = {} # -- print('DC1:',text_dict) # -- for evt in coded_events: if evt in text_dict: # 16.04.30 pas bypasses problems with expansion of compounds event_dict[key]['sents'][sent]['meta'][ 'actortext'][evt] = text_dict[evt][:2] event_dict[key]['sents'][sent]['meta'][ 'eventtext'][evt] = text_dict[evt][2] event_dict[key]['sents'][sent]['meta'][ 'actorroot'][evt] = text_dict[evt][3:5] if coded_events and PETRglobals.IssueFileName != "": event_issues = get_issues(SentenceText) if event_issues: event_dict[key]['sents'][sent]['issues'] = event_issues if PETRglobals.PauseBySentence: if len(input("Press Enter to continue...")) > 0: sys.exit() prev_code = coded_events NEvents += len(coded_events) if len(coded_events) == 0: NEmpty += 1 else: logger.info( '{} has no parse information. Passing.'.format(SentenceID)) pass if SkipStory: event_dict[key]['sents'] = None print("\nSummary:") print("Stories read:", NStory, " Sentences coded:", NSent, " Events generated:", NEvents) print("Discards: Sentence", NDiscardSent, " Story", NDiscardStory, " Sentences without events:", NEmpty) print("Average Coding time = ", times / sents if sents else 0) # -- print('DC-exit:',event_dict) return event_dict