def replace_subgraph_for_person_or_organization(amr: AMR, node_var): """ Replace a subgraph for a person or organization. The wiki node and name subgraph are removed and person is replaced with PERSON, while organization is replaced with ORGANIZATION Eg. p / person :wiki "Deng_Xiaoping" :name (n / name :op1 "Deng"~e.1 :op2 "Xiaoping"~e.2) :ARG0-of (h / have-rel-role-91 :ARG2 (c / comrade~e.0))) With p / PERSON :ARG0-of (h / have-rel-role-91 :ARG2 (c / comrade~e.0))) """ # replaced amr[node_var]['name'][0][0] with amr[node_var].get('name')[0][0] # because the first option sometimes gives wrong results wiki_literal = amr[node_var].get('wiki')[0][0] name_node = amr[node_var].get('name')[0][0] name_op_literals = [ name_op_tuple[0][0] for name_op_tuple in amr[name_node].values() ] # remove from relation_to_tokens update_relation_to_tokens(amr, node_var, ['wiki', 'name']) update_relation_to_tokens(amr, name_node, amr[name_node].keys()) # remove from default dict del amr[node_var]['wiki'] del amr[node_var]['name'] del amr[name_node] if wiki_literal in amr.keys(): # the condition is necessary in case there are two or more - wiki literals if wiki_literal == '-': # do not delete it if there is also polarity in the amr all_relations_list = [item.keys() for item in amr.values()] all_relations = [ item for sublist in all_relations_list for item in sublist ] if 'polarity' not in all_relations: del amr[wiki_literal] else: del amr[wiki_literal] for name_op_literal in name_op_literals: if name_op_literal in amr.keys(): del amr[name_op_literal] # remove from node_to_concepts del amr.node_to_concepts[name_node] # remove from node_to_tokens nodes_to_remove: List = name_op_literals.copy() nodes_to_remove.append(name_node) nodes_to_remove.append(wiki_literal) for node in nodes_to_remove: if node in amr.node_to_tokens.keys(): del amr.node_to_tokens[node] # replace person -> PERSON and organization -> ORGANIZATION amr.node_to_concepts[node_var] = amr.node_to_concepts[node_var].upper() return amr
def test_parse_example_1(): amr_str = """(r / recommend-01~e.1 :ARG1 (a / advocate-01~e.4 :ARG1 (i / it~e.0) :manner~e.2 (v / vigorous~e.3)))""" parsed_amr: AMR = AMR.parse_string(amr_str) expected_amr: AMR = AMR() expected_amr.node_to_concepts = { 'i': 'it', 'v': 'vigorous', 'a': 'advocate-01', 'r': 'recommend-01' } expected_amr.node_to_tokens = { 'i': ['0'], 'v': ['3'], 'a': ['4'], 'r': ['1'] } expected_amr.relation_to_tokens = {'manner': [('2', 'a')]} expected_amr['i'] = {} expected_amr['v'] = {} expected_amr['a'] = {'ARG1': [('i', )], 'manner': [('v', )]} expected_amr['r'] = {'ARG1': [('a', )]} expected_amr.roots = ['r'] assert_amr_graph_dictionaries(expected_amr, parsed_amr)
def test_train_pre_processing_ex_person_with_polarity(): sentence = 'It is Santorum that is the by far major nonRomney candidate and Newt would appear to be the spoiler .' amr: AMR = AMR() amr.roots = ['a'] amr.reentrance_triples = [] amr.node_to_concepts = {'b': 'by-far', 'm': 'major-02', 'n2': 'name', 'p3': 'person', 'n': 'name', 'p2': 'person', 'c': 'candidate', 'n3': 'name', 'p4': 'person', 's': 'spoil-01', 'a2': 'appear-02', 'a': 'and'} amr.node_to_tokens = {'b': ['6', '7'], 'Santorum': [('2', 'n')], 'm': ['8'], 'Newt': [('12', 'n3')], 'c': ['10'], 'a2': ['14'], 'a': ['11']} amr.relation_to_tokens = {'domain': [('1', 'c'), ('4', 'c')]} amr['b'] = {} amr['m'] = {'degree': [('b',)]} amr['Romney'] = {} amr['n2'] = {'op1': [('Romney',)]} amr['-'] = {} amr['p3'] = {'polarity': [('-',)], 'wiki': [('Mitt_Romney',)], 'name': [('n2',)]} amr['Mitt_Romney'] = {} amr['Santorum'] = {} amr['n'] = {'op1': [('Santorum',)]} amr['Rick_Santorum'] = {} amr['p2'] = {'wiki': [('Rick_Santorum',)], 'name': [('n',)]} amr['c'] = {'ARG1-of': [('m',)], 'mod': [('p3',)], 'domain': [('p2',)]} amr['Newt'] = {} amr['n3'] = {'op1': [('Newt',)]} amr['Newt_Gingrich'] = {} amr['p4'] = {'wiki': [('Newt_Gingrich',)], 'name': [('n3',)]} amr['s'] = {'ARG0': [('p4',)]} amr['a2'] = {'ARG1': [('s',)]} amr['a'] = {'op1': [('c',)], 'op2': [('a2',)]} generated_amr, generated_sentence, generated_metadata = train_pre_processing(amr, sentence) # expected expected_sentence = 'It is PERSON that is the by far major nonRomney candidate and PERSON would appear to be the spoiler .' expected_metadata = {2: ['Santorum'], 12: ['Newt']} expected_amr: AMR = AMR() expected_amr.roots = ['a'] expected_amr.reentrance_triples = [] expected_amr.node_to_concepts = {'b': 'by-far', 'm': 'major-02', 'n2': 'name', 'p3': 'person', 'p2': 'PERSON', 'c': 'candidate', 'p4': 'PERSON', 's': 'spoil-01', 'a2': 'appear-02', 'a': 'and'} expected_amr.node_to_tokens = {'b': ['6', '7'], 'p2': ['2'], 'm': ['8'], 'p4': ['12'], 'c': ['10'], 'a2': ['14'], 'a': ['11']} expected_amr.relation_to_tokens = {'domain': [('1', 'c'), ('4', 'c')]} expected_amr['b'] = {} expected_amr['m'] = {'degree': [('b',)]} expected_amr['Romney'] = {} expected_amr['n2'] = {'op1': [('Romney',)]} expected_amr['-'] = {} expected_amr['p3'] = {'polarity': [('-',)], 'wiki': [('Mitt_Romney',)], 'name': [('n2',)]} expected_amr['Mitt_Romney'] = {} expected_amr['p2'] = {} expected_amr['c'] = {'ARG1-of': [('m',)], 'mod': [('p3',)], 'domain': [('p2',)]} expected_amr['p4'] = {} expected_amr['s'] = {'ARG0': [('p4',)]} expected_amr['a2'] = {'ARG1': [('s',)]} expected_amr['a'] = {'op1': [('c',)], 'op2': [('a2',)]} assert expected_sentence == generated_sentence assert_amr_graph_dictionaries(expected_amr, generated_amr) assert generated_metadata == expected_metadata
def test_parse_example_with_reentrancy(): amr_str = """(r / receive-01~e.4 :ARG0 (w / we~e.0) :ARG1 (t / thing~e.7 :ARG0-of~e.7 (r2 / remind-01~e.7 :ARG1 (p / pay-01~e.6 :ARG0 w) :ARG2 w)) :ARG2~e.8 (h / hospital~e.10) :time (n / now~e.2) :time (a / already~e.3))""" parsed_amr: AMR = AMR.parse_string(amr_str) expected_amr: AMR = AMR() expected_amr.node_to_concepts = { 'w': 'we', 'p': 'pay-01', 'r2': 'remind-01', 't': 'thing', 'h': 'hospital', 'n': 'now', 'a': 'already', 'r': 'receive-01' } expected_amr.node_to_tokens = { 'p': ['6'], 'r2': ['7'], 'w': ['0'], 't': ['7'], 'h': ['10'], 'n': ['2'], 'a': ['3'], 'r': ['4'] } expected_amr.relation_to_tokens = { 'ARG0-of': [('7', 't')], 'ARG2': [('8', 'r')] } expected_amr['w'] = {} expected_amr['p'] = {'ARG0': [('w', )]} expected_amr['r2'] = {'ARG1': [('p', )], 'ARG2': [('w', )]} expected_amr['t'] = {'ARG0-of': [('r2', )]} expected_amr['h'] = {} expected_amr['n'] = {} expected_amr['a'] = {} expected_amr['r'] = { 'ARG0': [('w', )], 'ARG1': [('t', )], 'ARG2': [('h', )], 'time': [('n', ), ('a', )] } expected_amr.roots = ['r'] assert_amr_graph_dictionaries(expected_amr, parsed_amr)
def test_parse_example2_with_2polarities(): amr_str = """(c / contrast-01~e.0 :ARG2 (a2 / authorize-01~e.6 :ARG1 (o2 / or~e.9 :op1 (a / approve-01 :ARG0 p :ARG1 (p2 / pay-01 :purpose (e2 / employ-01 :polarity -~e.2,11))) :op2 (d / deny-01 :ARG0 p :ARG1 p2)) :ARG2 (p / person :ARG0-of (r / represent-01 :ARG1 (o / organization :mod (e / employ-01 :polarity -~e.2,11))))))""" parsed_amr: AMR = AMR.parse_string(amr_str) expected_amr: AMR = AMR() expected_amr.node_to_concepts = { 'e2': 'employ-01', 'p2': 'pay-01', 'a': 'approve-01', 'd': 'deny-01', 'o2': 'or', 'e': 'employ-01', 'o': 'organization', 'r': 'represent-01', 'p': 'person', 'a2': 'authorize-01', 'c': 'contrast-01' } expected_amr.node_to_tokens = { '-': [('2', 'e2'), ('11', 'e2'), ('2', 'e'), ('11', 'e')], 'o2': ['9'], 'a2': ['6'], 'c': ['0'] } expected_amr.relation_to_tokens = {} expected_amr['-'] = {} expected_amr['e2'] = {'polarity': [('-', )]} expected_amr['p2'] = {'purpose': [('e2', )]} expected_amr['p'] = {'ARG0-of': [('r', )]} expected_amr['a'] = {'ARG0': [('p', )], 'ARG1': [('p2', )]} expected_amr['d'] = {'ARG0': [('p', )], 'ARG1': [('p2', )]} expected_amr['o2'] = {'op1': [('a', )], 'op2': [('d', )]} expected_amr['e'] = {'polarity': [('-', )]} expected_amr['o'] = {'mod': [('e', )]} expected_amr['r'] = {'ARG1': [('o', )]} expected_amr['a2'] = {'ARG1': [('o2', )], 'ARG2': [('p', )]} expected_amr['c'] = {'ARG2': [('a2', )]} expected_amr.roots = ['c'] assert_amr_graph_dictionaries(expected_amr, parsed_amr)
def test_train_pre_processing_ex_organization(): sentence = "Some propaganda activities of ZF have soon become viewed as jokes by the people ." amr: AMR = AMR() amr.roots = ['b'] amr.reentrance_triples = [('b', 'ARG1', 'a')] amr.node_to_concepts = {'p2': 'person', 'n': 'name', 'o': 'organization', 'p': 'propaganda', 's': 'some', 'a': 'activity-06', 'j': 'joke-01', 't': 'thing', 'v': 'view-02', 's2': 'soon', 'b': 'become-01'} amr.node_to_tokens = {'ZF': [('4', 'n')], 'p': ['1'], 's': ['0'], 'j': ['10'], 'p2': ['13'], 'a': ['2'], 't': ['10'], 'v': ['8'], 's2': ['6'], 'b': ['7']} amr.relation_to_tokens = {'ARG0': [('3', 'a'), ('11', 'v')], 'ARG2-of': [('10', 't')], 'ARG2': [('9', 'v')], 'time': [('9', 'b')]} amr['p2'] = {} amr['ZF'] = {} amr['n'] = {'op1': [('ZF',)]} amr['-'] = {} amr['o'] = {'wiki': [('-',)], 'name': [('n',)]} amr['p'] = {} amr['s'] = {} amr['a'] = {'ARG0': [('o',)], 'ARG1': [('p',)], 'quant': [('s',)]} amr['j'] = {} amr['t'] = {'ARG2-of': [('j',)]} amr['v'] = {'ARG0': [('p2',)], 'ARG1': [('a',)], 'ARG2': [('t',)]} amr['s2'] = {} amr['b'] = {'ARG1': [('a',)], 'ARG2': [('v',)], 'time': [('s2',)]} generated_amr, generated_sentence, generated_metadata = train_pre_processing(amr, sentence) # Expected expected_sentence = "Some propaganda activities of ORGANIZATION have soon become viewed as jokes by the people ." expected_metadata = {4: ['ZF']} expected_amr: AMR = AMR() expected_amr.roots = ['b'] expected_amr.reentrance_triples = [('b', 'ARG1', 'a')] expected_amr.node_to_concepts = {'p2': 'person', 'o': 'ORGANIZATION', 'p': 'propaganda', 's': 'some', 'a': 'activity-06', 'j': 'joke-01', 't': 'thing', 'v': 'view-02', 's2': 'soon', 'b': 'become-01'} expected_amr.node_to_tokens = {'o': ['4'], 'p': ['1'], 's': ['0'], 'j': ['10'], 'p2': ['13'], 'a': ['2'], 't': ['10'], 'v': ['8'], 's2': ['6'], 'b': ['7']} expected_amr.relation_to_tokens = {'ARG0': [('3', 'a'), ('11', 'v')], 'ARG2-of': [('10', 't')], 'ARG2': [('9', 'v')], 'time': [('9', 'b')]} expected_amr['p2'] = {} expected_amr['o'] = {} expected_amr['p'] = {} expected_amr['s'] = {} expected_amr['a'] = {'ARG0': [('o',)], 'ARG1': [('p',)], 'quant': [('s',)]} expected_amr['j'] = {} expected_amr['t'] = {'ARG2-of': [('j',)]} expected_amr['v'] = {'ARG0': [('p2',)], 'ARG1': [('a',)], 'ARG2': [('t',)]} expected_amr['s2'] = {} expected_amr['b'] = {'ARG1': [('a',)], 'ARG2': [('v',)], 'time': [('s2',)]} assert_amr_graph_dictionaries(expected_amr, generated_amr) assert generated_sentence == expected_sentence assert generated_metadata == expected_metadata
def generate_test_data(file_path, verbose=True): if verbose is False: logging.disable(logging.WARN) sentence_amr_triples = input_file_parser.extract_data_records(file_path) fail_sentences = [] test_data = [] named_entity_exceptions = 0 for i in tqdm(list(range(0, len(sentence_amr_triples)))): (sentence, amr_str, amr_id) = sentence_amr_triples[i] try: logging.warn("Started processing example %d", i) concepts_metadata = {} amr = AMR.parse_string(amr_str) try: (new_sentence, named_entities ) = NamedEntitiesReplacer.process_sentence(sentence) for name_entity in named_entities: concepts_metadata[name_entity[0]] = name_entity[1] except Exception as e: named_entity_exceptions += 1 raise e test_data.append((new_sentence, concepts_metadata)) except Exception as e: logging.warn(e) fail_sentences.append(sentence) logging.warn("Failed at: %d", i) logging.warn("%s\n", sentence) return test_data
def test_create_from_amr_example_2(): amr_str = """(a / and~e.0 :op2 (p / possible-01~e.8 :ARG1 (a3 / avoid-01~e.10 :ARG0 (h / he~e.7) :ARG1 (c / censure-01~e.12 :ARG1 h)) :ARG1-of (a2 / actual-02~e.9) :manner (p2 / promise-01~e.5 :polarity~e.2 -~e.2 :ARG0 h :mod (a4 / any~e.4))))""" amr = AMR.parse_string(amr_str) custom_amr = CustomizedAMR() custom_amr.create_custom_AMR(amr) generated_concepts = IdentifiedConcepts() generated_concepts.create_from_amr('amr_id_2', amr) expected_concepts = IdentifiedConcepts() expected_concepts.amr_id = 'amr_id_2' expected_concepts.ordered_concepts = [ Concept('a', 'and'), Concept('-', '-', 0), Concept('a4', 'any'), Concept('p2', 'promise-01'), Concept('h', 'he'), Concept('p', 'possible-01'), Concept('a2', 'actual-02'), Concept('a3', 'avoid-01'), Concept('c', 'censure-01') ] assert_identified_concepts(expected_concepts, generated_concepts)
def test_create_from_amr_example_reentrancy(): amr_str = """(r / receive-01~e.4 :ARG0 (w / we~e.0) :ARG1 (t / thing~e.7 :ARG0-of~e.7 (r2 / remind-01~e.7 :ARG1 (p / pay-01~e.6 :ARG0 w) :ARG2 w)) :ARG2~e.8 (h / hospital~e.10) :time (n / now~e.2) :time (a / already~e.3))""" amr = AMR.parse_string(amr_str) generated_concepts = IdentifiedConcepts() generated_concepts.create_from_amr('amr_id_reentrancy', amr) expected_concepts = IdentifiedConcepts() expected_concepts.amr_id = 'amr_id_reentrancy' expected_concepts.ordered_concepts = [ Concept('w', 'we'), Concept('n', 'now'), Concept('a', 'already'), Concept('r', 'receive-01'), Concept('p', 'pay-01'), Concept('r2', 'remind-01'), Concept('t', 'thing'), Concept('h', 'hospital') ] assert_identified_concepts(expected_concepts, generated_concepts)
def test_generate_parent_list_vector_with_2_polarites(): amr_str = """(a / and~e.0 :op2 (p2 / practice-01~e.13 :ARG1 (l / loan-01~e.12 :ARG2 (p / person~e.11 :ARG0-of~e.11 (s / study-01~e.11))) :mod (s2 / sane~e.10 :polarity~e.10 -~e.10) :ARG1-of (i2 / identical-01~e.16 :ARG2~e.19 (p3 / practice-01~e.24 :ARG1 (l2 / loan-01~e.23 :ARG1 (m / mortgage-01~e.22)) :mod (s3 / sane~e.21 :polarity~e.21 -~e.21)) :manner (w / way~e.18 :mod (e / every~e.18))) :ARG0-of (c2 / cause-01~e.3,8 :ARG1 (b / be-located-at-91~e.5,7 :ARG1 (t / they~e.4) :ARG2 (t2 / there~e.6)) :mod (o / only~e.2))))""" amr: AMR = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.create_from_amr('amr_2_polarities', amr) add_false_root(identified_concepts) generated_parent_list_vector = generate_parent_list_vector( amr, identified_concepts) # a o c2 t b t2 - s2 s p l p2 i2 e w - s3 m l2 p3 # 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 expected_parent_list_vector = [[-1], [0], [3], [12], [5], [3], [5], [8], [12], [10], [11], [12], [1], [12], [15], [13], [17], [20], [19], [20], [13]] assertion_message = str(generated_parent_list_vector) + ' should be' + str( expected_parent_list_vector) assert generated_parent_list_vector == expected_parent_list_vector, assertion_message
def read_original_graphs(file_type, filter_path="deft", cache=True): """ Returns a list of (amr_id, sentence, AMR, CustomizedAMR) quadruples Loads the list from a dump file if present, else generates it and saves it to a dump file :param file_type - data set partition (training, dev or test) :param filter_path - filtering criteria for data files :param cache - allow to load from dump file if true, else calculate from original file and save new dump """ if filter_path is None: filter_path = "deft" dir_path = AMR_ALIGNMENTS_SPLIT + "/" + file_type parsed_data = [] directory_content = listdir(dir_path) original_corpus = sorted( [x for x in directory_content if "dump" not in x and filter_path in x]) for file_name in original_corpus: original_file_path = dir_path + "/" + file_name dump_file_path = dir_path + "/original_graphs_dumps/" + file_name + ".dump" print(original_file_path) if cache and path.exists(dump_file_path): print("cache") with open(dump_file_path, "rb") as dump_file: parsed_data += js.load(dump_file) else: print("generate") file_data = input_file_parser.extract_data_records( original_file_path) parsed_file_data = [] failed_amrs_in_file = 0 for amr_triple in file_data: try: camr_graph = AMR.parse_string(amr_triple[1]) custom_amr_graph = CustomizedAMR() custom_amr_graph.create_custom_AMR(camr_graph) parsed_file_data.append((amr_triple[2], amr_triple[0], camr_graph, custom_amr_graph)) except Exception as _: # print "Exception when parsing AMR with ID: %s in file %s with error: %s\n" % ( # amr_triple[2], file_name, e) failed_amrs_in_file += 1 if not path.exists(path.dirname(dump_file_path)): makedirs(path.dirname(dump_file_path)) with open(dump_file_path, "wb") as dump_file: js.dump(parsed_file_data, dump_file) parsed_data += parsed_file_data print(("%d / %d in %s" % (failed_amrs_in_file, len(file_data), original_file_path))) return parsed_data
def test_train_pre_processing_ex_person_reentrancy(): sentence = 'Now , Wang Shi said , these responses have had effects on me .' amr: AMR = AMR() amr.roots = ['s'] amr.reentrance_triples = [('e', 'ARG1', 'p')] amr.node_to_concepts = {'n2': 'name', 'p': 'person', 'r': 'respond-01', 't': 'this', 't2': 'thing', 'e': 'effect-03', 'n': 'now', 's': 'say-01'} amr.node_to_tokens = {'Wang': [('2', 'n2')], 'Shi': [('3', 'n2')], 'r': ['7'], 't': ['6'], 't2': ['7'], 'p': ['12'], 'e': ['10'], 'n': ['0'], 's': ['4']} amr.relation_to_tokens = {'ARG2-of': [('7', 't2')], 'ARG1': [('11', 'e')]} amr['Wang'] = {} amr['n2'] = {'op1': [('Wang',)], 'op2': [('Shi',)]} amr['Shi'] = {} amr['Wang_Shi_(entrepreneur)'] = {} amr['p'] = {'wiki': [('Wang_Shi_(entrepreneur)',)], 'name': [('n2',)]} amr['r'] = {} amr['t'] = {} amr['t2'] = {'ARG2-of': [('r',)], 'mod': [('t',)]} amr['e'] = {'ARG0': [('t2',)], 'ARG1': [('p',)]} amr['n'] = {} amr['s'] = {'ARG0': [('p',)], 'ARG1': [('e',)], 'time': [('n',)]} generated_amr, generated_sentence, generated_metadata = train_pre_processing(amr, sentence) # expected expected_sentence = 'Now , PERSON said , these responses have had effects on me .' expected_metadata = {2: ['Wang', 'Shi']} expected_amr: AMR = AMR() expected_amr.roots = ['s'] expected_amr.reentrance_triples = [('e', 'ARG1', 'p')] expected_amr.node_to_concepts = {'p': 'PERSON', 'r': 'respond-01', 't': 'this', 't2': 'thing', 'e': 'effect-03', 'n': 'now', 's': 'say-01'} expected_amr.node_to_tokens = {'p': ['2'], 'r': ['6'], 't': ['5'], 't2': ['6'], 'p': ['11'], 'e': ['9'], 'n': ['0'], 's': ['3']} expected_amr.relation_to_tokens = {'ARG2-of': [('6', 't2')], 'ARG1': [('10', 'e')]} expected_amr['p'] = {} expected_amr['r'] = {} expected_amr['t'] = {} expected_amr['t2'] = {'ARG2-of': [('r',)], 'mod': [('t',)]} expected_amr['e'] = {'ARG0': [('t2',)], 'ARG1': [('p',)]} expected_amr['n'] = {} expected_amr['s'] = {'ARG0': [('p',)], 'ARG1': [('e',)], 'time': [('n',)]} assert_amr_graph_dictionaries(expected_amr, generated_amr) assert generated_sentence == expected_sentence assert generated_metadata == expected_metadata
def test_pre_and_post_processing_eg_2(): sentence = 'It is Santorum that is the by far major nonRomney candidate and Newt would appear to be the spoiler .' amr_str = """(a / and~e.11 :op1 (c / candidate~e.10 :ARG1-of (m / major-02~e.8 :degree (b / by-far~e.6,7)) :mod (p3 / person~e.9 :polarity -~e.9 :wiki "Mitt_Romney"~e.9 :name (n2 / name~e.9 :op1 "Romney"~e.9)) :domain~e.1,4 (p2 / person :wiki "Rick_Santorum" :name (n / name :op1 "Santorum"~e.2))) :op2 (a2 / appear-02~e.14 :ARG1 (s / spoil-01~e.18 :ARG0 (p4 / person :wiki "Newt_Gingrich" :name (n3 / name :op1 "Newt"~e.12)))))""" amr: AMR = AMR.parse_string(amr_str) amr, new_sentence, metadata = train_pre_processing(amr, sentence) identified_concepts = IdentifiedConcepts() identified_concepts.create_from_amr('', amr) add_false_root(identified_concepts) vector_of_parents = generate_parent_list_vector(amr, identified_concepts) post_processing_on_parent_vector(identified_concepts, vector_of_parents, new_sentence, metadata) relations_dict = { ('and', 'candidate'): 'op1', ('and', 'appear-02'): 'op2', ('candidate', 'major-02'): 'ARG1-of', ('candidate', 'person'): 'mod', ('major-02', 'by-far'): 'degree', ('person', '-'): 'polarity', ('person', 'Mitt_Romney'): 'wiki', ('person', 'name'): 'name', ('person', 'Santorum'): 'wiki', ('name', 'Romney'): 'op1', ('name', 'Santorum'): 'op1', ('appear-02', 'spoil-01'): 'ARG1', ('spoil-01', 'person'): 'ARG0', ('person', 'Newt'): 'wiki', ('name', 'Newt'): 'op1' } amr_node: Node = generate_amr_node_for_vector_of_parents( identified_concepts, vector_of_parents, relations_dict) generated_amr_str = amr_node.amr_print_with_reentrancy() expected_amr_str = """(a / and~e.11 :op1 (c / candidate~e.10 :ARG1-of (m / major-02~e.8 :degree (b / by-far~e.6,7)) :mod (p3 / person~e.9 :polarity -~e.9 :wiki "Mitt_Romney"~e.9 :name (n2 / name~e.9 :op1 "Romney"~e.9)) :mod~e.1,4 (p2 / person :wiki "Santorum" :name (n / name :op1 "Santorum"~e.2))) :op2 (a2 / appear-02~e.14 :ARG1 (s / spoil-01~e.18 :ARG0 (p4 / person :wiki "Newt" :name (n3 / name :op1 "Newt"~e.12)))))""" smatch = calculate_smatch(generated_amr_str, expected_amr_str) assert smatch == 1
def test_parse_example_with_polarity(): amr_str = """(y2 / year~e.4 :time-of~e.5 (r / recover-01~e.7 :ARG1-of (e / expect-01 :polarity -~e.6)) :ARG1-of (p / possible-01~e.1) :domain~e.2 (d / date-entity :year~e.4 2012~e.0))""" parsed_amr: AMR = AMR.parse_string(amr_str) expected_amr: AMR = AMR() expected_amr.node_to_concepts = { 'y2': 'year', 'r': 'recover-01', 'e': 'expect-01', 'p': 'possible-01', 'd': 'date-entity' } expected_amr.node_to_tokens = { 'y2': ['4'], 'r': ['7'], '-': [('6', 'e')], 'p': ['1'], '2012': [('0', 'd')] } expected_amr.relation_to_tokens = { 'time-of': [('5', 'y2')], 'domain': [('2', 'y2')], 'year': [('4', 'd')] } expected_amr['y2'] = { 'time-of': [('r', )], 'ARG1-of': [('p', )], 'domain': [('d', )] } expected_amr['r'] = {'ARG1-of': [('e', )]} expected_amr['e'] = {'polarity': [('-', )]} expected_amr['-'] = {} expected_amr['p'] = {} expected_amr['d'] = {'year': [('2012', )]} expected_amr['2012'] = {} expected_amr.roots = ['y2'] assert_amr_graph_dictionaries(expected_amr, parsed_amr)
def test_generate_amr_node_for_vector_of_parents_example_1(): amr_str = """(s / suppose-01~e.1 :ARG0 (i / i~e.0) :ARG1 (p / possible-01~e.3 :ARG1 (a / add-02~e.4 :ARG0 (y / you~e.2) :ARG1 (p2 / probation~e.5 :ARG1-of (c / contrast-01~e.7 :ARG2 (r / replace-01~e.12 :ARG1 p2 :ARG2~e.13 (t / time~e.15 :mod (j / jail~e.14)) :mod (j2 / just~e.10)))))))""" amr_str1 = """(d1 / suppose-01~e.1 :ARG0 (i / i~e.0) :ARG1 (p / possible-01~e.3 :ARG1 (a / add-02~e.4 :ARG0 (y / you~e.2) :ARG1 (p2 / probation~e.5 :ARG1-of (c / contrast-01~e.7 :ARG2 (r / replace-01~e.12 :ARG1 p2 :mod (j2 / just~e.10) :ARG2~e.13 (t / time~e.15 :mod (j / jail~e.14)) ))))))""" amr: AMR = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.create_from_amr('amr_1', amr) add_false_root(identified_concepts) vector_of_parents = generate_parent_list_vector(amr, identified_concepts) # transf parent vectors to vector of parents # i s y p a p2 c j2 r j t # 1 2 3 4 5 6 7 8 9 10 11 relations_dict = { ('suppose-01', 'i'): 'ARG0', ('suppose-01', 'possible-01'): 'ARG1', ('possible-01', 'add-02'): 'ARG1', ('add-02', 'you'): 'ARG0', ('add-02', 'probation'): 'ARG1', ('probation', 'contrast-01'): 'ARG1-of', ('contrast-01', 'replace-01'): 'ARG2', ('replace-01', 'probation'): 'ARG1', ('replace-01', 'time'): 'ARG2', ('replace-01', 'just'): 'mod', ('time', 'jail'): 'mod' } amr_node: Node = generate_amr_node_for_vector_of_parents( identified_concepts, vector_of_parents, relations_dict) generated_amr_str = amr_node.amr_print_with_reentrancy() smatch = calculate_smatch(generated_amr_str, amr_str) assert smatch == 1
def modify_node_to_tokens_alignment(amr: AMR, alignment_mapping: Dict[int, int]): node_to_tokens_copy = deepcopy(amr.node_to_tokens) # print(str(amr)) for key, node_tokens_list in node_to_tokens_copy.items(): amr.node_to_tokens[key] = [] for node_token in node_tokens_list: if type(node_token) is tuple: token, parent = node_token new_token = str(alignment_mapping[int(token)]) amr.node_to_tokens[key].append((new_token, parent)) else: new_token = str(alignment_mapping[int(node_token)]) amr.node_to_tokens[key].append(new_token)
def test_generate_parent_vector_example_2(): amr_str = """(m / man~e.2 :ARG1-of (m2 / marry-01~e.1) :ARG0-of (l / love-01~e.9 :ARG1~e.10 (y / you~e.11) :ARG1-of (r / real-04~e.6) :condition-of~e.4 (a3 / and~e.16 :op1 (g / go-06~e.14 :ARG2 (a / ahead~e.15) :mod (j / just~e.13)) :op2 (o2 / or~e.22 :op1 (f / file-01~e.17 :ARG4~e.18 (d / divorce-01~e.19) :time (n / now~e.20)) :op2 (m3 / move-01~e.25 :ARG2 (o / out-06~e.26 :ARG2~e.27 (h / house~e.29 :poss~e.28 m~e.28)) :time n~e.30 :mod (a2 / at-least~e.23,24))))))""" amr: AMR = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.ordered_concepts = [ Concept('', 'ROOT'), # 0 Concept('m2', 'marry-01'), # 1 Concept('m', 'man'), # 2 Concept('r', 'real-04'), # 3 Concept('l', 'love-01'), # 4 Concept('y', 'you'), # 5 Concept('j', 'just'), # 6 Concept('g', 'go-06'), # 7 Concept('a', 'ahead'), # 8 Concept('a3', 'and'), # 9 Concept('f', 'file-01'), # 10 Concept('d', 'divorce-01'), # 11 Concept('n', 'now'), # 12 Concept('o2', 'or'), # 13 Concept('a2', 'at-least'), # 14 Concept('m3', 'move-01'), # 15 Concept('o', 'out-06'), # 16 Concept('h', 'house') # 17 ] generated_parent_vector = generate_parent_vectors(amr, identified_concepts, 2) expected_parent_vector = [ (-1, 2, 0, 4, 2, 4, 7, 9, 7, 4, 13, 10, 10, 9, 15, 13, 15, 16), (-1, 2, 0, 4, 2, 4, 7, 9, 7, 4, 13, 10, 15, 9, 15, 13, 15, 16) ] assert_parent_vectors(expected_parent_vector, generated_parent_vector)
def test_create_from_amr_example_4(): amr_str = """(i / intensify-01~e.7 :li~e.0 -1~e.0 :ARG1 (c / contradiction~e.3) :ARG0-of (m / make-02~e.9 :ARG1 (c2 / control-01~e.12,13,14 :polarity - :ARG1 (s / situation~e.11))) :ARG1-of (b / bind-02~e.5))""" amr = AMR.parse_string(amr_str) generated_concepts = IdentifiedConcepts() generated_concepts.create_from_amr('amr_id_3', amr) expected_concepts = IdentifiedConcepts() expected_concepts.amr_id = 'amr_id_3' # return None as not all concepts are aligned + unalignment tolerance is default (0) expected_concepts.ordered_concepts = None assert_identified_concepts(expected_concepts, generated_concepts)
def test_create_from_custom_amr_example_1(): amr: AMR = AMR() amr.node_to_concepts = {'i': 'it', 'v': 'vigorous', 'a': 'advocate-01', 'r': 'recommend-01'} amr.node_to_tokens = {'i': ['0'], 'v': ['3'], 'a': ['4'], 'r': ['1']} amr.relation_to_tokens = {'manner': [('2', 'a')]} amr['i'] = {} amr['v'] = {} amr['a'] = {'ARG1': [('i',)], 'manner': [('v',)]} amr['r'] = {'ARG1': [('a',)]} generated_concepts = IdentifiedConcepts() generated_concepts.create_from_amr('amr_id_1', amr) expected_concepts = IdentifiedConcepts() expected_concepts.amr_id = 'amr_id_1' expected_concepts.ordered_concepts = [Concept('i', 'it'), Concept('r', 'recommend-01'), Concept('v', 'vigorous'), Concept('a', 'advocate-01')] assert_identified_concepts(expected_concepts, generated_concepts)
def test_create_custom_AMR_example_no_exception(): amr_str = """(p / pay-01~e.16 :ARG0 (p2 / person :mod (s / star~e.2 :mod (p3 / pop~e.1)) :mod (d3 / debt~e.13 :consist-of (m / monetary-quantity :quant 240000000~e.10,11 :unit (d2 / dollar~e.9)) :ARG1-of (s2 / say-01~e.6))) :ARG1 (m2 / monetary-quantity :quant 6~e.17 :unit (f / figure~e.18)) :ARG2~e.26 (a / and~e.30 :op1 (d / doctor~e.29 :mod (v / voodoo~e.28) :mod (a2 / another~e.27)) :op2 (w / woman~e.34 :wiki - :name (n / name~e.35 :op1 "Samia"~e.36) :mod (m3 / mystery~e.32) :mod (c2 / country :wiki "Egypt" :name (n2 / name~e.35 :op1 "Egypt"~e.33)) :ARG1-of (c3 / come-01~e.39 :ARG4~e.40 p2~e.41 :accompanier~e.42 (l / letter~e.44 :mod~e.45 (g / greet-01~e.46 :ARG0~e.47 (p7 / person :ARG0-of (h2 / have-org-role-91 :ARG1 c6 :ARG2 (p4 / prince~e.53 :ARG1-of (r2 / rank-01~e.51 :ARG1-of (h / high-02~e.49)))) :ARG0-of (p5 / purport-01 :ARG1 (p6 / person :wiki "Mohammed_bin_Nawwaf_bin_Abdulaziz" :name (n4 / name~e.35 :op1 "Nawaf"~e.56 :op2 "Bin"~e.57 :op3 "Abdulaziz"~e.58 :op4 "Al"~e.59 :op5 "Saud"~e.61) :ARG0-of (h3 / have-org-role-91~e.68 :ARG1 (c6 / country :wiki "Saudi_Arabia" :name (n5 / name~e.35 :op1 "Saudi"~e.69 :op2 "Arabia"~e.70)) :ARG2 (c5 / chief~e.65 :topic~e.66 (i / intelligence~e.67)) :time (n6 / now~e.63)))))))))) :ARG3~e.19 (c / cleanse-01~e.22 :manner (r / ritual~e.21) :ARG0-of (u / use-01~e.23 :ARG1 (b / blood~e.25 :mod (s3 / sheep~e.24)))))""" amr = AMR.parse_string(amr_str) custom_amr = CustomizedAMR() custom_amr.create_custom_AMR(amr)
def test__create_from_amr_with_2_polarites(): amr_str = """(a / and~e.0 :op2 (p2 / practice-01~e.13 :ARG1 (l / loan-01~e.12 :ARG2 (p / person~e.11 :ARG0-of~e.11 (s / study-01~e.11))) :mod (s2 / sane~e.10 :polarity~e.10 -~e.10) :ARG1-of (i2 / identical-01~e.16 :ARG2~e.19 (p3 / practice-01~e.24 :ARG1 (l2 / loan-01~e.23 :ARG1 (m / mortgage-01~e.22)) :mod (s3 / sane~e.21 :polarity~e.21 -~e.21)) :manner (w / way~e.18 :mod (e / every~e.18))) :ARG0-of (c2 / cause-01~e.3,8 :ARG1 (b / be-located-at-91~e.5,7 :ARG1 (t / they~e.4) :ARG2 (t2 / there~e.6)) :mod (o / only~e.2))))""" amr: AMR = AMR.parse_string(amr_str) generated_concepts = IdentifiedConcepts() generated_concepts.create_from_amr('amr_2_polarities', amr) expected_concepts = IdentifiedConcepts() expected_concepts.amr_id = 'amr_2_polarities' expected_concepts.ordered_concepts = [ Concept('a', 'and'), Concept('o', 'only'), Concept('c2', 'cause-01'), Concept('t', 'they'), Concept('b', 'be-located-at-91'), Concept('t2', 'there'), Concept('-', '-', 0), Concept('s2', 'sane'), Concept('s', 'study-01'), Concept('p', 'person'), Concept('l', 'loan-01'), Concept('p2', 'practice-01'), Concept('i2', 'identical-01'), Concept('e', 'every'), Concept('w', 'way'), Concept('-', '-', 1), Concept('s3', 'sane'), Concept('m', 'mortgage-01'), Concept('l2', 'loan-01'), Concept('p3', 'practice-01') ] assert_identified_concepts(expected_concepts, generated_concepts)
def test_generate_parent_list_vector_ex_1(): amr_str = """(r / recommend-01~e.1 :ARG1 (a / advocate-01~e.4 :ARG1 (i / it~e.0) :manner~e.2 (v / vigorous~e.3)))""" amr = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.create_from_amr('amr_id_1', amr) add_false_root(identified_concepts) generated_parent_list_vector = generate_parent_list_vector( amr, identified_concepts) # i r v a # 1 2 3 4 expected_parent_list_vector = [[-1], [4], [0], [4], [2]] assertion_message = str(generated_parent_list_vector) + ' should be' + str( expected_parent_list_vector) assert generated_parent_list_vector == expected_parent_list_vector, assertion_message
def map_to_amr_dataset_dict(dataset_dict): """ Takes as input a dictionary of the form: {dataset: [sentence, amr_str, amr_id]} Outputs a dictionary of the form: {dataset: [sentence, amr: AMR, amr_id]} """ amr_dataset_dict = {} for dataset, data in dataset_dict.items(): new_format_data = [] for data_item in data: amr_str = data_item[1] # TODO: util for amr_str -> custom_amr amr = AMR.parse_string(amr_str) new_format_data.append((data_item[0], amr, data_item[2])) amr_dataset_dict[dataset] = new_format_data return amr_dataset_dict
def test_generate_parent_vector_example_2(): amr_str = """(r / recommend-01~e.1 :ARG1 (a / advocate-01~e.4 :ARG1 (i / it~e.0) :manner~e.2 (v / vigorous~e.3)))""" amr: AMR = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.ordered_concepts = [ Concept('', 'ROOT'), Concept('i', 'it'), Concept('r', 'recommend-01'), Concept('v', 'vigorous'), Concept('a', 'advocate-01') ] generated_parent_vector = generate_parent_vectors(amr, identified_concepts) expected_parent_vector = [[-1, 4, 0, 4, 2]] assert_parent_vectors(expected_parent_vector, generated_parent_vector)
def test_create_from_amr_example_1(): amr_str = """(r / recommend-01~e.1 :ARG1 (a / advocate-01~e.4 :ARG1 (i / it~e.0) :manner~e.2 (v / vigorous~e.3)))""" amr = AMR.parse_string(amr_str) generated_concepts = IdentifiedConcepts() generated_concepts.create_from_amr('amr_id_1', amr) expected_concepts = IdentifiedConcepts() expected_concepts.amr_id = 'amr_id_1' expected_concepts.ordered_concepts = [ Concept('i', 'it'), Concept('r', 'recommend-01'), Concept('v', 'vigorous'), Concept('a', 'advocate-01') ] assert_identified_concepts(expected_concepts, generated_concepts)
def modify_sentence_and_alignment_for_person_or_organization( amr: AMR, sentence: str, node_var, to_remove_tokens, metadata_dict: Dict[int, List[str]]): sentence_tokens = sentence.split() n = len(sentence_tokens) k = len(to_remove_tokens) removal_indexes = [ i for i in range(n - k + 1) if sentence_tokens[i:i + k] == to_remove_tokens ] # create a mapping between old and new alignment alignment_mapping = construct_alignment_mapping(len(sentence_tokens), len(to_remove_tokens), removal_indexes) # modify alignment # metadata alignment old_metadata_dict = deepcopy(metadata_dict) for old_index, values in old_metadata_dict.items(): new_index = alignment_mapping[old_index] if new_index != old_index: del metadata_dict[old_index] metadata_dict[new_index] = values # node_to_tokens modify_node_to_tokens_alignment(amr, alignment_mapping) # make sure the new PERSON/ORGANIZATION node is aligned if node_var not in amr.node_to_tokens.keys(): amr.node_to_tokens[node_var] = [] for removal_index in removal_indexes: amr.node_to_tokens[node_var].append(str(removal_index)) # relation_to_tokens modify_relation_to_tokens_alignment(amr, alignment_mapping) # modify sentence new_token = amr.node_to_concepts[node_var].upper() # make sure all occurances of to_remove_token are removed new_sentence = ' '.join(sentence_tokens) substring_to_replace = ' '.join(to_remove_tokens) replacement_indexes = get_indices_of_sublist_in_list( sentence_tokens, to_remove_tokens) new_sentence = new_sentence.replace(substring_to_replace, new_token) # construct metadata for replacement_index in replacement_indexes: # need to use alignment_mapping in case the same token list occurs more then once metadata_dict[alignment_mapping[ replacement_index]] = substring_to_replace.split() return new_sentence
def test_generate_parent_list_vector_with_polarity(): amr_str = """(y2 / year~e.4 :time-of~e.5 (r / recover-01~e.7 :ARG1-of (e / expect-01~e.6 :polarity -~e.6)) :ARG1-of (p / possible-01~e.1) :domain~e.2 (d / date-entity~e.4 :year~e.4 2012~e.0))""" amr: AMR = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.create_from_amr('amr_polarity', amr) add_false_root(identified_concepts) generated_parent_list_vector = generate_parent_list_vector( amr, identified_concepts) # 2012 p d y2 - e r # 1 2 3 4 5 6 7 expected_parent_list_vector = [[-1], [3], [4], [4], [0], [6], [7], [4]] assertion_message = str(generated_parent_list_vector) + ' should be' + str( expected_parent_list_vector) assert generated_parent_list_vector == expected_parent_list_vector, assertion_message
def test_create_from_amr_example_3(): amr_str = """(d / difficult~e.5 :domain~e.4 (r / reach-01~e.7 :ARG1 (c / consensus~e.0 :topic~e.1 (c2 / country :wiki "India" :name (n / name :op1 "India"~e.2))) :time~e.8 (m / meet-03~e.11 :ARG0 (o / organization :wiki "Nubolt12_632_6421.19clear_Suppliers_Group" :name (n2 / name :op1 "NSG"~e.10)) :time~e.12 (d2 / date-entity :year 2007~e.14 :month~e.13 11~e.13))))""" amr = AMR.parse_string(amr_str) generated_concepts = IdentifiedConcepts() generated_concepts.create_from_amr('amr_id_3', amr) expected_concepts = IdentifiedConcepts() expected_concepts.amr_id = 'amr_id_3' # return None as not all concepts are aligned + unalignment tolerance is default (0) expected_concepts.ordered_concepts = None assert_identified_concepts(expected_concepts, generated_concepts)
def generate_dataset_statistics(sentence_amr_str_triples: List[Tuple[str, str, str]], filters): # number of (sentence,amr) pairs that pass the amr parsing instances = 0 # number of instances that pass the filters filtered_instances = 0 sentence_amr_id = [] amr_preprocessing_fails = 0 for i in range(0, len(sentence_amr_str_triples)): (sentence, amr_str, amr_id) = sentence_amr_str_triples[i] # print("sentence: {0}\n amr_str: {1}\n".format(sentence, amr_str)) try: amr = AMR.parse_string(amr_str) preprocessing_steps: List[PreprocessingStep] = [ HaveOrgPreprocessingStep(), NamedEntitiesPreprocessingStep(), DateEntitiesPreprocessingStep(), TemporalQuantitiesPreprocessingStep(), QuantitiesPreprocessingStep() ] new_amr, new_sentence, _ = apply_preprocessing_steps_on_instance( amr, sentence, preprocessing_steps) custom_amr = amr_data.CustomizedAMR() custom_amr.create_custom_AMR(new_amr) sentence_amr_id.append((sentence, custom_amr, amr_id)) except Exception as e: amr_preprocessing_fails += 1 # apply filters filtering = CustomizedAMRDataFiltering(sentence_amr_id) for f in filters: filtering.add_filter(f) new_sentence_amr_pairs = filtering.execute() instances = len(sentence_amr_id) filtered_instances = len(new_sentence_amr_pairs) return instances, filtered_instances
def test_generate_parent_list_vector_reentrancy_ex_2(): amr_str = """(f / foolish~e.3 :mode~e.7 interrogative~e.7 :domain~e.0,2 (i / i~e.1) :condition~e.4 (d / do-02~e.5 :ARG0 i :ARG1 (t / this~e.6)))""" amr = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.create_from_amr('amr_2_reentrancy', amr) add_false_root(identified_concepts) generated_parent_list_vector = generate_parent_list_vector( amr, identified_concepts) # i f d t interogative # 1 2 3 4 5 expected_parent_list_vector = [[-1], [3, 2], [0], [2], [3], [2]] assertion_message = str(generated_parent_list_vector) + ' should be' + str( expected_parent_list_vector) assert generated_parent_list_vector == expected_parent_list_vector, assertion_message