def test_parse_example_1(): amr_str = """(r / recommend-01~e.1 :ARG1 (a / advocate-01~e.4 :ARG1 (i / it~e.0) :manner~e.2 (v / vigorous~e.3)))""" parsed_amr: AMR = AMR.parse_string(amr_str) expected_amr: AMR = AMR() expected_amr.node_to_concepts = { 'i': 'it', 'v': 'vigorous', 'a': 'advocate-01', 'r': 'recommend-01' } expected_amr.node_to_tokens = { 'i': ['0'], 'v': ['3'], 'a': ['4'], 'r': ['1'] } expected_amr.relation_to_tokens = {'manner': [('2', 'a')]} expected_amr['i'] = {} expected_amr['v'] = {} expected_amr['a'] = {'ARG1': [('i', )], 'manner': [('v', )]} expected_amr['r'] = {'ARG1': [('a', )]} expected_amr.roots = ['r'] assert_amr_graph_dictionaries(expected_amr, parsed_amr)
def generate_test_data(file_path, verbose=True): if verbose is False: logging.disable(logging.WARN) sentence_amr_triples = input_file_parser.extract_data_records(file_path) fail_sentences = [] test_data = [] named_entity_exceptions = 0 for i in tqdm(list(range(0, len(sentence_amr_triples)))): (sentence, amr_str, amr_id) = sentence_amr_triples[i] try: logging.warn("Started processing example %d", i) concepts_metadata = {} amr = AMR.parse_string(amr_str) try: (new_sentence, named_entities ) = NamedEntitiesReplacer.process_sentence(sentence) for name_entity in named_entities: concepts_metadata[name_entity[0]] = name_entity[1] except Exception as e: named_entity_exceptions += 1 raise e test_data.append((new_sentence, concepts_metadata)) except Exception as e: logging.warn(e) fail_sentences.append(sentence) logging.warn("Failed at: %d", i) logging.warn("%s\n", sentence) return test_data
def test_generate_parent_list_vector_with_2_polarites(): amr_str = """(a / and~e.0 :op2 (p2 / practice-01~e.13 :ARG1 (l / loan-01~e.12 :ARG2 (p / person~e.11 :ARG0-of~e.11 (s / study-01~e.11))) :mod (s2 / sane~e.10 :polarity~e.10 -~e.10) :ARG1-of (i2 / identical-01~e.16 :ARG2~e.19 (p3 / practice-01~e.24 :ARG1 (l2 / loan-01~e.23 :ARG1 (m / mortgage-01~e.22)) :mod (s3 / sane~e.21 :polarity~e.21 -~e.21)) :manner (w / way~e.18 :mod (e / every~e.18))) :ARG0-of (c2 / cause-01~e.3,8 :ARG1 (b / be-located-at-91~e.5,7 :ARG1 (t / they~e.4) :ARG2 (t2 / there~e.6)) :mod (o / only~e.2))))""" amr: AMR = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.create_from_amr('amr_2_polarities', amr) add_false_root(identified_concepts) generated_parent_list_vector = generate_parent_list_vector( amr, identified_concepts) # a o c2 t b t2 - s2 s p l p2 i2 e w - s3 m l2 p3 # 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 expected_parent_list_vector = [[-1], [0], [3], [12], [5], [3], [5], [8], [12], [10], [11], [12], [1], [12], [15], [13], [17], [20], [19], [20], [13]] assertion_message = str(generated_parent_list_vector) + ' should be' + str( expected_parent_list_vector) assert generated_parent_list_vector == expected_parent_list_vector, assertion_message
def test_create_from_amr_example_2(): amr_str = """(a / and~e.0 :op2 (p / possible-01~e.8 :ARG1 (a3 / avoid-01~e.10 :ARG0 (h / he~e.7) :ARG1 (c / censure-01~e.12 :ARG1 h)) :ARG1-of (a2 / actual-02~e.9) :manner (p2 / promise-01~e.5 :polarity~e.2 -~e.2 :ARG0 h :mod (a4 / any~e.4))))""" amr = AMR.parse_string(amr_str) custom_amr = CustomizedAMR() custom_amr.create_custom_AMR(amr) generated_concepts = IdentifiedConcepts() generated_concepts.create_from_amr('amr_id_2', amr) expected_concepts = IdentifiedConcepts() expected_concepts.amr_id = 'amr_id_2' expected_concepts.ordered_concepts = [ Concept('a', 'and'), Concept('-', '-', 0), Concept('a4', 'any'), Concept('p2', 'promise-01'), Concept('h', 'he'), Concept('p', 'possible-01'), Concept('a2', 'actual-02'), Concept('a3', 'avoid-01'), Concept('c', 'censure-01') ] assert_identified_concepts(expected_concepts, generated_concepts)
def test_create_from_amr_example_reentrancy(): amr_str = """(r / receive-01~e.4 :ARG0 (w / we~e.0) :ARG1 (t / thing~e.7 :ARG0-of~e.7 (r2 / remind-01~e.7 :ARG1 (p / pay-01~e.6 :ARG0 w) :ARG2 w)) :ARG2~e.8 (h / hospital~e.10) :time (n / now~e.2) :time (a / already~e.3))""" amr = AMR.parse_string(amr_str) generated_concepts = IdentifiedConcepts() generated_concepts.create_from_amr('amr_id_reentrancy', amr) expected_concepts = IdentifiedConcepts() expected_concepts.amr_id = 'amr_id_reentrancy' expected_concepts.ordered_concepts = [ Concept('w', 'we'), Concept('n', 'now'), Concept('a', 'already'), Concept('r', 'receive-01'), Concept('p', 'pay-01'), Concept('r2', 'remind-01'), Concept('t', 'thing'), Concept('h', 'hospital') ] assert_identified_concepts(expected_concepts, generated_concepts)
def read_original_graphs(file_type, filter_path="deft", cache=True): """ Returns a list of (amr_id, sentence, AMR, CustomizedAMR) quadruples Loads the list from a dump file if present, else generates it and saves it to a dump file :param file_type - data set partition (training, dev or test) :param filter_path - filtering criteria for data files :param cache - allow to load from dump file if true, else calculate from original file and save new dump """ if filter_path is None: filter_path = "deft" dir_path = AMR_ALIGNMENTS_SPLIT + "/" + file_type parsed_data = [] directory_content = listdir(dir_path) original_corpus = sorted( [x for x in directory_content if "dump" not in x and filter_path in x]) for file_name in original_corpus: original_file_path = dir_path + "/" + file_name dump_file_path = dir_path + "/original_graphs_dumps/" + file_name + ".dump" print(original_file_path) if cache and path.exists(dump_file_path): print("cache") with open(dump_file_path, "rb") as dump_file: parsed_data += js.load(dump_file) else: print("generate") file_data = input_file_parser.extract_data_records( original_file_path) parsed_file_data = [] failed_amrs_in_file = 0 for amr_triple in file_data: try: camr_graph = AMR.parse_string(amr_triple[1]) custom_amr_graph = CustomizedAMR() custom_amr_graph.create_custom_AMR(camr_graph) parsed_file_data.append((amr_triple[2], amr_triple[0], camr_graph, custom_amr_graph)) except Exception as _: # print "Exception when parsing AMR with ID: %s in file %s with error: %s\n" % ( # amr_triple[2], file_name, e) failed_amrs_in_file += 1 if not path.exists(path.dirname(dump_file_path)): makedirs(path.dirname(dump_file_path)) with open(dump_file_path, "wb") as dump_file: js.dump(parsed_file_data, dump_file) parsed_data += parsed_file_data print(("%d / %d in %s" % (failed_amrs_in_file, len(file_data), original_file_path))) return parsed_data
def test_pre_and_post_processing_eg_2(): sentence = 'It is Santorum that is the by far major nonRomney candidate and Newt would appear to be the spoiler .' amr_str = """(a / and~e.11 :op1 (c / candidate~e.10 :ARG1-of (m / major-02~e.8 :degree (b / by-far~e.6,7)) :mod (p3 / person~e.9 :polarity -~e.9 :wiki "Mitt_Romney"~e.9 :name (n2 / name~e.9 :op1 "Romney"~e.9)) :domain~e.1,4 (p2 / person :wiki "Rick_Santorum" :name (n / name :op1 "Santorum"~e.2))) :op2 (a2 / appear-02~e.14 :ARG1 (s / spoil-01~e.18 :ARG0 (p4 / person :wiki "Newt_Gingrich" :name (n3 / name :op1 "Newt"~e.12)))))""" amr: AMR = AMR.parse_string(amr_str) amr, new_sentence, metadata = train_pre_processing(amr, sentence) identified_concepts = IdentifiedConcepts() identified_concepts.create_from_amr('', amr) add_false_root(identified_concepts) vector_of_parents = generate_parent_list_vector(amr, identified_concepts) post_processing_on_parent_vector(identified_concepts, vector_of_parents, new_sentence, metadata) relations_dict = { ('and', 'candidate'): 'op1', ('and', 'appear-02'): 'op2', ('candidate', 'major-02'): 'ARG1-of', ('candidate', 'person'): 'mod', ('major-02', 'by-far'): 'degree', ('person', '-'): 'polarity', ('person', 'Mitt_Romney'): 'wiki', ('person', 'name'): 'name', ('person', 'Santorum'): 'wiki', ('name', 'Romney'): 'op1', ('name', 'Santorum'): 'op1', ('appear-02', 'spoil-01'): 'ARG1', ('spoil-01', 'person'): 'ARG0', ('person', 'Newt'): 'wiki', ('name', 'Newt'): 'op1' } amr_node: Node = generate_amr_node_for_vector_of_parents( identified_concepts, vector_of_parents, relations_dict) generated_amr_str = amr_node.amr_print_with_reentrancy() expected_amr_str = """(a / and~e.11 :op1 (c / candidate~e.10 :ARG1-of (m / major-02~e.8 :degree (b / by-far~e.6,7)) :mod (p3 / person~e.9 :polarity -~e.9 :wiki "Mitt_Romney"~e.9 :name (n2 / name~e.9 :op1 "Romney"~e.9)) :mod~e.1,4 (p2 / person :wiki "Santorum" :name (n / name :op1 "Santorum"~e.2))) :op2 (a2 / appear-02~e.14 :ARG1 (s / spoil-01~e.18 :ARG0 (p4 / person :wiki "Newt" :name (n3 / name :op1 "Newt"~e.12)))))""" smatch = calculate_smatch(generated_amr_str, expected_amr_str) assert smatch == 1
def test_parse_example_with_reentrancy(): amr_str = """(r / receive-01~e.4 :ARG0 (w / we~e.0) :ARG1 (t / thing~e.7 :ARG0-of~e.7 (r2 / remind-01~e.7 :ARG1 (p / pay-01~e.6 :ARG0 w) :ARG2 w)) :ARG2~e.8 (h / hospital~e.10) :time (n / now~e.2) :time (a / already~e.3))""" parsed_amr: AMR = AMR.parse_string(amr_str) expected_amr: AMR = AMR() expected_amr.node_to_concepts = { 'w': 'we', 'p': 'pay-01', 'r2': 'remind-01', 't': 'thing', 'h': 'hospital', 'n': 'now', 'a': 'already', 'r': 'receive-01' } expected_amr.node_to_tokens = { 'p': ['6'], 'r2': ['7'], 'w': ['0'], 't': ['7'], 'h': ['10'], 'n': ['2'], 'a': ['3'], 'r': ['4'] } expected_amr.relation_to_tokens = { 'ARG0-of': [('7', 't')], 'ARG2': [('8', 'r')] } expected_amr['w'] = {} expected_amr['p'] = {'ARG0': [('w', )]} expected_amr['r2'] = {'ARG1': [('p', )], 'ARG2': [('w', )]} expected_amr['t'] = {'ARG0-of': [('r2', )]} expected_amr['h'] = {} expected_amr['n'] = {} expected_amr['a'] = {} expected_amr['r'] = { 'ARG0': [('w', )], 'ARG1': [('t', )], 'ARG2': [('h', )], 'time': [('n', ), ('a', )] } expected_amr.roots = ['r'] assert_amr_graph_dictionaries(expected_amr, parsed_amr)
def test_parse_example2_with_2polarities(): amr_str = """(c / contrast-01~e.0 :ARG2 (a2 / authorize-01~e.6 :ARG1 (o2 / or~e.9 :op1 (a / approve-01 :ARG0 p :ARG1 (p2 / pay-01 :purpose (e2 / employ-01 :polarity -~e.2,11))) :op2 (d / deny-01 :ARG0 p :ARG1 p2)) :ARG2 (p / person :ARG0-of (r / represent-01 :ARG1 (o / organization :mod (e / employ-01 :polarity -~e.2,11))))))""" parsed_amr: AMR = AMR.parse_string(amr_str) expected_amr: AMR = AMR() expected_amr.node_to_concepts = { 'e2': 'employ-01', 'p2': 'pay-01', 'a': 'approve-01', 'd': 'deny-01', 'o2': 'or', 'e': 'employ-01', 'o': 'organization', 'r': 'represent-01', 'p': 'person', 'a2': 'authorize-01', 'c': 'contrast-01' } expected_amr.node_to_tokens = { '-': [('2', 'e2'), ('11', 'e2'), ('2', 'e'), ('11', 'e')], 'o2': ['9'], 'a2': ['6'], 'c': ['0'] } expected_amr.relation_to_tokens = {} expected_amr['-'] = {} expected_amr['e2'] = {'polarity': [('-', )]} expected_amr['p2'] = {'purpose': [('e2', )]} expected_amr['p'] = {'ARG0-of': [('r', )]} expected_amr['a'] = {'ARG0': [('p', )], 'ARG1': [('p2', )]} expected_amr['d'] = {'ARG0': [('p', )], 'ARG1': [('p2', )]} expected_amr['o2'] = {'op1': [('a', )], 'op2': [('d', )]} expected_amr['e'] = {'polarity': [('-', )]} expected_amr['o'] = {'mod': [('e', )]} expected_amr['r'] = {'ARG1': [('o', )]} expected_amr['a2'] = {'ARG1': [('o2', )], 'ARG2': [('p', )]} expected_amr['c'] = {'ARG2': [('a2', )]} expected_amr.roots = ['c'] assert_amr_graph_dictionaries(expected_amr, parsed_amr)
def test_generate_amr_node_for_vector_of_parents_example_1(): amr_str = """(s / suppose-01~e.1 :ARG0 (i / i~e.0) :ARG1 (p / possible-01~e.3 :ARG1 (a / add-02~e.4 :ARG0 (y / you~e.2) :ARG1 (p2 / probation~e.5 :ARG1-of (c / contrast-01~e.7 :ARG2 (r / replace-01~e.12 :ARG1 p2 :ARG2~e.13 (t / time~e.15 :mod (j / jail~e.14)) :mod (j2 / just~e.10)))))))""" amr_str1 = """(d1 / suppose-01~e.1 :ARG0 (i / i~e.0) :ARG1 (p / possible-01~e.3 :ARG1 (a / add-02~e.4 :ARG0 (y / you~e.2) :ARG1 (p2 / probation~e.5 :ARG1-of (c / contrast-01~e.7 :ARG2 (r / replace-01~e.12 :ARG1 p2 :mod (j2 / just~e.10) :ARG2~e.13 (t / time~e.15 :mod (j / jail~e.14)) ))))))""" amr: AMR = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.create_from_amr('amr_1', amr) add_false_root(identified_concepts) vector_of_parents = generate_parent_list_vector(amr, identified_concepts) # transf parent vectors to vector of parents # i s y p a p2 c j2 r j t # 1 2 3 4 5 6 7 8 9 10 11 relations_dict = { ('suppose-01', 'i'): 'ARG0', ('suppose-01', 'possible-01'): 'ARG1', ('possible-01', 'add-02'): 'ARG1', ('add-02', 'you'): 'ARG0', ('add-02', 'probation'): 'ARG1', ('probation', 'contrast-01'): 'ARG1-of', ('contrast-01', 'replace-01'): 'ARG2', ('replace-01', 'probation'): 'ARG1', ('replace-01', 'time'): 'ARG2', ('replace-01', 'just'): 'mod', ('time', 'jail'): 'mod' } amr_node: Node = generate_amr_node_for_vector_of_parents( identified_concepts, vector_of_parents, relations_dict) generated_amr_str = amr_node.amr_print_with_reentrancy() smatch = calculate_smatch(generated_amr_str, amr_str) assert smatch == 1
def test_generate_parent_vector_example_2(): amr_str = """(m / man~e.2 :ARG1-of (m2 / marry-01~e.1) :ARG0-of (l / love-01~e.9 :ARG1~e.10 (y / you~e.11) :ARG1-of (r / real-04~e.6) :condition-of~e.4 (a3 / and~e.16 :op1 (g / go-06~e.14 :ARG2 (a / ahead~e.15) :mod (j / just~e.13)) :op2 (o2 / or~e.22 :op1 (f / file-01~e.17 :ARG4~e.18 (d / divorce-01~e.19) :time (n / now~e.20)) :op2 (m3 / move-01~e.25 :ARG2 (o / out-06~e.26 :ARG2~e.27 (h / house~e.29 :poss~e.28 m~e.28)) :time n~e.30 :mod (a2 / at-least~e.23,24))))))""" amr: AMR = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.ordered_concepts = [ Concept('', 'ROOT'), # 0 Concept('m2', 'marry-01'), # 1 Concept('m', 'man'), # 2 Concept('r', 'real-04'), # 3 Concept('l', 'love-01'), # 4 Concept('y', 'you'), # 5 Concept('j', 'just'), # 6 Concept('g', 'go-06'), # 7 Concept('a', 'ahead'), # 8 Concept('a3', 'and'), # 9 Concept('f', 'file-01'), # 10 Concept('d', 'divorce-01'), # 11 Concept('n', 'now'), # 12 Concept('o2', 'or'), # 13 Concept('a2', 'at-least'), # 14 Concept('m3', 'move-01'), # 15 Concept('o', 'out-06'), # 16 Concept('h', 'house') # 17 ] generated_parent_vector = generate_parent_vectors(amr, identified_concepts, 2) expected_parent_vector = [ (-1, 2, 0, 4, 2, 4, 7, 9, 7, 4, 13, 10, 10, 9, 15, 13, 15, 16), (-1, 2, 0, 4, 2, 4, 7, 9, 7, 4, 13, 10, 15, 9, 15, 13, 15, 16) ] assert_parent_vectors(expected_parent_vector, generated_parent_vector)
def test_create_from_amr_example_4(): amr_str = """(i / intensify-01~e.7 :li~e.0 -1~e.0 :ARG1 (c / contradiction~e.3) :ARG0-of (m / make-02~e.9 :ARG1 (c2 / control-01~e.12,13,14 :polarity - :ARG1 (s / situation~e.11))) :ARG1-of (b / bind-02~e.5))""" amr = AMR.parse_string(amr_str) generated_concepts = IdentifiedConcepts() generated_concepts.create_from_amr('amr_id_3', amr) expected_concepts = IdentifiedConcepts() expected_concepts.amr_id = 'amr_id_3' # return None as not all concepts are aligned + unalignment tolerance is default (0) expected_concepts.ordered_concepts = None assert_identified_concepts(expected_concepts, generated_concepts)
def test_create_custom_AMR_example_no_exception(): amr_str = """(p / pay-01~e.16 :ARG0 (p2 / person :mod (s / star~e.2 :mod (p3 / pop~e.1)) :mod (d3 / debt~e.13 :consist-of (m / monetary-quantity :quant 240000000~e.10,11 :unit (d2 / dollar~e.9)) :ARG1-of (s2 / say-01~e.6))) :ARG1 (m2 / monetary-quantity :quant 6~e.17 :unit (f / figure~e.18)) :ARG2~e.26 (a / and~e.30 :op1 (d / doctor~e.29 :mod (v / voodoo~e.28) :mod (a2 / another~e.27)) :op2 (w / woman~e.34 :wiki - :name (n / name~e.35 :op1 "Samia"~e.36) :mod (m3 / mystery~e.32) :mod (c2 / country :wiki "Egypt" :name (n2 / name~e.35 :op1 "Egypt"~e.33)) :ARG1-of (c3 / come-01~e.39 :ARG4~e.40 p2~e.41 :accompanier~e.42 (l / letter~e.44 :mod~e.45 (g / greet-01~e.46 :ARG0~e.47 (p7 / person :ARG0-of (h2 / have-org-role-91 :ARG1 c6 :ARG2 (p4 / prince~e.53 :ARG1-of (r2 / rank-01~e.51 :ARG1-of (h / high-02~e.49)))) :ARG0-of (p5 / purport-01 :ARG1 (p6 / person :wiki "Mohammed_bin_Nawwaf_bin_Abdulaziz" :name (n4 / name~e.35 :op1 "Nawaf"~e.56 :op2 "Bin"~e.57 :op3 "Abdulaziz"~e.58 :op4 "Al"~e.59 :op5 "Saud"~e.61) :ARG0-of (h3 / have-org-role-91~e.68 :ARG1 (c6 / country :wiki "Saudi_Arabia" :name (n5 / name~e.35 :op1 "Saudi"~e.69 :op2 "Arabia"~e.70)) :ARG2 (c5 / chief~e.65 :topic~e.66 (i / intelligence~e.67)) :time (n6 / now~e.63)))))))))) :ARG3~e.19 (c / cleanse-01~e.22 :manner (r / ritual~e.21) :ARG0-of (u / use-01~e.23 :ARG1 (b / blood~e.25 :mod (s3 / sheep~e.24)))))""" amr = AMR.parse_string(amr_str) custom_amr = CustomizedAMR() custom_amr.create_custom_AMR(amr)
def test__create_from_amr_with_2_polarites(): amr_str = """(a / and~e.0 :op2 (p2 / practice-01~e.13 :ARG1 (l / loan-01~e.12 :ARG2 (p / person~e.11 :ARG0-of~e.11 (s / study-01~e.11))) :mod (s2 / sane~e.10 :polarity~e.10 -~e.10) :ARG1-of (i2 / identical-01~e.16 :ARG2~e.19 (p3 / practice-01~e.24 :ARG1 (l2 / loan-01~e.23 :ARG1 (m / mortgage-01~e.22)) :mod (s3 / sane~e.21 :polarity~e.21 -~e.21)) :manner (w / way~e.18 :mod (e / every~e.18))) :ARG0-of (c2 / cause-01~e.3,8 :ARG1 (b / be-located-at-91~e.5,7 :ARG1 (t / they~e.4) :ARG2 (t2 / there~e.6)) :mod (o / only~e.2))))""" amr: AMR = AMR.parse_string(amr_str) generated_concepts = IdentifiedConcepts() generated_concepts.create_from_amr('amr_2_polarities', amr) expected_concepts = IdentifiedConcepts() expected_concepts.amr_id = 'amr_2_polarities' expected_concepts.ordered_concepts = [ Concept('a', 'and'), Concept('o', 'only'), Concept('c2', 'cause-01'), Concept('t', 'they'), Concept('b', 'be-located-at-91'), Concept('t2', 'there'), Concept('-', '-', 0), Concept('s2', 'sane'), Concept('s', 'study-01'), Concept('p', 'person'), Concept('l', 'loan-01'), Concept('p2', 'practice-01'), Concept('i2', 'identical-01'), Concept('e', 'every'), Concept('w', 'way'), Concept('-', '-', 1), Concept('s3', 'sane'), Concept('m', 'mortgage-01'), Concept('l2', 'loan-01'), Concept('p3', 'practice-01') ] assert_identified_concepts(expected_concepts, generated_concepts)
def map_to_amr_dataset_dict(dataset_dict): """ Takes as input a dictionary of the form: {dataset: [sentence, amr_str, amr_id]} Outputs a dictionary of the form: {dataset: [sentence, amr: AMR, amr_id]} """ amr_dataset_dict = {} for dataset, data in dataset_dict.items(): new_format_data = [] for data_item in data: amr_str = data_item[1] # TODO: util for amr_str -> custom_amr amr = AMR.parse_string(amr_str) new_format_data.append((data_item[0], amr, data_item[2])) amr_dataset_dict[dataset] = new_format_data return amr_dataset_dict
def test_generate_parent_list_vector_ex_1(): amr_str = """(r / recommend-01~e.1 :ARG1 (a / advocate-01~e.4 :ARG1 (i / it~e.0) :manner~e.2 (v / vigorous~e.3)))""" amr = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.create_from_amr('amr_id_1', amr) add_false_root(identified_concepts) generated_parent_list_vector = generate_parent_list_vector( amr, identified_concepts) # i r v a # 1 2 3 4 expected_parent_list_vector = [[-1], [4], [0], [4], [2]] assertion_message = str(generated_parent_list_vector) + ' should be' + str( expected_parent_list_vector) assert generated_parent_list_vector == expected_parent_list_vector, assertion_message
def test_create_from_amr_example_1(): amr_str = """(r / recommend-01~e.1 :ARG1 (a / advocate-01~e.4 :ARG1 (i / it~e.0) :manner~e.2 (v / vigorous~e.3)))""" amr = AMR.parse_string(amr_str) generated_concepts = IdentifiedConcepts() generated_concepts.create_from_amr('amr_id_1', amr) expected_concepts = IdentifiedConcepts() expected_concepts.amr_id = 'amr_id_1' expected_concepts.ordered_concepts = [ Concept('i', 'it'), Concept('r', 'recommend-01'), Concept('v', 'vigorous'), Concept('a', 'advocate-01') ] assert_identified_concepts(expected_concepts, generated_concepts)
def test_generate_parent_vector_example_2(): amr_str = """(r / recommend-01~e.1 :ARG1 (a / advocate-01~e.4 :ARG1 (i / it~e.0) :manner~e.2 (v / vigorous~e.3)))""" amr: AMR = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.ordered_concepts = [ Concept('', 'ROOT'), Concept('i', 'it'), Concept('r', 'recommend-01'), Concept('v', 'vigorous'), Concept('a', 'advocate-01') ] generated_parent_vector = generate_parent_vectors(amr, identified_concepts) expected_parent_vector = [[-1, 4, 0, 4, 2]] assert_parent_vectors(expected_parent_vector, generated_parent_vector)
def test_create_from_amr_example_3(): amr_str = """(d / difficult~e.5 :domain~e.4 (r / reach-01~e.7 :ARG1 (c / consensus~e.0 :topic~e.1 (c2 / country :wiki "India" :name (n / name :op1 "India"~e.2))) :time~e.8 (m / meet-03~e.11 :ARG0 (o / organization :wiki "Nubolt12_632_6421.19clear_Suppliers_Group" :name (n2 / name :op1 "NSG"~e.10)) :time~e.12 (d2 / date-entity :year 2007~e.14 :month~e.13 11~e.13))))""" amr = AMR.parse_string(amr_str) generated_concepts = IdentifiedConcepts() generated_concepts.create_from_amr('amr_id_3', amr) expected_concepts = IdentifiedConcepts() expected_concepts.amr_id = 'amr_id_3' # return None as not all concepts are aligned + unalignment tolerance is default (0) expected_concepts.ordered_concepts = None assert_identified_concepts(expected_concepts, generated_concepts)
def test_generate_parent_list_vector_with_polarity(): amr_str = """(y2 / year~e.4 :time-of~e.5 (r / recover-01~e.7 :ARG1-of (e / expect-01~e.6 :polarity -~e.6)) :ARG1-of (p / possible-01~e.1) :domain~e.2 (d / date-entity~e.4 :year~e.4 2012~e.0))""" amr: AMR = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.create_from_amr('amr_polarity', amr) add_false_root(identified_concepts) generated_parent_list_vector = generate_parent_list_vector( amr, identified_concepts) # 2012 p d y2 - e r # 1 2 3 4 5 6 7 expected_parent_list_vector = [[-1], [3], [4], [4], [0], [6], [7], [4]] assertion_message = str(generated_parent_list_vector) + ' should be' + str( expected_parent_list_vector) assert generated_parent_list_vector == expected_parent_list_vector, assertion_message
def generate_dataset_statistics(sentence_amr_str_triples: List[Tuple[str, str, str]], filters): # number of (sentence,amr) pairs that pass the amr parsing instances = 0 # number of instances that pass the filters filtered_instances = 0 sentence_amr_id = [] amr_preprocessing_fails = 0 for i in range(0, len(sentence_amr_str_triples)): (sentence, amr_str, amr_id) = sentence_amr_str_triples[i] # print("sentence: {0}\n amr_str: {1}\n".format(sentence, amr_str)) try: amr = AMR.parse_string(amr_str) preprocessing_steps: List[PreprocessingStep] = [ HaveOrgPreprocessingStep(), NamedEntitiesPreprocessingStep(), DateEntitiesPreprocessingStep(), TemporalQuantitiesPreprocessingStep(), QuantitiesPreprocessingStep() ] new_amr, new_sentence, _ = apply_preprocessing_steps_on_instance( amr, sentence, preprocessing_steps) custom_amr = amr_data.CustomizedAMR() custom_amr.create_custom_AMR(new_amr) sentence_amr_id.append((sentence, custom_amr, amr_id)) except Exception as e: amr_preprocessing_fails += 1 # apply filters filtering = CustomizedAMRDataFiltering(sentence_amr_id) for f in filters: filtering.add_filter(f) new_sentence_amr_pairs = filtering.execute() instances = len(sentence_amr_id) filtered_instances = len(new_sentence_amr_pairs) return instances, filtered_instances
def test_generate_parent_list_vector_reentrancy_ex_2(): amr_str = """(f / foolish~e.3 :mode~e.7 interrogative~e.7 :domain~e.0,2 (i / i~e.1) :condition~e.4 (d / do-02~e.5 :ARG0 i :ARG1 (t / this~e.6)))""" amr = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.create_from_amr('amr_2_reentrancy', amr) add_false_root(identified_concepts) generated_parent_list_vector = generate_parent_list_vector( amr, identified_concepts) # i f d t interogative # 1 2 3 4 5 expected_parent_list_vector = [[-1], [3, 2], [0], [2], [3], [2]] assertion_message = str(generated_parent_list_vector) + ' should be' + str( expected_parent_list_vector) assert generated_parent_list_vector == expected_parent_list_vector, assertion_message
def test_pre_and_post_processing_for_organization(): sentence = 'Some propaganda activities of ZF have soon become viewed as jokes by the people .' amr_str = """(b / become-01~e.7 :ARG1 a :ARG2 (v / view-02~e.8 :ARG0~e.11 (p2 / person~e.13) :ARG1 (a / activity-06~e.2 :ARG0~e.3 (o / organization :wiki ZF :name (n / name :op1 "ZF"~e.4)) :ARG1 (p / propaganda~e.1) :quant (s / some~e.0)) :ARG2~e.9 (t / thing~e.10 :ARG2-of~e.10 (j / joke-01~e.10))) :time~e.9 (s2 / soon~e.6))""" amr: AMR = AMR.parse_string(amr_str) amr, new_sentence, metadata = train_pre_processing(amr, sentence) identified_concepts = IdentifiedConcepts() identified_concepts.create_from_amr('', amr) add_false_root(identified_concepts) vector_of_parents = generate_parent_list_vector(amr, identified_concepts) post_processing_on_parent_vector(identified_concepts, vector_of_parents, new_sentence, metadata) relations_dict = { ('become-01', 'activity-06'): 'ARG1', ('become-01', 'view-02'): 'ARG2', ('become-01', 'soon'): 'time', ('view-02', 'person'): 'ARG0', ('view-02', 'activity-06'): 'ARG1', ('view-02', 'thing'): 'ARG2', ('activity-06', 'organization'): 'ARG0', ('activity-06', 'propaganda'): 'ARG1', ('activity-06', 'some'): 'quant', ('organization', 'ZF'): 'wiki', ('organization', 'name'): 'name', ('name', 'ZF'): 'op1', ('thing', 'joke-01'): 'ARG2-of' } amr_node: Node = generate_amr_node_for_vector_of_parents( identified_concepts, vector_of_parents, relations_dict) generated_amr_str = amr_node.amr_print_with_reentrancy() smatch = calculate_smatch(generated_amr_str, amr_str) assert smatch == 1
def test_generate_parent_list_vector_reentrancy_ex_3(): amr_str = """(c2 / convince-01~e.1 :ARG0 (i / i~e.0) :ARG1~e.2 (s / she~e.2) :concession-of~e.4 (s2 / shallow~e.10 :ARG1-of (c / conviction-02~e.6 :ARG0~e.5 i~e.5 :ARG2 c2)))""" amr = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.create_from_amr('amr_3_reentrancy', amr) add_false_root(identified_concepts) generated_parent_list_vector = generate_parent_list_vector( amr, identified_concepts) # c2 s i c s2 # 1 2 3 4 5 expected_parent_list_vector = [[-1], [0, 4], [1], [4, 1], [5], [1]] assertion_message = str(generated_parent_list_vector) + ' should be' + str( expected_parent_list_vector) assert generated_parent_list_vector == expected_parent_list_vector, assertion_message
def test_parse_example_with_polarity(): amr_str = """(y2 / year~e.4 :time-of~e.5 (r / recover-01~e.7 :ARG1-of (e / expect-01 :polarity -~e.6)) :ARG1-of (p / possible-01~e.1) :domain~e.2 (d / date-entity :year~e.4 2012~e.0))""" parsed_amr: AMR = AMR.parse_string(amr_str) expected_amr: AMR = AMR() expected_amr.node_to_concepts = { 'y2': 'year', 'r': 'recover-01', 'e': 'expect-01', 'p': 'possible-01', 'd': 'date-entity' } expected_amr.node_to_tokens = { 'y2': ['4'], 'r': ['7'], '-': [('6', 'e')], 'p': ['1'], '2012': [('0', 'd')] } expected_amr.relation_to_tokens = { 'time-of': [('5', 'y2')], 'domain': [('2', 'y2')], 'year': [('4', 'd')] } expected_amr['y2'] = { 'time-of': [('r', )], 'ARG1-of': [('p', )], 'domain': [('d', )] } expected_amr['r'] = {'ARG1-of': [('e', )]} expected_amr['e'] = {'polarity': [('-', )]} expected_amr['-'] = {} expected_amr['p'] = {} expected_amr['d'] = {'year': [('2012', )]} expected_amr['2012'] = {} expected_amr.roots = ['y2'] assert_amr_graph_dictionaries(expected_amr, parsed_amr)
def generate_statistics(self, file_path): try: sentence_amr_triples = input_file_parser.extract_data_records( file_path) # for i in tqdm(range(0, len(sentence_amr_triples))): for i in range(0, len(sentence_amr_triples)): (sentence, amr_str, amr_id) = sentence_amr_triples[i] sentence_len = len(sentence.split(" ")) self.sentence_lengths_all[ sentence_len] = self.sentence_lengths_all[sentence_len] + 1 if self.min_sentence_len <= sentence_len < self.max_sentence_len: try: amr = AMR.parse_string(amr_str) self.generate_statistics_for_a_sentence( i, amr_id, amr, sentence, amr_str) except Exception as e: self.histogram_sentence_fails[amr_parse_fail] += 1 self.sentence_failed += 1 except Exception as e: # these exceptions maybe shouldn't be counted, I mean at least not added to sentence fails :) self.histogram_sentence_fails[amr_pair_extraction_fail] += 1 self.sentence_failed += 1 print(e)
def create_stanford_ner_preprocessing_histogram(): """ Create a histogram to see how many person, organization, percentage-entity and date-entity (with child rel time) are in the training dataset, to see which preprocessing is worth implementing """ training_dataset_dict = read_dataset_dict('training') histogram = { 'person': 0, 'organization': 0, 'percentage-entity': 0, 'time date-entity': 0 } for dataset, data_per_dataset in training_dataset_dict.items(): for sentence, amr_str, amr_id in data_per_dataset: amr: AMR = AMR.parse_string(amr_str) histogram['person'] += get_no_nodes(amr, 'person', 'name') histogram['organization'] += get_no_nodes(amr, 'organization', 'name') histogram['percentage-entity'] += get_no_nodes( amr, 'percentage-entity', 'value') histogram['time date-entity'] += get_no_nodes( amr, 'date-entity', 'time') return histogram
def test_generate_parent_list_vector_reentrancy(): amr_str = """(r / receive-01~e.4 :ARG0 (w / we~e.0) :ARG1 (t / thing~e.7 :ARG0-of~e.7 (r2 / remind-01~e.7 :ARG1 (p / pay-01~e.6 :ARG0 w) :ARG2 w)) :ARG2~e.8 (h / hospital~e.10) :time (n / now~e.2) :time (a / already~e.3))""" amr = AMR.parse_string(amr_str) identified_concepts = IdentifiedConcepts() identified_concepts.create_from_amr('amr_2_polarities', amr) add_false_root(identified_concepts) generated_parent_list_vector = generate_parent_list_vector( amr, identified_concepts) # w n a r p r2 t h # 1 2 3 4 5 6 7 8 expected_parent_list_vector = [[-1], [5, 6, 4], [4], [4], [0], [6], [7], [4], [4]] assertion_message = str(generated_parent_list_vector) + ' should be' + str( expected_parent_list_vector) assert generated_parent_list_vector == expected_parent_list_vector, assertion_message
for date_relation, quantity in zip(date_relations, quantities): date_entity_node.add_child(Node(quantity, quantity), date_relation) return date_entity_node if __name__ == "__main__": parser_parameters = ParserParameters(max_len=50, with_enhanced_dep_info=False, with_target_semantic_labels=False, with_reattach=True, with_gold_concept_labels=True, with_gold_relation_labels=True) sentence = "It looks like we will also bring in whales ." amr = AMR.parse_string(""" (l / look-02~e.1 :ARG1~e.2 (b / bring-01~e.6 :ARG0 (w / we~e.3) :ARG1~e.7 (w2 / whale~e.8) :mod (a / also~e.5))) """) custom_AMR = CustomizedAMR() custom_AMR.create_custom_AMR(amr) actions = ActionSequenceGenerator.generate_action_sequence(custom_AMR, sentence) acts_i = [a.index for a in actions] act = ActionConceptTransfer() act.load_from_action_objects(actions) actions_re = act.populate_new_actions(acts_i) tokens = tokenizer_util.text_to_sequence(sentence)
def get_custom_amr(amr_str): amr = AMR.parse_string(amr_str) custom_amr = amr_data.CustomizedAMR() custom_amr.create_custom_AMR(amr) return custom_amr