Ejemplo n.º 1
0
def test_parse_example_1():
    amr_str = """(r / recommend-01~e.1
                    :ARG1 (a / advocate-01~e.4
                        :ARG1 (i / it~e.0)
                        :manner~e.2 (v / vigorous~e.3)))"""
    parsed_amr: AMR = AMR.parse_string(amr_str)
    expected_amr: AMR = AMR()
    expected_amr.node_to_concepts = {
        'i': 'it',
        'v': 'vigorous',
        'a': 'advocate-01',
        'r': 'recommend-01'
    }
    expected_amr.node_to_tokens = {
        'i': ['0'],
        'v': ['3'],
        'a': ['4'],
        'r': ['1']
    }
    expected_amr.relation_to_tokens = {'manner': [('2', 'a')]}
    expected_amr['i'] = {}
    expected_amr['v'] = {}
    expected_amr['a'] = {'ARG1': [('i', )], 'manner': [('v', )]}
    expected_amr['r'] = {'ARG1': [('a', )]}
    expected_amr.roots = ['r']
    assert_amr_graph_dictionaries(expected_amr, parsed_amr)
Ejemplo n.º 2
0
def generate_test_data(file_path, verbose=True):
    if verbose is False:
        logging.disable(logging.WARN)

    sentence_amr_triples = input_file_parser.extract_data_records(file_path)
    fail_sentences = []
    test_data = []
    named_entity_exceptions = 0

    for i in tqdm(list(range(0, len(sentence_amr_triples)))):
        (sentence, amr_str, amr_id) = sentence_amr_triples[i]
        try:
            logging.warn("Started processing example %d", i)
            concepts_metadata = {}
            amr = AMR.parse_string(amr_str)

            try:
                (new_sentence, named_entities
                 ) = NamedEntitiesReplacer.process_sentence(sentence)
                for name_entity in named_entities:
                    concepts_metadata[name_entity[0]] = name_entity[1]
            except Exception as e:
                named_entity_exceptions += 1
                raise e

            test_data.append((new_sentence, concepts_metadata))
        except Exception as e:
            logging.warn(e)
            fail_sentences.append(sentence)
            logging.warn("Failed at: %d", i)
            logging.warn("%s\n", sentence)

    return test_data
Ejemplo n.º 3
0
def test_generate_parent_list_vector_with_2_polarites():
    amr_str = """(a / and~e.0
                      :op2 (p2 / practice-01~e.13
                            :ARG1 (l / loan-01~e.12
                                  :ARG2 (p / person~e.11
                                        :ARG0-of~e.11 (s / study-01~e.11)))
                            :mod (s2 / sane~e.10 :polarity~e.10 -~e.10)
                            :ARG1-of (i2 / identical-01~e.16
                                  :ARG2~e.19 (p3 / practice-01~e.24
                                        :ARG1 (l2 / loan-01~e.23
                                              :ARG1 (m / mortgage-01~e.22))
                                        :mod (s3 / sane~e.21 :polarity~e.21 -~e.21))
                                  :manner (w / way~e.18
                                        :mod (e / every~e.18)))
                            :ARG0-of (c2 / cause-01~e.3,8 
                                  :ARG1 (b / be-located-at-91~e.5,7
                                        :ARG1 (t / they~e.4)
                                        :ARG2 (t2 / there~e.6))
                                  :mod (o / only~e.2))))"""
    amr: AMR = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.create_from_amr('amr_2_polarities', amr)
    add_false_root(identified_concepts)
    generated_parent_list_vector = generate_parent_list_vector(
        amr, identified_concepts)
    # a o c2 t b t2 - s2 s p  l  p2 i2 e  w  -  s3 m  l2 p3
    # 1 2 3  4 5 6  7 8  9 10 11 12 13 14 15 16 17 18 19 20
    expected_parent_list_vector = [[-1], [0], [3], [12], [5], [3], [5], [8],
                                   [12], [10], [11], [12], [1], [12], [15],
                                   [13], [17], [20], [19], [20], [13]]
    assertion_message = str(generated_parent_list_vector) + ' should be' + str(
        expected_parent_list_vector)
    assert generated_parent_list_vector == expected_parent_list_vector, assertion_message
Ejemplo n.º 4
0
def test_create_from_amr_example_2():
    amr_str = """(a / and~e.0 
      :op2 (p / possible-01~e.8 
            :ARG1 (a3 / avoid-01~e.10 
                  :ARG0 (h / he~e.7) 
                  :ARG1 (c / censure-01~e.12 
                        :ARG1 h)) 
            :ARG1-of (a2 / actual-02~e.9) 
            :manner (p2 / promise-01~e.5 :polarity~e.2 -~e.2 
                  :ARG0 h 
                  :mod (a4 / any~e.4))))"""
    amr = AMR.parse_string(amr_str)
    custom_amr = CustomizedAMR()
    custom_amr.create_custom_AMR(amr)

    generated_concepts = IdentifiedConcepts()
    generated_concepts.create_from_amr('amr_id_2', amr)
    expected_concepts = IdentifiedConcepts()
    expected_concepts.amr_id = 'amr_id_2'
    expected_concepts.ordered_concepts = [
        Concept('a', 'and'),
        Concept('-', '-', 0),
        Concept('a4', 'any'),
        Concept('p2', 'promise-01'),
        Concept('h', 'he'),
        Concept('p', 'possible-01'),
        Concept('a2', 'actual-02'),
        Concept('a3', 'avoid-01'),
        Concept('c', 'censure-01')
    ]
    assert_identified_concepts(expected_concepts, generated_concepts)
Ejemplo n.º 5
0
def test_create_from_amr_example_reentrancy():
    amr_str = """(r / receive-01~e.4
                      :ARG0 (w / we~e.0)
                      :ARG1 (t / thing~e.7
                            :ARG0-of~e.7 (r2 / remind-01~e.7
                                  :ARG1 (p / pay-01~e.6
                                        :ARG0 w)
                                  :ARG2 w))
                      :ARG2~e.8 (h / hospital~e.10)
                      :time (n / now~e.2)
                      :time (a / already~e.3))"""
    amr = AMR.parse_string(amr_str)
    generated_concepts = IdentifiedConcepts()
    generated_concepts.create_from_amr('amr_id_reentrancy', amr)
    expected_concepts = IdentifiedConcepts()
    expected_concepts.amr_id = 'amr_id_reentrancy'
    expected_concepts.ordered_concepts = [
        Concept('w', 'we'),
        Concept('n', 'now'),
        Concept('a', 'already'),
        Concept('r', 'receive-01'),
        Concept('p', 'pay-01'),
        Concept('r2', 'remind-01'),
        Concept('t', 'thing'),
        Concept('h', 'hospital')
    ]
    assert_identified_concepts(expected_concepts, generated_concepts)
Ejemplo n.º 6
0
def read_original_graphs(file_type, filter_path="deft", cache=True):
    """
        Returns a list of (amr_id, sentence, AMR, CustomizedAMR) quadruples
        Loads the list from a dump file if present, else generates it and saves it to a dump file
        :param file_type - data set partition (training, dev or test)
        :param filter_path - filtering criteria for data files
        :param cache - allow to load from dump file if true, else calculate from original file and save new dump
    """
    if filter_path is None:
        filter_path = "deft"
    dir_path = AMR_ALIGNMENTS_SPLIT + "/" + file_type

    parsed_data = []

    directory_content = listdir(dir_path)
    original_corpus = sorted(
        [x for x in directory_content if "dump" not in x and filter_path in x])

    for file_name in original_corpus:
        original_file_path = dir_path + "/" + file_name
        dump_file_path = dir_path + "/original_graphs_dumps/" + file_name + ".dump"
        print(original_file_path)

        if cache and path.exists(dump_file_path):
            print("cache")
            with open(dump_file_path, "rb") as dump_file:
                parsed_data += js.load(dump_file)
        else:
            print("generate")
            file_data = input_file_parser.extract_data_records(
                original_file_path)

            parsed_file_data = []
            failed_amrs_in_file = 0

            for amr_triple in file_data:
                try:
                    camr_graph = AMR.parse_string(amr_triple[1])

                    custom_amr_graph = CustomizedAMR()
                    custom_amr_graph.create_custom_AMR(camr_graph)

                    parsed_file_data.append((amr_triple[2], amr_triple[0],
                                             camr_graph, custom_amr_graph))
                except Exception as _:
                    # print "Exception when parsing AMR with ID: %s in file %s with error: %s\n" % (
                    #    amr_triple[2], file_name, e)
                    failed_amrs_in_file += 1

            if not path.exists(path.dirname(dump_file_path)):
                makedirs(path.dirname(dump_file_path))
            with open(dump_file_path, "wb") as dump_file:
                js.dump(parsed_file_data, dump_file)
            parsed_data += parsed_file_data

            print(("%d / %d in %s" %
                   (failed_amrs_in_file, len(file_data), original_file_path)))

    return parsed_data
def test_pre_and_post_processing_eg_2():
    sentence = 'It is Santorum that is the by far major nonRomney candidate and Newt would appear to be the spoiler .'
    amr_str = """(a / and~e.11
      :op1 (c / candidate~e.10
            :ARG1-of (m / major-02~e.8
                  :degree (b / by-far~e.6,7))
            :mod (p3 / person~e.9 :polarity -~e.9 :wiki "Mitt_Romney"~e.9
                  :name (n2 / name~e.9 :op1 "Romney"~e.9))
            :domain~e.1,4 (p2 / person :wiki "Rick_Santorum"
                  :name (n / name :op1 "Santorum"~e.2)))
      :op2 (a2 / appear-02~e.14
            :ARG1 (s / spoil-01~e.18
                  :ARG0 (p4 / person :wiki "Newt_Gingrich"
                        :name (n3 / name :op1 "Newt"~e.12)))))"""
    amr: AMR = AMR.parse_string(amr_str)
    amr, new_sentence, metadata = train_pre_processing(amr, sentence)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.create_from_amr('', amr)
    add_false_root(identified_concepts)
    vector_of_parents = generate_parent_list_vector(amr, identified_concepts)
    post_processing_on_parent_vector(identified_concepts, vector_of_parents,
                                     new_sentence, metadata)
    relations_dict = {
        ('and', 'candidate'): 'op1',
        ('and', 'appear-02'): 'op2',
        ('candidate', 'major-02'): 'ARG1-of',
        ('candidate', 'person'): 'mod',
        ('major-02', 'by-far'): 'degree',
        ('person', '-'): 'polarity',
        ('person', 'Mitt_Romney'): 'wiki',
        ('person', 'name'): 'name',
        ('person', 'Santorum'): 'wiki',
        ('name', 'Romney'): 'op1',
        ('name', 'Santorum'): 'op1',
        ('appear-02', 'spoil-01'): 'ARG1',
        ('spoil-01', 'person'): 'ARG0',
        ('person', 'Newt'): 'wiki',
        ('name', 'Newt'): 'op1'
    }
    amr_node: Node = generate_amr_node_for_vector_of_parents(
        identified_concepts, vector_of_parents, relations_dict)
    generated_amr_str = amr_node.amr_print_with_reentrancy()
    expected_amr_str = """(a / and~e.11
      :op1 (c / candidate~e.10
            :ARG1-of (m / major-02~e.8
                  :degree (b / by-far~e.6,7))
            :mod (p3 / person~e.9 :polarity -~e.9 :wiki "Mitt_Romney"~e.9
                  :name (n2 / name~e.9 :op1 "Romney"~e.9))
            :mod~e.1,4 (p2 / person :wiki "Santorum"
                  :name (n / name :op1 "Santorum"~e.2)))
      :op2 (a2 / appear-02~e.14
            :ARG1 (s / spoil-01~e.18
                  :ARG0 (p4 / person :wiki "Newt"
                        :name (n3 / name :op1 "Newt"~e.12)))))"""

    smatch = calculate_smatch(generated_amr_str, expected_amr_str)
    assert smatch == 1
Ejemplo n.º 8
0
def test_parse_example_with_reentrancy():
    amr_str = """(r / receive-01~e.4
                      :ARG0 (w / we~e.0)
                      :ARG1 (t / thing~e.7
                            :ARG0-of~e.7 (r2 / remind-01~e.7
                                  :ARG1 (p / pay-01~e.6
                                        :ARG0 w)
                                  :ARG2 w))
                      :ARG2~e.8 (h / hospital~e.10)
                      :time (n / now~e.2)
                      :time (a / already~e.3))"""
    parsed_amr: AMR = AMR.parse_string(amr_str)
    expected_amr: AMR = AMR()
    expected_amr.node_to_concepts = {
        'w': 'we',
        'p': 'pay-01',
        'r2': 'remind-01',
        't': 'thing',
        'h': 'hospital',
        'n': 'now',
        'a': 'already',
        'r': 'receive-01'
    }

    expected_amr.node_to_tokens = {
        'p': ['6'],
        'r2': ['7'],
        'w': ['0'],
        't': ['7'],
        'h': ['10'],
        'n': ['2'],
        'a': ['3'],
        'r': ['4']
    }

    expected_amr.relation_to_tokens = {
        'ARG0-of': [('7', 't')],
        'ARG2': [('8', 'r')]
    }
    expected_amr['w'] = {}
    expected_amr['p'] = {'ARG0': [('w', )]}
    expected_amr['r2'] = {'ARG1': [('p', )], 'ARG2': [('w', )]}
    expected_amr['t'] = {'ARG0-of': [('r2', )]}
    expected_amr['h'] = {}
    expected_amr['n'] = {}
    expected_amr['a'] = {}
    expected_amr['r'] = {
        'ARG0': [('w', )],
        'ARG1': [('t', )],
        'ARG2': [('h', )],
        'time': [('n', ), ('a', )]
    }
    expected_amr.roots = ['r']
    assert_amr_graph_dictionaries(expected_amr, parsed_amr)
Ejemplo n.º 9
0
def test_parse_example2_with_2polarities():
    amr_str = """(c / contrast-01~e.0
                      :ARG2 (a2 / authorize-01~e.6
                            :ARG1 (o2 / or~e.9
                                  :op1 (a / approve-01
                                        :ARG0 p
                                        :ARG1 (p2 / pay-01
                                              :purpose (e2 / employ-01 :polarity -~e.2,11)))
                                  :op2 (d / deny-01
                                        :ARG0 p
                                        :ARG1 p2))
                            :ARG2 (p / person
                                  :ARG0-of (r / represent-01
                                        :ARG1 (o / organization
                                              :mod (e / employ-01 :polarity -~e.2,11))))))"""
    parsed_amr: AMR = AMR.parse_string(amr_str)
    expected_amr: AMR = AMR()
    expected_amr.node_to_concepts = {
        'e2': 'employ-01',
        'p2': 'pay-01',
        'a': 'approve-01',
        'd': 'deny-01',
        'o2': 'or',
        'e': 'employ-01',
        'o': 'organization',
        'r': 'represent-01',
        'p': 'person',
        'a2': 'authorize-01',
        'c': 'contrast-01'
    }

    expected_amr.node_to_tokens = {
        '-': [('2', 'e2'), ('11', 'e2'), ('2', 'e'), ('11', 'e')],
        'o2': ['9'],
        'a2': ['6'],
        'c': ['0']
    }

    expected_amr.relation_to_tokens = {}
    expected_amr['-'] = {}
    expected_amr['e2'] = {'polarity': [('-', )]}
    expected_amr['p2'] = {'purpose': [('e2', )]}
    expected_amr['p'] = {'ARG0-of': [('r', )]}
    expected_amr['a'] = {'ARG0': [('p', )], 'ARG1': [('p2', )]}
    expected_amr['d'] = {'ARG0': [('p', )], 'ARG1': [('p2', )]}
    expected_amr['o2'] = {'op1': [('a', )], 'op2': [('d', )]}
    expected_amr['e'] = {'polarity': [('-', )]}
    expected_amr['o'] = {'mod': [('e', )]}
    expected_amr['r'] = {'ARG1': [('o', )]}
    expected_amr['a2'] = {'ARG1': [('o2', )], 'ARG2': [('p', )]}
    expected_amr['c'] = {'ARG2': [('a2', )]}
    expected_amr.roots = ['c']
    assert_amr_graph_dictionaries(expected_amr, parsed_amr)
Ejemplo n.º 10
0
def test_generate_amr_node_for_vector_of_parents_example_1():
    amr_str = """(s / suppose-01~e.1 
                      :ARG0 (i / i~e.0) 
                      :ARG1 (p / possible-01~e.3 
                            :ARG1 (a / add-02~e.4 
                                  :ARG0 (y / you~e.2) 
                                  :ARG1 (p2 / probation~e.5 
                                        :ARG1-of (c / contrast-01~e.7 
                                              :ARG2 (r / replace-01~e.12 
                                                    :ARG1 p2 
                                                    :ARG2~e.13 (t / time~e.15 
                                                          :mod (j / jail~e.14)) 
                                                    :mod (j2 / just~e.10)))))))"""
    amr_str1 = """(d1 / suppose-01~e.1 
                      :ARG0 (i / i~e.0) 
                      :ARG1 (p / possible-01~e.3 
                            :ARG1 (a / add-02~e.4 
                                  :ARG0 (y / you~e.2) 
                                  :ARG1 (p2 / probation~e.5 
                                        :ARG1-of (c / contrast-01~e.7 
                                              :ARG2 (r / replace-01~e.12 
                                                    :ARG1 p2 
                                                    :mod (j2 / just~e.10)
                                                    :ARG2~e.13 (t / time~e.15 
                                                          :mod (j / jail~e.14)) 
                                                    ))))))"""
    amr: AMR = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.create_from_amr('amr_1', amr)
    add_false_root(identified_concepts)
    vector_of_parents = generate_parent_list_vector(amr, identified_concepts)
    # transf parent vectors to vector of parents
    # i s y p a p2 c j2 r j  t
    # 1 2 3 4 5 6  7 8  9 10 11
    relations_dict = {
        ('suppose-01', 'i'): 'ARG0',
        ('suppose-01', 'possible-01'): 'ARG1',
        ('possible-01', 'add-02'): 'ARG1',
        ('add-02', 'you'): 'ARG0',
        ('add-02', 'probation'): 'ARG1',
        ('probation', 'contrast-01'): 'ARG1-of',
        ('contrast-01', 'replace-01'): 'ARG2',
        ('replace-01', 'probation'): 'ARG1',
        ('replace-01', 'time'): 'ARG2',
        ('replace-01', 'just'): 'mod',
        ('time', 'jail'): 'mod'
    }
    amr_node: Node = generate_amr_node_for_vector_of_parents(
        identified_concepts, vector_of_parents, relations_dict)
    generated_amr_str = amr_node.amr_print_with_reentrancy()
    smatch = calculate_smatch(generated_amr_str, amr_str)
    assert smatch == 1
Ejemplo n.º 11
0
def test_generate_parent_vector_example_2():
    amr_str = """(m / man~e.2 
      :ARG1-of (m2 / marry-01~e.1) 
      :ARG0-of (l / love-01~e.9 
            :ARG1~e.10 (y / you~e.11) 
            :ARG1-of (r / real-04~e.6) 
            :condition-of~e.4 (a3 / and~e.16 
                  :op1 (g / go-06~e.14 
                        :ARG2 (a / ahead~e.15) 
                        :mod (j / just~e.13)) 
                  :op2 (o2 / or~e.22 
                        :op1 (f / file-01~e.17 
                              :ARG4~e.18 (d / divorce-01~e.19) 
                              :time (n / now~e.20)) 
                        :op2 (m3 / move-01~e.25 
                              :ARG2 (o / out-06~e.26 
                                    :ARG2~e.27 (h / house~e.29 
                                          :poss~e.28 m~e.28)) 
                              :time n~e.30 
                              :mod (a2 / at-least~e.23,24))))))"""
    amr: AMR = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.ordered_concepts = [
        Concept('', 'ROOT'),  # 0
        Concept('m2', 'marry-01'),  # 1
        Concept('m', 'man'),  # 2
        Concept('r', 'real-04'),  # 3
        Concept('l', 'love-01'),  # 4
        Concept('y', 'you'),  # 5
        Concept('j', 'just'),  # 6
        Concept('g', 'go-06'),  # 7
        Concept('a', 'ahead'),  # 8
        Concept('a3', 'and'),  # 9
        Concept('f', 'file-01'),  # 10
        Concept('d', 'divorce-01'),  # 11
        Concept('n', 'now'),  # 12
        Concept('o2', 'or'),  # 13
        Concept('a2', 'at-least'),  # 14
        Concept('m3', 'move-01'),  # 15
        Concept('o', 'out-06'),  # 16
        Concept('h', 'house')  # 17
    ]
    generated_parent_vector = generate_parent_vectors(amr, identified_concepts,
                                                      2)
    expected_parent_vector = [
        (-1, 2, 0, 4, 2, 4, 7, 9, 7, 4, 13, 10, 10, 9, 15, 13, 15, 16),
        (-1, 2, 0, 4, 2, 4, 7, 9, 7, 4, 13, 10, 15, 9, 15, 13, 15, 16)
    ]
    assert_parent_vectors(expected_parent_vector, generated_parent_vector)
Ejemplo n.º 12
0
def test_create_from_amr_example_4():
    amr_str = """(i / intensify-01~e.7 :li~e.0 -1~e.0 
                    :ARG1 (c / contradiction~e.3) 
                    :ARG0-of (m / make-02~e.9 
                        :ARG1 (c2 / control-01~e.12,13,14 :polarity - 
                              :ARG1 (s / situation~e.11))) 
                    :ARG1-of (b / bind-02~e.5))"""
    amr = AMR.parse_string(amr_str)
    generated_concepts = IdentifiedConcepts()
    generated_concepts.create_from_amr('amr_id_3', amr)
    expected_concepts = IdentifiedConcepts()
    expected_concepts.amr_id = 'amr_id_3'
    # return None as not all concepts are aligned + unalignment tolerance is default (0)
    expected_concepts.ordered_concepts = None
    assert_identified_concepts(expected_concepts, generated_concepts)
Ejemplo n.º 13
0
def test_create_custom_AMR_example_no_exception():
    amr_str = """(p / pay-01~e.16 
      :ARG0 (p2 / person 
            :mod (s / star~e.2 
                  :mod (p3 / pop~e.1)) 
            :mod (d3 / debt~e.13 
                  :consist-of (m / monetary-quantity :quant 240000000~e.10,11 
                        :unit (d2 / dollar~e.9)) 
                  :ARG1-of (s2 / say-01~e.6))) 
      :ARG1 (m2 / monetary-quantity :quant 6~e.17 
            :unit (f / figure~e.18)) 
      :ARG2~e.26 (a / and~e.30 
            :op1 (d / doctor~e.29 
                  :mod (v / voodoo~e.28) 
                  :mod (a2 / another~e.27)) 
            :op2 (w / woman~e.34 :wiki - 
                  :name (n / name~e.35 :op1 "Samia"~e.36) 
                  :mod (m3 / mystery~e.32) 
                  :mod (c2 / country :wiki "Egypt" 
                        :name (n2 / name~e.35 :op1 "Egypt"~e.33)) 
                  :ARG1-of (c3 / come-01~e.39 
                        :ARG4~e.40 p2~e.41 
                        :accompanier~e.42 (l / letter~e.44 
                              :mod~e.45 (g / greet-01~e.46 
                                    :ARG0~e.47 (p7 / person 
                                          :ARG0-of (h2 / have-org-role-91 
                                                :ARG1 c6 
                                                :ARG2 (p4 / prince~e.53 
                                                      :ARG1-of (r2 / rank-01~e.51 
                                                            :ARG1-of (h / high-02~e.49)))) 
                                          :ARG0-of (p5 / purport-01 
                                                :ARG1 (p6 / person :wiki "Mohammed_bin_Nawwaf_bin_Abdulaziz" 
                                                      :name (n4 / name~e.35 :op1 "Nawaf"~e.56 :op2 "Bin"~e.57 :op3 "Abdulaziz"~e.58 :op4 "Al"~e.59 :op5 "Saud"~e.61) 
                                                      :ARG0-of (h3 / have-org-role-91~e.68 
                                                            :ARG1 (c6 / country :wiki "Saudi_Arabia" 
                                                                  :name (n5 / name~e.35 :op1 "Saudi"~e.69 :op2 "Arabia"~e.70)) 
                                                            :ARG2 (c5 / chief~e.65 
                                                                  :topic~e.66 (i / intelligence~e.67)) 
                                                            :time (n6 / now~e.63)))))))))) 
      :ARG3~e.19 (c / cleanse-01~e.22 
            :manner (r / ritual~e.21) 
            :ARG0-of (u / use-01~e.23 
                  :ARG1 (b / blood~e.25 
                        :mod (s3 / sheep~e.24)))))"""
    amr = AMR.parse_string(amr_str)
    custom_amr = CustomizedAMR()
    custom_amr.create_custom_AMR(amr)
Ejemplo n.º 14
0
def test__create_from_amr_with_2_polarites():
    amr_str = """(a / and~e.0
                      :op2 (p2 / practice-01~e.13
                            :ARG1 (l / loan-01~e.12
                                  :ARG2 (p / person~e.11
                                        :ARG0-of~e.11 (s / study-01~e.11)))
                            :mod (s2 / sane~e.10 :polarity~e.10 -~e.10)
                            :ARG1-of (i2 / identical-01~e.16
                                  :ARG2~e.19 (p3 / practice-01~e.24
                                        :ARG1 (l2 / loan-01~e.23
                                              :ARG1 (m / mortgage-01~e.22))
                                        :mod (s3 / sane~e.21 :polarity~e.21 -~e.21))
                                  :manner (w / way~e.18
                                        :mod (e / every~e.18)))
                            :ARG0-of (c2 / cause-01~e.3,8 
                                  :ARG1 (b / be-located-at-91~e.5,7
                                        :ARG1 (t / they~e.4)
                                        :ARG2 (t2 / there~e.6))
                                  :mod (o / only~e.2))))"""
    amr: AMR = AMR.parse_string(amr_str)
    generated_concepts = IdentifiedConcepts()
    generated_concepts.create_from_amr('amr_2_polarities', amr)
    expected_concepts = IdentifiedConcepts()
    expected_concepts.amr_id = 'amr_2_polarities'
    expected_concepts.ordered_concepts = [
        Concept('a', 'and'),
        Concept('o', 'only'),
        Concept('c2', 'cause-01'),
        Concept('t', 'they'),
        Concept('b', 'be-located-at-91'),
        Concept('t2', 'there'),
        Concept('-', '-', 0),
        Concept('s2', 'sane'),
        Concept('s', 'study-01'),
        Concept('p', 'person'),
        Concept('l', 'loan-01'),
        Concept('p2', 'practice-01'),
        Concept('i2', 'identical-01'),
        Concept('e', 'every'),
        Concept('w', 'way'),
        Concept('-', '-', 1),
        Concept('s3', 'sane'),
        Concept('m', 'mortgage-01'),
        Concept('l2', 'loan-01'),
        Concept('p3', 'practice-01')
    ]
    assert_identified_concepts(expected_concepts, generated_concepts)
Ejemplo n.º 15
0
def map_to_amr_dataset_dict(dataset_dict):
    """
    Takes as input a dictionary of the form:
        {dataset: [sentence, amr_str, amr_id]}
    Outputs a dictionary of the form:
        {dataset: [sentence, amr: AMR, amr_id]}
    """
    amr_dataset_dict = {}
    for dataset, data in dataset_dict.items():
        new_format_data = []
        for data_item in data:
            amr_str = data_item[1]
            # TODO: util for amr_str -> custom_amr
            amr = AMR.parse_string(amr_str)
            new_format_data.append((data_item[0], amr, data_item[2]))
        amr_dataset_dict[dataset] = new_format_data
    return amr_dataset_dict
Ejemplo n.º 16
0
def test_generate_parent_list_vector_ex_1():
    amr_str = """(r / recommend-01~e.1
                    :ARG1 (a / advocate-01~e.4
                        :ARG1 (i / it~e.0)
                        :manner~e.2 (v / vigorous~e.3)))"""
    amr = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.create_from_amr('amr_id_1', amr)
    add_false_root(identified_concepts)
    generated_parent_list_vector = generate_parent_list_vector(
        amr, identified_concepts)
    # i r v a
    # 1 2 3 4
    expected_parent_list_vector = [[-1], [4], [0], [4], [2]]
    assertion_message = str(generated_parent_list_vector) + ' should be' + str(
        expected_parent_list_vector)
    assert generated_parent_list_vector == expected_parent_list_vector, assertion_message
Ejemplo n.º 17
0
def test_create_from_amr_example_1():
    amr_str = """(r / recommend-01~e.1
                    :ARG1 (a / advocate-01~e.4
                        :ARG1 (i / it~e.0)
                        :manner~e.2 (v / vigorous~e.3)))"""
    amr = AMR.parse_string(amr_str)
    generated_concepts = IdentifiedConcepts()
    generated_concepts.create_from_amr('amr_id_1', amr)
    expected_concepts = IdentifiedConcepts()
    expected_concepts.amr_id = 'amr_id_1'
    expected_concepts.ordered_concepts = [
        Concept('i', 'it'),
        Concept('r', 'recommend-01'),
        Concept('v', 'vigorous'),
        Concept('a', 'advocate-01')
    ]
    assert_identified_concepts(expected_concepts, generated_concepts)
Ejemplo n.º 18
0
def test_generate_parent_vector_example_2():
    amr_str = """(r / recommend-01~e.1
                    :ARG1 (a / advocate-01~e.4
                        :ARG1 (i / it~e.0)
                        :manner~e.2 (v / vigorous~e.3)))"""
    amr: AMR = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.ordered_concepts = [
        Concept('', 'ROOT'),
        Concept('i', 'it'),
        Concept('r', 'recommend-01'),
        Concept('v', 'vigorous'),
        Concept('a', 'advocate-01')
    ]
    generated_parent_vector = generate_parent_vectors(amr, identified_concepts)
    expected_parent_vector = [[-1, 4, 0, 4, 2]]
    assert_parent_vectors(expected_parent_vector, generated_parent_vector)
Ejemplo n.º 19
0
def test_create_from_amr_example_3():
    amr_str = """(d / difficult~e.5
          :domain~e.4 (r / reach-01~e.7
                :ARG1 (c / consensus~e.0
                      :topic~e.1 (c2 / country :wiki "India"
                            :name (n / name :op1 "India"~e.2)))
                :time~e.8 (m / meet-03~e.11
                      :ARG0 (o / organization :wiki "Nubolt12_632_6421.19clear_Suppliers_Group"
                            :name (n2 / name :op1 "NSG"~e.10))
                      :time~e.12 (d2 / date-entity :year 2007~e.14 :month~e.13 11~e.13))))"""
    amr = AMR.parse_string(amr_str)
    generated_concepts = IdentifiedConcepts()
    generated_concepts.create_from_amr('amr_id_3', amr)
    expected_concepts = IdentifiedConcepts()
    expected_concepts.amr_id = 'amr_id_3'
    # return None as not all concepts are aligned + unalignment tolerance is default (0)
    expected_concepts.ordered_concepts = None
    assert_identified_concepts(expected_concepts, generated_concepts)
Ejemplo n.º 20
0
def test_generate_parent_list_vector_with_polarity():
    amr_str = """(y2 / year~e.4
                      :time-of~e.5 (r / recover-01~e.7
                            :ARG1-of (e / expect-01~e.6 :polarity -~e.6))
                      :ARG1-of (p / possible-01~e.1)
                      :domain~e.2 (d / date-entity~e.4 :year~e.4 2012~e.0))"""
    amr: AMR = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.create_from_amr('amr_polarity', amr)
    add_false_root(identified_concepts)
    generated_parent_list_vector = generate_parent_list_vector(
        amr, identified_concepts)
    # 2012 p d y2 - e r
    # 1    2 3 4  5 6 7
    expected_parent_list_vector = [[-1], [3], [4], [4], [0], [6], [7], [4]]
    assertion_message = str(generated_parent_list_vector) + ' should be' + str(
        expected_parent_list_vector)
    assert generated_parent_list_vector == expected_parent_list_vector, assertion_message
Ejemplo n.º 21
0
def generate_dataset_statistics(sentence_amr_str_triples: List[Tuple[str, str,
                                                                     str]],
                                filters):
    # number of (sentence,amr) pairs that pass the amr parsing
    instances = 0
    # number of instances that pass the filters
    filtered_instances = 0

    sentence_amr_id = []
    amr_preprocessing_fails = 0

    for i in range(0, len(sentence_amr_str_triples)):
        (sentence, amr_str, amr_id) = sentence_amr_str_triples[i]
        # print("sentence: {0}\n amr_str: {1}\n".format(sentence, amr_str))

        try:

            amr = AMR.parse_string(amr_str)
            preprocessing_steps: List[PreprocessingStep] = [
                HaveOrgPreprocessingStep(),
                NamedEntitiesPreprocessingStep(),
                DateEntitiesPreprocessingStep(),
                TemporalQuantitiesPreprocessingStep(),
                QuantitiesPreprocessingStep()
            ]
            new_amr, new_sentence, _ = apply_preprocessing_steps_on_instance(
                amr, sentence, preprocessing_steps)

            custom_amr = amr_data.CustomizedAMR()
            custom_amr.create_custom_AMR(new_amr)
            sentence_amr_id.append((sentence, custom_amr, amr_id))

        except Exception as e:
            amr_preprocessing_fails += 1

    # apply filters
    filtering = CustomizedAMRDataFiltering(sentence_amr_id)
    for f in filters:
        filtering.add_filter(f)
    new_sentence_amr_pairs = filtering.execute()

    instances = len(sentence_amr_id)
    filtered_instances = len(new_sentence_amr_pairs)
    return instances, filtered_instances
Ejemplo n.º 22
0
def test_generate_parent_list_vector_reentrancy_ex_2():
    amr_str = """(f / foolish~e.3 
                      :mode~e.7 interrogative~e.7
                      :domain~e.0,2 (i / i~e.1)
                      :condition~e.4 (d / do-02~e.5
                                        :ARG0 i
                                        :ARG1 (t / this~e.6)))"""
    amr = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.create_from_amr('amr_2_reentrancy', amr)
    add_false_root(identified_concepts)
    generated_parent_list_vector = generate_parent_list_vector(
        amr, identified_concepts)
    # i f d t interogative
    # 1 2 3 4 5
    expected_parent_list_vector = [[-1], [3, 2], [0], [2], [3], [2]]
    assertion_message = str(generated_parent_list_vector) + ' should be' + str(
        expected_parent_list_vector)
    assert generated_parent_list_vector == expected_parent_list_vector, assertion_message
def test_pre_and_post_processing_for_organization():
    sentence = 'Some propaganda activities of ZF have soon become viewed as jokes by the people .'
    amr_str = """(b / become-01~e.7
      :ARG1 a
      :ARG2 (v / view-02~e.8
            :ARG0~e.11 (p2 / person~e.13)
            :ARG1 (a / activity-06~e.2
                  :ARG0~e.3 (o / organization :wiki ZF
                        :name (n / name :op1 "ZF"~e.4))
                  :ARG1 (p / propaganda~e.1)
                  :quant (s / some~e.0))
            :ARG2~e.9 (t / thing~e.10
                  :ARG2-of~e.10 (j / joke-01~e.10)))
      :time~e.9 (s2 / soon~e.6))"""
    amr: AMR = AMR.parse_string(amr_str)
    amr, new_sentence, metadata = train_pre_processing(amr, sentence)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.create_from_amr('', amr)
    add_false_root(identified_concepts)
    vector_of_parents = generate_parent_list_vector(amr, identified_concepts)
    post_processing_on_parent_vector(identified_concepts, vector_of_parents,
                                     new_sentence, metadata)
    relations_dict = {
        ('become-01', 'activity-06'): 'ARG1',
        ('become-01', 'view-02'): 'ARG2',
        ('become-01', 'soon'): 'time',
        ('view-02', 'person'): 'ARG0',
        ('view-02', 'activity-06'): 'ARG1',
        ('view-02', 'thing'): 'ARG2',
        ('activity-06', 'organization'): 'ARG0',
        ('activity-06', 'propaganda'): 'ARG1',
        ('activity-06', 'some'): 'quant',
        ('organization', 'ZF'): 'wiki',
        ('organization', 'name'): 'name',
        ('name', 'ZF'): 'op1',
        ('thing', 'joke-01'): 'ARG2-of'
    }
    amr_node: Node = generate_amr_node_for_vector_of_parents(
        identified_concepts, vector_of_parents, relations_dict)
    generated_amr_str = amr_node.amr_print_with_reentrancy()
    smatch = calculate_smatch(generated_amr_str, amr_str)
    assert smatch == 1
Ejemplo n.º 24
0
def test_generate_parent_list_vector_reentrancy_ex_3():
    amr_str = """(c2 / convince-01~e.1 
                      :ARG0 (i / i~e.0) 
                      :ARG1~e.2 (s / she~e.2) 
                      :concession-of~e.4 (s2 / shallow~e.10 
                            :ARG1-of (c / conviction-02~e.6 
                                  :ARG0~e.5 i~e.5 
                                  :ARG2 c2)))"""
    amr = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.create_from_amr('amr_3_reentrancy', amr)
    add_false_root(identified_concepts)
    generated_parent_list_vector = generate_parent_list_vector(
        amr, identified_concepts)
    # c2 s i c s2
    # 1  2 3 4 5
    expected_parent_list_vector = [[-1], [0, 4], [1], [4, 1], [5], [1]]
    assertion_message = str(generated_parent_list_vector) + ' should be' + str(
        expected_parent_list_vector)
    assert generated_parent_list_vector == expected_parent_list_vector, assertion_message
Ejemplo n.º 25
0
def test_parse_example_with_polarity():
    amr_str = """(y2 / year~e.4
                      :time-of~e.5 (r / recover-01~e.7
                            :ARG1-of (e / expect-01 :polarity -~e.6))
                      :ARG1-of (p / possible-01~e.1)
                      :domain~e.2 (d / date-entity :year~e.4 2012~e.0))"""
    parsed_amr: AMR = AMR.parse_string(amr_str)
    expected_amr: AMR = AMR()
    expected_amr.node_to_concepts = {
        'y2': 'year',
        'r': 'recover-01',
        'e': 'expect-01',
        'p': 'possible-01',
        'd': 'date-entity'
    }
    expected_amr.node_to_tokens = {
        'y2': ['4'],
        'r': ['7'],
        '-': [('6', 'e')],
        'p': ['1'],
        '2012': [('0', 'd')]
    }
    expected_amr.relation_to_tokens = {
        'time-of': [('5', 'y2')],
        'domain': [('2', 'y2')],
        'year': [('4', 'd')]
    }
    expected_amr['y2'] = {
        'time-of': [('r', )],
        'ARG1-of': [('p', )],
        'domain': [('d', )]
    }
    expected_amr['r'] = {'ARG1-of': [('e', )]}
    expected_amr['e'] = {'polarity': [('-', )]}
    expected_amr['-'] = {}
    expected_amr['p'] = {}
    expected_amr['d'] = {'year': [('2012', )]}
    expected_amr['2012'] = {}
    expected_amr.roots = ['y2']
    assert_amr_graph_dictionaries(expected_amr, parsed_amr)
 def generate_statistics(self, file_path):
     try:
         sentence_amr_triples = input_file_parser.extract_data_records(
             file_path)
         # for i in tqdm(range(0, len(sentence_amr_triples))):
         for i in range(0, len(sentence_amr_triples)):
             (sentence, amr_str, amr_id) = sentence_amr_triples[i]
             sentence_len = len(sentence.split(" "))
             self.sentence_lengths_all[
                 sentence_len] = self.sentence_lengths_all[sentence_len] + 1
             if self.min_sentence_len <= sentence_len < self.max_sentence_len:
                 try:
                     amr = AMR.parse_string(amr_str)
                     self.generate_statistics_for_a_sentence(
                         i, amr_id, amr, sentence, amr_str)
                 except Exception as e:
                     self.histogram_sentence_fails[amr_parse_fail] += 1
                     self.sentence_failed += 1
     except Exception as e:
         # these exceptions maybe shouldn't be counted, I mean at least not added to sentence fails :)
         self.histogram_sentence_fails[amr_pair_extraction_fail] += 1
         self.sentence_failed += 1
         print(e)
Ejemplo n.º 27
0
def create_stanford_ner_preprocessing_histogram():
    """
        Create a histogram to see how many person, organization, percentage-entity and date-entity (with child rel time)
        are in the training dataset, to see which preprocessing is worth implementing
    """
    training_dataset_dict = read_dataset_dict('training')
    histogram = {
        'person': 0,
        'organization': 0,
        'percentage-entity': 0,
        'time date-entity': 0
    }
    for dataset, data_per_dataset in training_dataset_dict.items():
        for sentence, amr_str, amr_id in data_per_dataset:
            amr: AMR = AMR.parse_string(amr_str)
            histogram['person'] += get_no_nodes(amr, 'person', 'name')
            histogram['organization'] += get_no_nodes(amr, 'organization',
                                                      'name')
            histogram['percentage-entity'] += get_no_nodes(
                amr, 'percentage-entity', 'value')
            histogram['time date-entity'] += get_no_nodes(
                amr, 'date-entity', 'time')
    return histogram
Ejemplo n.º 28
0
def test_generate_parent_list_vector_reentrancy():
    amr_str = """(r / receive-01~e.4
                      :ARG0 (w / we~e.0)
                      :ARG1 (t / thing~e.7
                            :ARG0-of~e.7 (r2 / remind-01~e.7
                                  :ARG1 (p / pay-01~e.6
                                        :ARG0 w)
                                  :ARG2 w))
                      :ARG2~e.8 (h / hospital~e.10)
                      :time (n / now~e.2)
                      :time (a / already~e.3))"""
    amr = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.create_from_amr('amr_2_polarities', amr)
    add_false_root(identified_concepts)
    generated_parent_list_vector = generate_parent_list_vector(
        amr, identified_concepts)
    # w n a r p r2 t h
    # 1 2 3 4 5 6  7 8
    expected_parent_list_vector = [[-1], [5, 6, 4], [4], [4], [0], [6], [7],
                                   [4], [4]]
    assertion_message = str(generated_parent_list_vector) + ' should be' + str(
        expected_parent_list_vector)
    assert generated_parent_list_vector == expected_parent_list_vector, assertion_message
Ejemplo n.º 29
0
        for date_relation, quantity in zip(date_relations, quantities):
            date_entity_node.add_child(Node(quantity, quantity), date_relation)
        return date_entity_node


if __name__ == "__main__":

    parser_parameters = ParserParameters(max_len=50, with_enhanced_dep_info=False,
                                         with_target_semantic_labels=False, with_reattach=True,
                                         with_gold_concept_labels=True, with_gold_relation_labels=True)

    sentence = "It looks like we will also bring in whales ."
    amr = AMR.parse_string("""
    (l / look-02~e.1
          :ARG1~e.2 (b / bring-01~e.6
                :ARG0 (w / we~e.3)
                :ARG1~e.7 (w2 / whale~e.8)
                :mod (a / also~e.5)))
    """)

    custom_AMR = CustomizedAMR()
    custom_AMR.create_custom_AMR(amr)

    actions = ActionSequenceGenerator.generate_action_sequence(custom_AMR, sentence)
    acts_i = [a.index for a in actions]

    act = ActionConceptTransfer()
    act.load_from_action_objects(actions)
    actions_re = act.populate_new_actions(acts_i)

    tokens = tokenizer_util.text_to_sequence(sentence)
Ejemplo n.º 30
0
def get_custom_amr(amr_str):
    amr = AMR.parse_string(amr_str)
    custom_amr = amr_data.CustomizedAMR()
    custom_amr.create_custom_AMR(amr)
    return custom_amr