Python AMR 예제들, models.amr_graph.AMR Python 예제들

예제 #1

0

파일 보기

파일: stanford_train_preprocessing_util.py 프로젝트: keg-tucn/AMR

def replace_subgraph_for_person_or_organization(amr: AMR, node_var):
    """
    Replace a subgraph for a person or organization. The wiki node and name subgraph are removed
    and person is replaced with PERSON, while organization is replaced with ORGANIZATION

    Eg.
        p / person
               :wiki "Deng_Xiaoping"
               :name (n / name :op1 "Deng"~e.1 :op2 "Xiaoping"~e.2)
               :ARG0-of (h / have-rel-role-91 :ARG2 (c / comrade~e.0)))
    With
        p / PERSON
               :ARG0-of (h / have-rel-role-91 :ARG2 (c / comrade~e.0)))
    """
    # replaced amr[node_var]['name'][0][0] with amr[node_var].get('name')[0][0]
    # because the first option sometimes gives wrong results
    wiki_literal = amr[node_var].get('wiki')[0][0]
    name_node = amr[node_var].get('name')[0][0]
    name_op_literals = [
        name_op_tuple[0][0] for name_op_tuple in amr[name_node].values()
    ]

    # remove from relation_to_tokens
    update_relation_to_tokens(amr, node_var, ['wiki', 'name'])
    update_relation_to_tokens(amr, name_node, amr[name_node].keys())

    # remove from default dict
    del amr[node_var]['wiki']
    del amr[node_var]['name']
    del amr[name_node]
    if wiki_literal in amr.keys():
        # the condition is necessary in case there are two or more - wiki literals
        if wiki_literal == '-':
            # do not delete it if there is also polarity in the amr
            all_relations_list = [item.keys() for item in amr.values()]
            all_relations = [
                item for sublist in all_relations_list for item in sublist
            ]
            if 'polarity' not in all_relations:
                del amr[wiki_literal]
        else:
            del amr[wiki_literal]
    for name_op_literal in name_op_literals:
        if name_op_literal in amr.keys():
            del amr[name_op_literal]

    # remove from node_to_concepts
    del amr.node_to_concepts[name_node]

    # remove from node_to_tokens
    nodes_to_remove: List = name_op_literals.copy()
    nodes_to_remove.append(name_node)
    nodes_to_remove.append(wiki_literal)
    for node in nodes_to_remove:
        if node in amr.node_to_tokens.keys():
            del amr.node_to_tokens[node]

    # replace person -> PERSON and organization -> ORGANIZATION
    amr.node_to_concepts[node_var] = amr.node_to_concepts[node_var].upper()
    return amr

예제 #2

0

파일 보기

def test_parse_example_1():
    amr_str = """(r / recommend-01~e.1
                    :ARG1 (a / advocate-01~e.4
                        :ARG1 (i / it~e.0)
                        :manner~e.2 (v / vigorous~e.3)))"""
    parsed_amr: AMR = AMR.parse_string(amr_str)
    expected_amr: AMR = AMR()
    expected_amr.node_to_concepts = {
        'i': 'it',
        'v': 'vigorous',
        'a': 'advocate-01',
        'r': 'recommend-01'
    }
    expected_amr.node_to_tokens = {
        'i': ['0'],
        'v': ['3'],
        'a': ['4'],
        'r': ['1']
    }
    expected_amr.relation_to_tokens = {'manner': [('2', 'a')]}
    expected_amr['i'] = {}
    expected_amr['v'] = {}
    expected_amr['a'] = {'ARG1': [('i', )], 'manner': [('v', )]}
    expected_amr['r'] = {'ARG1': [('a', )]}
    expected_amr.roots = ['r']
    assert_amr_graph_dictionaries(expected_amr, parsed_amr)

예제 #3

0

파일 보기

파일: standford_pre_post_processing_test.py 프로젝트: keg-tucn/AMR

def test_train_pre_processing_ex_person_with_polarity():
    sentence = 'It is Santorum that is the by far major nonRomney candidate and Newt would appear to be the spoiler .'
    amr: AMR = AMR()
    amr.roots = ['a']
    amr.reentrance_triples = []
    amr.node_to_concepts = {'b': 'by-far', 'm': 'major-02', 'n2': 'name', 'p3': 'person', 'n': 'name',
                            'p2': 'person', 'c': 'candidate', 'n3': 'name', 'p4': 'person', 's': 'spoil-01',
                            'a2': 'appear-02', 'a': 'and'}
    amr.node_to_tokens = {'b': ['6', '7'], 'Santorum': [('2', 'n')], 'm': ['8'], 'Newt': [('12', 'n3')],
                          'c': ['10'], 'a2': ['14'], 'a': ['11']}
    amr.relation_to_tokens = {'domain': [('1', 'c'), ('4', 'c')]}
    amr['b'] = {}
    amr['m'] = {'degree': [('b',)]}
    amr['Romney'] = {}
    amr['n2'] = {'op1': [('Romney',)]}
    amr['-'] = {}
    amr['p3'] = {'polarity': [('-',)], 'wiki': [('Mitt_Romney',)], 'name': [('n2',)]}
    amr['Mitt_Romney'] = {}
    amr['Santorum'] = {}
    amr['n'] = {'op1': [('Santorum',)]}
    amr['Rick_Santorum'] = {}
    amr['p2'] = {'wiki': [('Rick_Santorum',)], 'name': [('n',)]}
    amr['c'] = {'ARG1-of': [('m',)], 'mod': [('p3',)], 'domain': [('p2',)]}
    amr['Newt'] = {}
    amr['n3'] = {'op1': [('Newt',)]}
    amr['Newt_Gingrich'] = {}
    amr['p4'] = {'wiki': [('Newt_Gingrich',)], 'name': [('n3',)]}
    amr['s'] = {'ARG0': [('p4',)]}
    amr['a2'] = {'ARG1': [('s',)]}
    amr['a'] = {'op1': [('c',)], 'op2': [('a2',)]}
    generated_amr, generated_sentence, generated_metadata = train_pre_processing(amr, sentence)
    # expected
    expected_sentence = 'It is PERSON that is the by far major nonRomney candidate and PERSON would appear to be the spoiler .'
    expected_metadata = {2: ['Santorum'], 12: ['Newt']}
    expected_amr: AMR = AMR()
    expected_amr.roots = ['a']
    expected_amr.reentrance_triples = []
    expected_amr.node_to_concepts = {'b': 'by-far', 'm': 'major-02', 'n2': 'name', 'p3': 'person',
                                     'p2': 'PERSON', 'c': 'candidate', 'p4': 'PERSON', 's': 'spoil-01',
                                     'a2': 'appear-02', 'a': 'and'}
    expected_amr.node_to_tokens = {'b': ['6', '7'], 'p2': ['2'], 'm': ['8'], 'p4': ['12'],
                                   'c': ['10'], 'a2': ['14'], 'a': ['11']}
    expected_amr.relation_to_tokens = {'domain': [('1', 'c'), ('4', 'c')]}
    expected_amr['b'] = {}
    expected_amr['m'] = {'degree': [('b',)]}
    expected_amr['Romney'] = {}
    expected_amr['n2'] = {'op1': [('Romney',)]}
    expected_amr['-'] = {}
    expected_amr['p3'] = {'polarity': [('-',)], 'wiki': [('Mitt_Romney',)], 'name': [('n2',)]}
    expected_amr['Mitt_Romney'] = {}
    expected_amr['p2'] = {}
    expected_amr['c'] = {'ARG1-of': [('m',)], 'mod': [('p3',)], 'domain': [('p2',)]}
    expected_amr['p4'] = {}
    expected_amr['s'] = {'ARG0': [('p4',)]}
    expected_amr['a2'] = {'ARG1': [('s',)]}
    expected_amr['a'] = {'op1': [('c',)], 'op2': [('a2',)]}
    assert expected_sentence == generated_sentence
    assert_amr_graph_dictionaries(expected_amr, generated_amr)
    assert generated_metadata == expected_metadata

예제 #4

0

파일 보기

def test_parse_example_with_reentrancy():
    amr_str = """(r / receive-01~e.4
                      :ARG0 (w / we~e.0)
                      :ARG1 (t / thing~e.7
                            :ARG0-of~e.7 (r2 / remind-01~e.7
                                  :ARG1 (p / pay-01~e.6
                                        :ARG0 w)
                                  :ARG2 w))
                      :ARG2~e.8 (h / hospital~e.10)
                      :time (n / now~e.2)
                      :time (a / already~e.3))"""
    parsed_amr: AMR = AMR.parse_string(amr_str)
    expected_amr: AMR = AMR()
    expected_amr.node_to_concepts = {
        'w': 'we',
        'p': 'pay-01',
        'r2': 'remind-01',
        't': 'thing',
        'h': 'hospital',
        'n': 'now',
        'a': 'already',
        'r': 'receive-01'
    }

    expected_amr.node_to_tokens = {
        'p': ['6'],
        'r2': ['7'],
        'w': ['0'],
        't': ['7'],
        'h': ['10'],
        'n': ['2'],
        'a': ['3'],
        'r': ['4']
    }

    expected_amr.relation_to_tokens = {
        'ARG0-of': [('7', 't')],
        'ARG2': [('8', 'r')]
    }
    expected_amr['w'] = {}
    expected_amr['p'] = {'ARG0': [('w', )]}
    expected_amr['r2'] = {'ARG1': [('p', )], 'ARG2': [('w', )]}
    expected_amr['t'] = {'ARG0-of': [('r2', )]}
    expected_amr['h'] = {}
    expected_amr['n'] = {}
    expected_amr['a'] = {}
    expected_amr['r'] = {
        'ARG0': [('w', )],
        'ARG1': [('t', )],
        'ARG2': [('h', )],
        'time': [('n', ), ('a', )]
    }
    expected_amr.roots = ['r']
    assert_amr_graph_dictionaries(expected_amr, parsed_amr)

예제 #5

0

파일 보기

def test_parse_example2_with_2polarities():
    amr_str = """(c / contrast-01~e.0
                      :ARG2 (a2 / authorize-01~e.6
                            :ARG1 (o2 / or~e.9
                                  :op1 (a / approve-01
                                        :ARG0 p
                                        :ARG1 (p2 / pay-01
                                              :purpose (e2 / employ-01 :polarity -~e.2,11)))
                                  :op2 (d / deny-01
                                        :ARG0 p
                                        :ARG1 p2))
                            :ARG2 (p / person
                                  :ARG0-of (r / represent-01
                                        :ARG1 (o / organization
                                              :mod (e / employ-01 :polarity -~e.2,11))))))"""
    parsed_amr: AMR = AMR.parse_string(amr_str)
    expected_amr: AMR = AMR()
    expected_amr.node_to_concepts = {
        'e2': 'employ-01',
        'p2': 'pay-01',
        'a': 'approve-01',
        'd': 'deny-01',
        'o2': 'or',
        'e': 'employ-01',
        'o': 'organization',
        'r': 'represent-01',
        'p': 'person',
        'a2': 'authorize-01',
        'c': 'contrast-01'
    }

    expected_amr.node_to_tokens = {
        '-': [('2', 'e2'), ('11', 'e2'), ('2', 'e'), ('11', 'e')],
        'o2': ['9'],
        'a2': ['6'],
        'c': ['0']
    }

    expected_amr.relation_to_tokens = {}
    expected_amr['-'] = {}
    expected_amr['e2'] = {'polarity': [('-', )]}
    expected_amr['p2'] = {'purpose': [('e2', )]}
    expected_amr['p'] = {'ARG0-of': [('r', )]}
    expected_amr['a'] = {'ARG0': [('p', )], 'ARG1': [('p2', )]}
    expected_amr['d'] = {'ARG0': [('p', )], 'ARG1': [('p2', )]}
    expected_amr['o2'] = {'op1': [('a', )], 'op2': [('d', )]}
    expected_amr['e'] = {'polarity': [('-', )]}
    expected_amr['o'] = {'mod': [('e', )]}
    expected_amr['r'] = {'ARG1': [('o', )]}
    expected_amr['a2'] = {'ARG1': [('o2', )], 'ARG2': [('p', )]}
    expected_amr['c'] = {'ARG2': [('a2', )]}
    expected_amr.roots = ['c']
    assert_amr_graph_dictionaries(expected_amr, parsed_amr)

예제 #6

0

파일 보기

파일: standford_pre_post_processing_test.py 프로젝트: keg-tucn/AMR

def test_train_pre_processing_ex_organization():
    sentence = "Some propaganda activities of ZF have soon become viewed as jokes by the people ."
    amr: AMR = AMR()
    amr.roots = ['b']
    amr.reentrance_triples = [('b', 'ARG1', 'a')]
    amr.node_to_concepts = {'p2': 'person', 'n': 'name', 'o': 'organization', 'p': 'propaganda',
                            's': 'some', 'a': 'activity-06', 'j': 'joke-01', 't': 'thing',
                            'v': 'view-02', 's2': 'soon', 'b': 'become-01'}
    amr.node_to_tokens = {'ZF': [('4', 'n')], 'p': ['1'], 's': ['0'], 'j': ['10'],
                          'p2': ['13'], 'a': ['2'], 't': ['10'], 'v': ['8'], 's2': ['6'], 'b': ['7']}
    amr.relation_to_tokens = {'ARG0': [('3', 'a'), ('11', 'v')], 'ARG2-of': [('10', 't')], 'ARG2': [('9', 'v')],
                              'time': [('9', 'b')]}
    amr['p2'] = {}
    amr['ZF'] = {}
    amr['n'] = {'op1': [('ZF',)]}
    amr['-'] = {}
    amr['o'] = {'wiki': [('-',)], 'name': [('n',)]}
    amr['p'] = {}
    amr['s'] = {}
    amr['a'] = {'ARG0': [('o',)], 'ARG1': [('p',)], 'quant': [('s',)]}
    amr['j'] = {}
    amr['t'] = {'ARG2-of': [('j',)]}
    amr['v'] = {'ARG0': [('p2',)], 'ARG1': [('a',)], 'ARG2': [('t',)]}
    amr['s2'] = {}
    amr['b'] = {'ARG1': [('a',)], 'ARG2': [('v',)], 'time': [('s2',)]}
    generated_amr, generated_sentence, generated_metadata = train_pre_processing(amr, sentence)
    # Expected
    expected_sentence = "Some propaganda activities of ORGANIZATION have soon become viewed as jokes by the people ."
    expected_metadata = {4: ['ZF']}
    expected_amr: AMR = AMR()
    expected_amr.roots = ['b']
    expected_amr.reentrance_triples = [('b', 'ARG1', 'a')]
    expected_amr.node_to_concepts = {'p2': 'person', 'o': 'ORGANIZATION', 'p': 'propaganda',
                                     's': 'some', 'a': 'activity-06', 'j': 'joke-01', 't': 'thing',
                                     'v': 'view-02', 's2': 'soon', 'b': 'become-01'}
    expected_amr.node_to_tokens = {'o': ['4'], 'p': ['1'], 's': ['0'], 'j': ['10'],
                                   'p2': ['13'], 'a': ['2'], 't': ['10'], 'v': ['8'], 's2': ['6'], 'b': ['7']}
    expected_amr.relation_to_tokens = {'ARG0': [('3', 'a'), ('11', 'v')], 'ARG2-of': [('10', 't')],
                                       'ARG2': [('9', 'v')],
                                       'time': [('9', 'b')]}
    expected_amr['p2'] = {}
    expected_amr['o'] = {}
    expected_amr['p'] = {}
    expected_amr['s'] = {}
    expected_amr['a'] = {'ARG0': [('o',)], 'ARG1': [('p',)], 'quant': [('s',)]}
    expected_amr['j'] = {}
    expected_amr['t'] = {'ARG2-of': [('j',)]}
    expected_amr['v'] = {'ARG0': [('p2',)], 'ARG1': [('a',)], 'ARG2': [('t',)]}
    expected_amr['s2'] = {}
    expected_amr['b'] = {'ARG1': [('a',)], 'ARG2': [('v',)], 'time': [('s2',)]}
    assert_amr_graph_dictionaries(expected_amr, generated_amr)
    assert generated_sentence == expected_sentence
    assert generated_metadata == expected_metadata

예제 #7

0

파일 보기

def generate_test_data(file_path, verbose=True):
    if verbose is False:
        logging.disable(logging.WARN)

    sentence_amr_triples = input_file_parser.extract_data_records(file_path)
    fail_sentences = []
    test_data = []
    named_entity_exceptions = 0

    for i in tqdm(list(range(0, len(sentence_amr_triples)))):
        (sentence, amr_str, amr_id) = sentence_amr_triples[i]
        try:
            logging.warn("Started processing example %d", i)
            concepts_metadata = {}
            amr = AMR.parse_string(amr_str)

            try:
                (new_sentence, named_entities
                 ) = NamedEntitiesReplacer.process_sentence(sentence)
                for name_entity in named_entities:
                    concepts_metadata[name_entity[0]] = name_entity[1]
            except Exception as e:
                named_entity_exceptions += 1
                raise e

            test_data.append((new_sentence, concepts_metadata))
        except Exception as e:
            logging.warn(e)
            fail_sentences.append(sentence)
            logging.warn("Failed at: %d", i)
            logging.warn("%s\n", sentence)

    return test_data

예제 #8

0

파일 보기

def test_create_from_amr_example_2():
    amr_str = """(a / and~e.0 
      :op2 (p / possible-01~e.8 
            :ARG1 (a3 / avoid-01~e.10 
                  :ARG0 (h / he~e.7) 
                  :ARG1 (c / censure-01~e.12 
                        :ARG1 h)) 
            :ARG1-of (a2 / actual-02~e.9) 
            :manner (p2 / promise-01~e.5 :polarity~e.2 -~e.2 
                  :ARG0 h 
                  :mod (a4 / any~e.4))))"""
    amr = AMR.parse_string(amr_str)
    custom_amr = CustomizedAMR()
    custom_amr.create_custom_AMR(amr)

    generated_concepts = IdentifiedConcepts()
    generated_concepts.create_from_amr('amr_id_2', amr)
    expected_concepts = IdentifiedConcepts()
    expected_concepts.amr_id = 'amr_id_2'
    expected_concepts.ordered_concepts = [
        Concept('a', 'and'),
        Concept('-', '-', 0),
        Concept('a4', 'any'),
        Concept('p2', 'promise-01'),
        Concept('h', 'he'),
        Concept('p', 'possible-01'),
        Concept('a2', 'actual-02'),
        Concept('a3', 'avoid-01'),
        Concept('c', 'censure-01')
    ]
    assert_identified_concepts(expected_concepts, generated_concepts)

예제 #9

0

파일 보기

def test_create_from_amr_example_reentrancy():
    amr_str = """(r / receive-01~e.4
                      :ARG0 (w / we~e.0)
                      :ARG1 (t / thing~e.7
                            :ARG0-of~e.7 (r2 / remind-01~e.7
                                  :ARG1 (p / pay-01~e.6
                                        :ARG0 w)
                                  :ARG2 w))
                      :ARG2~e.8 (h / hospital~e.10)
                      :time (n / now~e.2)
                      :time (a / already~e.3))"""
    amr = AMR.parse_string(amr_str)
    generated_concepts = IdentifiedConcepts()
    generated_concepts.create_from_amr('amr_id_reentrancy', amr)
    expected_concepts = IdentifiedConcepts()
    expected_concepts.amr_id = 'amr_id_reentrancy'
    expected_concepts.ordered_concepts = [
        Concept('w', 'we'),
        Concept('n', 'now'),
        Concept('a', 'already'),
        Concept('r', 'receive-01'),
        Concept('p', 'pay-01'),
        Concept('r2', 'remind-01'),
        Concept('t', 'thing'),
        Concept('h', 'hospital')
    ]
    assert_identified_concepts(expected_concepts, generated_concepts)

예제 #10

0

파일 보기

def test_generate_parent_list_vector_with_2_polarites():
    amr_str = """(a / and~e.0
                      :op2 (p2 / practice-01~e.13
                            :ARG1 (l / loan-01~e.12
                                  :ARG2 (p / person~e.11
                                        :ARG0-of~e.11 (s / study-01~e.11)))
                            :mod (s2 / sane~e.10 :polarity~e.10 -~e.10)
                            :ARG1-of (i2 / identical-01~e.16
                                  :ARG2~e.19 (p3 / practice-01~e.24
                                        :ARG1 (l2 / loan-01~e.23
                                              :ARG1 (m / mortgage-01~e.22))
                                        :mod (s3 / sane~e.21 :polarity~e.21 -~e.21))
                                  :manner (w / way~e.18
                                        :mod (e / every~e.18)))
                            :ARG0-of (c2 / cause-01~e.3,8 
                                  :ARG1 (b / be-located-at-91~e.5,7
                                        :ARG1 (t / they~e.4)
                                        :ARG2 (t2 / there~e.6))
                                  :mod (o / only~e.2))))"""
    amr: AMR = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.create_from_amr('amr_2_polarities', amr)
    add_false_root(identified_concepts)
    generated_parent_list_vector = generate_parent_list_vector(
        amr, identified_concepts)
    # a o c2 t b t2 - s2 s p  l  p2 i2 e  w  -  s3 m  l2 p3
    # 1 2 3  4 5 6  7 8  9 10 11 12 13 14 15 16 17 18 19 20
    expected_parent_list_vector = [[-1], [0], [3], [12], [5], [3], [5], [8],
                                   [12], [10], [11], [12], [1], [12], [15],
                                   [13], [17], [20], [19], [20], [13]]
    assertion_message = str(generated_parent_list_vector) + ' should be' + str(
        expected_parent_list_vector)
    assert generated_parent_list_vector == expected_parent_list_vector, assertion_message

예제 #11

0

파일 보기

파일: dataset_loader.py 프로젝트: keg-tucn/AMR

def read_original_graphs(file_type, filter_path="deft", cache=True):
    """
        Returns a list of (amr_id, sentence, AMR, CustomizedAMR) quadruples
        Loads the list from a dump file if present, else generates it and saves it to a dump file
        :param file_type - data set partition (training, dev or test)
        :param filter_path - filtering criteria for data files
        :param cache - allow to load from dump file if true, else calculate from original file and save new dump
    """
    if filter_path is None:
        filter_path = "deft"
    dir_path = AMR_ALIGNMENTS_SPLIT + "/" + file_type

    parsed_data = []

    directory_content = listdir(dir_path)
    original_corpus = sorted(
        [x for x in directory_content if "dump" not in x and filter_path in x])

    for file_name in original_corpus:
        original_file_path = dir_path + "/" + file_name
        dump_file_path = dir_path + "/original_graphs_dumps/" + file_name + ".dump"
        print(original_file_path)

        if cache and path.exists(dump_file_path):
            print("cache")
            with open(dump_file_path, "rb") as dump_file:
                parsed_data += js.load(dump_file)
        else:
            print("generate")
            file_data = input_file_parser.extract_data_records(
                original_file_path)

            parsed_file_data = []
            failed_amrs_in_file = 0

            for amr_triple in file_data:
                try:
                    camr_graph = AMR.parse_string(amr_triple[1])

                    custom_amr_graph = CustomizedAMR()
                    custom_amr_graph.create_custom_AMR(camr_graph)

                    parsed_file_data.append((amr_triple[2], amr_triple[0],
                                             camr_graph, custom_amr_graph))
                except Exception as _:
                    # print "Exception when parsing AMR with ID: %s in file %s with error: %s\n" % (
                    #    amr_triple[2], file_name, e)
                    failed_amrs_in_file += 1

            if not path.exists(path.dirname(dump_file_path)):
                makedirs(path.dirname(dump_file_path))
            with open(dump_file_path, "wb") as dump_file:
                js.dump(parsed_file_data, dump_file)
            parsed_data += parsed_file_data

            print(("%d / %d in %s" %
                   (failed_amrs_in_file, len(file_data), original_file_path)))

    return parsed_data

예제 #12

0

파일 보기

파일: standford_pre_post_processing_test.py 프로젝트: keg-tucn/AMR

def test_train_pre_processing_ex_person_reentrancy():
    sentence = 'Now , Wang Shi said , these responses have had effects on me .'
    amr: AMR = AMR()
    amr.roots = ['s']
    amr.reentrance_triples = [('e', 'ARG1', 'p')]
    amr.node_to_concepts = {'n2': 'name', 'p': 'person', 'r': 'respond-01', 't': 'this',
                            't2': 'thing', 'e': 'effect-03', 'n': 'now', 's': 'say-01'}
    amr.node_to_tokens = {'Wang': [('2', 'n2')], 'Shi': [('3', 'n2')], 'r': ['7'], 't': ['6'],
                          't2': ['7'], 'p': ['12'], 'e': ['10'], 'n': ['0'], 's': ['4']}
    amr.relation_to_tokens = {'ARG2-of': [('7', 't2')], 'ARG1': [('11', 'e')]}
    amr['Wang'] = {}
    amr['n2'] = {'op1': [('Wang',)], 'op2': [('Shi',)]}
    amr['Shi'] = {}
    amr['Wang_Shi_(entrepreneur)'] = {}
    amr['p'] = {'wiki': [('Wang_Shi_(entrepreneur)',)], 'name': [('n2',)]}
    amr['r'] = {}
    amr['t'] = {}
    amr['t2'] = {'ARG2-of': [('r',)], 'mod': [('t',)]}
    amr['e'] = {'ARG0': [('t2',)], 'ARG1': [('p',)]}
    amr['n'] = {}
    amr['s'] = {'ARG0': [('p',)], 'ARG1': [('e',)], 'time': [('n',)]}
    generated_amr, generated_sentence, generated_metadata = train_pre_processing(amr, sentence)

    # expected
    expected_sentence = 'Now , PERSON said , these responses have had effects on me .'
    expected_metadata = {2: ['Wang', 'Shi']}
    expected_amr: AMR = AMR()
    expected_amr.roots = ['s']
    expected_amr.reentrance_triples = [('e', 'ARG1', 'p')]
    expected_amr.node_to_concepts = {'p': 'PERSON', 'r': 'respond-01', 't': 'this',
                                     't2': 'thing', 'e': 'effect-03', 'n': 'now', 's': 'say-01'}
    expected_amr.node_to_tokens = {'p': ['2'], 'r': ['6'], 't': ['5'],
                                   't2': ['6'], 'p': ['11'], 'e': ['9'], 'n': ['0'], 's': ['3']}
    expected_amr.relation_to_tokens = {'ARG2-of': [('6', 't2')], 'ARG1': [('10', 'e')]}
    expected_amr['p'] = {}
    expected_amr['r'] = {}
    expected_amr['t'] = {}
    expected_amr['t2'] = {'ARG2-of': [('r',)], 'mod': [('t',)]}
    expected_amr['e'] = {'ARG0': [('t2',)], 'ARG1': [('p',)]}
    expected_amr['n'] = {}
    expected_amr['s'] = {'ARG0': [('p',)], 'ARG1': [('e',)], 'time': [('n',)]}

    assert_amr_graph_dictionaries(expected_amr, generated_amr)
    assert generated_sentence == expected_sentence
    assert generated_metadata == expected_metadata

예제 #13

0

파일 보기

파일: stanford_pre_post_processing_integration_test.py 프로젝트: keg-tucn/AMR

def test_pre_and_post_processing_eg_2():
    sentence = 'It is Santorum that is the by far major nonRomney candidate and Newt would appear to be the spoiler .'
    amr_str = """(a / and~e.11
      :op1 (c / candidate~e.10
            :ARG1-of (m / major-02~e.8
                  :degree (b / by-far~e.6,7))
            :mod (p3 / person~e.9 :polarity -~e.9 :wiki "Mitt_Romney"~e.9
                  :name (n2 / name~e.9 :op1 "Romney"~e.9))
            :domain~e.1,4 (p2 / person :wiki "Rick_Santorum"
                  :name (n / name :op1 "Santorum"~e.2)))
      :op2 (a2 / appear-02~e.14
            :ARG1 (s / spoil-01~e.18
                  :ARG0 (p4 / person :wiki "Newt_Gingrich"
                        :name (n3 / name :op1 "Newt"~e.12)))))"""
    amr: AMR = AMR.parse_string(amr_str)
    amr, new_sentence, metadata = train_pre_processing(amr, sentence)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.create_from_amr('', amr)
    add_false_root(identified_concepts)
    vector_of_parents = generate_parent_list_vector(amr, identified_concepts)
    post_processing_on_parent_vector(identified_concepts, vector_of_parents,
                                     new_sentence, metadata)
    relations_dict = {
        ('and', 'candidate'): 'op1',
        ('and', 'appear-02'): 'op2',
        ('candidate', 'major-02'): 'ARG1-of',
        ('candidate', 'person'): 'mod',
        ('major-02', 'by-far'): 'degree',
        ('person', '-'): 'polarity',
        ('person', 'Mitt_Romney'): 'wiki',
        ('person', 'name'): 'name',
        ('person', 'Santorum'): 'wiki',
        ('name', 'Romney'): 'op1',
        ('name', 'Santorum'): 'op1',
        ('appear-02', 'spoil-01'): 'ARG1',
        ('spoil-01', 'person'): 'ARG0',
        ('person', 'Newt'): 'wiki',
        ('name', 'Newt'): 'op1'
    }
    amr_node: Node = generate_amr_node_for_vector_of_parents(
        identified_concepts, vector_of_parents, relations_dict)
    generated_amr_str = amr_node.amr_print_with_reentrancy()
    expected_amr_str = """(a / and~e.11
      :op1 (c / candidate~e.10
            :ARG1-of (m / major-02~e.8
                  :degree (b / by-far~e.6,7))
            :mod (p3 / person~e.9 :polarity -~e.9 :wiki "Mitt_Romney"~e.9
                  :name (n2 / name~e.9 :op1 "Romney"~e.9))
            :mod~e.1,4 (p2 / person :wiki "Santorum"
                  :name (n / name :op1 "Santorum"~e.2)))
      :op2 (a2 / appear-02~e.14
            :ARG1 (s / spoil-01~e.18
                  :ARG0 (p4 / person :wiki "Newt"
                        :name (n3 / name :op1 "Newt"~e.12)))))"""

    smatch = calculate_smatch(generated_amr_str, expected_amr_str)
    assert smatch == 1

예제 #14

0

파일 보기

def test_parse_example_with_polarity():
    amr_str = """(y2 / year~e.4
                      :time-of~e.5 (r / recover-01~e.7
                            :ARG1-of (e / expect-01 :polarity -~e.6))
                      :ARG1-of (p / possible-01~e.1)
                      :domain~e.2 (d / date-entity :year~e.4 2012~e.0))"""
    parsed_amr: AMR = AMR.parse_string(amr_str)
    expected_amr: AMR = AMR()
    expected_amr.node_to_concepts = {
        'y2': 'year',
        'r': 'recover-01',
        'e': 'expect-01',
        'p': 'possible-01',
        'd': 'date-entity'
    }
    expected_amr.node_to_tokens = {
        'y2': ['4'],
        'r': ['7'],
        '-': [('6', 'e')],
        'p': ['1'],
        '2012': [('0', 'd')]
    }
    expected_amr.relation_to_tokens = {
        'time-of': [('5', 'y2')],
        'domain': [('2', 'y2')],
        'year': [('4', 'd')]
    }
    expected_amr['y2'] = {
        'time-of': [('r', )],
        'ARG1-of': [('p', )],
        'domain': [('d', )]
    }
    expected_amr['r'] = {'ARG1-of': [('e', )]}
    expected_amr['e'] = {'polarity': [('-', )]}
    expected_amr['-'] = {}
    expected_amr['p'] = {}
    expected_amr['d'] = {'year': [('2012', )]}
    expected_amr['2012'] = {}
    expected_amr.roots = ['y2']
    assert_amr_graph_dictionaries(expected_amr, parsed_amr)

예제 #15

0

파일 보기

파일: trainer_util_integration_test.py 프로젝트: keg-tucn/AMR

def test_generate_amr_node_for_vector_of_parents_example_1():
    amr_str = """(s / suppose-01~e.1 
                      :ARG0 (i / i~e.0) 
                      :ARG1 (p / possible-01~e.3 
                            :ARG1 (a / add-02~e.4 
                                  :ARG0 (y / you~e.2) 
                                  :ARG1 (p2 / probation~e.5 
                                        :ARG1-of (c / contrast-01~e.7 
                                              :ARG2 (r / replace-01~e.12 
                                                    :ARG1 p2 
                                                    :ARG2~e.13 (t / time~e.15 
                                                          :mod (j / jail~e.14)) 
                                                    :mod (j2 / just~e.10)))))))"""
    amr_str1 = """(d1 / suppose-01~e.1 
                      :ARG0 (i / i~e.0) 
                      :ARG1 (p / possible-01~e.3 
                            :ARG1 (a / add-02~e.4 
                                  :ARG0 (y / you~e.2) 
                                  :ARG1 (p2 / probation~e.5 
                                        :ARG1-of (c / contrast-01~e.7 
                                              :ARG2 (r / replace-01~e.12 
                                                    :ARG1 p2 
                                                    :mod (j2 / just~e.10)
                                                    :ARG2~e.13 (t / time~e.15 
                                                          :mod (j / jail~e.14)) 
                                                    ))))))"""
    amr: AMR = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.create_from_amr('amr_1', amr)
    add_false_root(identified_concepts)
    vector_of_parents = generate_parent_list_vector(amr, identified_concepts)
    # transf parent vectors to vector of parents
    # i s y p a p2 c j2 r j  t
    # 1 2 3 4 5 6  7 8  9 10 11
    relations_dict = {
        ('suppose-01', 'i'): 'ARG0',
        ('suppose-01', 'possible-01'): 'ARG1',
        ('possible-01', 'add-02'): 'ARG1',
        ('add-02', 'you'): 'ARG0',
        ('add-02', 'probation'): 'ARG1',
        ('probation', 'contrast-01'): 'ARG1-of',
        ('contrast-01', 'replace-01'): 'ARG2',
        ('replace-01', 'probation'): 'ARG1',
        ('replace-01', 'time'): 'ARG2',
        ('replace-01', 'just'): 'mod',
        ('time', 'jail'): 'mod'
    }
    amr_node: Node = generate_amr_node_for_vector_of_parents(
        identified_concepts, vector_of_parents, relations_dict)
    generated_amr_str = amr_node.amr_print_with_reentrancy()
    smatch = calculate_smatch(generated_amr_str, amr_str)
    assert smatch == 1

예제 #16

0

파일 보기

파일: stanford_train_preprocessing_util.py 프로젝트: keg-tucn/AMR

def modify_node_to_tokens_alignment(amr: AMR, alignment_mapping: Dict[int,
                                                                      int]):
    node_to_tokens_copy = deepcopy(amr.node_to_tokens)
    # print(str(amr))
    for key, node_tokens_list in node_to_tokens_copy.items():
        amr.node_to_tokens[key] = []
        for node_token in node_tokens_list:
            if type(node_token) is tuple:
                token, parent = node_token
                new_token = str(alignment_mapping[int(token)])
                amr.node_to_tokens[key].append((new_token, parent))
            else:
                new_token = str(alignment_mapping[int(node_token)])
                amr.node_to_tokens[key].append(new_token)

예제 #17

0

파일 보기

def test_generate_parent_vector_example_2():
    amr_str = """(m / man~e.2 
      :ARG1-of (m2 / marry-01~e.1) 
      :ARG0-of (l / love-01~e.9 
            :ARG1~e.10 (y / you~e.11) 
            :ARG1-of (r / real-04~e.6) 
            :condition-of~e.4 (a3 / and~e.16 
                  :op1 (g / go-06~e.14 
                        :ARG2 (a / ahead~e.15) 
                        :mod (j / just~e.13)) 
                  :op2 (o2 / or~e.22 
                        :op1 (f / file-01~e.17 
                              :ARG4~e.18 (d / divorce-01~e.19) 
                              :time (n / now~e.20)) 
                        :op2 (m3 / move-01~e.25 
                              :ARG2 (o / out-06~e.26 
                                    :ARG2~e.27 (h / house~e.29 
                                          :poss~e.28 m~e.28)) 
                              :time n~e.30 
                              :mod (a2 / at-least~e.23,24))))))"""
    amr: AMR = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.ordered_concepts = [
        Concept('', 'ROOT'),  # 0
        Concept('m2', 'marry-01'),  # 1
        Concept('m', 'man'),  # 2
        Concept('r', 'real-04'),  # 3
        Concept('l', 'love-01'),  # 4
        Concept('y', 'you'),  # 5
        Concept('j', 'just'),  # 6
        Concept('g', 'go-06'),  # 7
        Concept('a', 'ahead'),  # 8
        Concept('a3', 'and'),  # 9
        Concept('f', 'file-01'),  # 10
        Concept('d', 'divorce-01'),  # 11
        Concept('n', 'now'),  # 12
        Concept('o2', 'or'),  # 13
        Concept('a2', 'at-least'),  # 14
        Concept('m3', 'move-01'),  # 15
        Concept('o', 'out-06'),  # 16
        Concept('h', 'house')  # 17
    ]
    generated_parent_vector = generate_parent_vectors(amr, identified_concepts,
                                                      2)
    expected_parent_vector = [
        (-1, 2, 0, 4, 2, 4, 7, 9, 7, 4, 13, 10, 10, 9, 15, 13, 15, 16),
        (-1, 2, 0, 4, 2, 4, 7, 9, 7, 4, 13, 10, 15, 9, 15, 13, 15, 16)
    ]
    assert_parent_vectors(expected_parent_vector, generated_parent_vector)

예제 #18

0

파일 보기

def test_create_from_amr_example_4():
    amr_str = """(i / intensify-01~e.7 :li~e.0 -1~e.0 
                    :ARG1 (c / contradiction~e.3) 
                    :ARG0-of (m / make-02~e.9 
                        :ARG1 (c2 / control-01~e.12,13,14 :polarity - 
                              :ARG1 (s / situation~e.11))) 
                    :ARG1-of (b / bind-02~e.5))"""
    amr = AMR.parse_string(amr_str)
    generated_concepts = IdentifiedConcepts()
    generated_concepts.create_from_amr('amr_id_3', amr)
    expected_concepts = IdentifiedConcepts()
    expected_concepts.amr_id = 'amr_id_3'
    # return None as not all concepts are aligned + unalignment tolerance is default (0)
    expected_concepts.ordered_concepts = None
    assert_identified_concepts(expected_concepts, generated_concepts)

예제 #19

0

파일 보기

파일: concept_test.py 프로젝트: keg-tucn/AMR

def test_create_from_custom_amr_example_1():
    amr: AMR = AMR()
    amr.node_to_concepts = {'i': 'it', 'v': 'vigorous', 'a': 'advocate-01', 'r': 'recommend-01'}
    amr.node_to_tokens = {'i': ['0'], 'v': ['3'], 'a': ['4'], 'r': ['1']}
    amr.relation_to_tokens = {'manner': [('2', 'a')]}
    amr['i'] = {}
    amr['v'] = {}
    amr['a'] = {'ARG1': [('i',)], 'manner': [('v',)]}
    amr['r'] = {'ARG1': [('a',)]}
    generated_concepts = IdentifiedConcepts()
    generated_concepts.create_from_amr('amr_id_1', amr)
    expected_concepts = IdentifiedConcepts()
    expected_concepts.amr_id = 'amr_id_1'
    expected_concepts.ordered_concepts = [Concept('i', 'it'), Concept('r', 'recommend-01'), Concept('v', 'vigorous'),
                                          Concept('a', 'advocate-01')]
    assert_identified_concepts(expected_concepts, generated_concepts)

예제 #20

0

파일 보기

def test_create_custom_AMR_example_no_exception():
    amr_str = """(p / pay-01~e.16 
      :ARG0 (p2 / person 
            :mod (s / star~e.2 
                  :mod (p3 / pop~e.1)) 
            :mod (d3 / debt~e.13 
                  :consist-of (m / monetary-quantity :quant 240000000~e.10,11 
                        :unit (d2 / dollar~e.9)) 
                  :ARG1-of (s2 / say-01~e.6))) 
      :ARG1 (m2 / monetary-quantity :quant 6~e.17 
            :unit (f / figure~e.18)) 
      :ARG2~e.26 (a / and~e.30 
            :op1 (d / doctor~e.29 
                  :mod (v / voodoo~e.28) 
                  :mod (a2 / another~e.27)) 
            :op2 (w / woman~e.34 :wiki - 
                  :name (n / name~e.35 :op1 "Samia"~e.36) 
                  :mod (m3 / mystery~e.32) 
                  :mod (c2 / country :wiki "Egypt" 
                        :name (n2 / name~e.35 :op1 "Egypt"~e.33)) 
                  :ARG1-of (c3 / come-01~e.39 
                        :ARG4~e.40 p2~e.41 
                        :accompanier~e.42 (l / letter~e.44 
                              :mod~e.45 (g / greet-01~e.46 
                                    :ARG0~e.47 (p7 / person 
                                          :ARG0-of (h2 / have-org-role-91 
                                                :ARG1 c6 
                                                :ARG2 (p4 / prince~e.53 
                                                      :ARG1-of (r2 / rank-01~e.51 
                                                            :ARG1-of (h / high-02~e.49)))) 
                                          :ARG0-of (p5 / purport-01 
                                                :ARG1 (p6 / person :wiki "Mohammed_bin_Nawwaf_bin_Abdulaziz" 
                                                      :name (n4 / name~e.35 :op1 "Nawaf"~e.56 :op2 "Bin"~e.57 :op3 "Abdulaziz"~e.58 :op4 "Al"~e.59 :op5 "Saud"~e.61) 
                                                      :ARG0-of (h3 / have-org-role-91~e.68 
                                                            :ARG1 (c6 / country :wiki "Saudi_Arabia" 
                                                                  :name (n5 / name~e.35 :op1 "Saudi"~e.69 :op2 "Arabia"~e.70)) 
                                                            :ARG2 (c5 / chief~e.65 
                                                                  :topic~e.66 (i / intelligence~e.67)) 
                                                            :time (n6 / now~e.63)))))))))) 
      :ARG3~e.19 (c / cleanse-01~e.22 
            :manner (r / ritual~e.21) 
            :ARG0-of (u / use-01~e.23 
                  :ARG1 (b / blood~e.25 
                        :mod (s3 / sheep~e.24)))))"""
    amr = AMR.parse_string(amr_str)
    custom_amr = CustomizedAMR()
    custom_amr.create_custom_AMR(amr)

예제 #21

0

파일 보기

def test__create_from_amr_with_2_polarites():
    amr_str = """(a / and~e.0
                      :op2 (p2 / practice-01~e.13
                            :ARG1 (l / loan-01~e.12
                                  :ARG2 (p / person~e.11
                                        :ARG0-of~e.11 (s / study-01~e.11)))
                            :mod (s2 / sane~e.10 :polarity~e.10 -~e.10)
                            :ARG1-of (i2 / identical-01~e.16
                                  :ARG2~e.19 (p3 / practice-01~e.24
                                        :ARG1 (l2 / loan-01~e.23
                                              :ARG1 (m / mortgage-01~e.22))
                                        :mod (s3 / sane~e.21 :polarity~e.21 -~e.21))
                                  :manner (w / way~e.18
                                        :mod (e / every~e.18)))
                            :ARG0-of (c2 / cause-01~e.3,8 
                                  :ARG1 (b / be-located-at-91~e.5,7
                                        :ARG1 (t / they~e.4)
                                        :ARG2 (t2 / there~e.6))
                                  :mod (o / only~e.2))))"""
    amr: AMR = AMR.parse_string(amr_str)
    generated_concepts = IdentifiedConcepts()
    generated_concepts.create_from_amr('amr_2_polarities', amr)
    expected_concepts = IdentifiedConcepts()
    expected_concepts.amr_id = 'amr_2_polarities'
    expected_concepts.ordered_concepts = [
        Concept('a', 'and'),
        Concept('o', 'only'),
        Concept('c2', 'cause-01'),
        Concept('t', 'they'),
        Concept('b', 'be-located-at-91'),
        Concept('t2', 'there'),
        Concept('-', '-', 0),
        Concept('s2', 'sane'),
        Concept('s', 'study-01'),
        Concept('p', 'person'),
        Concept('l', 'loan-01'),
        Concept('p2', 'practice-01'),
        Concept('i2', 'identical-01'),
        Concept('e', 'every'),
        Concept('w', 'way'),
        Concept('-', '-', 1),
        Concept('s3', 'sane'),
        Concept('m', 'mortgage-01'),
        Concept('l2', 'loan-01'),
        Concept('p3', 'practice-01')
    ]
    assert_identified_concepts(expected_concepts, generated_concepts)

예제 #22

0

파일 보기

def test_generate_parent_list_vector_ex_1():
    amr_str = """(r / recommend-01~e.1
                    :ARG1 (a / advocate-01~e.4
                        :ARG1 (i / it~e.0)
                        :manner~e.2 (v / vigorous~e.3)))"""
    amr = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.create_from_amr('amr_id_1', amr)
    add_false_root(identified_concepts)
    generated_parent_list_vector = generate_parent_list_vector(
        amr, identified_concepts)
    # i r v a
    # 1 2 3 4
    expected_parent_list_vector = [[-1], [4], [0], [4], [2]]
    assertion_message = str(generated_parent_list_vector) + ' should be' + str(
        expected_parent_list_vector)
    assert generated_parent_list_vector == expected_parent_list_vector, assertion_message

예제 #23

0

파일 보기

def map_to_amr_dataset_dict(dataset_dict):
    """
    Takes as input a dictionary of the form:
        {dataset: [sentence, amr_str, amr_id]}
    Outputs a dictionary of the form:
        {dataset: [sentence, amr: AMR, amr_id]}
    """
    amr_dataset_dict = {}
    for dataset, data in dataset_dict.items():
        new_format_data = []
        for data_item in data:
            amr_str = data_item[1]
            # TODO: util for amr_str -> custom_amr
            amr = AMR.parse_string(amr_str)
            new_format_data.append((data_item[0], amr, data_item[2]))
        amr_dataset_dict[dataset] = new_format_data
    return amr_dataset_dict

예제 #24

0

파일 보기

def test_generate_parent_vector_example_2():
    amr_str = """(r / recommend-01~e.1
                    :ARG1 (a / advocate-01~e.4
                        :ARG1 (i / it~e.0)
                        :manner~e.2 (v / vigorous~e.3)))"""
    amr: AMR = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.ordered_concepts = [
        Concept('', 'ROOT'),
        Concept('i', 'it'),
        Concept('r', 'recommend-01'),
        Concept('v', 'vigorous'),
        Concept('a', 'advocate-01')
    ]
    generated_parent_vector = generate_parent_vectors(amr, identified_concepts)
    expected_parent_vector = [[-1, 4, 0, 4, 2]]
    assert_parent_vectors(expected_parent_vector, generated_parent_vector)

예제 #25

0

파일 보기

def test_create_from_amr_example_1():
    amr_str = """(r / recommend-01~e.1
                    :ARG1 (a / advocate-01~e.4
                        :ARG1 (i / it~e.0)
                        :manner~e.2 (v / vigorous~e.3)))"""
    amr = AMR.parse_string(amr_str)
    generated_concepts = IdentifiedConcepts()
    generated_concepts.create_from_amr('amr_id_1', amr)
    expected_concepts = IdentifiedConcepts()
    expected_concepts.amr_id = 'amr_id_1'
    expected_concepts.ordered_concepts = [
        Concept('i', 'it'),
        Concept('r', 'recommend-01'),
        Concept('v', 'vigorous'),
        Concept('a', 'advocate-01')
    ]
    assert_identified_concepts(expected_concepts, generated_concepts)

예제 #26

0

파일 보기

파일: stanford_train_preprocessing_util.py 프로젝트: keg-tucn/AMR

def modify_sentence_and_alignment_for_person_or_organization(
        amr: AMR, sentence: str, node_var, to_remove_tokens,
        metadata_dict: Dict[int, List[str]]):
    sentence_tokens = sentence.split()
    n = len(sentence_tokens)
    k = len(to_remove_tokens)
    removal_indexes = [
        i for i in range(n - k + 1)
        if sentence_tokens[i:i + k] == to_remove_tokens
    ]
    # create a mapping between old and new alignment
    alignment_mapping = construct_alignment_mapping(len(sentence_tokens),
                                                    len(to_remove_tokens),
                                                    removal_indexes)
    # modify alignment
    # metadata alignment
    old_metadata_dict = deepcopy(metadata_dict)
    for old_index, values in old_metadata_dict.items():
        new_index = alignment_mapping[old_index]
        if new_index != old_index:
            del metadata_dict[old_index]
            metadata_dict[new_index] = values
    # node_to_tokens
    modify_node_to_tokens_alignment(amr, alignment_mapping)
    # make sure the new PERSON/ORGANIZATION node is aligned
    if node_var not in amr.node_to_tokens.keys():
        amr.node_to_tokens[node_var] = []
        for removal_index in removal_indexes:
            amr.node_to_tokens[node_var].append(str(removal_index))
    # relation_to_tokens
    modify_relation_to_tokens_alignment(amr, alignment_mapping)
    # modify sentence
    new_token = amr.node_to_concepts[node_var].upper()
    # make sure all occurances of to_remove_token are removed
    new_sentence = ' '.join(sentence_tokens)
    substring_to_replace = ' '.join(to_remove_tokens)
    replacement_indexes = get_indices_of_sublist_in_list(
        sentence_tokens, to_remove_tokens)
    new_sentence = new_sentence.replace(substring_to_replace, new_token)
    # construct metadata
    for replacement_index in replacement_indexes:
        # need to use alignment_mapping in case the same token list occurs more then once
        metadata_dict[alignment_mapping[
            replacement_index]] = substring_to_replace.split()
    return new_sentence

예제 #27

0

파일 보기

def test_generate_parent_list_vector_with_polarity():
    amr_str = """(y2 / year~e.4
                      :time-of~e.5 (r / recover-01~e.7
                            :ARG1-of (e / expect-01~e.6 :polarity -~e.6))
                      :ARG1-of (p / possible-01~e.1)
                      :domain~e.2 (d / date-entity~e.4 :year~e.4 2012~e.0))"""
    amr: AMR = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.create_from_amr('amr_polarity', amr)
    add_false_root(identified_concepts)
    generated_parent_list_vector = generate_parent_list_vector(
        amr, identified_concepts)
    # 2012 p d y2 - e r
    # 1    2 3 4  5 6 7
    expected_parent_list_vector = [[-1], [3], [4], [4], [0], [6], [7], [4]]
    assertion_message = str(generated_parent_list_vector) + ' should be' + str(
        expected_parent_list_vector)
    assert generated_parent_list_vector == expected_parent_list_vector, assertion_message

예제 #28

0

파일 보기

def test_create_from_amr_example_3():
    amr_str = """(d / difficult~e.5
          :domain~e.4 (r / reach-01~e.7
                :ARG1 (c / consensus~e.0
                      :topic~e.1 (c2 / country :wiki "India"
                            :name (n / name :op1 "India"~e.2)))
                :time~e.8 (m / meet-03~e.11
                      :ARG0 (o / organization :wiki "Nubolt12_632_6421.19clear_Suppliers_Group"
                            :name (n2 / name :op1 "NSG"~e.10))
                      :time~e.12 (d2 / date-entity :year 2007~e.14 :month~e.13 11~e.13))))"""
    amr = AMR.parse_string(amr_str)
    generated_concepts = IdentifiedConcepts()
    generated_concepts.create_from_amr('amr_id_3', amr)
    expected_concepts = IdentifiedConcepts()
    expected_concepts.amr_id = 'amr_id_3'
    # return None as not all concepts are aligned + unalignment tolerance is default (0)
    expected_concepts.ordered_concepts = None
    assert_identified_concepts(expected_concepts, generated_concepts)

예제 #29

0

파일 보기

파일: data_stats.py 프로젝트: keg-tucn/AMR

def generate_dataset_statistics(sentence_amr_str_triples: List[Tuple[str, str,
                                                                     str]],
                                filters):
    # number of (sentence,amr) pairs that pass the amr parsing
    instances = 0
    # number of instances that pass the filters
    filtered_instances = 0

    sentence_amr_id = []
    amr_preprocessing_fails = 0

    for i in range(0, len(sentence_amr_str_triples)):
        (sentence, amr_str, amr_id) = sentence_amr_str_triples[i]
        # print("sentence: {0}\n amr_str: {1}\n".format(sentence, amr_str))

        try:

            amr = AMR.parse_string(amr_str)
            preprocessing_steps: List[PreprocessingStep] = [
                HaveOrgPreprocessingStep(),
                NamedEntitiesPreprocessingStep(),
                DateEntitiesPreprocessingStep(),
                TemporalQuantitiesPreprocessingStep(),
                QuantitiesPreprocessingStep()
            ]
            new_amr, new_sentence, _ = apply_preprocessing_steps_on_instance(
                amr, sentence, preprocessing_steps)

            custom_amr = amr_data.CustomizedAMR()
            custom_amr.create_custom_AMR(new_amr)
            sentence_amr_id.append((sentence, custom_amr, amr_id))

        except Exception as e:
            amr_preprocessing_fails += 1

    # apply filters
    filtering = CustomizedAMRDataFiltering(sentence_amr_id)
    for f in filters:
        filtering.add_filter(f)
    new_sentence_amr_pairs = filtering.execute()

    instances = len(sentence_amr_id)
    filtered_instances = len(new_sentence_amr_pairs)
    return instances, filtered_instances

예제 #30

0

파일 보기

def test_generate_parent_list_vector_reentrancy_ex_2():
    amr_str = """(f / foolish~e.3 
                      :mode~e.7 interrogative~e.7
                      :domain~e.0,2 (i / i~e.1)
                      :condition~e.4 (d / do-02~e.5
                                        :ARG0 i
                                        :ARG1 (t / this~e.6)))"""
    amr = AMR.parse_string(amr_str)
    identified_concepts = IdentifiedConcepts()
    identified_concepts.create_from_amr('amr_2_reentrancy', amr)
    add_false_root(identified_concepts)
    generated_parent_list_vector = generate_parent_list_vector(
        amr, identified_concepts)
    # i f d t interogative
    # 1 2 3 4 5
    expected_parent_list_vector = [[-1], [3, 2], [0], [2], [3], [2]]
    assertion_message = str(generated_parent_list_vector) + ' should be' + str(
        expected_parent_list_vector)
    assert generated_parent_list_vector == expected_parent_list_vector, assertion_message