예제 #1
0
    def __init__(self, sense_table, wiki_table, rel_vocab):
        self.amr = penman.AMRCodec()
        self.sense_table = dict()
        for line in open(sense_table).readlines():
            x, y = line.strip().split('\t')
            self.sense_table[x] = y

        self.wiki_table = dict()
        for line in open(wiki_table).readlines():
            x, y = line.strip().split('\t')
            self.wiki_table[x] = y
        self.rel_vocab = rel_vocab
예제 #2
0
def test_AMRCodec():
    c = penman.AMRCodec()

    assert c.invert_relation('ARG0') == 'ARG0-of'
    assert c.invert_relation('ARG0-of') == 'ARG0'
    assert c.invert_relation('domain') == 'mod'
    assert c.invert_relation('mod') == 'domain'
    assert c.invert_relation('consist-of') == 'consist-of-of'
    assert c.invert_relation('consist-of-of') == 'consist-of'

    with pytest.raises(penman.PenmanError):
        c.invert_relation('instance')

    assert c.encode(
        penman.Graph([('w', 'instance', 'want-01'), ('w', 'ARG0', 'b'),
                      ('w', 'ARG1', 'g'), ('b', 'instance', 'boy'),
                      ('g', 'instance', 'go'),
                      ('g', 'ARG0', 'b')])) == ('(w / want-01\n'
                                                '   :ARG0 (b / boy)\n'
                                                '   :ARG1 (g / go\n'
                                                '            :ARG0 b))')

    g = penman.Graph([('g', 'instance', 'gold'), ('g', 'consist-of-of', 'r'),
                      ('r', 'instance', 'ring')])
    assert c.encode(g) == ('(g / gold\n' '   :consist-of-of (r / ring))')
    assert c.encode(g, top='r') == ('(r / ring\n' '   :consist-of (g / gold))')

    g = penman.Graph([('w', 'instance', 'white'), ('w', 'domain', 'c'),
                      ('c', 'instance', 'cat')])
    assert c.encode(g) == ('(w / white\n' '   :domain (c / cat))')
    assert c.encode(g, top='c') == ('(c / cat\n' '   :mod (w / white))')

    assert c.decode('(g / go)').triples() == [('g', 'instance', 'go')]
    # example adapted from https://github.com/goodmami/penman/issues/17
    assert c.decode('(g / go :null_edge (x20 / 876-9))').triples() == [
        ('g', 'instance', 'go'), ('x20', 'instance', '876-9'),
        ('g', 'null_edge', 'x20')
    ]

    with pytest.raises(penman.DecodeError):
        c.decode('(g)')  # no concept or relations
    with pytest.raises(penman.DecodeError):
        c.decode('(g :ARG0 b)')  # no concept
    with pytest.raises(penman.DecodeError):
        c.decode('(g :ARG0 (b / boy) / go)')  # concept after relations
    with pytest.raises(penman.DecodeError):
        c.decode('(1 / one)')  # bad variable form
    with pytest.raises(penman.DecodeError):
        c.decode('(g / go : (b / boy))')  # anonymous relation
예제 #3
0
import penman
import networkx as nx

from stog.data.vocabulary import DEFAULT_PADDING_TOKEN, DEFAULT_OOV_TOKEN
from stog.data.dataset_readers.amr_parsing.graph_repair import GraphRepair
from stog.utils.string import find_similar_token, is_abstract_token, is_english_punct
from stog.utils import logging

logger = logging.init_logger()

# Disable inverting ':mod' relation.
penman.AMRCodec._inversions.pop('domain')
penman.AMRCodec._deinversions.pop('mod')
from penman import Triple

amr_codec = penman.AMRCodec(indent=6)

WORDSENSE_RE = re.compile(r'-\d\d$')
QUOTED_RE = re.compile(r'^".*"$')


class AMR:
    def __init__(self,
                 id=None,
                 sentence=None,
                 graph=None,
                 tokens=None,
                 lemmas=None,
                 pos_tags=None,
                 ner_tags=None,
                 abstract_map=None,
예제 #4
0
def delinearize(linearized_amr):
    tokens = linearized_amr.split(' ')
    stack = []
    concept_var = {}
    triplets = []
    rel = None

    def get_var(tok, concept_var):
        vars = concept_var.values()
        if tok[0] in 'abcdefghijklmnopqrstuvwxyz':
            t = tok[0]
        else:
            t = 'x'
        if t not in vars:
            return t
        else:
            count = 1
            while t + str(count) in vars:
                count += 1
            return t + str(count)

    #print(linearized_amr)
    for tok in tokens:
        if '(' in tok or ')' in tok:
            #print('Tok contains parenthesis')
            break

        if tok.startswith(':'):
            # A relation
            if rel:
                #print('Two relations %s - %s'%('..',rel))
                break
            else:
                rel = tok[1:]
        else:
            # A concept
            if len(stack) == 0:
                #print('Length of stack = 0')
                var = get_var(tok, concept_var)
                triplets.append((var, 'instance', tok))
                concept_var[tok] = var
                stack.append((var, tok))
                root = var
            else:
                top_var, top_concept = stack[-1]
                if rel:
                    #print(concept_var)
                    if tok not in concept_var.keys():
                        #print('%s not in %s'%(tok, str(concept_var)))
                        var = get_var(tok, concept_var)
                        triplets.append((var, 'instance', tok))
                        concept_var[tok] = var
                    else:
                        var = concept_var[tok]
                    triplets.append((top_var, rel, var))
                    stack.append((var, tok))
                    rel = None
                else:
                    if top_concept == tok:
                        stack.pop()
                        if len(stack) == 0:
                            break
                    else:
                        #print('Two concepts: %s - %s'%(top_concept, tok))
                        break

        codec = penman.AMRCodec()
        graph = penman.Graph(triplets)
        #print(codec.encode(graph, top=root))
    return codec.encode(graph, top=root)
예제 #5
0
 def __init__(self, rel_vocab):
     self.amr = penman.AMRCodec()
     self.rel_vocab = rel_vocab