def __init__(self, sense_table, wiki_table, rel_vocab): self.amr = penman.AMRCodec() self.sense_table = dict() for line in open(sense_table).readlines(): x, y = line.strip().split('\t') self.sense_table[x] = y self.wiki_table = dict() for line in open(wiki_table).readlines(): x, y = line.strip().split('\t') self.wiki_table[x] = y self.rel_vocab = rel_vocab
def test_AMRCodec(): c = penman.AMRCodec() assert c.invert_relation('ARG0') == 'ARG0-of' assert c.invert_relation('ARG0-of') == 'ARG0' assert c.invert_relation('domain') == 'mod' assert c.invert_relation('mod') == 'domain' assert c.invert_relation('consist-of') == 'consist-of-of' assert c.invert_relation('consist-of-of') == 'consist-of' with pytest.raises(penman.PenmanError): c.invert_relation('instance') assert c.encode( penman.Graph([('w', 'instance', 'want-01'), ('w', 'ARG0', 'b'), ('w', 'ARG1', 'g'), ('b', 'instance', 'boy'), ('g', 'instance', 'go'), ('g', 'ARG0', 'b')])) == ('(w / want-01\n' ' :ARG0 (b / boy)\n' ' :ARG1 (g / go\n' ' :ARG0 b))') g = penman.Graph([('g', 'instance', 'gold'), ('g', 'consist-of-of', 'r'), ('r', 'instance', 'ring')]) assert c.encode(g) == ('(g / gold\n' ' :consist-of-of (r / ring))') assert c.encode(g, top='r') == ('(r / ring\n' ' :consist-of (g / gold))') g = penman.Graph([('w', 'instance', 'white'), ('w', 'domain', 'c'), ('c', 'instance', 'cat')]) assert c.encode(g) == ('(w / white\n' ' :domain (c / cat))') assert c.encode(g, top='c') == ('(c / cat\n' ' :mod (w / white))') assert c.decode('(g / go)').triples() == [('g', 'instance', 'go')] # example adapted from https://github.com/goodmami/penman/issues/17 assert c.decode('(g / go :null_edge (x20 / 876-9))').triples() == [ ('g', 'instance', 'go'), ('x20', 'instance', '876-9'), ('g', 'null_edge', 'x20') ] with pytest.raises(penman.DecodeError): c.decode('(g)') # no concept or relations with pytest.raises(penman.DecodeError): c.decode('(g :ARG0 b)') # no concept with pytest.raises(penman.DecodeError): c.decode('(g :ARG0 (b / boy) / go)') # concept after relations with pytest.raises(penman.DecodeError): c.decode('(1 / one)') # bad variable form with pytest.raises(penman.DecodeError): c.decode('(g / go : (b / boy))') # anonymous relation
import penman import networkx as nx from stog.data.vocabulary import DEFAULT_PADDING_TOKEN, DEFAULT_OOV_TOKEN from stog.data.dataset_readers.amr_parsing.graph_repair import GraphRepair from stog.utils.string import find_similar_token, is_abstract_token, is_english_punct from stog.utils import logging logger = logging.init_logger() # Disable inverting ':mod' relation. penman.AMRCodec._inversions.pop('domain') penman.AMRCodec._deinversions.pop('mod') from penman import Triple amr_codec = penman.AMRCodec(indent=6) WORDSENSE_RE = re.compile(r'-\d\d$') QUOTED_RE = re.compile(r'^".*"$') class AMR: def __init__(self, id=None, sentence=None, graph=None, tokens=None, lemmas=None, pos_tags=None, ner_tags=None, abstract_map=None,
def delinearize(linearized_amr): tokens = linearized_amr.split(' ') stack = [] concept_var = {} triplets = [] rel = None def get_var(tok, concept_var): vars = concept_var.values() if tok[0] in 'abcdefghijklmnopqrstuvwxyz': t = tok[0] else: t = 'x' if t not in vars: return t else: count = 1 while t + str(count) in vars: count += 1 return t + str(count) #print(linearized_amr) for tok in tokens: if '(' in tok or ')' in tok: #print('Tok contains parenthesis') break if tok.startswith(':'): # A relation if rel: #print('Two relations %s - %s'%('..',rel)) break else: rel = tok[1:] else: # A concept if len(stack) == 0: #print('Length of stack = 0') var = get_var(tok, concept_var) triplets.append((var, 'instance', tok)) concept_var[tok] = var stack.append((var, tok)) root = var else: top_var, top_concept = stack[-1] if rel: #print(concept_var) if tok not in concept_var.keys(): #print('%s not in %s'%(tok, str(concept_var))) var = get_var(tok, concept_var) triplets.append((var, 'instance', tok)) concept_var[tok] = var else: var = concept_var[tok] triplets.append((top_var, rel, var)) stack.append((var, tok)) rel = None else: if top_concept == tok: stack.pop() if len(stack) == 0: break else: #print('Two concepts: %s - %s'%(top_concept, tok)) break codec = penman.AMRCodec() graph = penman.Graph(triplets) #print(codec.encode(graph, top=root)) return codec.encode(graph, top=root)
def __init__(self, rel_vocab): self.amr = penman.AMRCodec() self.rel_vocab = rel_vocab