def test_interpret(amr_model): t = codec.parse('(a / A)') assert interpret(t) == Graph([('a', ':instance', 'A')], top='a') t = codec.parse('(a / A :consist-of (b / B))') assert interpret(t) == Graph([('a', ':instance', 'A'), ('b', ':consist', 'a'), ('b', ':instance', 'B')], top='a') assert interpret(t, model=amr_model) == Graph([('a', ':instance', 'A'), ('a', ':consist-of', 'b'), ('b', ':instance', 'B')], top='a')
def reify_edges(g: Graph, model: Model) -> Graph: """ Reify all edges in *g* that have reifications in *model*. Args: g: a :class:`~penman.graph.Graph` object model: a model defining reifications Returns: A new :class:`~penman.graph.Graph` object with reified edges. Example: >>> from penman.codec import PENMANCodec >>> from penman.models.amr import model >>> from penman.transform import reify_edges >>> codec = PENMANCodec(model=model) >>> g = codec.decode('(c / chapter :mod 7)') >>> g = reify_edges(g, model) >>> print(codec.encode(g)) (c / chapter :ARG1-of (_ / have-mod-91 :ARG2 7)) """ vars = g.variables() if model is None: model = Model() new_epidata = dict(g.epidata) new_triples: List[BasicTriple] = [] for triple in g.triples: if model.is_role_reifiable(triple[1]): in_triple, node_triple, out_triple = model.reify(triple, vars) if appears_inverted(g, triple): in_triple, out_triple = out_triple, in_triple new_triples.extend((in_triple, node_triple, out_triple)) var = node_triple[0] vars.add(var) # manage epigraphical markers new_epidata[in_triple] = [Push(var)] old_epis = new_epidata.pop(triple) if triple in new_epidata else [] node_epis, out_epis = _edge_markers(old_epis) new_epidata[node_triple] = node_epis new_epidata[out_triple] = out_epis # we don't know where to put the final POP without configuring # the tree; maybe this should be a tree operation? else: new_triples.append(triple) g = Graph(new_triples, epidata=new_epidata, metadata=g.metadata) logger.info('Reified edges: %s', g) return g
def reify_attributes(g: Graph) -> Graph: """ Reify all attributes in *g*. Args: g: a :class:`~penman.graph.Graph` object Returns: A new :class:`~penman.graph.Graph` object with reified attributes. Example: >>> from penman.codec import PENMANCodec >>> from penman.models.amr import model >>> from penman.transform import reify_attributes >>> codec = PENMANCodec(model=model) >>> g = codec.decode('(c / chapter :mod 7)') >>> g = reify_attributes(g) >>> print(codec.encode(g)) (c / chapter :mod (_ / 7)) """ variables = g.variables() new_epidata = dict(g.epidata) new_triples: List[BasicTriple] = [] i = 2 for triple in g.triples: source, role, target = triple if role != CONCEPT_ROLE and target not in variables: # get unique var for new node var = '_' while var in variables: var = f'_{i}' i += 1 variables.add(var) role_triple = (source, role, var) node_triple = (var, CONCEPT_ROLE, target) new_triples.extend((role_triple, node_triple)) # manage epigraphical markers old_epis = new_epidata.pop(triple) if triple in new_epidata else [] role_epis, node_epis = _attr_markers(old_epis) new_epidata[role_triple] = role_epis + [Push(var)] new_epidata[node_triple] = node_epis + [POP] else: new_triples.append(triple) g = Graph(new_triples, epidata=new_epidata, metadata=g.metadata) logger.info('Reified attributes: %s', g) return g
def test_errors(self, mini_amr): m = Model() a = Model.from_dict(mini_amr) # basic roles g = Graph([('a', ':instance', 'alpha')]) assert m.errors(g) == {} g = Graph([('a', ':instance', 'alpha'), ('a', ':mod', '1')]) assert m.errors(g) == {('a', ':mod', '1'): ['invalid role']} assert a.errors(g) == {} # regex role names g = Graph([('n', ':instance', 'name'), ('n', ':op1', 'Foo'), ('n', ':op2', 'Bar')]) assert a.errors(g) == {} # disconnected graph g = Graph([('a', ':instance', 'alpha'), ('b', ':instance', 'beta')]) assert m.errors(g) == {('b', ':instance', 'beta'): ['unreachable']} assert a.errors(g) == {('b', ':instance', 'beta'): ['unreachable']}
def test_issue_90(): # https://github.com/goodmami/penman/issues/90 g = Graph([('i', ':instance', 'iota'), ('i2', ':instance', 'i'), ('i', ':ARG0', 'i2')], top='i') assert reconfigure(g) == Tree(('i', [('/', 'iota'), (':ARG0', ('i2', [('/', 'i')]))]))
def indicate_branches(g: Graph, model: Model) -> Graph: """ Insert TOP triples in *g* indicating the tree structure. Note: This depends on *g* containing the epigraphical layout markers from parsing; it will not work with programmatically constructed Graph objects or those whose epigraphical data were removed. Args: g: a :class:`~penman.graph.Graph` object model: a model defining the TOP role Returns: A new :class:`~penman.graph.Graph` object with TOP roles indicating tree branches. Example: >>> from penman.codec import PENMANCodec >>> from penman.models.amr import model >>> from penman.transform import indicate_branches >>> codec = PENMANCodec(model=model) >>> g = codec.decode(''' ... (w / want-01 ... :ARG0 (b / boy) ... :ARG1 (g / go-02 ... :ARG0 b))''') >>> g = indicate_branches(g, model) >>> print(codec.encode(g)) (w / want-01 :TOP b :ARG0 (b / boy) :TOP g :ARG1 (g / go-02 :ARG0 b)) """ new_triples: List[BasicTriple] = [] for t in g.triples: push = next((epi for epi in g.epidata.get(t, []) if isinstance(epi, Push)), None) if push is not None: if push.variable == t[2]: new_triples.append((t[0], model.top_role, t[2])) elif push.variable == t[0]: assert isinstance(t[2], str) new_triples.append((t[2], model.top_role, t[0])) new_triples.append(t) g = Graph(new_triples, epidata=g.epidata, metadata=g.metadata) logger.info('Indicated branches: %s', g) return g
def node_contexts(g: Graph) -> List[Union[Variable, None]]: """ Return the list of node contexts corresponding to triples in *g*. If a node context is unknown, the value ``None`` is substituted. Example: >>> from penman import decode, layout >>> g = decode(''' ... (a / alpha ... :attr val ... :ARG0 (b / beta :ARG0 (g / gamma)) ... :ARG0-of g)''') >>> for ctx, trp in zip(layout.node_contexts(g), trp): ... print(ctx, ':', trp) ... a : ('a', ':instance', 'alpha') a : ('a', ':attr', 'val') a : ('a', ':ARG0', 'b') b : ('b', ':instance', 'beta') b : ('b', ':ARG0', 'g') g : ('g', ':instance', 'gamma') a : ('g', ':ARG0', 'a') """ variables = g.variables() stack = [g.top] contexts: List[Union[Variable, None]] = [None] * len(g.triples) for i, triple in enumerate(g.triples): eligible: List[Variable] = [triple[0]] if triple[1] != CONCEPT_ROLE and triple[2] in variables: eligible.append(cast(Variable, triple[2])) if stack[-1] not in eligible: break else: contexts[i] = stack[-1] pushed = get_pushed_variable(g, triple) if pushed: stack.append(pushed) try: for epi in g.epidata[triple]: if epi is POP: stack.pop() except IndexError: break # more POPs than contexts in stack return contexts
def interpret(t: Tree, model: Model = None) -> Graph: """ Interpret tree *t* as a graph using *model*. Tree interpretation is the process of transforming the nodes and edges of a tree into a directed graph. A semantic model determines which edges are inverted and how to deinvert them. If *model* is not provided, the default model will be used. Args: t: the :class:`~penman.tree.Tree` to interpret model: the :class:`~penman.model.Model` used to interpret *t* Returns: The interpreted :class:`~penman.graph.Graph`. Example: >>> from penman.tree import Tree >>> from penman import layout >>> t = Tree( ... ('b', [ ... ('/', 'bark-01'), ... ('ARG0', ('d', [ ... ('/', 'dog')]))])) >>> g = layout.interpret(t) >>> for triple in g.triples: ... print(triple) ... ('b', ':instance', 'bark-01') ('b', ':ARG0', 'd') ('d', ':instance', 'dog') """ if model is None: model = _default_model variables = {v for v, _ in t.nodes()} top, triples, epidata = _interpret_node(t.node, variables, model) epimap = {} for triple, epis in epidata: if triple in epimap: logger.warning( f'ignoring epigraph data for duplicate triple: {triple}' ) else: epimap[triple] = epis g = Graph(triples, top=top, epidata=epimap, metadata=t.metadata) logger.info('Interpreted: %s', g) return g
def dereify_edges(g: Graph, model: Model) -> Graph: """ Dereify edges in *g* that have reifications in *model*. Args: g: a :class:`~penman.graph.Graph` object Returns: A new :class:`~penman.graph.Graph` object with dereified edges. Example: >>> from penman.codec import PENMANCodec >>> from penman.models.amr import model >>> from penman.transform import dereify_edges >>> codec = PENMANCodec(model=model) >>> g = codec.decode( ... '(c / chapter' ... ' :ARG1-of (_ / have-mod-91' ... ' :ARG2 7))') >>> g = dereify_edges(g, model) >>> print(codec.encode(g)) (c / chapter :mod 7) """ if model is None: model = Model() agenda = _dereify_agenda(g, model) new_epidata = dict(g.epidata) new_triples: List[BasicTriple] = [] for triple in g.triples: var = triple[0] if var in agenda: first, dereified, epidata = agenda[var] # only insert at the first triple so the dereification # appears in the correct location if triple == first: new_triples.append(dereified) new_epidata[dereified] = epidata if triple in new_epidata: del new_epidata[triple] else: new_triples.append(triple) g = Graph(new_triples, epidata=new_epidata, metadata=g.metadata) logger.info('Dereified edges: %s', g) return g
def appears_inverted(g: Graph, triple: BasicTriple) -> bool: """ Return ``True`` if *triple* appears inverted in serialization. More specifically, this function returns ``True`` if *triple* has a :class:`Push` epigraphical marker in graph *g* whose associated variable is the source variable of *triple*. This should be accurate when testing a triple in a graph interpreted using :func:`interpret` (including :meth:`PENMANCodec.decode <penman.codec.PENMANCodec.decode>`, etc.), but it does not guarantee that a new serialization of *g* will express *triple* as inverted as it can change if the graph or its epigraphical markers are modified, if a new top is chosen, etc. Args: g: a :class:`~penman.graph.Graph` containing *triple* triple: the triple that does or does not appear inverted Returns: ``True`` if *triple* appears inverted in graph *g*. """ variables = g.variables() if triple[1] == CONCEPT_ROLE or triple[2] not in variables: # attributes and instance triples should never be inverted return False else: # edges may appear inverted... variable = get_pushed_variable(g, triple) if variable is not None: # ... when their source is pushed return variable == triple[0] else: # ... or when their target is the current node context for variable, _triple in zip(node_contexts(g), g.triples): if variable is None: break # we can no longer guess the node context elif _triple == triple: return triple[2] == variable return False
def deserialize(self, gstring): node_stack = [] # list of previously instantiated nodes node_depth = 0 # number of left parens encountered and not destacked triple = [] # Tokenize and some logging (the system has a lot of unbalanced parentheses) tokens = self.graph_tokenize(gstring) # left_parens = tokens.count('(') # right_parens = tokens.count(')') # if left_parens != right_parens: # logger.warning('gid=%s has %d left parens and %d right parens' % (self.gid, left_parens, right_parens)) # Loop through all tokens and parse the string for tnum, token in enumerate(tokens): #### Big case statement to classify parts of the graph string #### ttype = self.token_type(token) # Mostly ignored but can be used for error checking if token == '(': node_depth += 1 # Find the source for the triple elif len(triple) == 0 and ttype == TType.concept: # This path should only happen for a new graph. Make a somewhat arbitrary choice to # either stop parsing or to clear out the existing triples to prevent disconnected graphs. if len(self.triples) > 0: logger.error('gid=%s Initial node constructed when triples not empty.' % (self.gid)) if len(self.triples) > len(tokens)/4: # if > half done (on average ~2 tokens per triple) break else: self.triples = [] variable, concept, is_new_node = self.get_var_concept(token) triple.append(variable) if is_new_node: node_stack.append( variable ) # Some error logging if is_new_node and tokens[tnum-1] != '(': logger.warning('gid=%s Missing starting paren for node %s/%s' % (self.gid, variable, concept)) if not is_new_node and tokens[tnum-1] == '(': logger.warning('gid=%s Start paren present but %s is not a new concept' % (self.gid, concept)) elif len(triple) == 0 and ttype == TType.role: variable = node_stack[-1] triple.append(variable) triple.append(token) # Look for the role (aka edge) elif len(triple) == 1 and ttype == TType.role: triple.append(token) # Look for the target elif len(triple) == 2 and ttype == TType.attrib: triple.append(token) elif len(triple) == 2 and ttype == TType.concept: variable, concept, is_new_node = self.get_var_concept(token) if is_new_node: node_stack.append( variable ) # Some error logging if is_new_node and tokens[tnum-1] != '(': logger.warning('gid=%s Missing starting paren for node %s/%s' % (self.gid, variable, concept)) if not is_new_node and tokens[tnum-1] == '(': logger.warning('gid=%s Start paren present but %s is not a new concept' % (self.gid, concept)) triple.append(variable) # De-stack the root nodes based on closing parens, but don't destack past the top var # Log an error if we're trying to empty the stack and it's not the very last token elif token == ')': if len(node_stack) > 1: node_stack.pop() node_depth -= 1 elif tnum < len(self.triples)-1: logger.warning('gid=%s Trying to destack past top node' % self.gid) # Unknown situation (should never get here) else: logger.warning('gid=%s Unhandled token %s' % (self.gid, token)) #### Save the triple if complete #### if len(triple) == 3: self.triples.append( tuple(triple) ) triple = [] # Do a little post-processing check on the triples and fix attribs if needed # I haven't found instances that requires this but it could be useful for i, triple in enumerate(self.triples): if triple[1] == self.INSTANCE: continue target = triple[2] # Check if this is a varible if self.re_var.fullmatch(target) or self.re_ii.fullmatch(target): continue # If it's an attrib enforce attribute syntax else: if (target.startswith('"') and target.endswith('"')) or self.is_num(target) or \ (target in set(['-', '+', 'interrogative', 'imperative', 'expressive'])): continue else: new_target = '"' + target.replace('"', '') + '"' self.triples[i] = tuple([triple[0], triple[1], new_target]) logger.warning('gid=%s Replacing attrib %s with %s' % (self.gid, target, new_target)) # Now convert to a penman graph and then back to a string pgraph = Graph(self.triples) # Catch malformed graphs, including disconnected ones, incorrectly quoted attibs, etc.. try: self.gstring = penman.encode(pgraph, indent=6, model=NoOpModel()) self.pgraph = penman.decode(self.gstring, model=NoOpModel()) except: self.gstring = None self.pgraph = None