Ejemplo n.º 1
0
def test_interpret(amr_model):
    t = codec.parse('(a / A)')
    assert interpret(t) == Graph([('a', ':instance', 'A')], top='a')

    t = codec.parse('(a / A :consist-of (b / B))')
    assert interpret(t) == Graph([('a', ':instance', 'A'),
                                  ('b', ':consist', 'a'),
                                  ('b', ':instance', 'B')],
                                 top='a')
    assert interpret(t, model=amr_model) == Graph([('a', ':instance', 'A'),
                                                   ('a', ':consist-of', 'b'),
                                                   ('b', ':instance', 'B')],
                                                  top='a')
Ejemplo n.º 2
0
def reify_edges(g: Graph, model: Model) -> Graph:
    """
    Reify all edges in *g* that have reifications in *model*.

    Args:
        g: a :class:`~penman.graph.Graph` object
        model: a model defining reifications
    Returns:
        A new :class:`~penman.graph.Graph` object with reified edges.
    Example:
        >>> from penman.codec import PENMANCodec
        >>> from penman.models.amr import model
        >>> from penman.transform import reify_edges
        >>> codec = PENMANCodec(model=model)
        >>> g = codec.decode('(c / chapter :mod 7)')
        >>> g = reify_edges(g, model)
        >>> print(codec.encode(g))
        (c / chapter
           :ARG1-of (_ / have-mod-91
                       :ARG2 7))
    """
    vars = g.variables()
    if model is None:
        model = Model()
    new_epidata = dict(g.epidata)
    new_triples: List[BasicTriple] = []
    for triple in g.triples:
        if model.is_role_reifiable(triple[1]):
            in_triple, node_triple, out_triple = model.reify(triple, vars)
            if appears_inverted(g, triple):
                in_triple, out_triple = out_triple, in_triple
            new_triples.extend((in_triple, node_triple, out_triple))
            var = node_triple[0]
            vars.add(var)
            # manage epigraphical markers
            new_epidata[in_triple] = [Push(var)]
            old_epis = new_epidata.pop(triple) if triple in new_epidata else []
            node_epis, out_epis = _edge_markers(old_epis)
            new_epidata[node_triple] = node_epis
            new_epidata[out_triple] = out_epis
            # we don't know where to put the final POP without configuring
            # the tree; maybe this should be a tree operation?
        else:
            new_triples.append(triple)
    g = Graph(new_triples,
              epidata=new_epidata,
              metadata=g.metadata)
    logger.info('Reified edges: %s', g)
    return g
Ejemplo n.º 3
0
def reify_attributes(g: Graph) -> Graph:
    """
    Reify all attributes in *g*.

    Args:
        g: a :class:`~penman.graph.Graph` object
    Returns:
        A new :class:`~penman.graph.Graph` object with reified
        attributes.
    Example:
        >>> from penman.codec import PENMANCodec
        >>> from penman.models.amr import model
        >>> from penman.transform import reify_attributes
        >>> codec = PENMANCodec(model=model)
        >>> g = codec.decode('(c / chapter :mod 7)')
        >>> g = reify_attributes(g)
        >>> print(codec.encode(g))
        (c / chapter
           :mod (_ / 7))
    """
    variables = g.variables()
    new_epidata = dict(g.epidata)
    new_triples: List[BasicTriple] = []
    i = 2
    for triple in g.triples:
        source, role, target = triple
        if role != CONCEPT_ROLE and target not in variables:
            # get unique var for new node
            var = '_'
            while var in variables:
                var = f'_{i}'
                i += 1
            variables.add(var)
            role_triple = (source, role, var)
            node_triple = (var, CONCEPT_ROLE, target)
            new_triples.extend((role_triple, node_triple))
            # manage epigraphical markers
            old_epis = new_epidata.pop(triple) if triple in new_epidata else []
            role_epis, node_epis = _attr_markers(old_epis)
            new_epidata[role_triple] = role_epis + [Push(var)]
            new_epidata[node_triple] = node_epis + [POP]
        else:
            new_triples.append(triple)
    g = Graph(new_triples,
              epidata=new_epidata,
              metadata=g.metadata)
    logger.info('Reified attributes: %s', g)
    return g
Ejemplo n.º 4
0
 def test_errors(self, mini_amr):
     m = Model()
     a = Model.from_dict(mini_amr)
     # basic roles
     g = Graph([('a', ':instance', 'alpha')])
     assert m.errors(g) == {}
     g = Graph([('a', ':instance', 'alpha'), ('a', ':mod', '1')])
     assert m.errors(g) == {('a', ':mod', '1'): ['invalid role']}
     assert a.errors(g) == {}
     # regex role names
     g = Graph([('n', ':instance', 'name'), ('n', ':op1', 'Foo'),
                ('n', ':op2', 'Bar')])
     assert a.errors(g) == {}
     # disconnected graph
     g = Graph([('a', ':instance', 'alpha'), ('b', ':instance', 'beta')])
     assert m.errors(g) == {('b', ':instance', 'beta'): ['unreachable']}
     assert a.errors(g) == {('b', ':instance', 'beta'): ['unreachable']}
Ejemplo n.º 5
0
def test_issue_90():
    # https://github.com/goodmami/penman/issues/90

    g = Graph([('i', ':instance', 'iota'), ('i2', ':instance', 'i'),
               ('i', ':ARG0', 'i2')],
              top='i')
    assert reconfigure(g) == Tree(('i', [('/', 'iota'),
                                         (':ARG0', ('i2', [('/', 'i')]))]))
Ejemplo n.º 6
0
def indicate_branches(g: Graph, model: Model) -> Graph:
    """
    Insert TOP triples in *g* indicating the tree structure.

    Note:
        This depends on *g* containing the epigraphical layout markers
        from parsing; it will not work with programmatically
        constructed Graph objects or those whose epigraphical data
        were removed.

    Args:
        g: a :class:`~penman.graph.Graph` object
        model: a model defining the TOP role
    Returns:
        A new :class:`~penman.graph.Graph` object with TOP roles
        indicating tree branches.
    Example:
        >>> from penman.codec import PENMANCodec
        >>> from penman.models.amr import model
        >>> from penman.transform import indicate_branches
        >>> codec = PENMANCodec(model=model)
        >>> g = codec.decode('''
        ... (w / want-01
        ...    :ARG0 (b / boy)
        ...    :ARG1 (g / go-02
        ...             :ARG0 b))''')
        >>> g = indicate_branches(g, model)
        >>> print(codec.encode(g))
        (w / want-01
           :TOP b
           :ARG0 (b / boy)
           :TOP g
           :ARG1 (g / go-02
                    :ARG0 b))
    """
    new_triples: List[BasicTriple] = []
    for t in g.triples:
        push = next((epi for epi in g.epidata.get(t, [])
                     if isinstance(epi, Push)),
                    None)
        if push is not None:
            if push.variable == t[2]:
                new_triples.append((t[0], model.top_role, t[2]))
            elif push.variable == t[0]:
                assert isinstance(t[2], str)
                new_triples.append((t[2], model.top_role, t[0]))
        new_triples.append(t)
    g = Graph(new_triples,
              epidata=g.epidata,
              metadata=g.metadata)
    logger.info('Indicated branches: %s', g)
    return g
Ejemplo n.º 7
0
def node_contexts(g: Graph) -> List[Union[Variable, None]]:
    """
    Return the list of node contexts corresponding to triples in *g*.

    If a node context is unknown, the value ``None`` is substituted.

    Example:
        >>> from penman import decode, layout
        >>> g = decode('''
        ...   (a / alpha
        ...      :attr val
        ...      :ARG0 (b / beta :ARG0 (g / gamma))
        ...      :ARG0-of g)''')
        >>> for ctx, trp in zip(layout.node_contexts(g), trp):
        ...     print(ctx, ':', trp)
        ...
        a : ('a', ':instance', 'alpha')
        a : ('a', ':attr', 'val')
        a : ('a', ':ARG0', 'b')
        b : ('b', ':instance', 'beta')
        b : ('b', ':ARG0', 'g')
        g : ('g', ':instance', 'gamma')
        a : ('g', ':ARG0', 'a')
    """
    variables = g.variables()
    stack = [g.top]
    contexts: List[Union[Variable, None]] = [None] * len(g.triples)
    for i, triple in enumerate(g.triples):
        eligible: List[Variable] = [triple[0]]
        if triple[1] != CONCEPT_ROLE and triple[2] in variables:
            eligible.append(cast(Variable, triple[2]))

        if stack[-1] not in eligible:
            break
        else:
            contexts[i] = stack[-1]

        pushed = get_pushed_variable(g, triple)
        if pushed:
            stack.append(pushed)

        try:
            for epi in g.epidata[triple]:
                if epi is POP:
                    stack.pop()
        except IndexError:
            break  # more POPs than contexts in stack
    return contexts
Ejemplo n.º 8
0
def interpret(t: Tree, model: Model = None) -> Graph:
    """
    Interpret tree *t* as a graph using *model*.

    Tree interpretation is the process of transforming the nodes and
    edges of a tree into a directed graph. A semantic model determines
    which edges are inverted and how to deinvert them. If *model* is
    not provided, the default model will be used.

    Args:
        t: the :class:`~penman.tree.Tree` to interpret
        model: the :class:`~penman.model.Model` used to interpret *t*
    Returns:
        The interpreted :class:`~penman.graph.Graph`.
    Example:

        >>> from penman.tree import Tree
        >>> from penman import layout
        >>> t = Tree(
        ...   ('b', [
        ...     ('/', 'bark-01'),
        ...     ('ARG0', ('d', [
        ...       ('/', 'dog')]))]))
        >>> g = layout.interpret(t)
        >>> for triple in g.triples:
        ...     print(triple)
        ...
        ('b', ':instance', 'bark-01')
        ('b', ':ARG0', 'd')
        ('d', ':instance', 'dog')

    """
    if model is None:
        model = _default_model
    variables = {v for v, _ in t.nodes()}
    top, triples, epidata = _interpret_node(t.node, variables, model)
    epimap = {}
    for triple, epis in epidata:
        if triple in epimap:
            logger.warning(
                f'ignoring epigraph data for duplicate triple: {triple}'
            )
        else:
            epimap[triple] = epis
    g = Graph(triples, top=top, epidata=epimap, metadata=t.metadata)
    logger.info('Interpreted: %s', g)
    return g
Ejemplo n.º 9
0
def dereify_edges(g: Graph, model: Model) -> Graph:
    """
    Dereify edges in *g* that have reifications in *model*.

    Args:
        g: a :class:`~penman.graph.Graph` object
    Returns:
        A new :class:`~penman.graph.Graph` object with dereified
        edges.
    Example:
        >>> from penman.codec import PENMANCodec
        >>> from penman.models.amr import model
        >>> from penman.transform import dereify_edges
        >>> codec = PENMANCodec(model=model)
        >>> g = codec.decode(
        ...   '(c / chapter'
        ...   '   :ARG1-of (_ / have-mod-91'
        ...   '               :ARG2 7))')
        >>> g = dereify_edges(g, model)
        >>> print(codec.encode(g))
        (c / chapter
           :mod 7)
    """
    if model is None:
        model = Model()
    agenda = _dereify_agenda(g, model)
    new_epidata = dict(g.epidata)
    new_triples: List[BasicTriple] = []
    for triple in g.triples:
        var = triple[0]
        if var in agenda:
            first, dereified, epidata = agenda[var]
            # only insert at the first triple so the dereification
            # appears in the correct location
            if triple == first:
                new_triples.append(dereified)
                new_epidata[dereified] = epidata
            if triple in new_epidata:
                del new_epidata[triple]
        else:
            new_triples.append(triple)
    g = Graph(new_triples,
              epidata=new_epidata,
              metadata=g.metadata)
    logger.info('Dereified edges: %s', g)
    return g
Ejemplo n.º 10
0
def appears_inverted(g: Graph, triple: BasicTriple) -> bool:
    """
    Return ``True`` if *triple* appears inverted in serialization.

    More specifically, this function returns ``True`` if *triple* has
    a :class:`Push` epigraphical marker in graph *g* whose associated
    variable is the source variable of *triple*. This should be
    accurate when testing a triple in a graph interpreted using
    :func:`interpret` (including :meth:`PENMANCodec.decode
    <penman.codec.PENMANCodec.decode>`, etc.), but it does not
    guarantee that a new serialization of *g* will express *triple* as
    inverted as it can change if the graph or its epigraphical markers
    are modified, if a new top is chosen, etc.

    Args:
        g: a :class:`~penman.graph.Graph` containing *triple*
        triple: the triple that does or does not appear inverted
    Returns:
        ``True`` if *triple* appears inverted in graph *g*.
    """
    variables = g.variables()
    if triple[1] == CONCEPT_ROLE or triple[2] not in variables:
        # attributes and instance triples should never be inverted
        return False
    else:
        # edges may appear inverted...
        variable = get_pushed_variable(g, triple)
        if variable is not None:
            # ... when their source is pushed
            return variable == triple[0]
        else:
            # ... or when their target is the current node context
            for variable, _triple in zip(node_contexts(g), g.triples):
                if variable is None:
                    break  # we can no longer guess the node context
                elif _triple == triple:
                    return triple[2] == variable
    return False
Ejemplo n.º 11
0
 def deserialize(self, gstring):
     node_stack = []     # list of previously instantiated nodes
     node_depth = 0      # number of left parens encountered and not destacked
     triple     = []
     # Tokenize and some logging (the system has a lot of unbalanced parentheses)
     tokens = self.graph_tokenize(gstring)
     # left_parens  = tokens.count('(')
     # right_parens = tokens.count(')')
     # if left_parens != right_parens:
     #     logger.warning('gid=%s has %d left parens and %d right parens' % (self.gid, left_parens, right_parens))
     # Loop through all tokens and parse the string
     for tnum, token in enumerate(tokens):
         #### Big case statement to classify parts of the graph string ####
         ttype = self.token_type(token)
         # Mostly ignored but can be used for error checking
         if token == '(':
             node_depth += 1
         # Find the source for the triple
         elif len(triple) == 0 and ttype == TType.concept:
             # This path should only happen for a new graph. Make a somewhat arbitrary choice to
             # either stop parsing or to clear out the existing triples to prevent disconnected graphs.
             if len(self.triples) > 0:
                 logger.error('gid=%s Initial node constructed when triples not empty.' % (self.gid))
                 if len(self.triples) > len(tokens)/4:    # if > half done (on average ~2 tokens per triple)
                     break
                 else:
                     self.triples = []
             variable, concept, is_new_node = self.get_var_concept(token)
             triple.append(variable)
             if is_new_node:
                 node_stack.append( variable )
             # Some error logging
             if is_new_node and tokens[tnum-1] != '(':
                 logger.warning('gid=%s Missing starting paren for node %s/%s' % (self.gid, variable, concept))
             if not is_new_node and tokens[tnum-1] == '(':
                 logger.warning('gid=%s Start paren present but %s is not a new concept' % (self.gid, concept))
         elif len(triple) == 0 and ttype == TType.role:
             variable = node_stack[-1]
             triple.append(variable)
             triple.append(token)
         # Look for the role (aka edge)
         elif len(triple) == 1 and ttype == TType.role:
             triple.append(token)
         # Look for the target
         elif len(triple) == 2 and ttype == TType.attrib:
             triple.append(token)
         elif len(triple) == 2 and ttype == TType.concept:
             variable, concept, is_new_node = self.get_var_concept(token)
             if is_new_node:
                 node_stack.append( variable )
             # Some error logging
             if is_new_node and tokens[tnum-1] != '(':
                 logger.warning('gid=%s Missing starting paren for node %s/%s' % (self.gid, variable, concept))
             if not is_new_node and tokens[tnum-1] == '(':
                 logger.warning('gid=%s Start paren present but %s is not a new concept' % (self.gid, concept))
             triple.append(variable)
         # De-stack the root nodes based on closing parens, but don't destack past the top var
         # Log an error if we're trying to empty the stack and it's not the very last token
         elif token == ')':
             if len(node_stack) > 1:
                 node_stack.pop()
                 node_depth -= 1
             elif tnum < len(self.triples)-1:
                 logger.warning('gid=%s Trying to destack past top node' % self.gid)
         # Unknown situation (should never get here)
         else:
             logger.warning('gid=%s Unhandled token %s' % (self.gid, token))
         #### Save the triple if complete ####
         if len(triple) == 3:
             self.triples.append( tuple(triple) )
             triple = []
     # Do a little post-processing check on the triples and fix attribs if needed
     # I haven't found instances that requires this but it could be useful
     for i, triple in enumerate(self.triples):
         if triple[1] == self.INSTANCE:
             continue
         target = triple[2]
         # Check if this is a varible
         if self.re_var.fullmatch(target) or self.re_ii.fullmatch(target):
             continue
         # If it's an attrib enforce attribute syntax
         else:
             if (target.startswith('"') and target.endswith('"')) or self.is_num(target)  or \
                (target in set(['-', '+', 'interrogative', 'imperative', 'expressive'])):
                 continue
             else:
                 new_target = '"' + target.replace('"', '') + '"'
             self.triples[i] = tuple([triple[0], triple[1], new_target])
             logger.warning('gid=%s Replacing attrib %s with %s' % (self.gid, target, new_target))
     # Now convert to a penman graph and then back to a string
     pgraph = Graph(self.triples)
     # Catch malformed graphs, including disconnected ones, incorrectly quoted attibs, etc..
     try:
         self.gstring = penman.encode(pgraph, indent=6, model=NoOpModel())
         self.pgraph  = penman.decode(self.gstring, model=NoOpModel())
     except:
         self.gstring = None
         self.pgraph  = None