Ejemplo n.º 1
0
 def sort_key(branch: Branch):
     role, target = branch
     if is_atomic(target):
         criterion1 = target in variables
     else:
         criterion1 = target[0] in variables
     criterion2 = True if key is None else key(role)
     return (criterion1, criterion2)
Ejemplo n.º 2
0
def _interpret_node(t: Node, variables: Set[Variable], model: Model):
    has_concept = False
    triples = []
    epidata = {}
    var, edges = t
    for role, target in edges:
        epis: List[Epidatum] = []

        if role == '/':
            role = CONCEPT_ROLE
            has_concept = True
        elif '~' in role:
            role, _, alignment = role.partition('~')
            epis.append(RoleAlignment.from_string(alignment))

        # atomic targets
        if is_atomic(target):
            # remove any alignments
            if target and '~' in target:
                if target.startswith('"'):
                    # need to handle alignments on strings differently
                    # because strings may contain ~ inside the quotes
                    pivot = target.rindex('"') + 1
                    if pivot < len(target):
                        epis.append(Alignment.from_string(target[pivot:]))
                        target = target[:pivot]
                else:
                    target, _, alignment = target.partition('~')
                    epis.append(Alignment.from_string(alignment))
            triple = (var, role, target)
            if model.is_role_inverted(role):
                if target in variables:
                    triple = model.invert(triple)
                else:
                    logger.warning('cannot deinvert attribute: %r', triple)
            triples.append(triple)
            epidata[triple] = epis
        # nested nodes
        else:
            triple = model.deinvert((var, role, target[0]))
            triples.append(triple)
            epidata[triple] = epis

            # recurse to nested nodes
            epidata[triple].append(Push(target[0]))
            _, _triples, _epis = _interpret_node(target, variables, model)
            triples.extend(_triples)
            epidata.update(_epis)
            epidata[triples[-1]].append(POP)

    if not has_concept:
        instance = (var, CONCEPT_ROLE, None)
        triples.insert(0, instance)
        epidata[instance] = []

    return var, triples, epidata
Ejemplo n.º 3
0
def _canonicalize_node(node: Node, model: Model) -> Node:
    var, edges = node
    canonical_edges = []
    for i, edge in enumerate(edges):
        role, tgt = edge
        # alignments aren't parsed off yet, so handle them superficially
        role, tilde, alignment = role.partition('~')
        if not is_atomic(tgt):
            tgt = _canonicalize_node(tgt, model)
        canonical_role = model.canonicalize_role(role) + tilde + alignment
        canonical_edges.append((canonical_role, tgt))
    return (var, canonical_edges)
Ejemplo n.º 4
0
def _rearrange(node: Node, key: Callable[[Branch], Any]) -> None:
    _, branches = node
    if branches and branches[0][0] == '/':
        first = branches[0:1]
        rest = branches[1:]
    else:
        first = []
        rest = branches[:]
    for _, target in rest:
        if not is_atomic(target):
            _rearrange(target, key=key)
    branches[:] = first + sorted(rest, key=key)
Ejemplo n.º 5
0
def _process_epigraph(node):
    """Format epigraph data onto roles and targets."""
    _, edges = node
    for i, (role, target, epis) in enumerate(edges):
        atomic_target = is_atomic(target)
        for epi in epis:
            if epi.mode == 1:  # role epidata
                role = f'{role!s}{epi!s}'
            elif epi.mode == 2 and atomic_target:  # target epidata
                target = f'{target!s}{epi!s}'
            else:
                logger.warning('epigraphical marker ignored: %r', epi)
        if not atomic_target:
            _process_epigraph(target)
        edges[i] = (role, target)
Ejemplo n.º 6
0
def _interpret_node(t: Node, variables: Set[Variable], model: Model):
    has_concept = False
    triples = []
    epidata = {}
    var, edges = t
    for role, target in edges:
        epis: List[Epidatum] = []

        if role == '/':
            role = CONCEPT_ROLE
            has_concept = True
        elif '~' in role:
            role, _, alignment = role.partition('~')
            epis.append(RoleAlignment.from_string(alignment))

        # atomic targets
        if is_atomic(target):
            if target and '~' in target:
                target, _, alignment = target.partition('~')
                epis.append(Alignment.from_string(alignment))
            triple = (var, role, target)
            if model.is_role_inverted(role):
                if target in variables:
                    triple = model.invert(triple)
                else:
                    logger.warning('cannot deinvert attribute: %r', triple)
            triples.append(triple)
            epidata[triple] = epis
        # nested nodes
        else:
            triple = model.deinvert((var, role, target[0]))
            triples.append(triple)
            epidata[triple] = epis

            # recurse to nested nodes
            epidata[triple].append(Push(target[0]))
            _, _triples, _epis = _interpret_node(target, variables, model)
            triples.extend(_triples)
            epidata.update(_epis)
            epidata[triples[-1]].append(POP)

    if not has_concept:
        instance = (var, CONCEPT_ROLE, None)
        triples.insert(0, instance)
        epidata[instance] = []

    return var, triples, epidata
Ejemplo n.º 7
0
def _interpret_node(t: Node, variables: Set[Variable], model: Model):
    has_concept = False
    triples = []
    epidata = []
    var, edges = t
    for role, target in edges:
        epis: List[Epidatum] = []

        role, role_epis = _process_role(role)
        epis.extend(role_epis)
        has_concept |= role == CONCEPT_ROLE

        # atomic targets
        if is_atomic(target):
            target, target_epis = _process_atomic(target)
            epis.extend(target_epis)
            triple = (var, role, target)
            if model.is_role_inverted(role):
                if target in variables:
                    triple = model.invert(triple)
                else:
                    logger.warning('cannot deinvert attribute: %r', triple)
            triples.append(triple)
            epidata.append((triple, epis))
        # nested nodes
        else:
            triple = model.deinvert((var, role, target[0]))
            triples.append(triple)

            epis.append(Push(target[0]))
            epidata.append((triple, epis))

            # recurse to nested nodes
            _, _triples, _epis = _interpret_node(target, variables, model)
            triples.extend(_triples)
            _epis[-1][1].append(POP)  # POP from last triple of nested node
            epidata.extend(_epis)

    if not has_concept:
        instance = (var, CONCEPT_ROLE, None)
        triples.insert(0, instance)
        epidata.append((instance, []))

    return var, triples, epidata
Ejemplo n.º 8
0
    def _format_edge(self, edge, indent, column, vars):
        """
        Format tree *edge* into a PENMAN string.
        """
        role, target = edge

        if role != '/' and not role.startswith(':'):
            role = ':' + role

        if indent == -1:
            column += len(role) + 1  # +1 for :

        sep = ' '
        if not target:
            target = sep = ''
        elif not is_atomic(target):
            target = self._format_node(target, indent, column, vars)

        return f'{role}{sep}{target!s}'
Ejemplo n.º 9
0
def _format_node(node,
                 indent: Optional[int],
                 column: int,
                 vars: set) -> str:
    """
    Format tree *node* into a PENMAN string.
    """
    var, edges = node
    if not var:
        return '()'  # empty node
    if not edges:
        return f'({var!s})'  # var-only node

    # determine appropriate joiner based on value of indent
    if indent is None:
        joiner = ' '
    else:
        if indent == -1:
            column += len(str(var)) + 2  # +2 for ( and a space
        else:
            column += indent
        joiner = '\n' + ' ' * column

    # format the edges and join them
    # if vars is non-empty, all initial attributes are compactly
    # joined on the same line, otherwise they use joiner
    parts: List[str] = []
    compact = bool(vars)
    for edge in edges:
        target = edge[1]
        if compact and (not is_atomic(target) or target in vars):
            compact = False
            if parts:
                parts = [' '.join(parts)]
        parts.append(_format_edge(edge, indent, column, vars))
    # check if all edges can be compactly written
    if compact:
        parts = [' '.join(parts)]

    return f'({var!s} {joiner.join(parts)})'
Ejemplo n.º 10
0
def test_is_atomic():
    assert tree.is_atomic('a')
    assert tree.is_atomic(None)
    assert tree.is_atomic(3.14)
    assert not tree.is_atomic(('a', [('/', 'alpha')]))