def sort_key(branch: Branch): role, target = branch if is_atomic(target): criterion1 = target in variables else: criterion1 = target[0] in variables criterion2 = True if key is None else key(role) return (criterion1, criterion2)
def _interpret_node(t: Node, variables: Set[Variable], model: Model): has_concept = False triples = [] epidata = {} var, edges = t for role, target in edges: epis: List[Epidatum] = [] if role == '/': role = CONCEPT_ROLE has_concept = True elif '~' in role: role, _, alignment = role.partition('~') epis.append(RoleAlignment.from_string(alignment)) # atomic targets if is_atomic(target): # remove any alignments if target and '~' in target: if target.startswith('"'): # need to handle alignments on strings differently # because strings may contain ~ inside the quotes pivot = target.rindex('"') + 1 if pivot < len(target): epis.append(Alignment.from_string(target[pivot:])) target = target[:pivot] else: target, _, alignment = target.partition('~') epis.append(Alignment.from_string(alignment)) triple = (var, role, target) if model.is_role_inverted(role): if target in variables: triple = model.invert(triple) else: logger.warning('cannot deinvert attribute: %r', triple) triples.append(triple) epidata[triple] = epis # nested nodes else: triple = model.deinvert((var, role, target[0])) triples.append(triple) epidata[triple] = epis # recurse to nested nodes epidata[triple].append(Push(target[0])) _, _triples, _epis = _interpret_node(target, variables, model) triples.extend(_triples) epidata.update(_epis) epidata[triples[-1]].append(POP) if not has_concept: instance = (var, CONCEPT_ROLE, None) triples.insert(0, instance) epidata[instance] = [] return var, triples, epidata
def _canonicalize_node(node: Node, model: Model) -> Node: var, edges = node canonical_edges = [] for i, edge in enumerate(edges): role, tgt = edge # alignments aren't parsed off yet, so handle them superficially role, tilde, alignment = role.partition('~') if not is_atomic(tgt): tgt = _canonicalize_node(tgt, model) canonical_role = model.canonicalize_role(role) + tilde + alignment canonical_edges.append((canonical_role, tgt)) return (var, canonical_edges)
def _rearrange(node: Node, key: Callable[[Branch], Any]) -> None: _, branches = node if branches and branches[0][0] == '/': first = branches[0:1] rest = branches[1:] else: first = [] rest = branches[:] for _, target in rest: if not is_atomic(target): _rearrange(target, key=key) branches[:] = first + sorted(rest, key=key)
def _process_epigraph(node): """Format epigraph data onto roles and targets.""" _, edges = node for i, (role, target, epis) in enumerate(edges): atomic_target = is_atomic(target) for epi in epis: if epi.mode == 1: # role epidata role = f'{role!s}{epi!s}' elif epi.mode == 2 and atomic_target: # target epidata target = f'{target!s}{epi!s}' else: logger.warning('epigraphical marker ignored: %r', epi) if not atomic_target: _process_epigraph(target) edges[i] = (role, target)
def _interpret_node(t: Node, variables: Set[Variable], model: Model): has_concept = False triples = [] epidata = {} var, edges = t for role, target in edges: epis: List[Epidatum] = [] if role == '/': role = CONCEPT_ROLE has_concept = True elif '~' in role: role, _, alignment = role.partition('~') epis.append(RoleAlignment.from_string(alignment)) # atomic targets if is_atomic(target): if target and '~' in target: target, _, alignment = target.partition('~') epis.append(Alignment.from_string(alignment)) triple = (var, role, target) if model.is_role_inverted(role): if target in variables: triple = model.invert(triple) else: logger.warning('cannot deinvert attribute: %r', triple) triples.append(triple) epidata[triple] = epis # nested nodes else: triple = model.deinvert((var, role, target[0])) triples.append(triple) epidata[triple] = epis # recurse to nested nodes epidata[triple].append(Push(target[0])) _, _triples, _epis = _interpret_node(target, variables, model) triples.extend(_triples) epidata.update(_epis) epidata[triples[-1]].append(POP) if not has_concept: instance = (var, CONCEPT_ROLE, None) triples.insert(0, instance) epidata[instance] = [] return var, triples, epidata
def _interpret_node(t: Node, variables: Set[Variable], model: Model): has_concept = False triples = [] epidata = [] var, edges = t for role, target in edges: epis: List[Epidatum] = [] role, role_epis = _process_role(role) epis.extend(role_epis) has_concept |= role == CONCEPT_ROLE # atomic targets if is_atomic(target): target, target_epis = _process_atomic(target) epis.extend(target_epis) triple = (var, role, target) if model.is_role_inverted(role): if target in variables: triple = model.invert(triple) else: logger.warning('cannot deinvert attribute: %r', triple) triples.append(triple) epidata.append((triple, epis)) # nested nodes else: triple = model.deinvert((var, role, target[0])) triples.append(triple) epis.append(Push(target[0])) epidata.append((triple, epis)) # recurse to nested nodes _, _triples, _epis = _interpret_node(target, variables, model) triples.extend(_triples) _epis[-1][1].append(POP) # POP from last triple of nested node epidata.extend(_epis) if not has_concept: instance = (var, CONCEPT_ROLE, None) triples.insert(0, instance) epidata.append((instance, [])) return var, triples, epidata
def _format_edge(self, edge, indent, column, vars): """ Format tree *edge* into a PENMAN string. """ role, target = edge if role != '/' and not role.startswith(':'): role = ':' + role if indent == -1: column += len(role) + 1 # +1 for : sep = ' ' if not target: target = sep = '' elif not is_atomic(target): target = self._format_node(target, indent, column, vars) return f'{role}{sep}{target!s}'
def _format_node(node, indent: Optional[int], column: int, vars: set) -> str: """ Format tree *node* into a PENMAN string. """ var, edges = node if not var: return '()' # empty node if not edges: return f'({var!s})' # var-only node # determine appropriate joiner based on value of indent if indent is None: joiner = ' ' else: if indent == -1: column += len(str(var)) + 2 # +2 for ( and a space else: column += indent joiner = '\n' + ' ' * column # format the edges and join them # if vars is non-empty, all initial attributes are compactly # joined on the same line, otherwise they use joiner parts: List[str] = [] compact = bool(vars) for edge in edges: target = edge[1] if compact and (not is_atomic(target) or target in vars): compact = False if parts: parts = [' '.join(parts)] parts.append(_format_edge(edge, indent, column, vars)) # check if all edges can be compactly written if compact: parts = [' '.join(parts)] return f'({var!s} {joiner.join(parts)})'
def test_is_atomic(): assert tree.is_atomic('a') assert tree.is_atomic(None) assert tree.is_atomic(3.14) assert not tree.is_atomic(('a', [('/', 'alpha')]))