def _set_doc_parts(doc, parts): """ Update a document so that it has annotations from all the given subdocuments. Note that no attention is paid to annotation ids, spans, etc. It's up to you to ensure that everything is kosher. """ doc.units = concat_l(x.units for x in parts) doc.relations = concat_l(x.relations for x in parts) doc.schemas = concat_l(x.schemas for x in parts)
def overlapping_structs(inputs, k): """ Return items for structural annotations that have overlaps """ return concat_l(overlapping(inputs, k, lambda x, t=ty: x.type == t) for ty in stac.STRUCTURE_TYPES)
def reflow(text, width=40): """ Wrap some text, at the same time ensuring that all original linebreaks are still in place """ def wrap(line): """ Wrap a single line of text. If empty, return a blank line rather that no lines at all """ return textwrap.wrap(line, width) or [''] return concat_l(wrap(t) for t in text.split("\n"))
def _maybe_lozenge(gra, node): """Return (if applicable) lozenge nodes/edges starting from the given node If the given node looks like the start of a lozenge, return all of the nodes participating in the lozenge. If not, return None Parameters ---------- gra: educe.stac.graph.Graph node: string hypergraph node name Returns ------- nodes: set(string) nodes in lozenge (if whole tuple not None) edges: edges in lozenge (if whole tuple not None) """ top = [node] top_out = _outgoing(gra, node) if len(top_out) < 2: return None mid = [gra.rel_links(e)[1] for e in top_out] if len(mid) != len(set(mid)): # must all point to different nodes return None mid_outs = [_outgoing(gra, m) for m in mid] # for each mid point: find the set of bottoms it points # to (we're happy if non-empty intersection) bots = [frozenset(gra.rel_links(e)[1] for e in es) for es in mid_outs] bot = bots[0] for cand in bots[1:]: bot &= cand if len(bot) < 1: # no intersection return None loz_nodes = frozenset(top + mid) | bot loz_edges = frozenset(top_out + concat_l(mid_outs)) return loz_nodes, loz_edges
def nplike_trees(current, edu): "any trees within an EDU that look like nps (smallest match)" trees = enclosed_trees(edu.text_span(), current.parses.trees) return concat_l(t.topdown_smallest(is_nplike) for t in trees)