def _merge_edus(tcache, span, doc): """ Find any EDUs within the given span in the document and merge them into a single one. The EDUs should stretch from the beginning to the end of the span (gaps OK). The output EDU should have the same ID in all documents """ edus = edus_in_span(doc, span) if not edus: sys.exit("No EDUs in span %s" % span) espan = Span.merge_all(x.text_span() for x in edus) if espan != span: sys.exit("EDUs in do not cover full span %s [only %s]" % (span, espan)) _actually_merge(tcache, edus, doc)
def fill(self, current, edu1, edu2, target=None): vec = self if target is None else target doc = current.doc big_span = edu1.text_span().merge(edu2.text_span()) # spans for the turns that come between the two edus turns_between_span = Span(edu1.turn.text_span().char_end, edu2.turn.text_span().char_start) turns_between = turns_in_span(doc, turns_between_span) inner_edus = edus_in_span(doc, big_span) if edu1.identifier() != ROOT: # not present anyway inner_edus.remove(edu1) if edu2.identifier() != ROOT: inner_edus.remove(edu2) gap = EduGap(inner_edus=inner_edus, turns_between=turns_between, sf_cache=self.sf_cache) for key in self.keys: vec[key.name] = key.function(current, gap, edu1, edu2)