def select_spans(markup): for object in markup.objects: type = object.type if type == 'Person': yield Span(object.start, object.stop, object.type) elif type == 'Org': spans = [_ for _ in object.spans if _.type == 'org_name'] for span in filter_overlapping_spans(spans): yield Span(span.start, span.stop, type) elif type in ('LocOrg', 'Location'): spans = [_ for _ in object.spans if _.type == 'loc_name'] for span in filter_overlapping_spans(spans): yield Span(span.start, span.stop, type)
def adapt_factru(markup): spans = list(select_spans(markup)) # мид Грузии # ORG------- # LOC--- spans = list(filter_overlapping_spans(spans)) spans = list(adapt_spans(spans, markup.text, FACTRU_TYPES)) return Markup(markup.text, spans)
def adapt_ne5(markup): # ne5 bug # Бражский район Подмосковья # -------------- # ----------------- # компания "Союзкалий" # ----------- spans = list(filter_overlapping_spans(markup.spans)) spans = strip_spans_bounds(spans, markup.text, QUOTES) spans = adapt_spans(spans, markup.text, NE5_TYPES) return Markup(markup.text, list(spans))
def adapt_natasha(markup): spans = list(filter_overlapping_spans(markup.spans)) spans = list(adapt_spans(spans, markup.text, NATASHA_TYPES)) return Markup(markup.text, spans)
def parse_bsnlp(record): spans = find_spans(record.text, record.substrings) spans = list(filter_overlapping_spans(spans)) return BsnlpMarkup(record.text, spans)