Exemple #1
0
def select_spans(markup):
    for object in markup.objects:
        type = object.type
        if type == 'Person':
            yield Span(object.start, object.stop, object.type)
        elif type == 'Org':
            spans = [_ for _ in object.spans if _.type == 'org_name']
            for span in filter_overlapping_spans(spans):
                yield Span(span.start, span.stop, type)
        elif type in ('LocOrg', 'Location'):
            spans = [_ for _ in object.spans if _.type == 'loc_name']
            for span in filter_overlapping_spans(spans):
                yield Span(span.start, span.stop, type)
Exemple #2
0
def adapt_factru(markup):
    spans = list(select_spans(markup))

    # мид Грузии
    # ORG-------
    #     LOC---
    spans = list(filter_overlapping_spans(spans))

    spans = list(adapt_spans(spans, markup.text, FACTRU_TYPES))
    return Markup(markup.text, spans)
Exemple #3
0
def adapt_ne5(markup):
    # ne5 bug
    #   Бражский район Подмосковья
    #   --------------
    #            -----------------

    # компания "Союзкалий"
    #          -----------

    spans = list(filter_overlapping_spans(markup.spans))
    spans = strip_spans_bounds(spans, markup.text, QUOTES)
    spans = adapt_spans(spans, markup.text, NE5_TYPES)
    return Markup(markup.text, list(spans))
Exemple #4
0
def adapt_natasha(markup):
    spans = list(filter_overlapping_spans(markup.spans))
    spans = list(adapt_spans(spans, markup.text, NATASHA_TYPES))
    return Markup(markup.text, spans)
Exemple #5
0
def parse_bsnlp(record):
    spans = find_spans(record.text, record.substrings)
    spans = list(filter_overlapping_spans(spans))
    return BsnlpMarkup(record.text, spans)