def adapt_gareev(markup): # extra spaces + dots in spans # News Corp . # ----------- # « Русал » # --------- spans = strip_spans(markup.spans, markup.text, DOT + SPACES) spans = strip_spans_bounds(spans, markup.text, QUOTES + SPACES) spans = adapt_spans(spans, markup.text, GAREEV_TYPES) return Markup(markup.text, list(spans))
def adapt_ne5(markup): # ne5 bug # Бражский район Подмосковья # -------------- # ----------------- # компания "Союзкалий" # ----------- spans = list(filter_overlapping_spans(markup.spans)) spans = strip_spans_bounds(spans, markup.text, QUOTES) spans = adapt_spans(spans, markup.text, NE5_TYPES) return Markup(markup.text, list(spans))
def adapt_wikiner(markup): # большевистской газете " Правда " . # ---------- spans = strip_spans_bounds(markup.spans, markup.text, QUOTES + SPACES) spans = adapt_spans(spans, markup.text, WIKINER_TYPES) return Markup(markup.text, list(spans))