def map_(cls, batches, host, port): for sections in batches: texts = [_.text for _ in sections] data = post(cls, texts, host, port) markups = list(parse(cls, texts, data)) for section, markup in strict_zip(sections, markups): yield section.annotated(markup.spans)
def parse(texts, data): for text, (chunks, tags) in strict_zip(texts, data): # see patch_texts if not text.strip(): spans = [] else: tokens = list(find_tokens(chunks, text)) spans = list(bio_spans(tokens, tags)) yield DeeppavlovMarkup(text, spans)
def eval_score(tokens, guess, etalon, type): spans = list(select_type_spans(guess, type)) guess_tags = list(spans_io(tokens, spans)) spans = list(select_type_spans(etalon, type)) etalon_tags = list(spans_io(tokens, spans)) score = Score() for guess, etalon in strict_zip(guess_tags, etalon_tags): guess, _ = parse_bio(guess) etalon, _ = parse_bio(etalon) if guess == I: score.prec_total += 1 if etalon == O: score.prec_errors += 1 if etalon == I: score.recall_total += 1 if guess == O: score.recall_errors += 1 return score
def eval_markups(guesses, etalons, types=TYPES): scores = defaultdict(Score) for guess, etalon in strict_zip(guesses, etalons): for type, score in eval_markup(guess, etalon, types): scores[type].update(score) return dict(scores)