def get_evalb_summary(text): long = {} short = {} d = None for line in text.splitlines(): line = line.strip() # these lines tell us which dictionary to fill if line == '-- All --': d = long elif line.startswith('-- len'): # e.g. "-- len<40 --" d = short # no dictionary yet? no problem! if d is None: continue # we have a dictionary and a line, so try to parse it try: k, v = line.split('=') k = k.strip() v = try_parse_float(v) d[k] = v except: pass # TODO: warn? return long, short
def per_sentence_scores(stringiter): for line in stringiter: # attempt to clean up length "unmatch"es match = length_unmatch_re.search(line) if match: line = length_unmatch_re.sub('', line).rstrip() + ' ' line += stringiter.next().lstrip() match = sentence_stats_re.match(line) if match: d = {} for k, v in match.groupdict().items(): d[k] = try_parse_float(v) d['precision'], d['recall'] = \ calc_precision_recall(d['matchedbracket'], d['goldbracket'], d['testbracket']) d['precision'] *= 100 d['recall'] *= 100 d['fscore'] = calc_fscore(d['precision'], d['recall']) yield d