def main(): """Use as a cli tool like conlleval.pl""" usage = "usage: %(prog)s [--span_type {bio,iobes,iob}] [-d delimiterTag] [-v] < file" parser = argparse.ArgumentParser( description="Calculate Span level F1 from the CoNLL-2000 shared task.", usage=usage) parser.add_argument("--span_type", default="iobes", choices={"iobes", "iob", "bio"}, help="What tag annotation scheme is this file using.") parser.add_argument("--delimiterTag", "-d", default=" ", help="The separator between items in the file.") parser.add_argument( "--verbose", '-v', action="store_true", help="Output warnings when there is an illegal transition.") args = parser.parse_args() golds, preds = _read_conll_file(sys.stdin, args.delimiterTag) acc, tokens = _get_accuracy(golds, preds) golds, preds = _get_entites(golds, preds, args.span_type, args.verbose) metrics = per_entity_f1(golds, preds) metrics['acc'] = acc metrics['tokens'] = tokens print(conlleval_output(metrics))
def _test(self, ts, **kwargs): self.model.eval() total_sum = 0 total_correct = 0 gold_spans = [] pred_spans = [] metrics = {} steps = len(ts) conll_output = kwargs.get('conll_output', None) txts = kwargs.get('txts', None) handle = None if conll_output is not None and txts is not None: handle = open(conll_output, "w") pg = create_progress_bar(steps) for batch_dict in pg(ts): inputs = self.model.make_input(batch_dict) y = inputs.pop('y') lengths = inputs['lengths'] ids = inputs['ids'] pred = self.model(inputs) correct, count, golds, guesses = self.process_output( pred, y.data, lengths, ids, handle, txts) total_correct += correct total_sum += count gold_spans.extend(golds) pred_spans.extend(guesses) total_acc = total_correct / float(total_sum) metrics['acc'] = total_acc metrics['f1'] = span_f1(gold_spans, pred_spans) if self.verbose: # TODO: Add programmatic access to these metrics? conll_metrics = per_entity_f1(gold_spans, pred_spans) conll_metrics['acc'] = total_acc * 100 conll_metrics['tokens'] = total_sum.item() logger.info(conlleval_output(conll_metrics)) return metrics
def _test(self, ts, **kwargs): self.model.train = False total_correct = 0 total_sum = 0 gold_spans = [] pred_spans = [] metrics = {} steps = len(ts) conll_output = kwargs.get('conll_output', None) txts = kwargs.get('txts', None) handle = None if conll_output is not None and txts is not None: handle = open(conll_output, "w") pg = create_progress_bar(steps) for batch_dict in pg(ts): lengths = batch_dict[self.model.lengths_key] ids = batch_dict['ids'] y = batch_dict['y'] pred = self.model.predict(batch_dict) correct, count, golds, guesses = self.process_output( pred, y, lengths, ids, handle, txts) total_correct += correct total_sum += count gold_spans.extend(golds) pred_spans.extend(guesses) total_acc = total_correct / float(total_sum) metrics['acc'] = total_acc # Only show the fscore if requested metrics['f1'] = span_f1(gold_spans, pred_spans) if self.verbose: conll_metrics = per_entity_f1(gold_spans, pred_spans) conll_metrics['acc'] = total_acc * 100 conll_metrics['tokens'] = total_sum logger.info(conlleval_output(conll_metrics)) return metrics
def test(self, ts, conll_output=None, txts=None): total_correct = total_sum = 0 gold_spans = [] pred_spans = [] steps = len(ts) pg = create_progress_bar(steps) metrics = {} # Only if they provide a file and the raw txts, we can write CONLL file handle = None if conll_output is not None and txts is not None: handle = open(conll_output, "w") try: for batch_dict in pg(ts): correct, count, golds, guesses = self.process_batch( batch_dict, handle, txts) total_correct += correct total_sum += count gold_spans.extend(golds) pred_spans.extend(guesses) total_acc = total_correct / float(total_sum) # Only show the fscore if requested metrics['f1'] = span_f1(gold_spans, pred_spans) metrics['acc'] = total_acc if self.verbose: conll_metrics = per_entity_f1(gold_spans, pred_spans) conll_metrics['acc'] = total_acc * 100 conll_metrics['tokens'] = total_sum logger.info(conlleval_output(conll_metrics)) finally: if handle is not None: handle.close() return metrics