def main(args, classify): init.argcheck(args, 4, 4, 'Identify errors in parser output', '<gold> <test> <prefix_for_output_files>') # Output setup out_dict = { 'out': sys.stdout, 'err': sys.stderr, 'gold_trees': sys.stdout, 'test_trees': sys.stdout, 'error_counts': sys.stdout } prefix = args[3] out_dict['out'] = open(prefix + '.out', 'w') out_dict['err'] = open(prefix + '.log', 'w') out_dict['gold_trees'] = open(prefix + '.gold_trees', 'w') out_dict['test_trees'] = open(prefix + '.test_trees', 'w') out_dict['error_counts'] = open(prefix + '.error_counts', 'w') out_dict['init_errors'] = open(prefix + '.init_errors', 'w') init.header(args, out_dict.values()) # Classification print >> out_dict['out'], "Printing tree transformations" print >> out_dict['err'], "Printing tree transformations" gold_in = open(args[1]) test_in = sys.stdin if args[2] == '-' else open(args[2]) sent_no = 0 error_counts = defaultdict(lambda: []) while True: sent_no += 1 gold_text = gold_in.readline() test_text = test_in.readline() if gold_text == '' and test_text == '': print >> out_dict['err'], "End of both input files" break elif gold_text == '': print >> out_dict['err'], "End of gold input" break elif test_text == '': print >> out_dict['err'], "End of test input" break print >> out_dict['out'], "Sentence {}:".format(sent_no) print >> out_dict['err'], "Sentence {}:".format(sent_no) print >> out_dict['init_errors'], "Sentence {}:".format(sent_no) compare(gold_text.strip(), test_text.strip(), out_dict, error_counts, classify) print >> out_dict['init_errors'], "\n" # Results counts_to_print = [] for error in error_counts: if error == 'UNSET init': continue counts_to_print.append( (len(error_counts[error]), sum(error_counts[error]), error)) counts_to_print.sort(reverse=True) for error in counts_to_print: print >> out_dict['error_counts'], "{} {} {}".format(*error)
def main(args, classify): init.argcheck(args, 4, 4, 'Identify errors in parser output', '<gold> <test> <prefix_for_output_files>') # Output setup out_dict = { 'out': sys.stdout, 'err': sys.stderr, 'gold_trees': sys.stdout, 'test_trees': sys.stdout, 'error_counts': sys.stdout } prefix = args[3] out_dict['out'] = open(prefix + '.out', 'w') out_dict['err'] = open(prefix + '.log', 'w') out_dict['gold_trees'] = open(prefix + '.gold_trees', 'w') out_dict['test_trees'] = open(prefix + '.test_trees', 'w') out_dict['error_counts'] = open(prefix + '.error_counts', 'w') out_dict['init_errors'] = open(prefix + '.init_errors', 'w') init.header(args, out_dict.values()) # Classification print >> out_dict['out'], "Printing tree transformations" print >> out_dict['err'], "Printing tree transformations" gold_in = open(args[1]) test_in = sys.stdin if args[2] == '-' else open(args[2]) sent_no = 0 error_counts = defaultdict(lambda: []) while True: sent_no += 1 gold_text = gold_in.readline() test_text = test_in.readline() if gold_text == '' and test_text == '': print >> out_dict['err'], "End of both input files" break elif gold_text == '': print >> out_dict['err'], "End of gold input" break elif test_text == '': print >> out_dict['err'], "End of test input" break print >> out_dict['out'], "Sentence {}:".format(sent_no) print >> out_dict['err'], "Sentence {}:".format(sent_no) print >> out_dict['init_errors'], "Sentence {}:".format(sent_no) compare(gold_text.strip(), test_text.strip(), out_dict, error_counts, classify) print >> out_dict['init_errors'], "\n" # Results counts_to_print = [] for error in error_counts: if error == 'UNSET init': continue counts_to_print.append((len(error_counts[error]), sum(error_counts[error]), error)) counts_to_print.sort(reverse=True) for error in counts_to_print: print >> out_dict['error_counts'], "{} {} {}".format(*error)
def main(): # TODO, shift to a uniform style of module documentation, then just skip all of this! desc = __doc__.split("\n") arg_info = desc[1] further_desc = "\n".join(desc[2:]) desc = desc[0] init.argcheck(sys.argv, 3, 7, desc, arg_info, further_desc) out = open(sys.argv[1] + ".table", "w") log = open(sys.argv[1] + ".table.log", "w") init.header(sys.argv, log) data = get_data(sys.argv[2]) mapping = {} if len(sys.argv) > 3: mapping = get_mapping(sys.argv[3]) system_order_file = None if len(sys.argv) > 5: system_order_file = sys.argv[5] error_order_file = None if len(sys.argv) > 4: error_order_file = sys.argv[4] system_order, error_order, extra_info = get_order(data, system_order_file, error_order_file) print >> log, "System order:", system_order print >> log, "Error order:", error_order print_top(error_order, extra_info, out) print_data(system_order, error_order, data, mapping, extra_info, out) print_bottom(error_order, extra_info, out) if len(sys.argv) < 6: system_out = open(sys.argv[1] + ".table.system_order", "w") print_system_order(system_order, extra_info, system_out) system_out.close() if len(sys.argv) < 5: error_out = open(sys.argv[1] + ".table.error_order", "w") print_error_order(error_order, error_out) error_out.close() if len(sys.argv) < 4: mapping_out = open(sys.argv[1] + ".table.name_mapping", "w") print_mapping(mapping, data, mapping_out) mapping_out.close() out.close() log.close()
def main(): formats = { 'bart': read_bart, 'cherrypicker': read_cherrypicker, 'conll': read_conll, 'ims': read_ims, # 'opennlp': read_opennlp, 'reconcile': read_reconcile, # 'relaxcor': read_relaxcor, 'stanford_xml': read_stanford_xml, 'stanford': read_stanford, 'uiuc': read_uiuc, } try: _opts, args = getopt.gnu_getopt(sys.argv[1:], '', []) output_prefix, fmt, auto_src, gold_src = args except (getopt.GetoptError, ValueError): print('Translate a system output into the CoNLL format') print('./%s <prefix> <[%s]> <dir | file> <gold dir>' % (sys.argv[0], ','.join(formats))) return if fmt not in formats: print("Invalid format. Valid options are:") print('\n'.join(formats)) return with open(output_prefix + '.out', 'w') as out: with open(output_prefix + '.log', 'w') as log: init.header(sys.argv, log) auto, gold = formats[fmt](auto_src, gold_src) for doc in auto: for part in auto[doc]: for mention in auto[doc][part]['mentions']: if mention[1] >= mention[2]: info = "Invalid mention span {} from {} {}".format( str(mention), doc, part) info += '\n' + gold[doc][part]['text'][mention[0]] raise Exception(info) coreference_rendering.print_conll_style(auto, gold, out)
'bart': read_bart, 'cherrypicker': read_cherrypicker, 'conll': read_conll, 'ims': read_ims, ### 'opennlp': read_opennlp, 'reconcile': read_reconcile, ### 'relaxcor': read_relaxcor, 'stanford_xml': read_stanford_xml, 'stanford': read_stanford, 'uiuc': read_uiuc } init.argcheck(sys.argv, 5, 6, "Translate a system output into the CoNLL format", "<prefix> <[{}]> <dir | file> <gold dir>".format(','.join(formats.keys()))) out = open(sys.argv[1] + '.out', 'w') log = open(sys.argv[1] + '.log', 'w') init.header(sys.argv, log) auto_src = sys.argv[3] gold_src = sys.argv[4] if sys.argv[2] not in formats: print "Invalid format. Valid options are:" print '\n'.join(formats.keys()) sys.exit(1) auto, gold = formats[sys.argv[2]](auto_src, gold_src) for doc in auto: for part in auto[doc]: for mention in auto[doc][part]['mentions']: if mention[1] >= mention[2]: info = "Invalid mention span %s from %s %s" % (str(mention), doc, part) info += '\n' + gold[doc][part]['text'][mention[0]]
auto = coreference_reading.read_conll_coref_system_output(sys.argv[3]) gold = coreference_reading.read_conll_matching_files(auto, sys.argv[2]) out_cluster_errors = open(sys.argv[1] + '.cluster_errors', 'w') out_cluster_context = open(sys.argv[1] + '.cluster_context', 'w') out_cluster_missing = open(sys.argv[1] + '.cluster_missing', 'w') out_cluster_extra = open(sys.argv[1] + '.cluster_extra', 'w') out_mention_list = open(sys.argv[1] + '.mention_list', 'w') out_mention_text = open(sys.argv[1] + '.mention_text', 'w') out_files = [out_cluster_errors, out_cluster_context, out_cluster_missing, out_cluster_extra, out_mention_list, out_mention_text] init.header(sys.argv, out_files) for function, outfile in [ (coreference_rendering.print_mention_text, out_mention_text), (coreference_rendering.print_mention_list, out_mention_list), (coreference_rendering.print_cluster_errors, out_cluster_errors), (coreference_rendering.print_cluster_errors, out_cluster_context), (coreference_rendering.print_cluster_extra, out_cluster_extra), (coreference_rendering.print_cluster_missing, out_cluster_missing) ]: instructions = function.__doc__.split('\n') instructions = ['# ' + inst for inst in instructions] print >> outfile, '\n'.join(instructions) # Define an order order = []
'error: span mismatch': open(sys.argv[1] + '.corrected.span_errors', 'w'), 'error: split': open(sys.argv[1] + '.corrected.confused_entities', 'w'), 'error: extra mention': open(sys.argv[1] + '.corrected.extra_mention', 'w'), 'error: extra entity': open(sys.argv[1] + '.corrected.extra_entity', 'w'), 'error: merge': open(sys.argv[1] + '.corrected.divided', 'w'), 'error: missing mention': open(sys.argv[1] + '.corrected.missing_mention', 'w'), 'error: missing entity': open(sys.argv[1] + '.corrected.missing_entity', 'w'), 'error: extra mention prog': open(sys.argv[1] + '.corrected.extra_mention_prog', 'w'), 'error: extra entity prog': open(sys.argv[1] + '.corrected.extra_entity_prog', 'w'), 'error: merge prog': open(sys.argv[1] + '.corrected.divided_prog', 'w'), 'error: missing mention prog': open(sys.argv[1] + '.corrected.missing_mention_prog', 'w'), 'error: missing entity prog': open(sys.argv[1] + '.corrected.missing_entity_prog', 'w') } # Header info init.header(sys.argv, out['out']) init.header(sys.argv, out['short out']) init.header(sys.argv, out['properties']) init.header(sys.argv, out['summary']) print >> out['properties'], '''# Each line below describes a single error. # The fields included for the seven error types are: # span mismatch # System span (sentence, start, end) # Gold span (sentence, start, end) # Is the gold span a node in the gold parse? # Extra text to left # Missing text to left # Extra text to right # Missing text to right # Nodes spanning extra text to left # Nodes spanning missing text to left
'ims': read_ims, ### 'opennlp': read_opennlp, 'reconcile': read_reconcile, ### 'relaxcor': read_relaxcor, 'stanford_xml': read_stanford_xml, 'stanford': read_stanford, 'uiuc': read_uiuc } init.argcheck( sys.argv, 5, 5, "Translate a system output into the CoNLL format", "<prefix> <[{}]> <dir | file> <gold dir>".format(','.join( formats.keys()))) out = open(sys.argv[1] + '.out', 'w') log = open(sys.argv[1] + '.log', 'w') init.header(sys.argv, log) auto_src = sys.argv[3] gold_src = sys.argv[4] if sys.argv[2] not in formats: print "Invalid format. Valid options are:" print '\n'.join(formats.keys()) sys.exit(1) auto, gold = formats[sys.argv[2]](auto_src, gold_src) for doc in auto: for part in auto[doc]: for mention in auto[doc][part]['mentions']: if mention[1] >= mention[2]: info = "Invalid mention span {} from {} {}".format( str(mention), doc, part)
"labels_to_remove": [[str], ["TOP", "ROOT", "S1", "-NONE-", ",", ":", "``", "''", "."], "Remove nodes with the given labels, keep subtrees, but remove" "parents that now have a span of size 0"], "words_to_remove": [[str], [], "Remove nodes with the given words, and do as for labels"], "equivalent_labels": [[(str, str)], [("ADVP", "PRT")], "Labels to treat as equivalent"], "equivalent_words": [[(str, str)], [], "Words to treat as equivalent"], } # Provide current execution info out = sys.stdout init.header(sys.argv, out) # Handle options test_in = None gold_in = None if len(sys.argv) == 1: # TODO sys.exit() elif len(sys.argv) == 2: # TODO sys.exit() elif len(sys.argv) == 3: # Run with defaults, assume the two arguments are the gold and test files options['gold'][1] = sys.argv[1] options['test'][1] = sys.argv[2]
def main(): try: opts, args = getopt.gnu_getopt(sys.argv[1:], '', ['resolvespanerrors', 'lang=']) output_prefix, gold_dir, test_file = args except (getopt.GetoptError, ValueError): print('Print coreference resolution errors') print(('./%s <prefix> <gold_dir> <test_file> ' '[--resolvespanerrors] [--lang=<en|nl>]' % sys.argv[0])) return opts = dict(opts) lang = opts.get('--lang', 'en') auto = coreference_reading.read_conll_coref_system_output(test_file) gold = coreference_reading.read_conll_matching_files(auto, gold_dir, lang) out_cluster_errors = open(output_prefix + '.cluster_errors', 'w') out_cluster_context = open(output_prefix + '.cluster_context', 'w') out_cluster_missing = open(output_prefix + '.cluster_missing', 'w') out_cluster_extra = open(output_prefix + '.cluster_extra', 'w') out_mention_list = open(output_prefix + '.mention_list', 'w') out_mention_text = open(output_prefix + '.mention_text', 'w') out_files = [ out_cluster_errors, out_cluster_context, out_cluster_missing, out_cluster_extra, out_mention_list, out_mention_text ] for out in out_files: init.header(sys.argv, out) for function, outfile in [ (coreference_rendering.print_mention_text, out_mention_text), (coreference_rendering.print_mention_list, out_mention_list), (coreference_rendering.print_cluster_errors, out_cluster_errors), (coreference_rendering.print_cluster_errors, out_cluster_context), (coreference_rendering.print_cluster_extra, out_cluster_extra), (coreference_rendering.print_cluster_missing, out_cluster_missing) ]: instructions = function.__doc__.split('\n') instructions = ['# ' + inst for inst in instructions] print('\n'.join(instructions), file=outfile) # Define an order order = [] for doc in auto: for part in auto[doc]: order.append((doc, part)) order.sort() for doc, part in order: # Setup for out in out_files: print("\n# %s %s\n" % (doc, part), file=out) text = gold[doc][part]['text'] gold_parses = gold[doc][part]['parses'] gold_heads = gold[doc][part]['heads'] gold_mentions = gold[doc][part]['mentions'] gold_clusters = gold[doc][part]['clusters'] auto_mentions = auto[doc][part]['mentions'] auto_clusters = auto[doc][part]['clusters'] gold_cluster_set = coreference.set_of_clusters(gold_clusters) auto_cluster_set = coreference.set_of_clusters(auto_clusters) gold_mention_set = coreference.set_of_mentions(gold_clusters) auto_mention_set = coreference.set_of_mentions(auto_clusters) if '--resolvespanerrors' in opts: coreference_rendering.match_boundaries( gold_mention_set, auto_mention_set, auto_mentions, auto_clusters, auto_cluster_set, text, gold_parses, gold_heads) # Coloured mention output coreference_rendering.print_mention_list(out_mention_list, gold_mentions, auto_mention_set, gold_parses, gold_heads, text) coreference_rendering.print_mention_text(out_mention_text, gold_mentions, auto_mention_set, gold_parses, gold_heads, text) # Coloured cluster output, grouped groups = coreference.confusion_groups(gold_mentions, auto_mentions, gold_clusters, auto_clusters) covered = coreference_rendering.print_cluster_errors( groups, out_cluster_errors, out_cluster_context, text, gold_parses, gold_heads, auto_clusters, gold_clusters, gold_mentions) print("Entirely missing or extra\n", file=out_cluster_errors) print("Entirely missing or extra\n", file=out_cluster_context) coreference_rendering.print_cluster_missing(out_cluster_errors, out_cluster_context, out_cluster_missing, text, gold_cluster_set, covered, gold_parses, gold_heads) coreference_rendering.print_cluster_extra(out_cluster_errors, out_cluster_context, out_cluster_extra, text, auto_cluster_set, covered, gold_parses, gold_heads)
get_nonzero_span(node)[0] )) else: items.append(( node.word.split('-')[0] +"_"+ ref.label.split('-')[0] +"_"+ node.parent.label.split("-")[0], get_nonzero_span(node)[0], get_nonzero_span(ref) )) return items if __name__ == '__main__': # Provide current execution info out = sys.stdout init.header(sys.argv, out) # Handle Arguments parser = argparse.ArgumentParser(description='Evaluate parser output on trace recovery performance') # Input parser.add_argument('gold', help='Gold standard correct data') parser.add_argument('test', help='System output') parser.add_argument("--gold_format", choices=['ptb', 'ontonotes'], default='ptb', help="Input format for the gold file: PTB (single or multiple lines per parse), OntoNotes (one file)") parser.add_argument("--test_format", choices=['ptb', 'ontonotes'], default='ptb', help="Input format for the test file: PTB (single or multiple lines per parse), OntoNotes (one file)") # Scoring modification parser.add_argument("--null_only", action='store_true', help="Whether to only score the null itself (not coindexation)") parser.add_argument("--unlabelled_score", action='store_true', help="NOT IMPLEMENTED, Labeled or unlabelled score")