def read_stanford(auto_src, gold_src): """Stanford produces CoNLL style output, but with all fields. This will read it as normal.""" auto = coreference_reading.read_conll_doc(auto_src, None, True, False, False, True) gold = coreference_reading.read_conll_matching_files(auto, gold_src) return auto, gold
def read_stanford(auto_src, gold_src): '''Stanford produces CoNLL style output, but with all fields. This will read it as normal.''' auto = coreference_reading.read_conll_doc(auto_src, None, True, False, False, True) gold = coreference_reading.read_conll_matching_files(auto, gold_src) return auto, gold
def read_conll(auto_src, gold_src): '''CoNLL style output, last field is the relevant one.''' auto = coreference_reading.read_conll_doc(auto_src, None, False, False, False, True, False) gold = coreference_reading.read_conll_matching_files(auto, gold_src) return auto, gold
#!/usr/bin/env python # -*- coding: utf-8 -*- # vim: set ts=2 sw=2 noet: import sys from nlp_util import coreference_reading, coreference_rendering, coreference, init if __name__ == '__main__': init.argcheck(sys.argv, 4, 5, "Print coreference resolution errors", "<prefix> <gold_dir> <test> [resolve span errors first? T | F]") auto = coreference_reading.read_conll_coref_system_output(sys.argv[3]) gold = coreference_reading.read_conll_matching_files(auto, sys.argv[2]) out_cluster_errors = open(sys.argv[1] + '.cluster_errors', 'w') out_cluster_context = open(sys.argv[1] + '.cluster_context', 'w') out_cluster_missing = open(sys.argv[1] + '.cluster_missing', 'w') out_cluster_extra = open(sys.argv[1] + '.cluster_extra', 'w') out_mention_list = open(sys.argv[1] + '.mention_list', 'w') out_mention_text = open(sys.argv[1] + '.mention_text', 'w') out_files = [out_cluster_errors, out_cluster_context, out_cluster_missing, out_cluster_extra, out_mention_list, out_mention_text] init.header(sys.argv, out_files) for function, outfile in [ (coreference_rendering.print_mention_text, out_mention_text), (coreference_rendering.print_mention_list, out_mention_list), (coreference_rendering.print_cluster_errors, out_cluster_errors),
def main(): try: opts, args = getopt.gnu_getopt(sys.argv[1:], '', ['resolvespanerrors', 'lang=']) output_prefix, gold_dir, test_file = args except (getopt.GetoptError, ValueError): print('Print coreference resolution errors') print(('./%s <prefix> <gold_dir> <test_file> ' '[--resolvespanerrors] [--lang=<en|nl>]' % sys.argv[0])) return opts = dict(opts) lang = opts.get('--lang', 'en') auto = coreference_reading.read_conll_coref_system_output(test_file) gold = coreference_reading.read_conll_matching_files(auto, gold_dir, lang) out_cluster_errors = open(output_prefix + '.cluster_errors', 'w') out_cluster_context = open(output_prefix + '.cluster_context', 'w') out_cluster_missing = open(output_prefix + '.cluster_missing', 'w') out_cluster_extra = open(output_prefix + '.cluster_extra', 'w') out_mention_list = open(output_prefix + '.mention_list', 'w') out_mention_text = open(output_prefix + '.mention_text', 'w') out_files = [ out_cluster_errors, out_cluster_context, out_cluster_missing, out_cluster_extra, out_mention_list, out_mention_text ] for out in out_files: init.header(sys.argv, out) for function, outfile in [ (coreference_rendering.print_mention_text, out_mention_text), (coreference_rendering.print_mention_list, out_mention_list), (coreference_rendering.print_cluster_errors, out_cluster_errors), (coreference_rendering.print_cluster_errors, out_cluster_context), (coreference_rendering.print_cluster_extra, out_cluster_extra), (coreference_rendering.print_cluster_missing, out_cluster_missing) ]: instructions = function.__doc__.split('\n') instructions = ['# ' + inst for inst in instructions] print('\n'.join(instructions), file=outfile) # Define an order order = [] for doc in auto: for part in auto[doc]: order.append((doc, part)) order.sort() for doc, part in order: # Setup for out in out_files: print("\n# %s %s\n" % (doc, part), file=out) text = gold[doc][part]['text'] gold_parses = gold[doc][part]['parses'] gold_heads = gold[doc][part]['heads'] gold_mentions = gold[doc][part]['mentions'] gold_clusters = gold[doc][part]['clusters'] auto_mentions = auto[doc][part]['mentions'] auto_clusters = auto[doc][part]['clusters'] gold_cluster_set = coreference.set_of_clusters(gold_clusters) auto_cluster_set = coreference.set_of_clusters(auto_clusters) gold_mention_set = coreference.set_of_mentions(gold_clusters) auto_mention_set = coreference.set_of_mentions(auto_clusters) if '--resolvespanerrors' in opts: coreference_rendering.match_boundaries( gold_mention_set, auto_mention_set, auto_mentions, auto_clusters, auto_cluster_set, text, gold_parses, gold_heads) # Coloured mention output coreference_rendering.print_mention_list(out_mention_list, gold_mentions, auto_mention_set, gold_parses, gold_heads, text) coreference_rendering.print_mention_text(out_mention_text, gold_mentions, auto_mention_set, gold_parses, gold_heads, text) # Coloured cluster output, grouped groups = coreference.confusion_groups(gold_mentions, auto_mentions, gold_clusters, auto_clusters) covered = coreference_rendering.print_cluster_errors( groups, out_cluster_errors, out_cluster_context, text, gold_parses, gold_heads, auto_clusters, gold_clusters, gold_mentions) print("Entirely missing or extra\n", file=out_cluster_errors) print("Entirely missing or extra\n", file=out_cluster_context) coreference_rendering.print_cluster_missing(out_cluster_errors, out_cluster_context, out_cluster_missing, text, gold_cluster_set, covered, gold_parses, gold_heads) coreference_rendering.print_cluster_extra(out_cluster_errors, out_cluster_context, out_cluster_extra, text, auto_cluster_set, covered, gold_parses, gold_heads)