def main(args): input_dataset = args[1] input_run = args[2] output_dir = args[3] gold_relations = [json.loads(x) for x in open('%s/relations.json' % input_dataset)] predicted_relations = [json.loads(x) for x in open('%s/output.json' % input_run)] language = identify_language(gold_relations) all_correct = validate_relation_list(predicted_relations, language) if not all_correct: exit(1) output_file = open('%s/evaluation.prototext' % output_dir, 'w') print 'Evaluation for all discourse relations' write_results('All', evaluate(gold_relations, predicted_relations), output_file) print 'Evaluation for explicit discourse relations only' explicit_gold_relations = [x for x in gold_relations if x['Type'] == 'Explicit'] explicit_predicted_relations = [x for x in predicted_relations if x['Type'] == 'Explicit'] write_results('Explicit only', \ evaluate(explicit_gold_relations, explicit_predicted_relations), output_file) print 'Evaluation for non-explicit discourse relations only (Implicit, EntRel, AltLex)' non_explicit_gold_relations = [x for x in gold_relations if x['Type'] != 'Explicit'] non_explicit_predicted_relations = [x for x in predicted_relations if x['Type'] != 'Explicit'] write_results('Non-explicit only', \ evaluate(non_explicit_gold_relations, non_explicit_predicted_relations), output_file) output_file.close()
def main(args): input_dataset = args[1] input_run = args[2] output_dir = args[3] gold_relations = [ json.loads(x) for x in open('%s/relations.json' % input_dataset) ] predicted_relations = [ json.loads(x) for x in open('%s/output.json' % input_run) ] language = identify_language(gold_relations) all_correct = validate_relation_list(predicted_relations, language) if not all_correct: exit(1) output_file = open('%s/evaluation.prototext' % output_dir, 'w') print 'Evaluation for all discourse relations' write_results('All', evaluate(gold_relations, predicted_relations), output_file) print 'Evaluation for explicit discourse relations only' explicit_gold_relations = [ x for x in gold_relations if x['Type'] == 'Explicit' ] explicit_predicted_relations = [ x for x in predicted_relations if x['Type'] == 'Explicit' ] write_results('Explicit only', \ evaluate(explicit_gold_relations, explicit_predicted_relations), output_file) print 'Evaluation for non-explicit discourse relations only (Implicit, EntRel, AltLex)' non_explicit_gold_relations = [ x for x in gold_relations if x['Type'] != 'Explicit' ] non_explicit_predicted_relations = [ x for x in predicted_relations if x['Type'] != 'Explicit' ] write_results('Non-explicit only', \ evaluate(non_explicit_gold_relations, non_explicit_predicted_relations), output_file) print '\nPartial Evaluation for all discourse relations' write_partial_match_results('All (partial match)', \ partial_evaluate(gold_relations, predicted_relations, 0.7), output_file) print '\nPartial Evaluation for explicit discourse relations' write_partial_match_results('Explicit only (partial match)', \ partial_evaluate(explicit_gold_relations, explicit_predicted_relations, 0.7), output_file) print '\nPartial Evaluation for non-explicit discourse relations only (Implicit, EntRel, AltLex)' write_partial_match_results('Non-explicit only (partial match)', \ partial_evaluate(non_explicit_gold_relations, non_explicit_predicted_relations, 0.7), output_file) output_file.close()
def main(args): input_dataset = args[1] input_run = args[2] output_dir = args[3] gold_relations = [ json.loads(x) for x in open('%s/relations.json' % input_dataset) ] predicted_relations = [ json.loads(x) for x in open('%s/output.json' % input_run) ] if len(gold_relations) != len(predicted_relations): err_message = 'Gold standard has % instances; predicted %s instances' % \ (len(gold_relations), len(predicted_relations)) print >> sys.stderr, err_message exit(1) language = identify_language(gold_relations) all_correct = validate_relation_list(predicted_relations, language) if not all_correct: print >> sys.stderr, 'Invalid format' exit(1) gold_relations = sorted(gold_relations, key=lambda x: x['ID']) predicted_relations = sorted(predicted_relations, key=lambda x: x['ID']) use_gold_standard_types(gold_relations, predicted_relations) output_file = open('%s/evaluation.prototext' % output_dir, 'w') print 'Evaluation for all discourse relations' write_results('All', evaluate(gold_relations, predicted_relations), output_file) print 'Evaluation for explicit discourse relations only' explicit_gold_relations = [ x for x in gold_relations if x['Type'] == 'Explicit' ] explicit_predicted_relations = [ x for x in predicted_relations if x['Type'] == 'Explicit' ] write_results('Explicit only', \ evaluate(explicit_gold_relations, explicit_predicted_relations), output_file) print 'Evaluation for non-explicit discourse relations only (Implicit, EntRel, AltLex)' non_explicit_gold_relations = [ x for x in gold_relations if x['Type'] != 'Explicit' ] non_explicit_predicted_relations = [ x for x in predicted_relations if x['Type'] != 'Explicit' ] write_results('Non-explicit only', \ evaluate(non_explicit_gold_relations, non_explicit_predicted_relations), output_file) output_file.close()
def main(args): input_dataset = args[1] input_run = args[2] output_dir = args[3] relation_file = '%s/relations.json' % input_dataset gold_relations = [] file_line = 0 for x in open(relation_file): try: gold_relations.append(json.loads(x[x.index('{'):])) except: print "Error reading json file on line %s" % file_line print x file_line = file_line + 1 # gold_relations = [json.loads(x) for x in open('%s/relations.json' % input_dataset)] predicted_relations = [json.loads(x) for x in open('%s/output.json' % input_run)] if len(gold_relations) != len(predicted_relations): err_message = 'Gold standard has % instances; predicted %s instances' % \ (len(gold_relations), len(predicted_relations)) print >> sys.stderr, err_message exit(1) language = identify_language(gold_relations) all_correct = validate_relation_list(predicted_relations, language) if not all_correct: print >> sys.stderr, 'Invalid format' exit(1) gold_relations = sorted(gold_relations, key=lambda x: x['ID']) predicted_relations = sorted(predicted_relations, key=lambda x: x['ID']) use_gold_standard_types(gold_relations, predicted_relations) output_file = open('%s/evaluation.prototext' % output_dir, 'w') print 'Evaluation for all discourse relations' write_results('All', evaluate(gold_relations, predicted_relations), output_file) print 'Evaluation for explicit discourse relations only' explicit_gold_relations = [x for x in gold_relations if x['Type'] == 'Explicit'] explicit_predicted_relations = [x for x in predicted_relations if x['Type'] == 'Explicit'] write_results('Explicit only', \ evaluate(explicit_gold_relations, explicit_predicted_relations), output_file) print 'Evaluation for non-explicit discourse relations only (Implicit, EntRel, AltLex)' non_explicit_gold_relations = [x for x in gold_relations if x['Type'] != 'Explicit'] non_explicit_predicted_relations = [x for x in predicted_relations if x['Type'] != 'Explicit'] write_results('Non-explicit only', \ evaluate(non_explicit_gold_relations, non_explicit_predicted_relations), output_file) output_file.close()
write_proto_text('%s Arg 1 Arg2 extraction recall' % prefix, r, output_file) write_proto_text('%s Arg 1 Arg2 extraction f1' % prefix, f, output_file) p, r, f = sense_cm.compute_average_prf() write_proto_text('%s Sense precision' % prefix, p, output_file) write_proto_text('%s Sense recall' % prefix, r, output_file) write_proto_text('%s Sense f1' % prefix, f, output_file) if __name__ == '__main__': input_dataset = sys.argv[1] input_run = sys.argv[2] output_dir = sys.argv[3] gold_relations = [json.loads(x) for x in open('%s/pdtb-data.json' % input_dataset)] predicted_relations = [json.loads(x) for x in open('%s/output.json' % input_run)] all_correct = validate_relation_list(predicted_relations) if not all_correct: exit(1) output_file = open('%s/evaluation.prototext' % output_dir, 'w') print 'Evaluation for all discourse relations' write_results('All', evaluate(gold_relations, predicted_relations), output_file) print 'Evaluation for explicit discourse relations only' explicit_gold_relations = [x for x in gold_relations if x['Type'] == 'Explicit'] explicit_predicted_relations = [x for x in predicted_relations if x['Type'] == 'Explicit'] write_results('Explicit only', evaluate(explicit_gold_relations, explicit_predicted_relations), output_file) print 'Evaluation for non-explicit discourse relations only (Implicit, EntRel, AltLex)' non_explicit_gold_relations = [x for x in gold_relations if x['Type'] != 'Explicit'] non_explicit_predicted_relations = [x for x in predicted_relations if x['Type'] != 'Explicit']
write_proto_text('%s Sense recall' % prefix, r, output_file) write_proto_text('%s Sense f1' % prefix, f, output_file) if __name__ == '__main__': input_dataset = sys.argv[1] input_run = sys.argv[2] output_dir = sys.argv[3] gold_relations = [ json.loads(x) for x in open('%s/pdtb-data.json' % input_dataset) ] predicted_relations = [ json.loads(x) for x in open('%s/output.json' % input_run) ] all_correct = validate_relation_list(predicted_relations) if not all_correct: exit(1) output_file = open('%s/evaluation.prototext' % output_dir, 'w') print 'Evaluation for all discourse relations' write_results('All', evaluate(gold_relations, predicted_relations), output_file) print 'Evaluation for explicit discourse relations only' explicit_gold_relations = [ x for x in gold_relations if x['Type'] == 'Explicit' ] explicit_predicted_relations = [ x for x in predicted_relations if x['Type'] == 'Explicit' ]