def evaluate_and_write(args, model, tasks, splits_to_write, cuda_device): """ Evaluate a model on dev and/or test, then write predictions """ val_results, val_preds = evaluate.evaluate(model, tasks, args.batch_size, cuda_device, "val") if "val" in splits_to_write: evaluate.write_preds(tasks, val_preds, args.run_dir, "val", strict_glue_format=args.write_strict_glue_format) if "test" in splits_to_write: _, te_preds = evaluate.evaluate(model, tasks, args.batch_size, cuda_device, "test") evaluate.write_preds(tasks, te_preds, args.run_dir, "test", strict_glue_format=args.write_strict_glue_format) run_name = args.get("run_name", os.path.basename(args.run_dir)) results_tsv = os.path.join(args.exp_dir, "results.tsv") log.info("Writing results for split 'val' to %s", results_tsv) evaluate.write_results(val_results, results_tsv, run_name=run_name) # For logging results of all tasks at once # eg micro and macro average don't already have task identifier in key tasks_id_prefix = "_".join([t.name for t in tasks]) + "_" current_tasks_val_results = dict( (tasks_id_prefix + k, [v]) if (not tasks_id_prefix in k) else (k, [v]) for k, v in val_results.items()) return current_tasks_val_results
def evaluate_and_write(args, model, tasks, splits_to_write, cuda_device): """ Evaluate a model on dev and/or test, then write predictions """ val_results, val_preds = evaluate.evaluate(model, tasks, args.batch_size, cuda_device, "val") if "val" in splits_to_write: evaluate.write_preds( tasks, val_preds, args.run_dir, "val", strict_glue_format=args.write_strict_glue_format ) if "test" in splits_to_write: _, te_preds = evaluate.evaluate(model, tasks, args.batch_size, cuda_device, "test") evaluate.write_preds( tasks, te_preds, args.run_dir, "test", strict_glue_format=args.write_strict_glue_format ) run_name = args.get("run_name", os.path.basename(args.run_dir)) results_tsv = os.path.join(args.exp_dir, "results.tsv") log.info("Writing results for split 'val' to %s", results_tsv) evaluate.write_results(val_results, results_tsv, run_name=run_name)
def evaluate_and_write(args, model, tasks, splits_to_write, mode=None, do_write=False): """ Evaluate a model on dev and/or test, then write predictions """ val_results, val_preds = evaluate.evaluate(model, tasks, args.batch_size, args.cuda, "val") if "val" in splits_to_write: evaluate.write_preds(tasks, val_preds, args.run_dir, "val", strict_glue_format=args.write_strict_glue_format) if "test" in splits_to_write: _, te_preds = evaluate.evaluate(model, tasks, args.batch_size, args.cuda, "test") evaluate.write_preds(tasks, te_preds, args.run_dir, "test", strict_glue_format=args.write_strict_glue_format) # val_results will be all_metrics; all_metrics has keys like "taskname_metricname : metricvalue" if args.records_pickle_path: evaluate.pickle_results(val_results, path=args.records_pickle_path, mode=mode) if do_write: run_name = args.get("run_name", os.path.basename(args.run_dir)) results_tsv = os.path.join(args.exp_dir, "results.tsv") log.info("Writing results for split 'val' to %s", results_tsv) evaluate.write_results(val_results, results_tsv, run_name=run_name)