def evaluate_and_write(args, model, tasks, splits_to_write, cuda_device): """ Evaluate a model on dev and/or test, then write predictions """ val_results, val_preds = evaluate.evaluate(model, tasks, args.batch_size, cuda_device, "val") if "val" in splits_to_write: evaluate.write_preds(tasks, val_preds, args.run_dir, "val", strict_glue_format=args.write_strict_glue_format) if "test" in splits_to_write: _, te_preds = evaluate.evaluate(model, tasks, args.batch_size, cuda_device, "test") evaluate.write_preds(tasks, te_preds, args.run_dir, "test", strict_glue_format=args.write_strict_glue_format) run_name = args.get("run_name", os.path.basename(args.run_dir)) results_tsv = os.path.join(args.exp_dir, "results.tsv") log.info("Writing results for split 'val' to %s", results_tsv) evaluate.write_results(val_results, results_tsv, run_name=run_name) # For logging results of all tasks at once # eg micro and macro average don't already have task identifier in key tasks_id_prefix = "_".join([t.name for t in tasks]) + "_" current_tasks_val_results = dict( (tasks_id_prefix + k, [v]) if (not tasks_id_prefix in k) else (k, [v]) for k, v in val_results.items()) return current_tasks_val_results
def evaluate_and_write(args, model, tasks, splits_to_write, cuda_device): """ Evaluate a model on dev and/or test, then write predictions """ val_results, val_preds = evaluate.evaluate(model, tasks, args.batch_size, cuda_device, "val") if "val" in splits_to_write: evaluate.write_preds( tasks, val_preds, args.run_dir, "val", strict_glue_format=args.write_strict_glue_format ) if "test" in splits_to_write: _, te_preds = evaluate.evaluate(model, tasks, args.batch_size, cuda_device, "test") evaluate.write_preds( tasks, te_preds, args.run_dir, "test", strict_glue_format=args.write_strict_glue_format ) run_name = args.get("run_name", os.path.basename(args.run_dir)) results_tsv = os.path.join(args.exp_dir, "results.tsv") log.info("Writing results for split 'val' to %s", results_tsv) evaluate.write_results(val_results, results_tsv, run_name=run_name)
def evaluate_and_write(args, model, tasks, splits_to_write, mode=None, do_write=False): """ Evaluate a model on dev and/or test, then write predictions """ val_results, val_preds = evaluate.evaluate(model, tasks, args.batch_size, args.cuda, "val") if "val" in splits_to_write: evaluate.write_preds(tasks, val_preds, args.run_dir, "val", strict_glue_format=args.write_strict_glue_format) if "test" in splits_to_write: _, te_preds = evaluate.evaluate(model, tasks, args.batch_size, args.cuda, "test") evaluate.write_preds(tasks, te_preds, args.run_dir, "test", strict_glue_format=args.write_strict_glue_format) # val_results will be all_metrics; all_metrics has keys like "taskname_metricname : metricvalue" if args.records_pickle_path: evaluate.pickle_results(val_results, path=args.records_pickle_path, mode=mode) if do_write: run_name = args.get("run_name", os.path.basename(args.run_dir)) results_tsv = os.path.join(args.exp_dir, "results.tsv") log.info("Writing results for split 'val' to %s", results_tsv) evaluate.write_results(val_results, results_tsv, run_name=run_name)
def infer_jiant(exp_dir, task, items, batch_size=4): # use cached tokenizer path = join(exp_dir, 'transformers_cache') with env(PYTORCH_TRANSFORMERS_CACHE=path): reload(transformers.file_utils) # use terra model for lidirus run_dir = join( exp_dir, TERRA if task == LIDIRUS else task ) loggers = [ LOGGER, pytorch_pretrained_bert.modeling.logger, transformers.file_utils.logger, transformers.configuration_utils.logger, transformers.modeling_utils.logger, transformers.tokenization_utils.logger, allennlp.nn.initializers.logger ] with no_loggers(loggers): path = join(run_dir, 'params.conf') args = params_from_file(path) cuda_device = parse_cuda_list_arg('auto') args.local_log_path = join(run_dir, 'log.log') args.exp_dir = args.project_dir = exp_dir args.run_dir = run_dir log('Build tasks') with no_loggers(loggers), TemporaryDirectory() as dir: args.exp_dir = args.data_dir = dir # hide pkl, preproc dump_task(dir, task, items=[]) # mock empty train, val, test if task in (TERRA, LIDIRUS): dump_task(dir, LIDIRUS if task == TERRA else TERRA, items=[]) _, tasks, vocab, word_embs = build_tasks(args, cuda_device) log('Build model, load transformers pretrain') with no_loggers(loggers): args.exp_dir = exp_dir # use transformers cache model = build_model(args, vocab, word_embs, tasks, cuda_device) path = join(run_dir, 'model.th') log(f'Load state {path!r}') load_model_state(model, path, cuda_device) log(f'Build mock task, infer via eval, batch_size={batch_size}') with no_loggers(loggers), TemporaryDirectory() as dir: args.exp_dir = args.data_dir = dir dump_task(dir, task, items) if task in (TERRA, LIDIRUS): # choose one at inference args.pretrain_tasks = task args.target_tasks = task _, tasks, _, _ = build_tasks(args, cuda_device) _, preds = evaluate.evaluate( model, tasks, batch_size, cuda_device, 'test' ) evaluate.write_preds( tasks, preds, dir, 'test', args.write_strict_glue_format ) return list(load_preds(dir, task))