Esempio n. 1
0
def evaluate_and_write(args, model, tasks, splits_to_write, cuda_device):
    """ Evaluate a model on dev and/or test, then write predictions """
    val_results, val_preds = evaluate.evaluate(model, tasks, args.batch_size,
                                               cuda_device, "val")
    if "val" in splits_to_write:
        evaluate.write_preds(tasks,
                             val_preds,
                             args.run_dir,
                             "val",
                             strict_glue_format=args.write_strict_glue_format)
    if "test" in splits_to_write:
        _, te_preds = evaluate.evaluate(model, tasks, args.batch_size,
                                        cuda_device, "test")
        evaluate.write_preds(tasks,
                             te_preds,
                             args.run_dir,
                             "test",
                             strict_glue_format=args.write_strict_glue_format)

    run_name = args.get("run_name", os.path.basename(args.run_dir))
    results_tsv = os.path.join(args.exp_dir, "results.tsv")
    log.info("Writing results for split 'val' to %s", results_tsv)
    evaluate.write_results(val_results, results_tsv, run_name=run_name)

    # For logging results of all tasks at once

    # eg micro and macro average don't already have task identifier in key
    tasks_id_prefix = "_".join([t.name for t in tasks]) + "_"
    current_tasks_val_results = dict(
        (tasks_id_prefix + k, [v]) if (not tasks_id_prefix in k) else (k, [v])
        for k, v in val_results.items())

    return current_tasks_val_results
 def test_write_preds_does_run(self):
     evaluate.write_preds(self.glue_tasks,
                          self.val_preds,
                          self.temp_dir,
                          "test",
                          strict_glue_format=True)
     assert os.path.exists(self.temp_dir +
                           "/STS-B.tsv") and os.path.exists(self.temp_dir +
                                                            "/WiC.jsonl")
 def test_write_preds_glue(self):
     evaluate.write_preds(self.glue_tasks,
                          self.val_preds,
                          self.temp_dir,
                          "test",
                          strict_glue_format=True)
     stsb_predictions = pd.read_csv(self.temp_dir + "/STS-B.tsv", sep="\t")
     assert "index" in stsb_predictions.columns and "prediction" in stsb_predictions.columns
     assert stsb_predictions.iloc[0]["prediction"] == 5.00
     assert stsb_predictions.iloc[1]["prediction"] == 1.7
 def test_write_preds_superglue(self):
     """
     Ensure that SuperGLUE write predictions for test is saved to the correct file
     format.
     """
     evaluate.write_preds([self.wic],
                          self.val_preds,
                          self.temp_dir,
                          "test",
                          strict_glue_format=True)
     wic_predictions = pd.read_json(self.temp_dir + "/WiC.jsonl",
                                    lines=True)
     assert "idx" in wic_predictions.columns and "label" in wic_predictions.columns
     assert wic_predictions.iloc[0]["label"] == "false"
     assert wic_predictions.iloc[1]["label"] == "true"
Esempio n. 5
0
def evaluate_and_write(args, model, tasks, splits_to_write, cuda_device):
    """ Evaluate a model on dev and/or test, then write predictions """
    val_results, val_preds = evaluate.evaluate(model, tasks, args.batch_size, cuda_device, "val")
    if "val" in splits_to_write:
        evaluate.write_preds(
            tasks, val_preds, args.run_dir, "val", strict_glue_format=args.write_strict_glue_format
        )
    if "test" in splits_to_write:
        _, te_preds = evaluate.evaluate(model, tasks, args.batch_size, cuda_device, "test")
        evaluate.write_preds(
            tasks, te_preds, args.run_dir, "test", strict_glue_format=args.write_strict_glue_format
        )

    run_name = args.get("run_name", os.path.basename(args.run_dir))
    results_tsv = os.path.join(args.exp_dir, "results.tsv")
    log.info("Writing results for split 'val' to %s", results_tsv)
    evaluate.write_results(val_results, results_tsv, run_name=run_name)
Esempio n. 6
0
def evaluate_and_write(args,
                       model,
                       tasks,
                       splits_to_write,
                       mode=None,
                       do_write=False):
    """ Evaluate a model on dev and/or test, then write predictions """
    val_results, val_preds = evaluate.evaluate(model, tasks, args.batch_size,
                                               args.cuda, "val")
    if "val" in splits_to_write:
        evaluate.write_preds(tasks,
                             val_preds,
                             args.run_dir,
                             "val",
                             strict_glue_format=args.write_strict_glue_format)
    if "test" in splits_to_write:
        _, te_preds = evaluate.evaluate(model, tasks, args.batch_size,
                                        args.cuda, "test")
        evaluate.write_preds(tasks,
                             te_preds,
                             args.run_dir,
                             "test",
                             strict_glue_format=args.write_strict_glue_format)

    # val_results will be all_metrics; all_metrics has keys like "taskname_metricname : metricvalue"

    if args.records_pickle_path:
        evaluate.pickle_results(val_results,
                                path=args.records_pickle_path,
                                mode=mode)

    if do_write:
        run_name = args.get("run_name", os.path.basename(args.run_dir))
        results_tsv = os.path.join(args.exp_dir, "results.tsv")
        log.info("Writing results for split 'val' to %s", results_tsv)
        evaluate.write_results(val_results, results_tsv, run_name=run_name)
Esempio n. 7
0
def infer_jiant(exp_dir, task, items, batch_size=4):
    # use cached tokenizer
    path = join(exp_dir, 'transformers_cache')
    with env(PYTORCH_TRANSFORMERS_CACHE=path):
        reload(transformers.file_utils)

    # use terra model for lidirus
    run_dir = join(
        exp_dir,
        TERRA if task == LIDIRUS else task
    )

    loggers = [
        LOGGER,
        pytorch_pretrained_bert.modeling.logger,
        transformers.file_utils.logger,
        transformers.configuration_utils.logger,
        transformers.modeling_utils.logger,
        transformers.tokenization_utils.logger,
        allennlp.nn.initializers.logger
    ]
    with no_loggers(loggers):
        path = join(run_dir, 'params.conf')
        args = params_from_file(path)
        cuda_device = parse_cuda_list_arg('auto')

    args.local_log_path = join(run_dir, 'log.log')
    args.exp_dir = args.project_dir = exp_dir
    args.run_dir = run_dir

    log('Build tasks')
    with no_loggers(loggers), TemporaryDirectory() as dir:
        args.exp_dir = args.data_dir = dir  # hide pkl, preproc
        dump_task(dir, task, items=[])  # mock empty train, val, test
        if task in (TERRA, LIDIRUS):
            dump_task(dir, LIDIRUS if task == TERRA else TERRA, items=[])
        _, tasks, vocab, word_embs = build_tasks(args, cuda_device)

    log('Build model, load transformers pretrain')
    with no_loggers(loggers):
        args.exp_dir = exp_dir  # use transformers cache
        model = build_model(args, vocab, word_embs, tasks, cuda_device)

    path = join(run_dir, 'model.th')
    log(f'Load state {path!r}')
    load_model_state(model, path, cuda_device)

    log(f'Build mock task, infer via eval, batch_size={batch_size}')
    with no_loggers(loggers), TemporaryDirectory() as dir:
        args.exp_dir = args.data_dir = dir
        dump_task(dir, task, items)

        if task in (TERRA, LIDIRUS):
            # choose one at inference
            args.pretrain_tasks = task
            args.target_tasks = task

        _, tasks, _, _ = build_tasks(args, cuda_device)
        _, preds = evaluate.evaluate(
            model, tasks,
            batch_size, cuda_device, 'test'
        )
        evaluate.write_preds(
            tasks, preds, dir,
            'test', args.write_strict_glue_format
        )

        return list(load_preds(dir, task))