Ejemplo n.º 1
0
def evaluation(config, logger=None, trainer=None):

    if logger is None:
        logger = config.get_logger('test')

    if getattr(config._args, "eval_from_training_config", False):
        eval_conf = copy.deepcopy(config)
        merge(eval_conf._config,
              config["eval_settings"],
              strategy=Strategy.REPLACE)
        config = eval_conf

    logger.info("Running evaluation with configuration:")
    logger.info(config)

    expert_dims, raw_input_dims = compute_dims(config)
    trn_config = compute_trn_config(config)

    # Set the random initial seeds
    seed = config["seed"]
    logger.info(f"Setting experiment random seed to {seed}")
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    # We use cls defaults for backwards compatibility with the MMIT configs.  In the
    # long run this should be handled by the json configs themselves
    cls_defaults = ["train", "val", "tiny", "challenge"]

    data_loaders = config.init(
        name='data_loader',
        module=module_data,
        logger=logger,
        raw_input_dims=raw_input_dims,
        text_feat=config["experts"]["text_feat"],
        text_dim=config["experts"]["text_dim"],
        text_agg=config["experts"]["text_agg"],
        use_zeros_for_missing=config["experts"].get("use_zeros_for_missing",
                                                    False),
        task=config.get("task", "retrieval"),
        cls_partitions=config.get("cls_partitions", cls_defaults),
    )

    model = config.init(
        name='arch',
        module=module_arch,
        trn_config=trn_config,
        expert_dims=expert_dims,
        text_dim=config["experts"]["text_dim"],
        disable_nan_checks=config["disable_nan_checks"],
        task=config.get("task", "retrieval"),
        ce_shared_dim=config["experts"].get("ce_shared_dim", None),
        feat_aggregation=config["data_loader"]["args"]["feat_aggregation"],
        trn_cat=config["data_loader"]["args"].get("trn_cat", 0),
    )
    logger.info(model)

    metrics = [getattr(module_metric, met) for met in config['metrics']]
    visualizer = config.init(
        name='visualizer',
        module=module_vis,
        exp_name=config._exper_name,
        web_dir=config._web_log_dir,
    )
    ckpt_path = config._args.resume
    logger.info(f"Loading checkpoint: {ckpt_path} ...")
    checkpoint = torch.load(ckpt_path)
    state_dict = checkpoint['state_dict']
    if config['n_gpu'] > 1:
        model = torch.nn.DataParallel(model)
    model.load_state_dict(state_dict)

    # prepare model for testing.  Note that some datasets fail to fit the retrieval
    # set on the GPU, so we run them on the CPU
    if torch.cuda.is_available() and not config.get("disable_gpu", True):
        device = "cuda"
    else:
        device = "cpu"
    logger.info(f"Running evaluation on {device}")

    model = model.to(device)
    model.eval()

    with torch.no_grad():
        samples, meta = data_loaders["retrieval"]

        # To use the nan-checks safely, we need make temporary copies of the data
        disable_nan_checks = config._config["disable_nan_checks"]
        with ctxt_mgr(samples, device, disable_nan_checks) as valid:
            output = model(**valid)

        sims = output["cross_view_conf_matrix"].data.cpu().float().numpy()
        dataset = data_loaders.dataset_name
        nested_metrics = {}
        for metric in metrics:
            metric_name = metric.__name__
            res = metric(sims, query_masks=meta["query_masks"])
            verbose(epoch=0, metrics=res, name=dataset, mode=metric_name)
            if trainer is not None:
                if not trainer.mini_train:
                    trainer.writer.set_step(step=0, mode="val")
                # avoid tensboard folding by prefixing
                metric_name_ = f"test_{metric_name}"
                trainer.log_metrics(res, metric_name=metric_name_, mode="val")
            nested_metrics[metric_name] = res

    if data_loaders.num_test_captions == 1:
        visualizer.visualize_ranking(
            sims=sims,
            meta=meta,
            epoch=0,
            nested_metrics=nested_metrics,
        )
    log = {}
    for subkey, subval in nested_metrics.items():
        for subsubkey, subsubval in subval.items():
            log[f"test_{subkey}_{subsubkey}"] = subsubval
    for key, value in log.items():
        logger.info(" {:15s}: {}".format(str(key), value))
Ejemplo n.º 2
0
def evaluation(config, logger=None, trainer=None):

    if logger is None:
        logger = config.get_logger('test')

    if getattr(config._args, "eval_from_training_config", False):
        eval_conf = copy.deepcopy(config)
        merge(eval_conf._config,
              config["eval_settings"],
              strategy=Strategy.REPLACE)
        config = eval_conf

    logger.info("Running evaluation with configuration:")
    logger.info(config)

    expert_dims, raw_input_dims = compute_dims(config)
    trn_config = compute_trn_config(config)

    # Set the random initial seeds
    seed = config["seed"]
    logger.info(f"Setting experiment random seed to {seed}")
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    update_src_web_video_dir(config)
    visualizer = config.init(
        name='visualizer',
        module=module_vis,
        exp_name=config._exper_name,
        web_dir=config._web_log_dir,
    )

    data_loaders = config.init(
        name='data_loader',
        module=module_data,
        logger=logger,
        raw_input_dims=raw_input_dims,
        challenge_mode=config.get("challenge_mode", False),
        text_feat=config["experts"]["text_feat"],
        text_dim=config["experts"]["text_dim"],
        text_agg=config["experts"]["text_agg"],
        use_zeros_for_missing=config["experts"].get("use_zeros_for_missing",
                                                    False),
        task=config.get("task", "retrieval"),
        eval_only=True,
    )

    model = config.init(
        name='arch',
        module=module_arch,
        trn_config=trn_config,
        expert_dims=expert_dims,
        text_dim=config["experts"]["text_dim"],
        disable_nan_checks=config["disable_nan_checks"],
        task=config.get("task", "retrieval"),
        ce_shared_dim=config["experts"].get("ce_shared_dim", None),
        feat_aggregation=config["data_loader"]["args"]["feat_aggregation"],
        trn_cat=config["data_loader"]["args"].get("trn_cat", 0),
    )
    logger.info(model)

    metrics = [getattr(module_metric, met) for met in config['metrics']]
    ckpt_path = config._args.resume
    logger.info(f"Loading checkpoint: {ckpt_path} ...")
    checkpoint = torch.load(ckpt_path)
    state_dict = checkpoint['state_dict']
    if config['n_gpu'] > 1:
        model = torch.nn.DataParallel(model)
    model.load_state_dict(state_dict)
    challenge_mode = config.get("challenge_mode", False)
    challenge_msg = (
        "\n"
        "Evaluation ran on challenge features. To obtain a score, upload the similarity"
        "matrix for each dataset to the test server after running the "
        "`misc/cvpr2020-challenge/prepare_submission.py` script and following the "
        "instructions at: "
        "https://www.robots.ox.ac.uk/~vgg/challenges/video-pentathlon/"
        "\n")

    # prepare model for testing.  Note that some datasets fail to fit the retrieval
    # set on the GPU, so we run them on the CPU
    if torch.cuda.is_available() and not config.get("disable_gpu", True):
        device = "cuda"
    else:
        device = "cpu"
    logger.info(f"Running evaluation on {device}")

    model = model.to(device)
    model.eval()

    with torch.no_grad():
        samples, meta = data_loaders["retrieval"]

        # To use the nan-checks safely, we need make temporary copies of the data
        disable_nan_checks = config._config["disable_nan_checks"]
        with ctxt_mgr(samples, device, disable_nan_checks) as valid:
            output = model(**valid)

        sims = output["cross_view_conf_matrix"].data.cpu().float().numpy()
        dataset = data_loaders.dataset_name
        if challenge_mode:
            split = data_loaders.dataloaders["dataset"].split_name
            prediction_path = config._log_dir / f"{dataset}-{split}-predictions.csv"
            compressed_preds = compress_predictions(
                query_masks=meta["query_masks"],
                sims=sims,
            )
            np.savetxt(prediction_path,
                       compressed_preds,
                       delimiter=',',
                       fmt="%d")
            print(f"Saved similarity matrix predictions to {prediction_path}")
            print(challenge_msg)
            return

        nested_metrics = {}
        for metric in metrics:
            metric_name = metric.__name__
            res = metric(sims, query_masks=meta["query_masks"])
            verbose(epoch=0, metrics=res, name=dataset, mode=metric_name)
            if trainer is not None:
                if not trainer.mini_train:
                    trainer.writer.set_step(step=0, mode="val")
                # avoid tensboard folding by prefixing
                metric_name_ = f"test_{metric_name}"
                trainer.log_metrics(res, metric_name=metric_name_, mode="val")
            nested_metrics[metric_name] = res

    if data_loaders.num_test_captions == 1:
        visualizer.visualize_ranking(
            sims=sims,
            meta=meta,
            epoch=0,
            nested_metrics=nested_metrics,
        )
    log = {}
    for subkey, subval in nested_metrics.items():
        for subsubkey, subsubval in subval.items():
            log[f"test_{subkey}_{subsubkey}"] = subsubval
    for key, value in log.items():
        logger.info(" {:15s}: {}".format(str(key), value))
Ejemplo n.º 3
0
def evaluation(config, logger=None):

    if logger is None:
        logger = config.get_logger('test')

    logger.info("Running evaluation with configuration:")
    logger.info(config)

    expert_dims, raw_input_dims = compute_dims(config)

    # Set the random initial seeds
    seed = config["seed"]
    logger.info(f"Setting experiment random seed to {seed}")
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    data_loaders = config.init(
        name='data_loader',
        module=module_data,
        raw_input_dims=raw_input_dims,
        text_feat=config["experts"]["text_feat"],
        text_dim=config["experts"]["text_dim"],
    )
    model = config.init(
        name='arch',
        module=module_arch,
        expert_dims=expert_dims,
        text_dim=config["experts"]["text_dim"],
        disable_nan_checks=config["disable_nan_checks"],
    )
    logger.info(model)

    metrics = [getattr(module_metric, met) for met in config['metrics']]
    visualizer = config.init(
        name='visualizer',
        module=module_vis,
        exp_name=config._exper_name,
        log_dir=config._web_log_dir,
    )
    ckpt_path = config._args.resume
    logger.info(f"Loading checkpoint: {ckpt_path} ...")
    checkpoint = torch.load(ckpt_path)
    state_dict = checkpoint['state_dict']
    if config['n_gpu'] > 1:
        model = torch.nn.DataParallel(model)
    model.load_state_dict(state_dict)

    # prepare model for testing.  Note that some datasets fail to fit the retrieval
    # set on the GPU, so we run them on the CPU
    if torch.cuda.is_available() and not config.get("disable_gpu", False):
        device = "cuda"
    else:
        device = "cpu"
    logger.info(f"Running evaluation on {device}")

    model = model.to(device)
    model.eval()

    with torch.no_grad():
        samples, meta = data_loaders["retrieval"]

        # To use the nan-checks safely, we need make temporary copies of the data
        disable_nan_checks = config._config["disable_nan_checks"]
        with valid_samples(samples, device, disable_nan_checks) as valid:
            output = model(**valid)

        sims = output["cross_view_conf_matrix"].data.cpu().float().numpy()
        dataset = data_loaders.dataset_name
        nested_metrics = {}
        for metric in metrics:
            metric_name = metric.__name__
            res = metric(sims, query_masks=meta["query_masks"])
            verbose(epoch=0, metrics=res, name=dataset, mode=metric_name)
            nested_metrics[metric_name] = res

    if data_loaders.num_test_captions == 1:
        visualizer.visualize_ranking(
            sims=sims,
            meta=meta,
            epoch=0,
            nested_metrics=nested_metrics,
        )
    log = {}
    for subkey, subval in nested_metrics.items():
        for subsubkey, subsubval in subval.items():
            log[f"test_{subkey}_{subsubkey}"] = subsubval
    for key, value in log.items():
        logger.info(' {:15s}: {}'.format(str(key), value))