def process_eval_image(
    cfg,
    fname_in,
    roi,
    fname_out,
    spatial_levels,
    image_helper,
    model,
    pca,
    eval_dataset_name,
):
    if is_revisited_dataset(eval_dataset_name):
        img = image_helper.load_and_prepare_revisited_image(fname_in, roi=roi)
    elif is_instre_dataset(eval_dataset_name):
        img = image_helper.load_and_prepare_instre_image(fname_in)
    else:
        img = image_helper.load_and_prepare_image(fname_in, roi=roi)
    v = torch.autograd.Variable(img.unsqueeze(0))
    vc = v.cuda()
    # the model output is a list always.
    activation_map = model(vc)[0].cpu()
    # process the features: rmac | l2 norm
    if cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "rmac":
        descriptors = get_rmac_descriptors(activation_map,
                                           spatial_levels,
                                           pca=pca)
    elif cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "l2_norm":
        # we simply L2 normalize the features otherwise
        descriptors = F.normalize(activation_map, p=2, dim=0)
    else:
        descriptors = activation_map
    save_file(descriptors.data.numpy(), fname_out)
    return descriptors.data.numpy()
Beispiel #2
0
def get_transforms(cfg, dataset_name):
    # Setup the data transforms (basic) that we apply on the train/eval dataset.
    if is_instre_dataset(dataset_name) or is_whiten_dataset(dataset_name):
        transforms = torchvision.transforms.Compose([
            MultigrainResize(int((256 / 224) * cfg.IMG_RETRIEVAL.RESIZE_IMG)),
            torchvision.transforms.CenterCrop(cfg.IMG_RETRIEVAL.RESIZE_IMG),
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225]),
        ])
    else:
        transforms = [
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225]),
        ]

        if cfg.IMG_RETRIEVAL.CENTER_CROP:
            transforms = [
                torchvision.transforms.Resize(
                    int((256 / 224) * cfg.IMG_RETRIEVAL.RESIZE_IMG)),
                torchvision.transforms.CenterCrop(
                    cfg.IMG_RETRIEVAL.RESIZE_IMG),
            ] + transforms

        transforms = torchvision.transforms.Compose(transforms)

    return transforms
def get_eval_dataset(cfg, root_dataset_path, eval_dataset_name, eval_binary_path):
    eval_data_path = f"{root_dataset_path}/{eval_dataset_name}"
    assert PathManager.exists(eval_data_path), f"Unknown path: {eval_data_path}"

    num_samples = (
        None
        if cfg.IMG_RETRIEVAL.NUM_DATABASE_SAMPLES == -1
        else cfg.IMG_RETRIEVAL.NUM_DATABASE_SAMPLES
    )

    if is_revisited_dataset(eval_dataset_name):
        eval_dataset = RevisitedInstanceRetrievalDataset(
            eval_dataset_name, root_dataset_path, num_samples=num_samples
        )
    elif is_instre_dataset(eval_dataset_name):
        eval_dataset = InstreDataset(eval_data_path, num_samples=num_samples)
    elif is_copdays_dataset(eval_dataset_name):
        eval_dataset = CopyDaysDataset(
            data_path=eval_data_path,
            num_samples=num_samples,
            use_distractors=cfg.IMG_RETRIEVAL.USE_DISTRACTORS,
        )
    else:
        eval_dataset = InstanceRetrievalDataset(
            eval_data_path, eval_binary_path, num_samples=num_samples
        )
    return eval_dataset
Beispiel #4
0
def process_eval_image(
    cfg,
    fname_in,
    roi,
    fname_out,
    spatial_levels,
    image_helper,
    model,
    pca,
    eval_dataset_name,
    verbose=False,
):
    if is_revisited_dataset(eval_dataset_name):
        img = image_helper.load_and_prepare_revisited_image(fname_in, roi=roi)
    elif is_instre_dataset(eval_dataset_name):
        img = image_helper.load_and_prepare_instre_image(fname_in)
    else:
        img = image_helper.load_and_prepare_image(fname_in, roi=roi)

    v = torch.autograd.Variable(img.unsqueeze(0))
    vc = v.cuda()
    # the model output is a list always.
    activation_map = model(vc)[0].cpu()

    if verbose:
        print(f"Eval image raw activation map shape: { activation_map.shape }")

    # process the features: rmac | l2 norm
    if cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "rmac":
        descriptors = get_rmac_descriptors(
            activation_map,
            spatial_levels,
            pca=pca,
            normalize=cfg.IMG_RETRIEVAL.NORMALIZE_FEATURES,
        )
    elif cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "gem":
        descriptors = gem(
            activation_map,
            p=cfg.IMG_RETRIEVAL.GEM_POOL_POWER,
            add_bias=True,
        )
    else:
        descriptors = activation_map

    # Optionally l2 normalize the features.
    if (cfg.IMG_RETRIEVAL.NORMALIZE_FEATURES
            and cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE != "rmac"):
        # RMAC performs normalization within the algorithm, hence we skip it here.
        descriptors = l2n(descriptors, dim=1)

    # Optionally apply pca.
    if pca and cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE != "rmac":
        # RMAC performs pca within the algorithm, hence we skip it here.
        descriptors = pca.apply(descriptors)

    if fname_out:
        save_file(descriptors.data.numpy(), fname_out, verbose=False)
    return descriptors.data.numpy()
def get_eval_dataset(cfg, root_dataset_path, eval_dataset_name, eval_binary_path):
    eval_data_path = f"{root_dataset_path}/{eval_dataset_name}"
    assert PathManager.exists(eval_data_path), f"Unknown path: {eval_data_path}"

    num_samples = 20 if cfg.IMG_RETRIEVAL.DEBUG_MODE else None

    if is_revisited_dataset(eval_dataset_name):
        eval_dataset = RevisitedInstanceRetrievalDataset(
            eval_dataset_name, root_dataset_path
        )
    elif is_instre_dataset(eval_dataset_name):
        eval_dataset = InstreDataset(eval_data_path, num_samples=num_samples)
    else:
        eval_dataset = InstanceRetrievalDataset(
            eval_data_path, eval_binary_path, num_samples=num_samples
        )
    return eval_dataset
Beispiel #6
0
def process_eval_image(
    cfg,
    fname_in,
    roi,
    fname_out,
    spatial_levels,
    image_helper,
    model,
    pca,
    eval_dataset_name,
    verbose=False,
):
    with PerfTimer("read_sample", PERF_STATS):
        if is_revisited_dataset(eval_dataset_name):
            img = image_helper.load_and_prepare_revisited_image(fname_in,
                                                                roi=roi)
        elif is_instre_dataset(eval_dataset_name):
            img = image_helper.load_and_prepare_instre_image(fname_in)
        else:
            img = image_helper.load_and_prepare_image(fname_in, roi=roi)

    with PerfTimer("extract_features", PERF_STATS):
        # the model output is a list always.
        img_scalings = cfg.IMG_RETRIEVAL.IMG_SCALINGS or [1]
        activation_maps = extract_activation_maps(img, model, img_scalings)

    if verbose:
        print(
            f"Example eval image raw activation map shape: { activation_maps[0].shape }"  # NOQA
        )
    with PerfTimer("post_process_features", PERF_STATS):
        # process the features: rmac | l2 norm
        if cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "rmac":
            descriptors = get_rmac_descriptors(
                activation_maps[0],
                spatial_levels,
                pca=pca,
                normalize=cfg.IMG_RETRIEVAL.NORMALIZE_FEATURES,
            )
        elif cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "gem":
            descriptors = get_average_gem(
                activation_maps,
                p=cfg.IMG_RETRIEVAL.GEM_POOL_POWER,
                add_bias=True,
            )
        else:
            descriptors = torch.mean(torch.stack(activation_maps), dim=0)
            descriptors = descriptors.reshape(descriptors.shape[0], -1)

        # Optionally l2 normalize the features.
        if (cfg.IMG_RETRIEVAL.NORMALIZE_FEATURES
                and cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE != "rmac"):
            # RMAC performs normalization within the algorithm, hence we skip it here.
            descriptors = l2n(descriptors, dim=1)

        # Optionally apply pca.
        if pca and cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE != "rmac":
            # RMAC performs pca within the algorithm, hence we skip it here.
            descriptors = pca.apply(descriptors)

    if fname_out:
        save_file(descriptors.data.numpy(), fname_out, verbose=False)

    return descriptors.data.numpy()