Beispiel #1
0
def build_detection_val_loader(cfg, dataset_name: str, mapper=None):
    dataset_dicts = get_detection_dataset_dicts(
        [dataset_name],
        filter_empty=False,
        proposal_files=[
            cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
        ]
        if cfg.MODEL.LOAD_PROPOSALS
        else None,
    )
    # dataset_dicts = get_detection_dataset_dicts(
    #     [dataset_name],
    #     filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
    #     min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
    #     if cfg.MODEL.KEYPOINT_ON
    #     else 0,
    #     proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
    # )
    dataset = DatasetFromList(dataset_dicts)

    if mapper is None:
        mapper = DatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)
    sampler = InferenceSampler(len(dataset))
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)

    data_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
        batch_sampler=batch_sampler,
        collate_fn=trivial_batch_collator,
    )
    return data_loader
Beispiel #2
0
 def test_build_iterable_dataloader_inference(self):
     # Test that build_detection_test_loader supports iterable dataset
     N = 50
     ds = DatasetFromList(list(range(N)))
     ds = ToIterableDataset(ds, InferenceSampler(len(ds)))
     dl = build_detection_test_loader(dataset=ds, mapper=lambda x: x, num_workers=3)
     self._check_is_range(dl, N)
Beispiel #3
0
 def test_build_batch_dataloader_inference(self):
     # Test that build_batch_data_loader can be used for inference
     N = 96
     ds = DatasetFromList(list(range(N)))
     sampler = InferenceSampler(len(ds))
     dl = build_batch_data_loader(ds, sampler, 8, num_workers=3)
     self._check_is_range(dl, N)
def build_detection_test_loader(cfg, dataset_name, mapper=None):
    dataset_dicts = get_detection_dataset_dicts(
        [dataset_name],
        filter_empty=False,
        proposal_files=[
            cfg.DATASETS.PROPOSAL_FILES_TEST[list(
                cfg.DATASETS.TEST).index(dataset_name)]
        ] if cfg.MODEL.LOAD_PROPOSALS else None,
    )
    dataset = DatasetFromList(dataset_dicts)
    if mapper is None:
        mapper = DatasetMapper(cfg, False)
    dataset = MapDataset(dataset, mapper)

    sampler = InferenceSampler(len(dataset))
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                          1,
                                                          drop_last=False)

    data_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
        batch_sampler=batch_sampler,
        collate_fn=trivial_batch_collator,
    )
    return data_loader
Beispiel #5
0
 def test_build_dataloader_inference(self):
     N = 50
     ds = DatasetFromList(list(range(N)))
     sampler = InferenceSampler(len(ds))
     dl = build_detection_test_loader(
         dataset=ds, sampler=sampler, mapper=lambda x: x, num_workers=3
     )
     self._check_is_range(dl, N)
Beispiel #6
0
def do_test(cfg, model):
    results = OrderedDict()
    for dataset_name in cfg.DATASETS.TEST:
        # data_loader = build_detection_test_loader(cfg, dataset_name)
        if 'build_detection_test_loader':
            if 'get_detection_dataset_dicts':
                descs_valid: List[Dict] = DatasetCatalog.get(dataset_name)
            # validation dataset is too large.
            random.seed(2020)
            descs_valid = random.sample(descs_valid, k=10)
            dataset = DatasetFromList(descs_valid)
            if 'DatasetMapper':
                mapper = make_mapper(dataset_name,
                                     is_train=False,
                                     augmentations=None)
            dataset = MapDataset(dataset, mapper)

            sampler = InferenceSampler(len(dataset))
            # Always use 1 image per worker during inference since this is the
            # standard when reporting inference time in papers.
            batch_sampler = torch.utils.data.sampler.BatchSampler(
                sampler, 1, drop_last=False)

            data_loader = torch.utils.data.DataLoader(
                dataset,
                num_workers=cfg.DATALOADER.NUM_WORKERS,
                batch_sampler=batch_sampler,
                collate_fn=trivial_batch_collator,
            )

        evaluator = get_evaluator2(
            cfg, dataset_name,
            os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name))
        global EVALUATOR
        EVALUATOR = evaluator

        results_i = inference_on_dataset(model, data_loader, evaluator)
        results[dataset_name] = results_i
        # TODO: Multiprocessing?
        if comm.is_main_process():
            logger.info("Evaluation results for {} in csv format:".format(
                dataset_name))
            if 'print_csv_format(results_i)':
                for tsk, res in results_i.items():
                    global RES
                    RES = res
                    res_df = pd.DataFrame(pd.Series(res, name='value'))
                    res_df = res_df[res_df['value'].notna()]
                    # res_df = res_df[res_df['value'] > 0]
                    res_df.index = res_df.index.map(
                        lambda x: '/'.join(x.split('/')[1:]))
                    pd.set_option('display.max_rows', None)
                    print(res_df)
                    pd.reset_option('display.max_rows')
Beispiel #7
0
def do_test(cfg, model):
    for dataset_name in cfg.DATASETS.TEST:
        # data_loader = build_detection_test_loader(cfg, dataset_name)
        if 'build_detection_test_loader':
            if dataset_name == 'coco_2017_val':
                dicts_valid: List[Dict] = DatasetCatalog.get(dataset_name)
                if "filter_empty and has_instances":
                    ...
                ds_valid = DatasetFromList(dicts_valid, copy=False)
                mapper = DatasetMapper(cfg, is_train=False)
            else:  # Open-Image-Dataset
                if 'get_detection_dataset_dicts':
                    descs_get: List[Dict] = DatasetCatalog.get(dataset_name)
                # validation dataset is too large.
                random.seed(2020)
                descs_valid = random.choices(descs_get, k=N_IMAGES_PER_TEST)
                # TODO: clear cache.
                ds_valid = DatasetFromList(descs_valid)
                if 'DatasetMapper':
                    mapper = make_mapper(dataset_name, is_train=False, augmentations=None)

            ds_valid = MapDataset(ds_valid, mapper)

            sampler = InferenceSampler(len(ds_valid))
            # Always use 1 image per worker during inference since this is the
            # standard when reporting inference time in papers.
            batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)

            data_loader = torch.utils.data.DataLoader(
                ds_valid,
                num_workers=cfg.DATALOADER.NUM_WORKERS,
                batch_sampler=batch_sampler,
                collate_fn=trivial_batch_collator,
            )

        evaluator = get_evaluator2(
            cfg, dataset_name, os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
        )

        results_i = inference_on_dataset(model, data_loader, evaluator)
        if comm.is_main_process():
            logger.info("Evaluation results for {} in csv format:".format(dataset_name))
            # print_csv_format(results_i)
            for tsk, res in results_i.items():
                res_df = pd.DataFrame(pd.Series(res, name='value'))
                res_df = res_df[res_df['value'].notna()]
                res_df.index = res_df.index.map(lambda x: '/'.join(x.split('/')[1:]))
                pd.set_option('display.max_rows', None)
                print(res_df)
                pd.reset_option('display.max_rows')
Beispiel #8
0
    def test_build_dataloader_inference(self):
        N = 50
        ds = DatasetFromList(list(range(N)))
        sampler = InferenceSampler(len(ds))
        # test that parallel loader works correctly
        dl = build_detection_test_loader(
            dataset=ds, sampler=sampler, mapper=lambda x: x, num_workers=3
        )
        self._check_is_range(dl, N)

        # test that batch_size works correctly
        dl = build_detection_test_loader(
            dataset=ds, sampler=sampler, mapper=lambda x: x, batch_size=4, num_workers=0
        )
        self._check_is_range(dl, N)
Beispiel #9
0
def build_detection_test_loader(cfg, dataset_name, mapper=None):
    """
    Similar to `build_detection_train_loader`.
    But this function uses the given `dataset_name` argument (instead of the names in cfg),
    and uses batch size 1.

    Args:
        cfg: a detectron2 CfgNode
        dataset_name (str): a name of the dataset that's available in the DatasetCatalog
        mapper (callable): a callable which takes a sample (dict) from dataset
            and returns the format to be consumed by the model.
            By default it will be `DatasetMapper(cfg, False)`.

    Returns:
        DataLoader: a torch DataLoader, that loads the given detection
            dataset, with test-time transformation and batching.
    """
    _add_category_whitelists_to_metadata(cfg)
    _add_category_maps_to_metadata(cfg)
    dataset_dicts = combine_detection_dataset_dicts(
        [dataset_name],
        keep_instance_predicate=_get_test_keep_instance_predicate(cfg),
        proposal_files=[
            cfg.DATASETS.PROPOSAL_FILES_TEST[list(
                cfg.DATASETS.TEST).index(dataset_name)]
        ] if cfg.MODEL.LOAD_PROPOSALS else None,
    )

    dataset = DatasetFromList(dataset_dicts)
    if mapper is None:
        mapper = DatasetMapper(cfg, False)
    dataset = MapDataset(dataset, mapper)

    sampler = InferenceSampler(len(dataset))
    # Always use 1 image per worker during inference since this is the
    # standard when reporting inference time in papers.
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                          1,
                                                          drop_last=False)

    data_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
        batch_sampler=batch_sampler,
        collate_fn=trivial_batch_collator,
    )
    return data_loader
Beispiel #10
0
def build_hand_test_loader(cfg):
    """
    """
    dataset_dicts, num_per_epoch = load_hand("test", cfg.HAND_PROJECT.DATA.ANNOT_SUBSET_TRAIN, cfg.HAND_PROJECT.DATA.BASE_PATH, selects=[1])

    dataset = DatasetFromList(dataset_dicts)
    mapper = DatasetMapper(cfg, False)
    dataset = MapDataset(dataset, mapper)

    sampler = InferenceSampler(len(dataset))
    # Always use 1 image per worker during inference since this is the
    # standard when reporting inference time in papers.
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)

    # data_loader = torch.utils.data.DataLoader(dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator)
    data_loader = torch.utils.data.DataLoader(dataset, num_workers=0, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator)
    return data_loader
Beispiel #11
0
def build_batch_test_loader(dataset, *, mapper, sampler=None, num_workers=0):
    """
    Similar to `build_detection_train_loader`, but uses a batch size of 1,
    and :class:`InferenceSampler`. This sampler coordinates all workers to
    produce the exact set of all samples.
    This interface is experimental.
    Args:
        dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
            or a map-style pytorch dataset. They can be obtained by using
            :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
        mapper (callable): a callable which takes a sample (dict) from dataset
           and returns the format to be consumed by the model.
           When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``.
        sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces
            indices to be applied on ``dataset``. Default to :class:`InferenceSampler`,
            which splits the dataset across all workers.
        num_workers (int): number of parallel data loading workers
    Returns:
        DataLoader: a torch DataLoader, that loads the given detection
        dataset, with test-time transformation and batching.
    Examples:
    ::
        data_loader = build_detection_test_loader(
            DatasetRegistry.get("my_test"),
            mapper=DatasetMapper(...))
        # or, instantiate with a CfgNode:
        data_loader = build_detection_test_loader(cfg, "my_test")
    """
    if isinstance(dataset, list):
        dataset = DatasetFromList(dataset, copy=False)
    if mapper is not None:
        dataset = MapDataset(dataset, mapper)
    if sampler is None:
        sampler = InferenceSampler(len(dataset))
    # Always use 1 image per worker during inference since this is the
    # standard when reporting inference time in papers.
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                          4,
                                                          drop_last=False)
    data_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=num_workers,
        batch_sampler=batch_sampler,
        collate_fn=trivial_batch_collator,
    )
    return data_loader
Beispiel #12
0
    def test_local_indices(self):
        sizes = [0, 16, 2, 42]
        world_sizes = [5, 2, 3, 4]

        expected_results = [
            [range(0) for _ in range(5)],
            [range(8), range(8, 16)],
            [range(1), range(1, 2), range(0)],
            [range(11), range(11, 22),
             range(22, 32),
             range(32, 42)],
        ]

        for size, world_size, expected_result in zip(sizes, world_sizes,
                                                     expected_results):
            with self.subTest(f"size={size}, world_size={world_size}"):
                local_indices = [
                    InferenceSampler._get_local_indices(size, world_size, r)
                    for r in range(world_size)
                ]
                self.assertEqual(local_indices, expected_result)
Beispiel #13
0
        cfg = get_cfg()
        cfg.merge_from_file(args.config_file)
        cfg.merge_from_list(args.opts)
        cfg.freeze()
        default_setup(
            cfg, args
        )  # if you don't like any of the default setup, write your own setup code

    if 'build dataloader':
        file_paths = glob.glob(DS_DIR + 'test/*.jpg')
        # import numpy as np
        # file_paths = np.random.choice(file_paths, size=200, replace=False)
        ds = DatasetFromList(file_paths)
        ds = MapDataset(ds, map_func=kaggle_mapper)

        sampler = InferenceSampler(len(ds))
        batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                              BATCH_SIZE,
                                                              drop_last=False)

        data_loader = torch.utils.data.DataLoader(
            ds,
            batch_sampler=batch_sampler,
            collate_fn=trivial_batch_collator,
        )

    if 'create model and load weights':
        meta_arch = cfg.MODEL.META_ARCHITECTURE
        model = META_ARCH_REGISTRY.get(meta_arch)(cfg)
        model.eval()
        model.to(torch.device(cfg.MODEL.DEVICE))