def build_detection_val_loader(cfg, dataset_name: str, mapper=None): dataset_dicts = get_detection_dataset_dicts( [dataset_name], filter_empty=False, proposal_files=[ cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)] ] if cfg.MODEL.LOAD_PROPOSALS else None, ) # dataset_dicts = get_detection_dataset_dicts( # [dataset_name], # filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, # min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE # if cfg.MODEL.KEYPOINT_ON # else 0, # proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, # ) dataset = DatasetFromList(dataset_dicts) if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler = InferenceSampler(len(dataset)) batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) return data_loader
def test_build_iterable_dataloader_inference(self): # Test that build_detection_test_loader supports iterable dataset N = 50 ds = DatasetFromList(list(range(N))) ds = ToIterableDataset(ds, InferenceSampler(len(ds))) dl = build_detection_test_loader(dataset=ds, mapper=lambda x: x, num_workers=3) self._check_is_range(dl, N)
def test_build_batch_dataloader_inference(self): # Test that build_batch_data_loader can be used for inference N = 96 ds = DatasetFromList(list(range(N))) sampler = InferenceSampler(len(ds)) dl = build_batch_data_loader(ds, sampler, 8, num_workers=3) self._check_is_range(dl, N)
def build_detection_test_loader(cfg, dataset_name, mapper=None): dataset_dicts = get_detection_dataset_dicts( [dataset_name], filter_empty=False, proposal_files=[ cfg.DATASETS.PROPOSAL_FILES_TEST[list( cfg.DATASETS.TEST).index(dataset_name)] ] if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts) if mapper is None: mapper = DatasetMapper(cfg, False) dataset = MapDataset(dataset, mapper) sampler = InferenceSampler(len(dataset)) batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) return data_loader
def test_build_dataloader_inference(self): N = 50 ds = DatasetFromList(list(range(N))) sampler = InferenceSampler(len(ds)) dl = build_detection_test_loader( dataset=ds, sampler=sampler, mapper=lambda x: x, num_workers=3 ) self._check_is_range(dl, N)
def do_test(cfg, model): results = OrderedDict() for dataset_name in cfg.DATASETS.TEST: # data_loader = build_detection_test_loader(cfg, dataset_name) if 'build_detection_test_loader': if 'get_detection_dataset_dicts': descs_valid: List[Dict] = DatasetCatalog.get(dataset_name) # validation dataset is too large. random.seed(2020) descs_valid = random.sample(descs_valid, k=10) dataset = DatasetFromList(descs_valid) if 'DatasetMapper': mapper = make_mapper(dataset_name, is_train=False, augmentations=None) dataset = MapDataset(dataset, mapper) sampler = InferenceSampler(len(dataset)) # Always use 1 image per worker during inference since this is the # standard when reporting inference time in papers. batch_sampler = torch.utils.data.sampler.BatchSampler( sampler, 1, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) evaluator = get_evaluator2( cfg, dataset_name, os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)) global EVALUATOR EVALUATOR = evaluator results_i = inference_on_dataset(model, data_loader, evaluator) results[dataset_name] = results_i # TODO: Multiprocessing? if comm.is_main_process(): logger.info("Evaluation results for {} in csv format:".format( dataset_name)) if 'print_csv_format(results_i)': for tsk, res in results_i.items(): global RES RES = res res_df = pd.DataFrame(pd.Series(res, name='value')) res_df = res_df[res_df['value'].notna()] # res_df = res_df[res_df['value'] > 0] res_df.index = res_df.index.map( lambda x: '/'.join(x.split('/')[1:])) pd.set_option('display.max_rows', None) print(res_df) pd.reset_option('display.max_rows')
def do_test(cfg, model): for dataset_name in cfg.DATASETS.TEST: # data_loader = build_detection_test_loader(cfg, dataset_name) if 'build_detection_test_loader': if dataset_name == 'coco_2017_val': dicts_valid: List[Dict] = DatasetCatalog.get(dataset_name) if "filter_empty and has_instances": ... ds_valid = DatasetFromList(dicts_valid, copy=False) mapper = DatasetMapper(cfg, is_train=False) else: # Open-Image-Dataset if 'get_detection_dataset_dicts': descs_get: List[Dict] = DatasetCatalog.get(dataset_name) # validation dataset is too large. random.seed(2020) descs_valid = random.choices(descs_get, k=N_IMAGES_PER_TEST) # TODO: clear cache. ds_valid = DatasetFromList(descs_valid) if 'DatasetMapper': mapper = make_mapper(dataset_name, is_train=False, augmentations=None) ds_valid = MapDataset(ds_valid, mapper) sampler = InferenceSampler(len(ds_valid)) # Always use 1 image per worker during inference since this is the # standard when reporting inference time in papers. batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) data_loader = torch.utils.data.DataLoader( ds_valid, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) evaluator = get_evaluator2( cfg, dataset_name, os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) ) results_i = inference_on_dataset(model, data_loader, evaluator) if comm.is_main_process(): logger.info("Evaluation results for {} in csv format:".format(dataset_name)) # print_csv_format(results_i) for tsk, res in results_i.items(): res_df = pd.DataFrame(pd.Series(res, name='value')) res_df = res_df[res_df['value'].notna()] res_df.index = res_df.index.map(lambda x: '/'.join(x.split('/')[1:])) pd.set_option('display.max_rows', None) print(res_df) pd.reset_option('display.max_rows')
def test_build_dataloader_inference(self): N = 50 ds = DatasetFromList(list(range(N))) sampler = InferenceSampler(len(ds)) # test that parallel loader works correctly dl = build_detection_test_loader( dataset=ds, sampler=sampler, mapper=lambda x: x, num_workers=3 ) self._check_is_range(dl, N) # test that batch_size works correctly dl = build_detection_test_loader( dataset=ds, sampler=sampler, mapper=lambda x: x, batch_size=4, num_workers=0 ) self._check_is_range(dl, N)
def build_detection_test_loader(cfg, dataset_name, mapper=None): """ Similar to `build_detection_train_loader`. But this function uses the given `dataset_name` argument (instead of the names in cfg), and uses batch size 1. Args: cfg: a detectron2 CfgNode dataset_name (str): a name of the dataset that's available in the DatasetCatalog mapper (callable): a callable which takes a sample (dict) from dataset and returns the format to be consumed by the model. By default it will be `DatasetMapper(cfg, False)`. Returns: DataLoader: a torch DataLoader, that loads the given detection dataset, with test-time transformation and batching. """ _add_category_whitelists_to_metadata(cfg) _add_category_maps_to_metadata(cfg) dataset_dicts = combine_detection_dataset_dicts( [dataset_name], keep_instance_predicate=_get_test_keep_instance_predicate(cfg), proposal_files=[ cfg.DATASETS.PROPOSAL_FILES_TEST[list( cfg.DATASETS.TEST).index(dataset_name)] ] if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts) if mapper is None: mapper = DatasetMapper(cfg, False) dataset = MapDataset(dataset, mapper) sampler = InferenceSampler(len(dataset)) # Always use 1 image per worker during inference since this is the # standard when reporting inference time in papers. batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) return data_loader
def build_hand_test_loader(cfg): """ """ dataset_dicts, num_per_epoch = load_hand("test", cfg.HAND_PROJECT.DATA.ANNOT_SUBSET_TRAIN, cfg.HAND_PROJECT.DATA.BASE_PATH, selects=[1]) dataset = DatasetFromList(dataset_dicts) mapper = DatasetMapper(cfg, False) dataset = MapDataset(dataset, mapper) sampler = InferenceSampler(len(dataset)) # Always use 1 image per worker during inference since this is the # standard when reporting inference time in papers. batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) # data_loader = torch.utils.data.DataLoader(dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator) data_loader = torch.utils.data.DataLoader(dataset, num_workers=0, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator) return data_loader
def build_batch_test_loader(dataset, *, mapper, sampler=None, num_workers=0): """ Similar to `build_detection_train_loader`, but uses a batch size of 1, and :class:`InferenceSampler`. This sampler coordinates all workers to produce the exact set of all samples. This interface is experimental. Args: dataset (list or torch.utils.data.Dataset): a list of dataset dicts, or a map-style pytorch dataset. They can be obtained by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`. mapper (callable): a callable which takes a sample (dict) from dataset and returns the format to be consumed by the model. When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``. sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces indices to be applied on ``dataset``. Default to :class:`InferenceSampler`, which splits the dataset across all workers. num_workers (int): number of parallel data loading workers Returns: DataLoader: a torch DataLoader, that loads the given detection dataset, with test-time transformation and batching. Examples: :: data_loader = build_detection_test_loader( DatasetRegistry.get("my_test"), mapper=DatasetMapper(...)) # or, instantiate with a CfgNode: data_loader = build_detection_test_loader(cfg, "my_test") """ if isinstance(dataset, list): dataset = DatasetFromList(dataset, copy=False) if mapper is not None: dataset = MapDataset(dataset, mapper) if sampler is None: sampler = InferenceSampler(len(dataset)) # Always use 1 image per worker during inference since this is the # standard when reporting inference time in papers. batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 4, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=num_workers, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) return data_loader
def test_local_indices(self): sizes = [0, 16, 2, 42] world_sizes = [5, 2, 3, 4] expected_results = [ [range(0) for _ in range(5)], [range(8), range(8, 16)], [range(1), range(1, 2), range(0)], [range(11), range(11, 22), range(22, 32), range(32, 42)], ] for size, world_size, expected_result in zip(sizes, world_sizes, expected_results): with self.subTest(f"size={size}, world_size={world_size}"): local_indices = [ InferenceSampler._get_local_indices(size, world_size, r) for r in range(world_size) ] self.assertEqual(local_indices, expected_result)
cfg = get_cfg() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() default_setup( cfg, args ) # if you don't like any of the default setup, write your own setup code if 'build dataloader': file_paths = glob.glob(DS_DIR + 'test/*.jpg') # import numpy as np # file_paths = np.random.choice(file_paths, size=200, replace=False) ds = DatasetFromList(file_paths) ds = MapDataset(ds, map_func=kaggle_mapper) sampler = InferenceSampler(len(ds)) batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, BATCH_SIZE, drop_last=False) data_loader = torch.utils.data.DataLoader( ds, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) if 'create model and load weights': meta_arch = cfg.MODEL.META_ARCHITECTURE model = META_ARCH_REGISTRY.get(meta_arch)(cfg) model.eval() model.to(torch.device(cfg.MODEL.DEVICE))