def get_coco_train(batch_size, opts, cfg_file): cfg = setup(opts, cfg_file) dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts, copy=False) mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler = samplers.TrainingSampler(len(dataset)) batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, batch_size, drop_last=True) # drop_last so the batch always have the same size train_loader = torch.utils.data.DataLoader( dataset, num_workers=4, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, worker_init_fn=worker_init_reset_seed, ) return train_loader
def get_coco_test(batch_size, opts, cfg_file): cfg = setup(opts, cfg_file) dataset_name = 'coco_2017_val_panoptic_separated' dataset_dicts = get_detection_dataset_dicts( [dataset_name], filter_empty=False, proposal_files=None, ) dataset = DatasetFromList(dataset_dicts) mapper = DatasetMapper(cfg, False) dataset = MapDataset(dataset, mapper) sampler = samplers.InferenceSampler(len(dataset)) batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, batch_size, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=4, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) return data_loader
def build_detection_test_loader(cfg, dataset_name, batch_size, mapper=None): dataset_dicts = get_detection_dataset_dicts( [dataset_name], filter_empty=False, proposal_files=[ cfg.DATASETS.PROPOSAL_FILES_TEST[list( cfg.DATASETS.TEST).index(dataset_name)] ] if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts) if mapper is None: mapper = DatasetMapper(cfg, False) dataset = MapDataset(dataset, mapper) sampler = samplers.InferenceSampler(len(dataset)) # Always use 1 image per worker during inference since this is the # standard when reporting inference time in papers. batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, batch_size, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=_trivial_batch_collator, ) return data_loader
def build_sequence_loader(cfg, dataset_name, mapper, total_samples, batch_size=1): """ Similar to `build_detection_test_loader` in the way that its sampler samples dataset_dicts in order and only loops once. """ dataset_dicts = DatasetCatalog.get(dataset_name) dataset = DatasetFromList(dataset_dicts) dataset = MapDataset(dataset, mapper) interval = max(1, int(len(dataset) / total_samples)) sampler = IntervalSampler(len(dataset), interval) batch_sampler = BatchSampler(sampler, batch_size, drop_last=False) def _trivial_batch_collator(batch): return batch data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=_trivial_batch_collator, ) return data_loader
def do_test(cfg, model): results = OrderedDict() for dataset_name in cfg.DATASETS.TEST: # data_loader = build_detection_test_loader(cfg, dataset_name) if 'build_detection_test_loader': if 'get_detection_dataset_dicts': descs_valid: List[Dict] = DatasetCatalog.get(dataset_name) # validation dataset is too large. random.seed(2020) descs_valid = random.sample(descs_valid, k=10) dataset = DatasetFromList(descs_valid) if 'DatasetMapper': mapper = make_mapper(dataset_name, is_train=False, augmentations=None) dataset = MapDataset(dataset, mapper) sampler = InferenceSampler(len(dataset)) # Always use 1 image per worker during inference since this is the # standard when reporting inference time in papers. batch_sampler = torch.utils.data.sampler.BatchSampler( sampler, 1, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) evaluator = get_evaluator2( cfg, dataset_name, os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)) global EVALUATOR EVALUATOR = evaluator results_i = inference_on_dataset(model, data_loader, evaluator) results[dataset_name] = results_i # TODO: Multiprocessing? if comm.is_main_process(): logger.info("Evaluation results for {} in csv format:".format( dataset_name)) if 'print_csv_format(results_i)': for tsk, res in results_i.items(): global RES RES = res res_df = pd.DataFrame(pd.Series(res, name='value')) res_df = res_df[res_df['value'].notna()] # res_df = res_df[res_df['value'] > 0] res_df.index = res_df.index.map( lambda x: '/'.join(x.split('/')[1:])) pd.set_option('display.max_rows', None) print(res_df) pd.reset_option('display.max_rows')
def test_iter_style(self): class DS(torch.utils.data.IterableDataset): def __iter__(self): yield from [1, 2, 3] ds = DS() ds = MapDataset(ds, TestMapDataset.map_func) self.assertIsInstance(ds, torch.utils.data.IterableDataset) data = list(iter(ds)) self.assertEqual(data, [2, 6])
def build_detection_train_loader( dataset, *, mapper, sampler=None, total_batch_size, aspect_ratio_grouping=True, num_workers=0, ): """ Build a dataloader for object detection with some default features. This interface is experimental. Args: dataset (list or torch.utils.data.Dataset): a list of dataset dicts, or a map-style pytorch dataset. They can be obtained by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`. mapper (callable): a callable which takes a sample (dict) from dataset and returns the format to be consumed by the model. When using cfg, the default choice is ``DatasetMapper(cfg, is_train=True)``. sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces indices to be applied on ``dataset``. Default to :class:`TrainingSampler`, which coordinates a random shuffle sequence across all workers. total_batch_size (int): total batch size across all workers. Batching simply puts data into a list. aspect_ratio_grouping (bool): whether to group images with similar aspect ratio for efficiency. When enabled, it requires each element in dataset be a dict with keys "width" and "height". num_workers (int): number of parallel data loading workers Returns: torch.utils.data.DataLoader: a dataloader. Each output from it is a ``list[mapped_element]`` of length ``total_batch_size / num_workers``, where ``mapped_element`` is produced by the ``mapper``. """ if isinstance(dataset, list): dataset = DatasetFromList(dataset, copy=False) if mapper is not None: dataset = MapDataset(dataset, mapper) if sampler is None: sampler = TrainingSampler(len(dataset)) assert isinstance(sampler, torch.utils.data.sampler.Sampler) return build_batch_data_loader( dataset, sampler, total_batch_size, aspect_ratio_grouping=aspect_ratio_grouping, num_workers=num_workers, )
def do_test(cfg, model): for dataset_name in cfg.DATASETS.TEST: # data_loader = build_detection_test_loader(cfg, dataset_name) if 'build_detection_test_loader': if dataset_name == 'coco_2017_val': dicts_valid: List[Dict] = DatasetCatalog.get(dataset_name) if "filter_empty and has_instances": ... ds_valid = DatasetFromList(dicts_valid, copy=False) mapper = DatasetMapper(cfg, is_train=False) else: # Open-Image-Dataset if 'get_detection_dataset_dicts': descs_get: List[Dict] = DatasetCatalog.get(dataset_name) # validation dataset is too large. random.seed(2020) descs_valid = random.choices(descs_get, k=N_IMAGES_PER_TEST) # TODO: clear cache. ds_valid = DatasetFromList(descs_valid) if 'DatasetMapper': mapper = make_mapper(dataset_name, is_train=False, augmentations=None) ds_valid = MapDataset(ds_valid, mapper) sampler = InferenceSampler(len(ds_valid)) # Always use 1 image per worker during inference since this is the # standard when reporting inference time in papers. batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) data_loader = torch.utils.data.DataLoader( ds_valid, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) evaluator = get_evaluator2( cfg, dataset_name, os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) ) results_i = inference_on_dataset(model, data_loader, evaluator) if comm.is_main_process(): logger.info("Evaluation results for {} in csv format:".format(dataset_name)) # print_csv_format(results_i) for tsk, res in results_i.items(): res_df = pd.DataFrame(pd.Series(res, name='value')) res_df = res_df[res_df['value'].notna()] res_df.index = res_df.index.map(lambda x: '/'.join(x.split('/')[1:])) pd.set_option('display.max_rows', None) print(res_df) pd.reset_option('display.max_rows')
def build_hand_train_loader(cfg): dataset_dicts, num_per_epoch = load_hand(cfg.HAND_PROJECT.DATA.MODE, cfg.HAND_PROJECT.DATA.ANNOT_SUBSET_TRAIN, cfg.HAND_PROJECT.DATA.BASE_PATH, selects=cfg.HAND_PROJECT.DATA.SELECTS) # pdb.set_trace() dataset = DatasetFromList(dataset_dicts, copy=False) mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) sampler = TrainingSampler(len(dataset)) return build_batch_data_loader(dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, num_workers=cfg.DATALOADER.NUM_WORKERS), num_per_epoch
def build_simple_dataloader(dataset_name: list, batch_size): dataset_dicts = get_detection_dataset_dicts(dataset_name) dataset = DatasetFromList(dataset_dicts, copy=False) cfg = get_cfg() cfg["aug_kwargs"] = {} dataset = MapDataset(dataset, AlbumentationsMapper(cfg, False)) # set the shuffle to False in debugging mode sampler = TrainingSampler(len(dataset), shuffle=False, seed=42) dataloader = build_batch_data_loader(dataset=dataset, sampler=sampler, total_batch_size=batch_size) return dataloader
def build_detection_train_loader_with_train_sampler(cfg, mapper, seed=42, shuffle=True): dataset_dicts = get_detection_dataset_dicts(cfg.DATASETS.TRAIN) dataset = DatasetFromList(dataset_dicts, copy=False) dataset = MapDataset(dataset, mapper) logger = logging.getLogger(__name__) logger.info("Using training sampler TrainingSampler with shuffle=False") sampler = TrainingSampler(len(dataset), shuffle=shuffle, seed=seed) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_x_train_loader( dataset, *, mapper, sampler=None, total_batch_size, aspect_ratio_grouping=True, num_workers=0 ): if isinstance(dataset, list): dataset = DatasetFromList(dataset, copy=False) if mapper is not None: dataset = MapDataset(dataset, mapper) if sampler is None: sampler = TrainingSampler(len(dataset)) assert isinstance(sampler, torch.utils.data.sampler.Sampler) return build_batch_data_loader( dataset, sampler, total_batch_size, aspect_ratio_grouping=aspect_ratio_grouping, num_workers=num_workers, )
def build_hand_test_loader(cfg): """ """ dataset_dicts, num_per_epoch = load_hand("test", cfg.HAND_PROJECT.DATA.ANNOT_SUBSET_TRAIN, cfg.HAND_PROJECT.DATA.BASE_PATH, selects=[1]) dataset = DatasetFromList(dataset_dicts) mapper = DatasetMapper(cfg, False) dataset = MapDataset(dataset, mapper) sampler = InferenceSampler(len(dataset)) # Always use 1 image per worker during inference since this is the # standard when reporting inference time in papers. batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) # data_loader = torch.utils.data.DataLoader(dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator) data_loader = torch.utils.data.DataLoader(dataset, num_workers=0, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator) return data_loader
def build_detection_test_loader(cfg, dataset_name, batch_size, mapper=None): """ Similar to `build_detection_train_loader`. But this function uses the given `dataset_name` argument (instead of the names in cfg), and uses batch size 1. Args: cfg: a detectron2 CfgNode dataset_name (str): a name of the dataset that's available in the DatasetCatalog mapper (callable): a callable which takes a sample (dict) from dataset and returns the format to be consumed by the model. By default it will be `DatasetMapper(cfg, False)`. Returns: DataLoader: a torch DataLoader, that loads the given detection dataset, with test-time transformation and batching. """ dataset_dicts = get_detection_dataset_dicts( [dataset_name], filter_empty=False, proposal_files=[ cfg.DATASETS.PROPOSAL_FILES_TEST[list( cfg.DATASETS.TEST).index(dataset_name)] ] if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts) if mapper is None: mapper = DatasetMapper(cfg, False) dataset = MapDataset(dataset, mapper) sampler = samplers.InferenceSampler(len(dataset)) # Always use 1 image per worker during inference since this is the # standard when reporting inference time in papers. batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, batch_size, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) return data_loader
def derender_dataset(cfg, dataset_names, attributes,for_inference=False): print("reading datasets {}".format(dataset_names)) start = time.time() dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names] dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts)) required_fields = ["pred_box"] if cfg.DATASETS.USE_PREDICTED_BOXES else ["bbox"] required_fields += ["attributes"] if not for_inference else [] _, dataset_dicts = image_based_to_annotation_based(dataset_dicts, required_fields) dataset = DatasetFromList(dataset_dicts, copy=False) mapper = DerenderMapper(cfg.DATASETS.USE_PREDICTED_BOXES, attributes, for_inference, use_depth=cfg.DATASETS.USE_DEPTH) dataset = MapDataset(dataset, mapper) print("done after {}".format(time.time()-start)) return dataset
def build_detection_test_loader(dataset, *, mapper, num_workers=0): """ Similar to `build_detection_train_loader`, but uses a batch size of 1. This interface is experimental. Args: dataset (list or torch.utils.data.Dataset): a list of dataset dicts, or a map-style pytorch dataset. They can be obtained by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`. mapper (callable): a callable which takes a sample (dict) from dataset and returns the format to be consumed by the model. When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``. num_workers (int): number of parallel data loading workers Returns: DataLoader: a torch DataLoader, that loads the given detection dataset, with test-time transformation and batching. Examples: :: data_loader = build_detection_test_loader( DatasetRegistry.get("my_test"), mapper=DatasetMapper(...)) # or, instantiate with a CfgNode: data_loader = build_detection_test_loader(cfg, "my_test") """ if isinstance(dataset, list): dataset = DatasetFromList(dataset, copy=False) if mapper is not None: dataset = MapDataset(dataset, mapper) sampler = InferenceSampler(len(dataset)) # Always use 1 image per worker during inference since this is the # standard when reporting inference time in papers. batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=num_workers, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) return data_loader
def build_classification_test_loader(cfg, dataset_name, mapper=None): dataset_dicts = get_classification_dataset_dicts(cfg.DATASETS.TEST) dataset = DatasetFromList(dataset_dicts, copy=False) if mapper is None: mapper = ClsDatasetMapper(cfg, False) # False means Not is_training dataset = MapDataset(dataset, mapper) sampler = samplers.InferenceSampler(len(dataset)) batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) return data_loader
def build_train_dataloader(cfg): # like 'build_detection_train_loader' if 'coco_2017_train' in cfg.DATASETS.TRAIN: descs_train: List[Dict] = DatasetCatalog.get("coco_2017_train") ds_train = DatasetFromList(descs_train, copy=False) mapper = DatasetMapper(cfg, True) else: # Open-Image-Dataset if 'get_detection_dataset_dicts': all_descs_train: List[Dict] = DatasetCatalog.get("oid_train") if 'rebalancing': image_id_vs_idx = {} for idx, desc in enumerate(all_descs_train): image_id_vs_idx[desc['image_id']] = idx descs_train = list(map(lambda img_id: all_descs_train[image_id_vs_idx[img_id]], sample_image_ids())) print('_' * 50 + f'train dataset len: {len(descs_train)}') ds_train = DatasetFromList(descs_train, copy=False) if 'DatasetMapper': augs = [RandomContrast(0.8, 1.2), RandomBrightness(0.8, 1.2), RandomSaturation(0.8, 1.2)] augs.extend(build_augmentation(cfg, is_train=True)) mapper = make_mapper('oid_train', is_train=True, augmentations=T.AugmentationList(augs)) ds_train = MapDataset(ds_train, mapper) sampler = TrainingSampler(len(ds_train)) data_loader = build_batch_data_loader( ds_train, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, ) global DATA_LOADER DATA_LOADER = data_loader return data_loader
if "filter_empty and has_instances": ... dataset = DatasetFromList(dicts_valid, copy=False) mapper = DatasetMapper(cfg, is_train=False) else: # Open-Image-Dataset if 'get_detection_dataset_dicts': descs_valid: List[Dict] = DatasetCatalog.get(dataset_name) # # validation dataset is too large. # descs_valid = random.choices(descs_valid, k=200) dataset = DatasetFromList(descs_valid) if 'DatasetMapper': mapper = make_mapper(dataset_name, is_train=False, augmentations=None) dataset = MapDataset(dataset, mapper) sampler = RandomSampler(dataset) # Always use 1 image per worker during inference since this is the # standard when reporting inference time in papers. batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, )
def write_with_inferred_attributes(cfg, split, attributes_key): timer = CodeTimer( "adding inferred attributes split:{}, attributes_key:{}".format( split, attributes_key)) module_cfg = os.path.join(cfg.TRAINED_DERENDER.EXP_DIR, "cfg.yaml") module_cfg = load_cfg_from_file(module_cfg) module_cfg.MODEL.WEIGHTS = cfg.TRAINED_DERENDER.ATTRIBUTES_WEIGHTS_MAP[ attributes_key] module_cfg.DATALOADER.OBJECTS_PER_BATCH = 1000 if cfg.BASE_NAME == "intphys" else 450 module_cfg.DATALOADER.NUM_WORKERS = 8 if cfg.BASE_NAME == "adept" else module_cfg.DATALOADER.NUM_WORKERS if cfg.DEBUG: module_cfg.DATALOADER.NUM_WORKERS = 0 module_cfg.DEBUG = True module_cfg.DATALOADER.OBJECTS_PER_BATCH = 50 predictor = DerenderPredictor(module_cfg) # if not cfg.DEBUG: # gpu_ids = [_ for _ in range(torch.cuda.device_count())] # predictor.derenderer = torch.nn.parallel.DataParallel(predictor.derenderer, gpu_ids) dataset_name, standard_format_json_file = get_dataset_name_and_json( cfg, split) dataset = DatasetCatalog.get(dataset_name) required_fields = [ "pred_box" ] if cfg.TRAINED_DERENDER.USE_INFERRED_BOXES else ["bbox"] filtered_idx, \ mapped_dataset = image_based_to_annotation_based(dataset, required_fields) mapped_dataset = DatasetFromList(mapped_dataset, copy=False) mapper = DerenderMapper(cfg.TRAINED_DERENDER.USE_INFERRED_BOXES, predictor.attributes, for_inference=True, use_depth=cfg.TRAINED_DERENDER.USE_DEPTH) mapped_dataset = MapDataset(mapped_dataset, mapper) data_loader = DataLoader( dataset=mapped_dataset, batch_size=module_cfg.DATALOADER.OBJECTS_PER_BATCH, num_workers=module_cfg.DATALOADER.NUM_WORKERS, shuffle=False) fil_pointer = 0 with torch.no_grad(): for inputs in data_loader: inputs = to_cuda(inputs) outputs = predictor(inputs) batch_size = list(outputs.values())[0].shape[0] for oix, (img_idx, an_idx) in zip( range(batch_size), filtered_idx[fil_pointer:fil_pointer + batch_size]): dataset[img_idx]["annotations"][an_idx][attributes_key] = \ {k: v[oix].item() for k, v in outputs.items()} # {k: v[oix].item() if v[oix].size == 1 # else [float(el) for el in v[oix]] # for k,v in outputs.items()} fil_pointer = fil_pointer + batch_size dataset = [fix_for_serialization(d) for d in dataset] with open(standard_format_json_file, "w") as f: json.dump(dataset, f, indent=4) timer.done()
def test_pickleability(self): ds = DatasetFromList([1, 2, 3]) ds = MapDataset(ds, lambda x: x * 2) ds = pickle.loads(pickle.dumps(ds)) self.assertEqual(ds[0], 2)
# model.to(torch.device(cfg.MODEL.DEVICE)) if 'do-train': ... if 'build_detection_train_loader': # all dataset_dicts w.r.t cfg.DATASETS.TRAIN will be flattened. dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts, copy=False) dataset = MapDataset(dataset, DatasetMapper(cfg, True)) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) # TODO avoid if-else? if sampler_name == "TrainingSampler": sampler = TrainingSampler(len(dataset)) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) data_loader = build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS,
def test_map_style(self): ds = DatasetFromList([1, 2, 3]) ds = MapDataset(ds, TestMapDataset.map_func) self.assertEqual(ds[0], 2) self.assertEqual(ds[2], 6) self.assertIn(ds[1], [2, 6])
def build_classification_train_loader(cfg, mapper=None): """ Build a classification data loader from cfg. Returns: list[dict]: Each dict contains, * image: Tensor, image in (C, H, W) format. * label (optional): int, groundtruth class """ num_workers = get_world_size() images_per_batch = cfg.SOLVER.IMS_PER_BATCH assert ( images_per_batch % num_workers == 0 ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format( images_per_batch, num_workers) assert ( images_per_batch >= num_workers ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format( images_per_batch, num_workers) images_per_worker = images_per_batch // num_workers dataset_dicts = get_classification_dataset_dicts(cfg.DATASETS.TRAIN) dataset = DatasetFromList(dataset_dicts, copy=False) if mapper is None: mapper = ClsDatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) if sampler_name == "TrainingSampler": sampler = samplers.TrainingSampler(len(dataset)) elif sampler_name == "RepeatFactorTrainingSampler": sampler = samplers.RepeatFactorTrainingSampler( dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) if cfg.DATALOADER.ASPECT_RATIO_GROUPING: data_loader = torch.utils.data.DataLoader( dataset, sampler=sampler, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=None, collate_fn=operator.itemgetter( 0), # don't batch, but yield individual elements worker_init_fn=worker_init_reset_seed, ) # yield individual mapped dict data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker) else: batch_sampler = torch.utils.data.sampler.BatchSampler( sampler, images_per_worker, drop_last=True) # drop_last so the batch always have the same size data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, worker_init_fn=worker_init_reset_seed, ) return data_loader
if 'setup(args)': cfg = get_cfg() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() default_setup( cfg, args ) # if you don't like any of the default setup, write your own setup code if 'build dataloader': file_paths = glob.glob(DS_DIR + 'test/*.jpg') # import numpy as np # file_paths = np.random.choice(file_paths, size=200, replace=False) ds = DatasetFromList(file_paths) ds = MapDataset(ds, map_func=kaggle_mapper) sampler = InferenceSampler(len(ds)) batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, BATCH_SIZE, drop_last=False) data_loader = torch.utils.data.DataLoader( ds, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) if 'create model and load weights': meta_arch = cfg.MODEL.META_ARCHITECTURE model = META_ARCH_REGISTRY.get(meta_arch)(cfg)