def test_training_sampler_seed(self): seed_all_rng(42) sampler = TrainingSampler(30) data = list(itertools.islice(sampler, 65)) seed_all_rng(42) sampler = TrainingSampler(30) seed_all_rng(999) # should be ineffective data2 = list(itertools.islice(sampler, 65)) self.assertEqual(data, data2)
def build_inference_based_loader( cfg: CfgNode, dataset_cfg: CfgNode, model: torch.nn.Module) -> InferenceBasedLoader: """ Constructs data loader based on inference results of a model. """ dataset = build_bootstrap_dataset(dataset_cfg.DATASET, dataset_cfg.IMAGE_LOADER) training_sampler = TrainingSampler(len(dataset)) data_loader = torch.utils.data.DataLoader( dataset, batch_size=dataset_cfg.IMAGE_LOADER.BATCH_SIZE, sampler=training_sampler, num_workers=dataset_cfg.IMAGE_LOADER.NUM_WORKERS, collate_fn=trivial_batch_collator, ) return InferenceBasedLoader( model, data_loader=data_loader, data_sampler=build_data_sampler(dataset_cfg.DATA_SAMPLER), data_filter=build_data_filter(dataset_cfg.FILTER), shuffle=True, batch_size=dataset_cfg.INFERENCE.OUTPUT_BATCH_SIZE, inference_batch_size=dataset_cfg.INFERENCE.INPUT_BATCH_SIZE, )
def build_custom_train_loader(cfg, mapper=None): """ Modified from detectron2.data.build.build_custom_train_loader, but supports different samplers """ source_aware = cfg.DATALOADER.SOURCE_AWARE if source_aware: dataset_dicts = get_detection_dataset_dicts_with_source( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) sizes = [0 for _ in range(len(cfg.DATASETS.TRAIN))] for d in dataset_dicts: sizes[d['dataset_source']] += 1 print('dataset sizes', sizes) else: dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts, copy=False) if mapper is None: assert 0 # mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) # TODO avoid if-else? if sampler_name == "TrainingSampler": sampler = TrainingSampler(len(dataset)) elif sampler_name == "MultiDatasetSampler": assert source_aware sampler = MultiDatasetSampler(cfg, sizes, dataset_dicts) elif sampler_name == "RepeatFactorTrainingSampler": repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD) sampler = RepeatFactorTrainingSampler(repeat_factors) elif sampler_name == "ClassAwareSampler": sampler = ClassAwareSampler(dataset_dicts) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_inference_based_loader( cfg: CfgNode, dataset_cfg: CfgNode, model: torch.nn.Module, embedder: Optional[torch.nn.Module] = None, ) -> InferenceBasedLoader: """ Constructs data loader based on inference results of a model. """ dataset = build_bootstrap_dataset(dataset_cfg.DATASET, dataset_cfg.IMAGE_LOADER) meta = MetadataCatalog.get(dataset_cfg.DATASET) training_sampler = TrainingSampler(len(dataset)) data_loader = torch.utils.data.DataLoader( dataset, # pyre-ignore[6] batch_size=dataset_cfg.IMAGE_LOADER.BATCH_SIZE, sampler=training_sampler, num_workers=dataset_cfg.IMAGE_LOADER.NUM_WORKERS, collate_fn=trivial_batch_collator, worker_init_fn=worker_init_reset_seed, ) return InferenceBasedLoader( model, data_loader=data_loader, data_sampler=build_data_sampler(cfg, dataset_cfg.DATA_SAMPLER, embedder), data_filter=build_data_filter(dataset_cfg.FILTER), shuffle=True, batch_size=dataset_cfg.INFERENCE.OUTPUT_BATCH_SIZE, inference_batch_size=dataset_cfg.INFERENCE.INPUT_BATCH_SIZE, category_to_class_mapping=meta.category_to_class_mapping, )
def build_detection_train_loader(cfg): """Builds a data loader for the baseline trainer with support of training on the subset of labeled data only. Most of code comes from `d2.data.build.build_detection_train_loader()`, see it for more details. """ # CSD: check config is supported assert cfg.DATALOADER.SAMPLER_TRAIN == "TrainingSampler", "Unsupported training sampler: {}".format( cfg.DATALOADER.SAMPLER_TRAIN) # Original code dataset = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) # CSD: subsample the dataset if needed dataset = check_subsample_dataset(dataset, cfg) if comm.is_main_process(): # Log counts logger = setup_logger(name=__name__) logger.debug("Number of images in the dataset: {}".format( len(dataset))) _log_api_usage("dataset." + cfg.DATASETS.TRAIN[0]) # Original code mapper = DatasetMapper(cfg, True) sampler = TrainingSampler(len(dataset)) dataset = DatasetFromList(dataset, copy=False) dataset = MapDataset(dataset, mapper) sampler = TrainingSampler(len(dataset)) assert isinstance(sampler, torch.utils.data.sampler.Sampler) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_detection_train_loader(cfg: CfgNode, mapper=None): """ A data loader is created in a way similar to that of Detectron2. The main differences are: - it allows to combine datasets with different but compatible object category sets The data loader is created by the following steps: 1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts. 2. Start workers to work on the dicts. Each worker will: * Map each metadata dict into another format to be consumed by the model. * Batch them by simply putting dicts into a list. The batched ``list[mapped_dict]`` is what this dataloader will return. Args: cfg (CfgNode): the config mapper (callable): a callable which takes a sample (dict) from dataset and returns the format to be consumed by the model. By default it will be `DatasetMapper(cfg, True)`. Returns: an infinite iterator of training data """ _add_category_whitelists_to_metadata(cfg) _add_category_maps_to_metadata(cfg) dataset_dicts = combine_detection_dataset_dicts( cfg.DATASETS.TRAIN, keep_instance_predicate=_get_train_keep_instance_predicate(cfg), proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts, copy=False) if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) if sampler_name == "TrainingSampler": sampler = TrainingSampler(len(dataset)) elif sampler_name == "RepeatFactorTrainingSampler": repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD ) sampler = RepeatFactorTrainingSampler(repeat_factors) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_detection_train_loader(cfg, mapper=None): """ A data loader is created by the following steps: 1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts. 2. Coordinate a random shuffle order shared among all processes (all GPUs) 3. Each process spawn another few workers to process the dicts. Each worker will: * Map each metadata dict into another format to be consumed by the model. * Batch them by simply putting dicts into a list. The batched ``list[mapped_dict]`` is what this dataloader will yield. Args: cfg (CfgNode): the config mapper (callable): a callable which takes a sample (dict) from dataset and returns the format to be consumed by the model. By default it will be ``DatasetMapper(cfg, True)``. Returns: an infinite iterator of training data """ dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts, copy=False) if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) # TODO avoid if-else? if sampler_name == "TrainingSampler": sampler = TrainingSampler(len(dataset)) elif sampler_name == "RepeatFactorTrainingSampler": repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD ) sampler = RepeatFactorTrainingSampler(repeat_factors) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_detection_semisup_train_loader(cfg, mapper=None): dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) # Divide into labeled and unlabeled sets according to supervision percentage label_dicts, unlabel_dicts = divide_label_unlabel( dataset_dicts, cfg.DATALOADER.SUP_PERCENT, cfg.DATALOADER.RANDOM_DATA_SEED, cfg.DATALOADER.RANDOM_DATA_SEED_PATH, ) dataset = DatasetFromList(label_dicts, copy=False) if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) if sampler_name == "TrainingSampler": sampler = TrainingSampler(len(dataset)) elif sampler_name == "RepeatFactorTrainingSampler": repeat_factors = ( RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( label_dicts, cfg.DATALOADER.REPEAT_THRESHOLD)) sampler = RepeatFactorTrainingSampler(repeat_factors) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) # list num of labeled and unlabeled logger.info("Number of training samples " + str(len(dataset))) logger.info("Supervision percentage " + str(cfg.DATALOADER.SUP_PERCENT)) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_simple_dataloader(dataset_name: list, batch_size): dataset_dicts = get_detection_dataset_dicts(dataset_name) dataset = DatasetFromList(dataset_dicts, copy=False) cfg = get_cfg() cfg["aug_kwargs"] = {} dataset = MapDataset(dataset, AlbumentationsMapper(cfg, False)) # set the shuffle to False in debugging mode sampler = TrainingSampler(len(dataset), shuffle=False, seed=42) dataloader = build_batch_data_loader(dataset=dataset, sampler=sampler, total_batch_size=batch_size) return dataloader
def build_hand_train_loader(cfg): dataset_dicts, num_per_epoch = load_hand(cfg.HAND_PROJECT.DATA.MODE, cfg.HAND_PROJECT.DATA.ANNOT_SUBSET_TRAIN, cfg.HAND_PROJECT.DATA.BASE_PATH, selects=cfg.HAND_PROJECT.DATA.SELECTS) # pdb.set_trace() dataset = DatasetFromList(dataset_dicts, copy=False) mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) sampler = TrainingSampler(len(dataset)) return build_batch_data_loader(dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, num_workers=cfg.DATALOADER.NUM_WORKERS), num_per_epoch
def build_detection_train_loader_with_train_sampler(cfg, mapper, seed=42, shuffle=True): dataset_dicts = get_detection_dataset_dicts(cfg.DATASETS.TRAIN) dataset = DatasetFromList(dataset_dicts, copy=False) dataset = MapDataset(dataset, mapper) logger = logging.getLogger(__name__) logger.info("Using training sampler TrainingSampler with shuffle=False") sampler = TrainingSampler(len(dataset), shuffle=shuffle, seed=seed) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_x_train_loader( dataset, *, mapper, sampler=None, total_batch_size, aspect_ratio_grouping=True, num_workers=0 ): if isinstance(dataset, list): dataset = DatasetFromList(dataset, copy=False) if mapper is not None: dataset = MapDataset(dataset, mapper) if sampler is None: sampler = TrainingSampler(len(dataset)) assert isinstance(sampler, torch.utils.data.sampler.Sampler) return build_batch_data_loader( dataset, sampler, total_batch_size, aspect_ratio_grouping=aspect_ratio_grouping, num_workers=num_workers, )
def test_to_iterable(self): sampler = TrainingSampler(100, seed=10) dataset = DatasetFromList(list(range(100))) dataset = ToIterableDataset(dataset, sampler) data_loader = data.DataLoader(dataset, num_workers=0, collate_fn=operator.itemgetter(0)) output = list(itertools.islice(data_loader, 100)) self.assertEqual(set(output), set(range(100))) data_loader = data.DataLoader( dataset, num_workers=2, collate_fn=operator.itemgetter(0), worker_init_fn=worker_init_reset_seed, # reset seed should not affect behavior of TrainingSampler ) output = list(itertools.islice(data_loader, 100)) # multiple workers should not lead to duplicate or different data self.assertEqual(set(output), set(range(100)))
def build_train_loader(cfg, mapper=None): if mapper is None: mapper = get_dataset_mapper(cfg.DATASETS.TRAIN[0])(cfg, True) dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts, copy=False) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) # TODO avoid if-else? if sampler_name == "TrainingSampler": sampler = TrainingSampler(len(dataset)) elif sampler_name == "RepeatFactorTrainingSampler": repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD) sampler = RepeatFactorTrainingSampler(repeat_factors) elif sampler_name == "RatioFactorTrainingSampler": repeat_factors = repeat_factors_from_ratios(dataset_dicts) sampler = RepeatFactorTrainingSampler(repeat_factors) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def _train_loader_from_config(cfg, mapper=None, *, dataset=None, sampler=None): if dataset is None: dataset = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) if mapper is None: mapper = DatasetMapper(cfg, True) if sampler is None: sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger("detectron2.trainer") logger.info("Using training sampler {}".format(sampler_name)) if sampler_name == "TrainingSampler": sampler = TrainingSampler(len(dataset)) elif sampler_name == "RepeatFactorTrainingSampler": repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( dataset, cfg.DATALOADER.REPEAT_THRESHOLD ) sampler = RepeatFactorTrainingSampler(repeat_factors) else: raise ValueError( "Unknown training sampler: {}".format(sampler_name)) return { "dataset": dataset, "sampler": sampler, "mapper": mapper, "total_batch_size": cfg.SOLVER.IMS_PER_BATCH, "aspect_ratio_grouping": cfg.DATALOADER.ASPECT_RATIO_GROUPING, "num_workers": cfg.DATALOADER.NUM_WORKERS, }
def build_train_dataloader(cfg): # like 'build_detection_train_loader' if 'coco_2017_train' in cfg.DATASETS.TRAIN: descs_train: List[Dict] = DatasetCatalog.get("coco_2017_train") ds_train = DatasetFromList(descs_train, copy=False) mapper = DatasetMapper(cfg, True) else: # Open-Image-Dataset if 'get_detection_dataset_dicts': all_descs_train: List[Dict] = DatasetCatalog.get("oid_train") if 'rebalancing': image_id_vs_idx = {} for idx, desc in enumerate(all_descs_train): image_id_vs_idx[desc['image_id']] = idx descs_train = list(map(lambda img_id: all_descs_train[image_id_vs_idx[img_id]], sample_image_ids())) print('_' * 50 + f'train dataset len: {len(descs_train)}') ds_train = DatasetFromList(descs_train, copy=False) if 'DatasetMapper': augs = [RandomContrast(0.8, 1.2), RandomBrightness(0.8, 1.2), RandomSaturation(0.8, 1.2)] augs.extend(build_augmentation(cfg, is_train=True)) mapper = make_mapper('oid_train', is_train=True, augmentations=T.AugmentationList(augs)) ds_train = MapDataset(ds_train, mapper) sampler = TrainingSampler(len(ds_train)) data_loader = build_batch_data_loader( ds_train, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, ) global DATA_LOADER DATA_LOADER = data_loader return data_loader
def build_ss_train_loader(cfg, mapper): """Builds a semi-supervised data loader that yields both labeled and unlabeled images. Data can be loaded in two modes (defined in `cfg.DATASETS.MODE`): - "CROSS_DATASET": labeled and unlabeled images come from two disparate datasets, e.g. VOCtrain and VOCtest - "RANDOM_SPLIT": labeled and unlabeled images come from the same dataset by splitting it into the labeled and unlabeled parts For more details see `build_ss_datasets()`. Each batch consists of `cfg.SOLVER.IMS_PER_BATCH_LABELED` labeled and `cfg.SOLVER.IMS_PER_BATCH_UNLABELED` unlabeled images, which can be modified in `csd/config/config.py` or in a custom `configs/*.yaml` config file supplied to your training script. The actual x-flips happen inside `AspectRatioGroupedSSDataset` that is instantiated by `build_ss_batch_data_loader` The returned tuple contains (1) a tuple of lists with dicts for labeled and unlabeled images and (2) a DataLoader with infinite sampling yielding a pair of batches with labeled and unlabeled images with the same aspect ratio within batch. Specifically, the returned DataLoader yields a tuple of lists: ([labeled_img, labeled_img_xflip], [unlabeled_im, unlabeled_img_xflip]). """ # Load labeled and unlabeled dataset dicts (either use two separate ones or perform a random split) labeled_dataset_dicts, unlabeled_dataset_dicts = build_ss_datasets(cfg) # Log the datasets sizes if comm.is_main_process(): logger = setup_logger(name=__name__) logger.debug( "Number of images in the labeled and unlabeled datasets: {}, {}". format(len(labeled_dataset_dicts), len(unlabeled_dataset_dicts))) # Print updated metadata counts print_instances_class_histogram( labeled_dataset_dicts, MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_classes) # Map metadata into actual objects (note: data augmentations also take place here) labeled_dataset = MapDataset(labeled_dataset_dicts, mapper) unlabeled_dataset = MapDataset(unlabeled_dataset_dicts, mapper) # Define data samplers assert cfg.DATALOADER.SAMPLER_TRAIN == "TrainingSampler", "Unsupported training sampler: {}".format( cfg.DATALOADER.SAMPLER_TRAIN) labeled_sampler = TrainingSampler(len(labeled_dataset)) unlabeled_sampler = TrainingSampler(len(unlabeled_dataset)) return ( labeled_dataset_dicts, unlabeled_dataset_dicts, ), build_ss_batch_data_loader( # Initialize actual dataloaders (labeled_dataset, unlabeled_dataset), (labeled_sampler, unlabeled_sampler), cfg.SOLVER.IMS_PER_BATCH_LABELED, cfg.SOLVER.IMS_PER_BATCH_UNLABELED, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts, copy=False) dataset = MapDataset(dataset, DatasetMapper(cfg, True)) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) # TODO avoid if-else? if sampler_name == "TrainingSampler": sampler = TrainingSampler(len(dataset)) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) data_loader = build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, ) # logger.info("Starting training from iteration {}".format(start_iter)) # with EventStorage(start_iter) as storage: # for data, iteration in zip(data_loader, range(start_iter, max_iter)): # iteration = iteration + 1 # storage.step()
def test_build_iterable_dataloader_train(self): kwargs = self._get_kwargs() ds = DatasetFromList(kwargs.pop("dataset")) ds = ToIterableDataset(ds, TrainingSampler(len(ds))) dl = build_detection_train_loader(dataset=ds, **kwargs) next(iter(dl))
def build_detection_semisup_train_loader_two_crops(cfg, mapper=None): if cfg.DATASETS.CROSS_DATASET: # cross-dataset (e.g., coco-additional) label_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN_LABEL, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) unlabel_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN_UNLABEL, filter_empty=False, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) else: # different degree of supervision (e.g., COCO-supervision) dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) # Divide into labeled and unlabeled sets according to supervision percentage label_dicts, unlabel_dicts = divide_label_unlabel( dataset_dicts, cfg.DATALOADER.SUP_PERCENT, cfg.DATALOADER.RANDOM_DATA_SEED, cfg.DATALOADER.RANDOM_DATA_SEED_PATH, ) label_dataset = DatasetFromList(label_dicts, copy=False) # exclude the labeled set from unlabeled dataset unlabel_dataset = DatasetFromList(unlabel_dicts, copy=False) # include the labeled set in unlabel dataset # unlabel_dataset = DatasetFromList(dataset_dicts, copy=False) if mapper is None: mapper = DatasetMapper(cfg, True) label_dataset = MapDataset(label_dataset, mapper) unlabel_dataset = MapDataset(unlabel_dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) if sampler_name == "TrainingSampler": label_sampler = TrainingSampler(len(label_dataset)) unlabel_sampler = TrainingSampler(len(unlabel_dataset)) elif sampler_name == "RepeatFactorTrainingSampler": raise NotImplementedError("{} not yet supported.".format(sampler_name)) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) return build_semisup_batch_data_loader_two_crop( (label_dataset, unlabel_dataset), (label_sampler, unlabel_sampler), cfg.SOLVER.IMG_PER_BATCH_LABEL, cfg.SOLVER.IMG_PER_BATCH_UNLABEL, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )