def build_detection_val_loader(cfg, dataset_name: str, mapper=None): dataset_dicts = get_detection_dataset_dicts( [dataset_name], filter_empty=False, proposal_files=[ cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)] ] if cfg.MODEL.LOAD_PROPOSALS else None, ) # dataset_dicts = get_detection_dataset_dicts( # [dataset_name], # filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, # min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE # if cfg.MODEL.KEYPOINT_ON # else 0, # proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, # ) dataset = DatasetFromList(dataset_dicts) if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler = InferenceSampler(len(dataset)) batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) return data_loader
def build_detection_test_loader(cfg, dataset_name, mapper=None): dataset_dicts = get_detection_dataset_dicts( [dataset_name], filter_empty=False, proposal_files=[ cfg.DATASETS.PROPOSAL_FILES_TEST[list( cfg.DATASETS.TEST).index(dataset_name)] ] if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts) if mapper is None: mapper = DatasetMapper(cfg, False) dataset = MapDataset(dataset, mapper) sampler = InferenceSampler(len(dataset)) batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) return data_loader
def do_test(cfg, model): results = OrderedDict() for dataset_name in cfg.DATASETS.TEST: mapper = None if cfg.INPUT.TEST_INPUT_TYPE == 'default' else \ DatasetMapper( cfg, False, augmentations=build_custom_augmentation(cfg, False)) data_loader = build_detection_test_loader(cfg, dataset_name, mapper=mapper) output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_{}".format(dataset_name)) evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type if evaluator_type == "lvis": evaluator = LVISEvaluator(dataset_name, cfg, True, output_folder) elif evaluator_type == 'coco': evaluator = COCOEvaluator(dataset_name, cfg, True, output_folder) else: assert 0, evaluator_type results[dataset_name] = inference_on_dataset(model, data_loader, evaluator) if comm.is_main_process(): logger.info("Evaluation results for {} in csv format:".format( dataset_name)) print_csv_format(results[dataset_name]) if len(results) == 1: results = list(results.values())[0] return results
def build_hooks(self) -> List[HookBase]: """ This method overwrites the default one from DefaultTrainer. It adds (if necessary) the `LossEvalHook` that allows evaluating the loss on the validation set. Returns: List[HookBase]: The augmented list of hooks. """ # Build a list of default hooks, including timing, evaluation, # checkpointing, lr scheduling, precise BN, writing events. hooks = super().build_hooks() # We add our custom validation hook if self.cfg.DATASETS.VALIDATION != "": data_set_mapper: DatasetMapper = DatasetMapper.from_config( cfg=self.cfg, is_train=True) data_loader: DataLoader = build_detection_test_loader( cfg=self.cfg, dataset_name=self.cfg.DATASETS.VALIDATION, mapper=data_set_mapper) loss_eval_hook: LossEvalHook = LossEvalHook( eval_period=self.cfg.VALIDATION.VALIDATION_PERIOD, model=self.model, data_loader=data_loader) hooks.insert(index=-1, obj=loss_eval_hook) return hooks
def build_test_loader(cls, cfg, dataset_name): """ Custom dataloader to provide model with ground truth bounding boxes """ # returns a list of dicts. Every entry in the list corresponds to one sample, represented by a dict. dataset_dicts = detectron2.data.get_detection_dataset_dicts( dataset_name) # add proposal boxes for i, s in enumerate(dataset_dicts): s["proposal_boxes"] = np.array( [ann["bbox"] for ann in dataset_dicts[i]["annotations"]] ) # np.array([[xmin, ymin, xmax, ymax],[xmin, ymin, xmax, ...], ...]) # kx4 matrix for k proposed bounding boxes s["proposal_objectness_logits"] = np.full( (s["proposal_boxes"].shape[0], ), 10) # logit of 10 is 99.999...% s["proposal_bbox_mode"] = detectron2.structures.BoxMode.XYWH_ABS # 1 # (x0, y0, w, h) in absolute floating points coordinates print("Proposal boxes for test data added.") return build_detection_test_loader(dataset_dicts, mapper=DatasetMapper( is_train=False, augmentations=[], image_format=cfg.INPUT.FORMAT, precomputed_proposal_topk=500))
def build_detection_meta_loader(cfg, mapper=None): """ build the meta set from training data with Class Balanced Sampling """ dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts, copy=False) if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) logger = logging.getLogger(__name__) logger.info("Using training sampler Class Balanced Sampler") repeat_factors = ClassBalancedTrainingSampler.repeat_factors_by_inverse_category_frequency( dataset_dicts) sampler = ClassBalancedTrainingSampler(repeat_factors) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_train_loader(cls, cfg): """ custom dataloader to provide model with ground truth bounding boxes """ # returns a list of dicts. Every entry in the list corresponds to one sample, represented by a dict. dataset_dicts = detectron2.data.get_detection_dataset_dicts( cfg.DATASETS.TRAIN[0]) # add proposal boxes for i, s in enumerate(dataset_dicts): s["proposal_boxes"] = np.array( [ann["bbox"] for ann in dataset_dicts[i]["annotations"]] ) # np.array([[xmin, ymin, xmax, ymax],[xmin, ymin, xmax, ...], ...]) # kx4 matrix for k proposed bounding boxes s["proposal_objectness_logits"] = np.full( (s["proposal_boxes"].shape[0], ), 10) # logit of 10 is 99.999...% s["proposal_bbox_mode"] = detectron2.structures.BoxMode.XYWH_ABS # 1 # (x0, y0, w, h) in absolute floating points coordinates print("Proposal boxes added.") return build_detection_train_loader( dataset_dicts, mapper=DatasetMapper(is_train=True, augmentations=[], image_format=cfg.INPUT.FORMAT, precomputed_proposal_topk=500), total_batch_size=cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS)
def build_detection_val_loader(cfg, dataset_name, total_batch_size, mapper=None): """ Similar to `build_detection_train_loader`. But this function uses the given `dataset_name` argument (instead of the names in cfg), and uses batch size 1. Args: cfg: a detectron2 CfgNode dataset_name (str): a name of the dataset that's available in the DatasetCatalog mapper (callable): a callable which takes a sample (dict) from dataset and returns the format to be consumed by the model. By default it will be `DatasetMapper(cfg, False)`. Returns: DataLoader: a torch DataLoader, that loads the given detection dataset, with test-time transformation and batching. """ world_size = comm.get_world_size() assert ( total_batch_size > 0 and total_batch_size % world_size == 0 ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format( total_batch_size, world_size) batch_size = total_batch_size // world_size dataset_dicts = get_detection_dataset_dicts( [dataset_name], filter_empty=False, proposal_files=[ cfg.DATASETS.PROPOSAL_FILES_TEST[list( cfg.DATASETS.TEST).index(dataset_name)] ] if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts) if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) # sampler = InferenceSampler(len(dataset)) sampler = DistributedSampler(dataset, shuffle=False) # logger.info("Start Computing Validation Loss on {} images".format(len(dataset))) # drop_last so the batch always have the same size batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, batch_size, drop_last=False) # batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) return data_loader
def build_train_loader(cls, cfg): """ Returns: iterable It calls :func:`detectron2.data.build_detection_train_loader` with a customized DatasetMapper, which adds categorical labels as a semantic mask. """ mapper = DatasetMapper(cfg, True) return build_detection_train_loader(cfg, mapper)
def build_train_loader(cls, cfg): """ Returns: iterable It now calls :func:`detectron2.data.build_detection_train_loader`. Overwrite it if you'd like a different data loader. """ return build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
def build_detection_test_query_loader(cfg, dataset_name, mapper=None): """ Similar to `build_detection_train_loader`. But this function uses the given `dataset_name` argument (instead of the names in cfg), and uses batch size 1. Args: cfg: a detectron2 CfgNode dataset_name (str): a name of the dataset that's available in the DatasetCatalog mapper (callable): a callable which takes a sample (dict) from dataset and returns the format to be consumed by the model. By default it will be `DatasetMapper(cfg, False)`. Returns: DataLoader: a torch DataLoader, that loads the given detection dataset, with test-time transformation and batching. """ num_workers = get_world_size() images_per_batch = cfg.SOLVER.IMS_PER_BATCH assert ( images_per_batch % num_workers == 0 ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format( images_per_batch, num_workers) assert ( images_per_batch >= num_workers ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format( images_per_batch, num_workers) images_per_worker = images_per_batch // num_workers dataset_dicts = get_detection_dataset_dicts( [dataset_name], filter_empty=False, proposal_files=[ cfg.DATASETS.PROPOSAL_FILES_TEST[list( cfg.DATASETS.TEST).index(dataset_name)] ] if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts) if mapper is None: mapper = DatasetMapper(cfg, False) dataset = MapDataset(dataset, mapper) sampler = samplers.InferenceSampler(len(dataset)) # Always use 1 image per worker during inference since this is the # standard when reporting inference time in papers. batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, images_per_worker, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) return data_loader
def build_detection_train_loader(cfg, mapper=None): """ A data loader is created by the following steps: 1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts. 2. Coordinate a random shuffle order shared among all processes (all GPUs) 3. Each process spawn another few workers to process the dicts. Each worker will: * Map each metadata dict into another format to be consumed by the model. * Batch them by simply putting dicts into a list. The batched ``list[mapped_dict]`` is what this dataloader will yield. Args: cfg (CfgNode): the config mapper (callable): a callable which takes a sample (dict) from dataset and returns the format to be consumed by the model. By default it will be ``DatasetMapper(cfg, True)``. Returns: an infinite iterator of training data """ dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts, copy=False) if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) # TODO avoid if-else? if sampler_name == "TrainingSampler": sampler = TrainingSampler(len(dataset)) elif sampler_name == "RepeatFactorTrainingSampler": repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD ) sampler = RepeatFactorTrainingSampler(repeat_factors) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_hooks(self): hooks = super().build_hooks() hooks.insert(-1,LossEvalHook( self.cfg.TEST.EVAL_PERIOD, self.model, build_detection_val_loader(self.cfg , self.cfg.DATASETS.TEST[0], DatasetMapper(self.cfg,True) # label í•„ìš” ) )) return hooks #--------------------
def build_mapper(cls, cfg, is_train=True): augs = detection_utils.build_augmentation(cfg, is_train) #if cfg.INPUT.CROP.ENABLED and is_train: # augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)) # Define a sequence of augmentations: TODO augs.append(T.RandomBrightness(0.9, 1.1)) augs.append(MyColorAugmentation()) # augs.append(T.RandomRotation([5,10,15,20,25,30], expand=True, center=None)) # T.RandomCrop("absolute", (640, 640)), # MyCustomResize() # type: T.Augmentation return DatasetMapper(cfg, is_train=is_train, augmentations=augs)
def build_detection_semisup_train_loader(cfg, mapper=None): dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) # Divide into labeled and unlabeled sets according to supervision percentage label_dicts, unlabel_dicts = divide_label_unlabel( dataset_dicts, cfg.DATALOADER.SUP_PERCENT, cfg.DATALOADER.RANDOM_DATA_SEED, cfg.DATALOADER.RANDOM_DATA_SEED_PATH, ) dataset = DatasetFromList(label_dicts, copy=False) if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) if sampler_name == "TrainingSampler": sampler = TrainingSampler(len(dataset)) elif sampler_name == "RepeatFactorTrainingSampler": repeat_factors = ( RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( label_dicts, cfg.DATALOADER.REPEAT_THRESHOLD)) sampler = RepeatFactorTrainingSampler(repeat_factors) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) # list num of labeled and unlabeled logger.info("Number of training samples " + str(len(dataset))) logger.info("Supervision percentage " + str(cfg.DATALOADER.SUP_PERCENT)) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_train_loader(cls, cfg): """ Returns: iterable It calls :func:`detectron2.data.build_detection_train_loader` with a customized DatasetMapper, which adds categorical labels as a semantic mask. """ register_coco_instances("fruits_nuts", {}, "./fruit_nuts/trainval.json", "./fruit_nuts/images") fruits_nuts_metadata = MetadataCatalog.get("fruits_nuts") mapper = DatasetMapper(cfg, True) return build_detection_train_loader(cfg, mapper)
def build_weighted_detection_train_loader(cfg: CfgNode, mapper=None): dataset_repeat_factors = get_train_datasets_repeat_factors(cfg) # OrderedDict to guarantee order of values() consistent with repeat factors dataset_name_to_dicts = OrderedDict( { name: get_detection_dataset_dicts( [name], filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) for name in cfg.DATASETS.TRAIN } ) # Repeat factor for every sample in the dataset repeat_factors = [ [dataset_repeat_factors[dsname]] * len(dataset_name_to_dicts[dsname]) for dsname in cfg.DATASETS.TRAIN ] repeat_factors = list(itertools.chain.from_iterable(repeat_factors)) dataset_dicts = dataset_name_to_dicts.values() dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts)) dataset = DatasetFromList(dataset_dicts, copy=False) if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) logger.info( "Using WeightedTrainingSampler with repeat_factors={}".format( cfg.DATASETS.TRAIN_REPEAT_FACTOR ) ) sampler = RepeatFactorTrainingSampler(torch.tensor(repeat_factors)) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_detection_train_loader(cfg): """Builds a data loader for the baseline trainer with support of training on the subset of labeled data only. Most of code comes from `d2.data.build.build_detection_train_loader()`, see it for more details. """ # CSD: check config is supported assert cfg.DATALOADER.SAMPLER_TRAIN == "TrainingSampler", "Unsupported training sampler: {}".format( cfg.DATALOADER.SAMPLER_TRAIN) # Original code dataset = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) # CSD: subsample the dataset if needed dataset = check_subsample_dataset(dataset, cfg) if comm.is_main_process(): # Log counts logger = setup_logger(name=__name__) logger.debug("Number of images in the dataset: {}".format( len(dataset))) _log_api_usage("dataset." + cfg.DATASETS.TRAIN[0]) # Original code mapper = DatasetMapper(cfg, True) sampler = TrainingSampler(len(dataset)) dataset = DatasetFromList(dataset, copy=False) dataset = MapDataset(dataset, mapper) sampler = TrainingSampler(len(dataset)) assert isinstance(sampler, torch.utils.data.sampler.Sampler) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_custom_train_loader(cfg, mapper=None): """ Modified from detectron2.data.build.build_custom_train_loader, but supports different samplers """ dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts, copy=False) if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) # TODO avoid if-else? if sampler_name == "TrainingSampler": sampler = TrainingSampler(len(dataset)) elif sampler_name == "RepeatFactorTrainingSampler": repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD) sampler = RepeatFactorTrainingSampler(repeat_factors) elif sampler_name == "ClassAwareSampler": sampler = ClassAwareSampler(dataset_dicts) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_classification_test_loader(cfg, dataset_name, mapper=None): """ Similar to `build_detection_train_loader`. But this function uses the given `dataset_name` argument (instead of the names in cfg), and uses batch size 1. Args: cfg: a detectron2 CfgNode dataset_name (str): a name of the dataset that's available in the DatasetCatalog mapper (callable): a callable which takes a sample (dict) from dataset and returns the format to be consumed by the model. By default it will be `DatasetMapper(cfg, False)`. Returns: DataLoader: a torch DataLoader, that loads the given detection dataset, with test-time transformation and batching. """ dataset_dicts = get_classification_dataset_dicts([dataset_name]) dataset = DatasetFromList(dataset_dicts) if mapper is None: mapper = DatasetMapper(cfg, False) dataset = MapDataset(dataset, mapper) sampler = samplers.InferenceSampler(len(dataset)) # Always use 1 image per worker during inference since this is the # standard when reporting inference time in papers. batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) return data_loader
def my_build_detection_train_loader(cfg, mapper=None, isShuffleData=True, curriculum_fraction=0): """ A data loader is created by the following steps: 1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts. 2. Start workers to work on the dicts. Each worker will: * Map each metadata dict into another format to be consumed by the model. * Batch them by simply putting dicts into a list. The batched ``list[mapped_dict]`` is what this dataloader will return. Args: cfg (CfgNode): the config mapper (callable): a callable which takes a sample (dict) from dataset and returns the format to be consumed by the model. By default it will be `DatasetMapper(cfg, True)`. Returns: an infinite iterator of training data """ num_workers = get_world_size() images_per_batch = cfg.SOLVER.IMS_PER_BATCH assert ( images_per_batch % num_workers == 0 ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format( images_per_batch, num_workers) assert ( images_per_batch >= num_workers ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format( images_per_batch, num_workers) images_per_worker = images_per_batch // num_workers dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts, copy=False) if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) if sampler_name == "TrainingSampler": # If the fraction is the default 0, use the whole dataset if (curriculum_fraction == 0): sampler = samplers.TrainingSampler(len(dataset), shuffle=isShuffleData) # If the fraction is not 0, then take that fraction of the dataset as a subset else: new_len = int(round(len(dataset) * curriculum_fraction)) sampler = samplers.TrainingSampler(new_len, shuffle=isShuffleData) elif sampler_name == "RepeatFactorTrainingSampler": sampler = samplers.RepeatFactorTrainingSampler( dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) if cfg.DATALOADER.ASPECT_RATIO_GROUPING: data_loader = torch.utils.data.DataLoader( dataset, sampler=sampler, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=None, collate_fn=operator.itemgetter( 0), # don't batch, but yield individual elements worker_init_fn=worker_init_reset_seed, ) # yield individual mapped dict data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker) else: batch_sampler = torch.utils.data.sampler.BatchSampler( sampler, images_per_worker, drop_last=True) # drop_last so the batch always have the same size data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, worker_init_fn=worker_init_reset_seed, ) return data_loader
def get_lvis_train_dataloader(cfg, h, w): default_mapper = DatasetMapper(cfg, is_train=True) mapper = partial(wrapper, default_m=default_mapper, h=h, w=w) dl = build_detection_train_loader(cfg, mapper=mapper) return dl
def build_hooks(self): cfg = self.cfg.clone() cfg.defrost() cfg.DATALOADER.NUM_WORKERS = 0 # save some memory and time for PreciseBN ret = [ hooks.IterationTimer(), hooks.LRScheduler(self.optimizer, self.scheduler), hooks.PreciseBN( # Run at the same freq as (but before) evaluation. cfg.TEST.EVAL_PERIOD, self.model, # Build a new data loader to not affect training self.build_train_loader(cfg), cfg.TEST.PRECISE_BN.NUM_ITER, ) if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model) else None, ] # Do PreciseBN before checkpointer, because it updates the model and need to # be saved by checkpointer. # This is not always the best: if checkpointing has a different frequency, # some checkpoints may have more precise statistics than others. if comm.is_main_process(): ret.append( hooks.PeriodicCheckpointer(self.checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD)) def test_and_save_results_student(): self._last_eval_results_student = self.test(self.cfg, self.model) _last_eval_results_student = { k + "_student": self._last_eval_results_student[k] for k in self._last_eval_results_student.keys() } return _last_eval_results_student def test_and_save_results_teacher(): self._last_eval_results_teacher = self.test( self.cfg, self.model_teacher) return self._last_eval_results_teacher ret.append( hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results_student)) ret.append( hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results_teacher)) if cfg.TEST.VAL_LOSS: # default is True # save training time if not applied ret.append( LossEvalHook( cfg.TEST.EVAL_PERIOD, self.model, build_detection_test_loader( self.cfg, self.cfg.DATASETS.TEST[0], DatasetMapper(self.cfg, True), ), model_output="loss_proposal", model_name="student", )) ret.append( LossEvalHook( cfg.TEST.EVAL_PERIOD, self.model_teacher, build_detection_test_loader( self.cfg, self.cfg.DATASETS.TEST[0], DatasetMapper(self.cfg, True), ), model_output="loss_proposal", model_name="", )) if comm.is_main_process(): # run writers in the end, so that evaluation metrics are written ret.append(hooks.PeriodicWriter(self.build_writers(), period=20)) return ret
def get_lvis_test_dataloader(cfg, h, w): default_mapper = DatasetMapper(cfg, is_train=False) mapper = partial(wrapper, default_m=default_mapper, h=h, w=w) dl = build_detection_test_loader(cfg, 'lvis_v0.5_val', mapper=mapper) return dl
def build_classification_train_loader(cfg, mapper=None, multiplier=1): """ A data loader is created by the following steps: 1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts. 2. Start workers to work on the dicts. Each worker will: * Map each metadata dict into another format to be consumed by the model. * Batch them by simply putting dicts into a list. The batched ``list[mapped_dict]`` is what this dataloader will return. Args: cfg (CfgNode): the config mapper (callable): a callable which takes a sample (dict) from dataset and returns the format to be consumed by the model. By default it will be `DatasetMapper(cfg, True)`. Returns: an infinite iterator of training data """ num_workers = get_world_size() images_per_batch = cfg.SOLVER.IMS_PER_BATCH sample_num = cfg.DATASETS.WEAK_CLASSIFIER_SAMPLE_NUM assert ( images_per_batch % num_workers == 0 ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format( images_per_batch, num_workers) assert ( images_per_batch >= num_workers ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format( images_per_batch, num_workers) images_per_worker = images_per_batch // num_workers images_per_worker = int(images_per_worker * multiplier) if sample_num > 0: np.random.seed(cfg.DATASETS.SAMPLE_SEED) print("Setting sampling seed:", cfg.DATASETS.SAMPLE_SEED) dataset_names = cfg.DATASETS.CLASSIFIER_TRAIN if isinstance(dataset_names, str): dataset_names = [dataset_names] dataset_dicts = [ DatasetCatalog.get(dataset_name) for dataset_name in dataset_names ] dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts)) label_to_annotation_dict = { e: [] for e in range(cfg.MODEL.ROI_HEADS.NUM_CLASSES) } for e in dataset_dicts: per_label_record = {} for ann in e['annotations']: if ann['category_id'] in per_label_record: per_label_record[ann['category_id']]['annotations'].append( ann) else: record = copy.deepcopy(e) # filter annotations annotations_filtered = [ann] record['annotations'] = annotations_filtered per_label_record[ann['category_id']] = record for key in per_label_record.keys(): label_to_annotation_dict[key].append(per_label_record[key]) label_to_annotation_dict_sampled = {} for id_class, ann_list in label_to_annotation_dict.items(): if id_class in cfg.DATASETS.FEWSHOT.BASE_CLASSES_ID: if not cfg.DATASETS.OVER_SAMPLE: if cfg.DATASETS.BASE_MULTIPLIER > 0: try: ann_list_sampled = np.random.choice( ann_list, size=int(sample_num * cfg.DATASETS.BASE_MULTIPLIER), replace=False) except: ann_list_sampled = np.random.choice( ann_list, size=int(sample_num * cfg.DATASETS.BASE_MULTIPLIER), replace=True) else: ann_list_sampled = ann_list else: print("BASE OVER SAMPLING") ann_list_sampled = ann_list label_to_annotation_dict_sampled[id_class] = ann_list_sampled else: if not cfg.DATASETS.OVER_SAMPLE: if cfg.DATASETS.BASE_MULTIPLIER > 0: try: ann_list_sampled = np.random.choice( ann_list, size=sample_num, replace=False) except: ann_list_sampled = np.random.choice( ann_list, size=sample_num, replace=True) if cfg.DATASETS.NOVEL_MULTIPLER > 0: ann_list_sampled = np.repeat( ann_list_sampled, cfg.DATASETS.NOVEL_MULTIPLER) else: ann_list_sampled = [] else: try: ann_list_sampled_temp = np.random.choice( ann_list, size=sample_num, replace=False) if not cfg.DATASETS.SAMPLE_WITH_REPLACEMENT: print("OVER SAMPLING") ann_list_sampled = np.random.choice( ann_list_sampled_temp, size=len(ann_list), replace=True) else: ann_list_sampled_temp = np.random.choice( ann_list, size=sample_num, replace=False) num_repeat = len(ann_list) // len( ann_list_sampled_temp) num_remainder = len(ann_list) % len( ann_list_sampled_temp) ann_list_sampled = np.repeat( ann_list_sampled_temp, num_repeat) if num_remainder > 0: ann_list_sampled = np.hstack( (ann_list_sampled, np.random.choice(ann_list_sampled_temp, size=num_remainder, replace=True))) print("OVER SAMPLING FIXED NEW", len(ann_list_sampled_temp), len(ann_list_sampled)) except: ann_list_sampled = ann_list label_to_annotation_dict_sampled[id_class] = ann_list_sampled dataset_dicts = [] for k, v in label_to_annotation_dict_sampled.items(): dataset_dicts.extend(v) DatasetCatalog.register("classifier_train_sampled", lambda: dataset_dicts) MetadataCatalog.get("classifier_train_sampled").set( thing_classes=MetadataCatalog.get(dataset_names[0]).thing_classes, evaluator_type='pascal_voc') dataset_name = ('classifier_train_sampled', ) # print([(x['image_id'], len(x['annotations'])) for x in dataset_dicts[:50]]) # print_instances_class_histogram_1(dataset_dicts, MetadataCatalog.get(dataset_names[0]).thing_classes) else: dataset_name = cfg.DATASETS.CLASSIFIER_TRAIN dataset_dicts = get_detection_dataset_dicts( dataset_name, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_CLASSIFIER_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None) dataset = DatasetFromList(dataset_dicts, copy=False) # # filtering # dataset_filtered = [] # for sample in dataset: # e_class_ids = set([e['category_id'] for e in sample['annotations']]) # for e_class_ids_ in e_class_ids: # if e_class_ids_ in cfg.DATASETS.FEWSHOT.NOVEL_CLASSES_ID: # dataset_filtered.append(sample) # break # dataset = dataset_filtered if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) if sampler_name == "TrainingSampler": sampler = samplers.TrainingSampler(len(dataset)) elif sampler_name == "RepeatFactorTrainingSampler": sampler = samplers.RepeatFactorTrainingSampler( dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) if cfg.DATALOADER.ASPECT_RATIO_GROUPING: data_loader = torch.utils.data.DataLoader( dataset, sampler=sampler, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=None, collate_fn=operator.itemgetter( 0), # don't batch, but yield individual elements worker_init_fn=worker_init_reset_seed, ) # yield individual mapped dict data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker) else: batch_sampler = torch.utils.data.sampler.BatchSampler( sampler, images_per_worker, drop_last=True) # drop_last so the batch always have the same size data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, worker_init_fn=worker_init_reset_seed, ) return data_loader
def build_detection_query_loader(cfg, dataset_names_tuple, dataset_proposal_files_tuple, mapper=None, is_train=True): """ - Modified from detectron2.data.build_detection_train_loader - `dataset_names_tuple`: since we need to provide dataset names using different variables (meta-setup) and cfg could not be modified (CfgNode is immutable) - `is_train`: will create duplicated entries according to annotations So if an image contains five annotations, it will appear in the dataset five times with different annotations """ num_workers = get_world_size() images_per_batch = cfg.SOLVER.IMS_PER_BATCH assert ( images_per_batch % num_workers == 0 ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format( images_per_batch, num_workers) assert ( images_per_batch >= num_workers ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format( images_per_batch, num_workers) images_per_worker = images_per_batch // num_workers dataset_dicts = get_detection_dataset_dicts( dataset_names_tuple, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=dataset_proposal_files_tuple if cfg.MODEL.LOAD_PROPOSALS else None, ) logger = logging.getLogger(__name__) # Train: split annotations class-wise if is_train: print( "Query dataset num instances before annotation-wise duplication: {}" .format(len(dataset_dicts))) dataset_dicts = duplicate_data_acc_to_annotation_categories( dataset_dicts) print( "Query dataset num instances after annotation-wise duplication: {}" .format(len(dataset_dicts))) dataset = DatasetFromList(dataset_dicts, copy=False) if mapper is None: mapper = DatasetMapper(cfg, is_train=is_train) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger.info("Using training sampler {}".format(sampler_name)) if sampler_name == "TrainingSampler": sampler = samplers.TrainingSampler(len(dataset)) elif sampler_name == "RepeatFactorTrainingSampler": sampler = samplers.RepeatFactorTrainingSampler( dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) if cfg.DATALOADER.ASPECT_RATIO_GROUPING: data_loader = torch.utils.data.DataLoader( dataset, sampler=sampler, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=None, collate_fn=operator.itemgetter( 0), # don't batch, but yield individual elements worker_init_fn=worker_init_reset_seed, ) # yield individual mapped dict data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker) else: batch_sampler = torch.utils.data.sampler.BatchSampler( sampler, images_per_worker, drop_last=True) # drop_last so the batch always have the same size data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, worker_init_fn=worker_init_reset_seed, ) return data_loader
def build_detection_train_loader_drop_ids(cfg, drop_image_ids, mapper=None): """ A rewrite for the detectron2.data.build.build_detection_train_loader function, as it supports drop images of certian_ids specified by drop_image_ids. Returns: an infinite iterator of training data """ num_workers = get_world_size() images_per_batch = cfg.SOLVER.IMS_PER_BATCH assert ( images_per_batch % num_workers == 0 ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format( images_per_batch, num_workers) assert ( images_per_batch >= num_workers ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format( images_per_batch, num_workers) images_per_worker = images_per_batch // num_workers dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList( [dd for dd in dataset_dicts if dd['image_id'] not in drop_image_ids], copy=False) if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) if sampler_name == "TrainingSampler": sampler = samplers.TrainingSampler(len(dataset)) elif sampler_name == "RepeatFactorTrainingSampler": sampler = samplers.RepeatFactorTrainingSampler( dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) if cfg.DATALOADER.ASPECT_RATIO_GROUPING: data_loader = torch.utils.data.DataLoader( dataset, sampler=sampler, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=None, collate_fn=operator.itemgetter( 0), # don't batch, but yield individual elements worker_init_fn=worker_init_reset_seed, ) # yield individual mapped dict data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker) else: batch_sampler = torch.utils.data.sampler.BatchSampler( sampler, images_per_worker, drop_last=True) # drop_last so the batch always have the same size data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, worker_init_fn=worker_init_reset_seed, ) return data_loader
def test_recognition(model_yaml, checkpoint, dataset, annotations, imagedir, outdir=None, use_rpn=False, record_individual_scores=False): """ Computes detections and uses them to compute recognition accuracies. Arguments: model_yaml: Path to model config in yaml format. dataset: Dataset name, used to name output files. annotations: Path to ground truth annotations (json file with COCO-style annotations). imagedir: Path to image directory. outdir: Directory where output files are stored. When None is passed, the output directory specified in the model's config file is used. use_rpn: If True, the region proposal network of the model is used instead of bounding boxes from the ground truth. """ # Register testset register_coco_instances(dataset, {}, annotations, imagedir) # Load model cfg = get_cfg() with open(model_yaml) as f: cfg = cfg.load_cfg(f) print("Numer of classes: {}".format(cfg.MODEL.ROI_HEADS.NUM_CLASSES)) cfg.DATASETS.TEST = (dataset,) model = build_model(cfg) # Create outdir if outdir == None: outdir = cfg.OUTPUT_DIR pathlib.Path(outdir).mkdir(exist_ok=True) print("Evaluation output directory: " + outdir) # Create data loader if not use_rpn: # returns a list of dicts. Every entry in the list corresponds to one sample, represented by a dict. dataset_dicts = detectron2.data.get_detection_dataset_dicts(dataset) # add proposal boxes for i, s in enumerate(dataset_dicts): s["proposal_boxes"] = np.array([ ann["bbox"] for ann in dataset_dicts[i]["annotations"] ]) # np.array([[xmin, ymin, xmax, ymax],[xmin, ymin, xmax, ...], ...]) # kx4 matrix for k proposed bounding boxes s["proposal_objectness_logits"] = np.full((s["proposal_boxes"].shape[0],), 10) # logit of 10 is 99.999...% s["proposal_bbox_mode"] = detectron2.structures.BoxMode.XYWH_ABS # 1 # (x0, y0, w, h) in absolute floating points coordinates print("Proposal boxes added.") model.proposal_generator = None # deactivate such that precomputed proposals are used print("Region proposal deactivated, ground truth bounding boxes are used.") val_loader = build_detection_test_loader(dataset_dicts, mapper=DatasetMapper(is_train=False, augmentations=[], image_format= cfg.INPUT.FORMAT, precomputed_proposal_topk=500)) else: val_loader = build_detection_test_loader(cfg, dataset) # load model state (weights) from checkpoint DetectionCheckpointer(model).load(checkpoint) # evaluate detections evaluator = COCOEvaluator(dataset, ("bbox",), False, output_dir=outdir) result = inference_on_dataset(model, val_loader, evaluator) print_csv_format(result) with open(os.path.join(outdir, "evaluation_" + dataset + ".json"), "w") as outfile: json.dump(result, outfile) # compute accuracies detection2accuracy(detections=os.path.join(outdir,"coco_instances_results.json"), groundtruth=annotations, outdir=outdir, record_individual_scores=record_individual_scores)
def build_train_loader(cls, cfg): return build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True, augmentations=build_train_aug(cfg)))
def build_test_loader(cls, cfg, dataset_name): return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))