def test_mxnet_iterator_last_batch_pad_last_batch(): num_gpus = 1 batch_size = 100 iters = 0 pipes, data_size = create_pipeline( lambda gpu: COCOReaderPipeline(batch_size=batch_size, num_threads=4, shard_id=gpu, num_gpus=num_gpus, data_paths=data_sets[0], random_shuffle=True, stick_to_shard=False, shuffle_after_epoch=False, pad_last_batch=True), batch_size, num_gpus) dali_train_iter = MXNetIterator(pipes, [("ids", MXNetIterator.DATA_TAG)], size=pipes[0].epoch_size("Reader"), fill_last_batch=True) img_ids_list, img_ids_list_set, mirrored_data, _, _ = \ gather_ids(dali_train_iter, lambda x: x.data[0].squeeze().asnumpy(), lambda x: x.pad, data_size) assert len(img_ids_list) > data_size assert len(img_ids_list_set) == data_size assert len(set(mirrored_data)) == 1 dali_train_iter.reset() next_img_ids_list, next_img_ids_list_set, next_mirrored_data, _, _ = \ gather_ids(dali_train_iter, lambda x: x.data[0].squeeze().asnumpy(), lambda x: x.pad, data_size) assert len(next_img_ids_list) > data_size assert len(next_img_ids_list_set) == data_size assert len(set(next_mirrored_data)) == 1
def _get_dali_dataloader(net, train_dataset, val_dataset, data_shape, global_batch_size, num_workers, devices, ctx, horovod): width, height = data_shape, data_shape with autograd.train_mode(): _, _, anchors = net(mx.nd.zeros((1, 3, height, width), ctx=ctx)) anchors = anchors.as_in_context(mx.cpu()) if horovod: batch_size = global_batch_size // hvd.size() pipelines = [ SSDDALIPipeline(device_id=hvd.local_rank(), batch_size=batch_size, data_shape=data_shape, anchors=anchors, num_workers=num_workers, dataset_reader=train_dataset[0]) ] else: num_devices = len(devices) batch_size = global_batch_size // num_devices pipelines = [ SSDDALIPipeline(device_id=device_id, batch_size=batch_size, data_shape=data_shape, anchors=anchors, num_workers=num_workers, dataset_reader=train_dataset[i]) for i, device_id in enumerate(devices) ] epoch_size = train_dataset[0].size() if horovod: epoch_size //= hvd.size() train_loader = DALIGenericIterator( pipelines, [('data', DALIGenericIterator.DATA_TAG), ('bboxes', DALIGenericIterator.LABEL_TAG), ('label', DALIGenericIterator.LABEL_TAG)], epoch_size, auto_reset=True) # validation if not horovod or hvd.rank() == 0: val_batchify_fn = Tuple(Stack(), Pad(pad_val=-1)) val_loader = gluon.data.DataLoader(val_dataset.transform( SSDDefaultValTransform(width, height)), global_batch_size, False, batchify_fn=val_batchify_fn, last_batch='keep', num_workers=num_workers) else: val_loader = None return train_loader, val_loader
def get_inference_iterator(pipeline): dali_iterator = DALIGenericIterator(pipelines=pipeline, output_map=[('data', DALIGenericIterator.DATA_TAG), ('original_shape', DALIGenericIterator.LABEL_TAG), ('id', DALIGenericIterator.LABEL_TAG)], size=pipeline.shard_size, auto_reset=True, squeeze_labels=False, dynamic_shape=True, fill_last_batch=False, last_batch_padded=True) # double buffering will not work with variable tensor shape iterator = SSDIterator(iterator=dali_iterator, double_buffer=False, remove_padding=True) return iterator
def get_training_iterator(pipeline, batch_size, synthetic=False): iterator = DALIGenericIterator(pipelines=[pipeline], output_map=[('data', DALIGenericIterator.DATA_TAG), ('bboxes', DALIGenericIterator.LABEL_TAG), ('label', DALIGenericIterator.LABEL_TAG)], size=pipeline.shard_size, auto_reset=True) iterator = SSDIterator(iterator=iterator, double_buffer=True, remove_padding=False) if batch_size != pipeline.batch_size: assert not pipeline.batch_size % batch_size, "batch size must divide the pipeline batch size" iterator = RateMatchInputIterator(ssd_iterator=iterator, input_batch_size=pipeline.batch_size, output_batch_size=batch_size) if synthetic: iterator = SyntheticInputIterator(iterator=iterator) return iterator
def get_dataloader(self): logging.info('getting data loader.') num_devices = len(self.ctx) thread_batch_size = self.batch_size // num_devices print("train dataloder") train_pipelines = [ SSDTrainPipeline(split=self.train_split, batch_size=thread_batch_size, data_shape=self.input_shape[0], num_shards=num_devices, device_id=i, anchors=self.anchors, num_workers=16) for i in range(num_devices) ] epoch_size = train_pipelines[0].size() train_loader = DALIGenericIterator( train_pipelines, [('data', DALIGenericIterator.DATA_TAG), ('bboxes', DALIGenericIterator.LABEL_TAG), ('label', DALIGenericIterator.LABEL_TAG)], epoch_size, auto_reset=True) print("val dataloder") val_pipelines = [ ValPipeline(split=self.val_split, batch_size=thread_batch_size, data_shape=self.input_shape[0], num_shards=num_devices, device_id=i, num_workers=16) for i in range(num_devices) ] epoch_size = val_pipelines[0].size() val_loader = ValLoader(val_pipelines, epoch_size, thread_batch_size, self.input_shape) print('load dataloder done') return train_loader, val_loader