def get_data_iterators(batch_size, dali_num_threads, train_dir, file_list, dali_nvjpeg_memory_padding, type_config, channel_last, comm, training=True): r'''Creates and returns DALI data iterators The datasets are partitioned in distributed training mode according to comm rank and number of processes. ''' cls_name = TrainPipeline if training else ValPipeline # Pipelines and Iterators for training train_pipe = cls_name(batch_size, dali_num_threads, comm.rank, train_dir, file_list, dali_nvjpeg_memory_padding, seed=comm.rank + 1, num_shards=comm.n_procs, channel_last=channel_last, dtype=type_config) data = dali_iterator.DaliIterator(train_pipe) data.size = train_pipe.epoch_size("Reader") // comm.n_procs return data
def get_val_data_iterator(args, comm, channels, spatial_size=(224, 224), norm_config='default'): # Pipelines and Iterators for validation mean, std = get_normalize_config(norm_config) if std is None: std = [1., 1., 1.] pad_output = get_pad_output_by_channels(channels) val_pipe = ValPipeline(args.batch_size, args.dali_num_threads, comm.rank, args.val_dir, args.val_list, args.dali_nvjpeg_memory_padding, seed=comm.rank + 1, device_id=int(comm.ctx.device_id), num_shards=comm.n_procs, channel_last=args.channel_last, spatial_size=spatial_size, dtype=args.type_config, mean=list(mean), std=list(std), pad_output=pad_output) vdata = dali_iterator.DaliIterator(val_pipe) vdata.size = int_div_ceil(val_pipe.epoch_size("Reader"), comm.n_procs) return vdata
def get_data_iterators(args, comm, stream_event_handler): ''' Creates and returns DALI data iterators for both datasets of training and validation. The datasets are partitioned in distributed training mode according to comm rank and number of processes. ''' # Pipelines and Iterators for training train_pipe = TrainPipeline(args.batch_size, args.dali_num_threads, comm.rank, args.train_dir, args.train_list, args.dali_nvjpeg_memory_padding, seed=comm.rank + 1, num_shards=comm.n_procs, channel_last=args.channel_last, dtype=args.type_config) data = dali_iterator.DaliIterator(train_pipe) data.size = train_pipe.epoch_size("Reader") // comm.n_procs # Pipelines and Iterators for validation val_pipe = ValPipeline(args.batch_size, args.dali_num_threads, comm.rank, args.val_dir, args.val_list, args.dali_nvjpeg_memory_padding, seed=comm.rank + 1, num_shards=comm.n_procs, channel_last=args.channel_last, dtype=args.type_config) vdata = dali_iterator.DaliIterator(val_pipe) vdata.size = val_pipe.epoch_size("Reader") // comm.n_procs return data, vdata
def imagenet_iterator(config, comm, train=True): if config['dataset']['dali']: if train: pipe = DataPipeline(config['dataset']['path'], config['train']['batch_size'], config['dataset']['dali_threads'], comm.rank, num_gpus=comm.n_procs, seed=1, train=train) else: pipe = DataPipeline(config['dataset']['val_path'], config['train']['batch_size'], config['dataset']['dali_threads'], comm.rank, num_gpus=comm.n_procs, seed=1, train=train) data_iterator_ = dali_iterator.DaliIterator(pipe) data_iterator_.size = np.ceil(pipe.epoch_size("Reader")/comm.n_procs) data_iterator_.batch_size = config['train']['batch_size'] return data_iterator_ else: return data_iterator_cache(config['dataset']['cache_dir'], config['train']['batch_size'], shuffle=True, normalize=True)
def get_train_data_iterator(args, comm, channels, norm_config='default'): # Pipelines and Iterators for training mean, std = get_normalize_config(norm_config) if std is None: std = [1., 1., 1.] pad_output = get_pad_output_by_channels(channels) train_pipe = TrainPipeline(args.batch_size, args.dali_num_threads, comm.rank, args.train_dir, args.train_list, args.dali_nvjpeg_memory_padding, seed=comm.rank + 1, num_shards=comm.n_procs, channel_last=args.channel_last, dtype=args.type_config, mean=list(mean), std=list(std), pad_output=pad_output) data = dali_iterator.DaliIterator(train_pipe) data.size = int_div_ceil(train_pipe.epoch_size("Reader"), comm.n_procs) return data