Ejemplo n.º 1
0
def get_data_iterators(batch_size,
                       dali_num_threads,
                       train_dir,
                       file_list,
                       dali_nvjpeg_memory_padding,
                       type_config,
                       channel_last,
                       comm,
                       training=True):
    r'''Creates and returns DALI data iterators

    The datasets are partitioned in distributed training
    mode according to comm rank and number of processes.
    '''
    cls_name = TrainPipeline if training else ValPipeline
    # Pipelines and Iterators for training
    train_pipe = cls_name(batch_size,
                          dali_num_threads,
                          comm.rank,
                          train_dir,
                          file_list,
                          dali_nvjpeg_memory_padding,
                          seed=comm.rank + 1,
                          num_shards=comm.n_procs,
                          channel_last=channel_last,
                          dtype=type_config)

    data = dali_iterator.DaliIterator(train_pipe)
    data.size = train_pipe.epoch_size("Reader") // comm.n_procs

    return data
Ejemplo n.º 2
0
def get_val_data_iterator(args,
                          comm,
                          channels,
                          spatial_size=(224, 224),
                          norm_config='default'):
    # Pipelines and Iterators for validation
    mean, std = get_normalize_config(norm_config)
    if std is None:
        std = [1., 1., 1.]
    pad_output = get_pad_output_by_channels(channels)
    val_pipe = ValPipeline(args.batch_size,
                           args.dali_num_threads,
                           comm.rank,
                           args.val_dir,
                           args.val_list,
                           args.dali_nvjpeg_memory_padding,
                           seed=comm.rank + 1,
                           device_id=int(comm.ctx.device_id),
                           num_shards=comm.n_procs,
                           channel_last=args.channel_last,
                           spatial_size=spatial_size,
                           dtype=args.type_config,
                           mean=list(mean),
                           std=list(std),
                           pad_output=pad_output)
    vdata = dali_iterator.DaliIterator(val_pipe)
    vdata.size = int_div_ceil(val_pipe.epoch_size("Reader"), comm.n_procs)
    return vdata
Ejemplo n.º 3
0
def get_data_iterators(args, comm, stream_event_handler):
    '''
    Creates and returns DALI data iterators for both datasets of training and
    validation.

    The datasets are partitioned in distributed training
    mode according to comm rank and number of processes.
    '''

    # Pipelines and Iterators for training
    train_pipe = TrainPipeline(args.batch_size,
                               args.dali_num_threads,
                               comm.rank,
                               args.train_dir,
                               args.train_list,
                               args.dali_nvjpeg_memory_padding,
                               seed=comm.rank + 1,
                               num_shards=comm.n_procs,
                               channel_last=args.channel_last,
                               dtype=args.type_config)

    data = dali_iterator.DaliIterator(train_pipe)
    data.size = train_pipe.epoch_size("Reader") // comm.n_procs

    # Pipelines and Iterators for validation
    val_pipe = ValPipeline(args.batch_size,
                           args.dali_num_threads,
                           comm.rank,
                           args.val_dir,
                           args.val_list,
                           args.dali_nvjpeg_memory_padding,
                           seed=comm.rank + 1,
                           num_shards=comm.n_procs,
                           channel_last=args.channel_last,
                           dtype=args.type_config)
    vdata = dali_iterator.DaliIterator(val_pipe)
    vdata.size = val_pipe.epoch_size("Reader") // comm.n_procs

    return data, vdata
Ejemplo n.º 4
0
def imagenet_iterator(config, comm, train=True):
    if config['dataset']['dali']:
        if train:
            pipe = DataPipeline(config['dataset']['path'],
                                config['train']['batch_size'], config['dataset']['dali_threads'], comm.rank,
                                num_gpus=comm.n_procs, seed=1, train=train)
        else:
            pipe = DataPipeline(config['dataset']['val_path'],
                                config['train']['batch_size'], config['dataset']['dali_threads'], comm.rank,
                                num_gpus=comm.n_procs, seed=1, train=train)

        data_iterator_ = dali_iterator.DaliIterator(pipe)
        data_iterator_.size = np.ceil(pipe.epoch_size("Reader")/comm.n_procs)
        data_iterator_.batch_size = config['train']['batch_size']

        return data_iterator_
    else:
        return data_iterator_cache(config['dataset']['cache_dir'], config['train']['batch_size'],
                                   shuffle=True, normalize=True)
Ejemplo n.º 5
0
def get_train_data_iterator(args, comm, channels, norm_config='default'):
    # Pipelines and Iterators for training
    mean, std = get_normalize_config(norm_config)
    if std is None:
        std = [1., 1., 1.]
    pad_output = get_pad_output_by_channels(channels)
    train_pipe = TrainPipeline(args.batch_size,
                               args.dali_num_threads,
                               comm.rank,
                               args.train_dir,
                               args.train_list,
                               args.dali_nvjpeg_memory_padding,
                               seed=comm.rank + 1,
                               num_shards=comm.n_procs,
                               channel_last=args.channel_last,
                               dtype=args.type_config,
                               mean=list(mean),
                               std=list(std),
                               pad_output=pad_output)

    data = dali_iterator.DaliIterator(train_pipe)
    data.size = int_div_ceil(train_pipe.epoch_size("Reader"), comm.n_procs)
    return data