def __init__(self, batch_size, num_threads, device_id, rec_path, idx_path, shard_id, num_shards, crop_shape, min_random_area, max_random_area, min_random_aspect_ratio, max_random_aspect_ratio, nvjpeg_padding, prefetch_queue=3, seed=12, output_layout=types.NCHW, pad_output=True, dtype='float16', mlperf_print=True): super(HybridTrainPipe, self).__init__( batch_size, num_threads, device_id, seed = seed + device_id, prefetch_queue_depth = prefetch_queue) if mlperf_print: # Shuffiling is done inside ops.MXNetReader mx_resnet_print(key=mlperf_log.INPUT_ORDER) self.input = ops.MXNetReader(path = [rec_path], index_path=[idx_path], random_shuffle=True, shard_id=shard_id, num_shards=num_shards) self.decode = ops.nvJPEGDecoder(device = "mixed", output_type = types.RGB, device_memory_padding = nvjpeg_padding, host_memory_padding = nvjpeg_padding) self.rrc = ops.RandomResizedCrop(device = "gpu", random_area = [ min_random_area, max_random_area], random_aspect_ratio = [ min_random_aspect_ratio, max_random_aspect_ratio], size = crop_shape) self.cmnp = ops.CropMirrorNormalize(device = "gpu", output_dtype = types.FLOAT16 if dtype == 'float16' else types.FLOAT, output_layout = output_layout, crop = crop_shape, pad_output = pad_output, image_type = types.RGB, mean = _mean_pixel, std = _std_pixel) self.coin = ops.CoinFlip(probability = 0.5) if mlperf_print: mx_resnet_print( key=mlperf_log.INPUT_CROP_USES_BBOXES, val=False) mx_resnet_print( key=mlperf_log.INPUT_DISTORTED_CROP_RATIO_RANGE, val=(min_random_aspect_ratio, max_random_aspect_ratio)) mx_resnet_print( key=mlperf_log.INPUT_DISTORTED_CROP_AREA_RANGE, val=(min_random_area, max_random_area)) mx_resnet_print( key=mlperf_log.INPUT_MEAN_SUBTRACTION, val=_mean_pixel) mx_resnet_print( key=mlperf_log.INPUT_RANDOM_FLIP)
def __init__(self, name, batch_size, num_workers, device_id, num_gpu, root=os.path.expanduser('~/.mxnet/datasets/face')): super().__init__(batch_size, num_workers, device_id, seed=12 + device_id) idx_files = [os.path.join(root, name, "train.idx")] rec_files = [os.path.join(root, name, "train.rec")] prop = open(os.path.join(root, name, "property"), "r").read().strip().split(',') assert len(prop) == 3 self.num_classes = int(prop[0]) self.image_size = [int(prop[1]), int(prop[2])] self._input = ops.MXNetReader(path=rec_files, index_path=idx_files, random_shuffle=True, num_shards=num_gpu, tensor_init_bytes=self.image_size[0] * self.image_size[1] * 8) self._decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) self._cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=self.image_size, image_type=types.RGB, mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5]) self._contrast = ops.Contrast(device="gpu", ) self._saturation = ops.Saturation(device="gpu", ) self._brightness = ops.Brightness(device="gpu", ) self._uniform = ops.Uniform(range=(0.7, 1.3)) self._coin = ops.CoinFlip(probability=0.5) self.iter = 0
def __init__(self, device_id, shard_id, num_shards, size, crop, options): super(ValPipeline, self).__init__(options.batchsize, options.dataloader_workers, device_id, seed=12 + device_id) rec_path = os.path.join(options.val_dir, "data.rec") idx_path = os.path.join(options.val_dir, "data.idx") self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=False, shard_id=shard_id, num_shards=num_shards, pad_last_batch=True) self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.res = ops.Resize(device="gpu", resize_shorter=size, interp_type=types.INTERP_TRIANGULAR) dtype = types.FLOAT16 if options.fp16 else types.FLOAT layout = types.NCHW self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=dtype, output_layout=layout, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], pad_output=False)
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) # MXnet rec reader self.input = ops.MXNetReader(path=join(data_dir, "train_label.rec"), index_path=join(data_dir, "train_label.idx"), random_shuffle=True, shard_id=args.local_rank, num_shards=args.world_size) #let user decide which pipeline works him bets for RN version he runs dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 self.decode = ops.ImageDecoderRandomCrop(device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255,0.456 * 255,0.406 * 255], std=[0.229 * 255,0.224 * 255,0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, path_imgrec, batch_size, num_threads, device_id, num_gpus, initial_fill): ''' initial_fill: 太大会占用内存,太小导致单个 batch id 重复率高而 loss 下降太慢,测试了下 batch_size*1000 基本不影响到训练 num_threads: 经测试,单核3.5GHz的U,hhd设置为3~4,ssd设置为5~6 ''' super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) logging.info('loading recordio %s...', path_imgrec) path_imgidx = path_imgrec[0:-4] + ".idx" self.input = ops.MXNetReader(path=[path_imgrec], index_path=[path_imgidx], random_shuffle=True, shard_id=device_id, num_shards=num_gpus, prefetch_queue_depth=5, initial_fill=initial_fill) self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.res = ops.Resize(device="gpu", resize_x=112, resize_y=112) self.rrc = ops.RandomResizedCrop(device="gpu", size=(112, 112)) # self.cmnp = ops.CropMirrorNormalize(device = "gpu", # dtype = types.FLOAT, # output_layout = types.NCHW, # mean = [0.485 * 255,0.456 * 255,0.406 * 255], # std = [0.229 * 255,0.224 * 255,0.225 * 255]) self.cmnp = ops.CropMirrorNormalize(device="gpu", dtype=types.FLOAT, output_layout=types.NCHW) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self, args, batch_size, num_threads, device_id, rec_path, idx_path, shard_id, num_shards, crop_shape, nvjpeg_padding, prefetch_queue=3, resize_shp=None, output_layout=types.NCHW, pad_output=True, dtype='float16', dali_cpu=False, nvjpeg_width_hint=5980, nvjpeg_height_hint=6430): super(HybridValPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, prefetch_queue_depth=prefetch_queue) self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=False, shard_id=shard_id, num_shards=num_shards, dont_use_mmap=args.dali_dont_use_mmap) if dali_cpu: dali_device = "cpu" decoder_device = "cpu" else: dali_device = "gpu" decoder_device = "mixed" dali_kwargs_fallback = {} if Version(dali.__version__) >= Version("1.2.0"): dali_kwargs_fallback = { "preallocate_width_hint": nvjpeg_width_hint, "preallocate_height_hint": nvjpeg_height_hint } self.decode = ops.ImageDecoder(device=decoder_device, output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding, **dali_kwargs_fallback) self.resize = ops.Resize(device=dali_device, resize_shorter=resize_shp) if resize_shp else None self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT16 if dtype == 'float16' else types.FLOAT, output_layout=output_layout, crop=crop_shape, pad_output=pad_output, image_type=types.RGB, mean=args.rgb_mean, std=args.rgb_std)
def __init__(self, batch_size, device_id, num_gpus, num_threads=2): super(RecordIOPipeline, self).__init__(batch_size, num_threads, device_id, prefetch_queue_depth={ "cpu_size": 6, "gpu_size": 2 }) self.input = ops.MXNetReader( path=rec_files, index_path=idx_files, random_shuffle=True, shard_id=device_id, num_shards=num_gpus, initial_fill=len(plmxds.imgidx) // num_gpus, ) self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, image_type=types.RGB, mean=[0.5 * 255., 0.5 * 255., 0.5 * 255.], std=[0.5 * 255., 0.5 * 255., 0.5 * 255.]) self.coin = ops.CoinFlip(probability=0.5) self.iter = 0
def __init__(self, batch_size, num_threads, device_id, record_dir, crop, size, num_gpus, dali_cpu=False): super(HybridValPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.input = ops.MXNetReader(path=[join(record_dir, "val.rec")], index_path=[join(record_dir, "val.idx")], random_shuffle=False, shard_id=device_id, num_shards=num_gpus) self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.res = ops.Resize(device="gpu", resize_shorter=size, interp_type=types.INTERP_CUBIC) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
def __init__(self, rec_path, index_path, batch_size, input_size, num_gpus, num_threads, device_id): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.input = ops.MXNetReader(path=[rec_path], index_path=[index_path], random_shuffle=True, shard_id=device_id, num_shards=num_gpus) # self.decode = ops.nvJPEGDecoderRandomCrop(device="mixed", self.decode = ops.HostDecoderRandomCrop( device="cpu", output_type=types.RGB, random_aspect_ratio=[0.75, 1.25], random_area=[0.08, 1.0], num_attempts=100) self.resize = ops.Resize(device="gpu", resize_x=input_size, resize_y=input_size) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(input_size, input_size), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self, args, batch_size, num_threads, device_id, rec_path, idx_path, shard_id, num_shards, crop_shape, nvjpeg_padding, prefetch_queue=3, output_layout=types.NCHW, pad_output=True, dtype='float16', dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, prefetch_queue_depth=prefetch_queue) self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=True, shard_id=shard_id, num_shards=num_shards) dali_device, decoder_device = get_device_names(dali_cpu) if args.dali_fuse_decoder: self.decode = ops.ImageDecoderRandomCrop( device=decoder_device, output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding) else: self.decode = ops.ImageDecoder( device=decoder_device, output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding) if args.dali_fuse_decoder: self.resize = ops.Resize(device=dali_device, resize_x=crop_shape[1], resize_y=crop_shape[0]) else: self.resize = ops.RandomResizedCrop(device=dali_device, size=crop_shape) self.cmnp = ops.CropMirrorNormalize( device="gpu", dtype=types.FLOAT16 if dtype == 'float16' else types.FLOAT, output_layout=output_layout, crop=crop_shape, pad_output=pad_output, mean=args.rgb_mean, std=args.rgb_std) self.coin = ops.random.CoinFlip(probability=0.5)
def __init__(self, batch_size, num_threads, device_id, num_gpus, data_paths): super(MXNetReaderPipeline, self).__init__(batch_size, num_threads, device_id) self.input = ops.MXNetReader(path=data_paths[0], index_path=data_paths[1], shard_id=device_id, num_shards=num_gpus)
def __init__(self, db_folder, batch_size, num_threads, device_id, num_gpus): super(MXNetReaderPipeline, self).__init__(batch_size, num_threads, device_id) self.input = ops.MXNetReader(path=[db_folder + "train.rec"], index_path=[db_folder + "train.idx"], random_shuffle=True, shard_id=device_id, num_shards=num_gpus)
def __init__(self, **kwargs): super(MXNetReaderPipeline, self).__init__(**kwargs) cache_enabled = kwargs['decoder_cache_params']['cache_enabled'] self.input = ops.MXNetReader(path = kwargs['data_paths'][0], index_path = kwargs['data_paths'][1], shard_id = kwargs['device_id'], num_shards = kwargs['num_gpus'], stick_to_shard = cache_enabled, #skip_cached_images = cache_enabled, prefetch_queue_depth = kwargs['reader_queue_depth'])
def __init__(self, batch_size, num_threads, device_id, rec_path, idx_path, shard_id, num_shards, crop_shape, nvjpeg_padding, prefetch_queue=3, seed=12, resize_shp=None, output_layout=types.NCHW, pad_output=True, dtype='float16', mlperf_print=True): super(HybridValPipe, self).__init__(batch_size, num_threads, device_id, seed=seed + device_id, prefetch_queue_depth=prefetch_queue) self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=False, shard_id=shard_id, num_shards=num_shards) self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding) self.resize = ops.Resize( device="gpu", resize_shorter=resize_shp) if resize_shp else None self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT16 if dtype == 'float16' else types.FLOAT, output_layout=output_layout, crop=crop_shape, pad_output=pad_output, image_type=types.RGB, mean=_mean_pixel, std=_std_pixel) if mlperf_print: mx_resnet_print(key=mlperf_log.INPUT_MEAN_SUBTRACTION, val=_mean_pixel) mx_resnet_print(key=mlperf_log.INPUT_RESIZE_ASPECT_PRESERVING) mx_resnet_print(key=mlperf_log.INPUT_CENTRAL_CROP)
def __init__(self, args, batch_size, num_threads, device_id, rec_path, idx_path, shard_id, num_shards, crop_shape, nvjpeg_padding, prefetch_queue=3, resize_shp=None, output_layout=types.NCHW, pad_output=True, dtype='float16', dali_cpu=False): super(HybridValPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, prefetch_queue_depth=prefetch_queue) self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=False, shard_id=shard_id, num_shards=num_shards) if dali_cpu: dali_device = "cpu" decoder_device = "cpu" else: dali_device = "gpu" decoder_device = "mixed" self.decode = ops.ImageDecoder(device=decoder_device, output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding) self.resize = ops.Resize( device=dali_device, resize_shorter=resize_shp) if resize_shp else None self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT16 if dtype == 'float16' else types.FLOAT, output_layout=output_layout, crop=crop_shape, pad_output=pad_output, image_type=types.RGB, mean=args.rgb_mean, std=args.rgb_std)
def __init__(self, batch_size, num_threads, device_id, num_gpus, db_dir): super(ValPipe, self).__init__(batch_size, num_threads, device_id) self.input = ops.MXNetReader(path=[db_dir + "val.rec"], index_path=[db_dir + "val.idx"], random_shuffle=False, shard_id=device_id, num_shards=num_gpus) self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.normalize = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, mean=[123.0, 116.0, 103.0], std=[100.0, 100.0, 100.0])
def __init__(self, batch_size, num_threads, device_id, num_shards, shard_id, use_fp16=False, train=True, root=os.path.expanduser('./data')): super().__init__(batch_size, num_threads, device_id, seed=12) part = "train" if train else "test" idx_files = [os.path.join(root, "cifar10_{}.idx").format(part)] rec_files = [os.path.join(root, "cifar10_{}.rec").format(part)] self.num_classes = 10 self.image_size = (32, 32) self.size = 0 self.train = train for idx_file in idx_files: with open(idx_file, "r") as f: self.size += len(list(f.readlines())) self._input = ops.MXNetReader(path=rec_files, index_path=idx_files, random_shuffle=True if train else False, num_shards=num_shards, shard_id=shard_id, seed=12, tensor_init_bytes=self.image_size[0] * self.image_size[1] * 8) self._decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self._cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT16 if use_fp16 else types.FLOAT, output_layout=types.NCHW, crop=self.image_size, image_type=types.RGB, mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]) if train: self.padding = ops.Paste(device="gpu", fill_value=128, ratio=1.25) self.px = ops.Uniform(range=(0, 1)) self.py = ops.Uniform(range=(0, 1)) self._uniform = ops.Uniform(range=(0.7, 1.3)) self._coin = ops.CoinFlip(probability=0.5)
def __init__(self, device_id, crop, size, rank, seed_rank, options): super(TrainPipeline, self).__init__(options.batchsize, 4, device_id, prefetch_queue_depth=3, set_affinity=True, seed=options.seed + seed_rank) rec_path = os.path.join(options.train_dir, "data.rec") idx_path = os.path.join(options.train_dir, "data.idx") self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=True, shard_id=rank, num_shards=size) # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations random_aspect_ratio = [0.75, 4. / 3.] random_area = [0.08, 1.0] self.decode = ops.ImageDecoderRandomCrop( device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512, random_aspect_ratio=random_aspect_ratio, random_area=random_area, num_attempts=100, seed=options.seed + seed_rank + 1641) self.res = ops.Resize(device="gpu", resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) dtype = types.FLOAT16 if options.fp16 else types.FLOAT layout = types.NCHW padding = False self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=dtype, output_layout=layout, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], pad_output=padding, seed=options.seed + seed_rank + 1223) self.coin = ops.CoinFlip(probability=0.5, seed=options.seed + seed_rank + 412)
def __init__(self, db_folder, batch_size, num_threads, device_id, num_gpus): super(MXNetReaderPipeline, self).__init__(batch_size, num_threads, device_id) self.input = ops.MXNetReader(path=[db_folder + "train.rec"], index_path=[db_folder + "train.idx"], random_shuffle=True, shard_id=device_id, num_shards=num_gpus) dali_device = "gpu" self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.cmn = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, image_type=types.RGB, mean=[128., 128., 128.], std=[1., 1., 1.]) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self, batch_size, num_threads, device_id, num_gpus, db_dir): super(TrainPipe, self).__init__(batch_size, num_threads, device_id) self.input = ops.MXNetReader(path=[db_dir + "train.rec"], index_path=[db_dir + "train.idx"], random_shuffle=False, shard_id=device_id, num_shards=num_gpus) self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) # self.blur = ops.GaussianBlur(device="cpu", sigma=5.0, window_size=5) # self.reshape = ops.Reshape(device="cpu", layout="HWC") self.normalize = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, mean=[123.0, 116.0, 103.0], std=[100.0, 100.0, 100.0]) self.transform_source = ops.ExternalSource() self.warp_gpu = ops.WarpAffine(device="gpu", interp_type=types.INTERP_LINEAR)
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, num_shards, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.input = ops.MXNetReader( path=[os.path.join(data_dir, "_train.rec")], index_path=[os.path.join(data_dir, "_train.idx")], random_shuffle=True, shard_id=device_id, num_shards=num_shards) # self.input = ops.FileReader(file_root=data_dir, shard_id=0, num_shards=4, random_shuffle=True) # let user decide which pipeline works him bets for RN version he runs if dali_cpu: dali_device = "cpu" self.decode = ops.HostDecoder(device=dali_device, output_type=types.RGB) else: dali_device = "gpu" # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) self.rrc = ops.RandomResizedCrop(device=dali_device, size=(crop, crop)) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, batch_size, num_threads, data_dir, crop, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, 0, seed=12) #self.input = ops.FileReader( # file_root=osp.join(data_dir, 'train'), # random_shuffle=True) self.input = ops.MXNetReader(path=osp.join(data_dir, 'train.rec'), index_path=osp.join( data_dir, 'train.idx'), random_shuffle=True) # let user decide which pipeline works him bets for RN version he runs if dali_cpu: dali_device = "cpu" self.decode = ops.ImageDecoderRandomCrop(device=dali_device, output_type=types.RGB) self.res = ops.Resize(resize_x=crop, resize_y=crop) else: dali_device = "gpu" # This padding sets the size of the internal nvJPEG buffers to be able to handle # all images from full-sized ImageNet without additional reallocations self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512) self.res = ops.RandomResizedCrop(device=dali_device, size=(crop, crop)) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, db_prefix, input_shape, batch_size, data_params, for_train, num_threads, device_id, num_shards): super(HybridRecPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, prefetch_queue_depth=2) self.for_train = for_train self.input = ops.MXNetReader( path=[db_prefix + '.rec'], index_path=[db_prefix + '.idx'], random_shuffle=data_params['shuffle'] if for_train else False, shard_id=device_id, num_shards=num_shards) self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.cmnp = ops.CropMirrorNormalize( device="gpu", dtype=types.FLOAT, output_layout=types.NCHW, crop=(input_shape[1], input_shape[2]), mean=data_params['mean'] if isinstance(data_params['mean'], list) else [data_params['mean'] for i in range(input_shape[0])], std=data_params['std'] if isinstance(data_params['std'], list) else [data_params['std'] for i in range(input_shape[0])]) if self.for_train: self.rotate = ops.Rotate(device="gpu", interp_type=types.INTERP_LINEAR) self.color = ops.ColorTwist(device='gpu') self.rng_angle = ops.Uniform( range=(-float(data_params['max_rotate_angle']), +float(data_params['max_rotate_angle']))) self.rng_contrast = ops.Uniform( range=(1.0 - data_params['contrast'], 1.0 + data_params['contrast'])) self.rng_brightness = ops.Uniform( range=(1.0 - data_params['brightness'], 1.0 + data_params['brightness'])) self.rng_saturation = ops.Uniform( range=(1.0 - data_params['saturation'], 1.0 + data_params['saturation'])) self.rng_hue = ops.Uniform(range=(1.0 - data_params['hue'], 1.0 + data_params['hue'])) self.coin = ops.CoinFlip( probability=0.5) if data_params['rand_mirror'] else 0
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, size, num_shards, dali_cpu=False): super(HybridValPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.input = ops.MXNetReader( path=[os.path.join(data_dir, "_val.rec")], index_path=[os.path.join(data_dir, "_val.idx")], random_shuffle=False, shard_id=device_id, num_shards=num_shards) # self.input = ops.FileReader(file_root=data_dir, shard_id=0, num_shards=4, random_shuffle=False) if dali_cpu: dali_device = "cpu" self.decode = ops.HostDecoder(device=dali_device, output_type=types.RGB) else: dali_device = "gpu" # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) # self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) self.res = ops.Resize(device=dali_device, resize_shorter=size) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
def __init__(self, batch_size, num_threads, device_id, rec_path, idx_path, shard_id, num_shards, crop_shape, nvjpeg_padding, prefetch_queue=3, output_layout=types.NCHW, pad_output=True, dtype='float16'): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, prefetch_queue_depth=prefetch_queue) self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=True, shard_id=shard_id, num_shards=num_shards) self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding) self.rrc = ops.RandomResizedCrop(device="gpu", size=crop_shape) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT16 if dtype == 'float16' else types.FLOAT, output_layout=output_layout, crop=crop_shape, pad_output=pad_output, image_type=types.RGB, mean=_mean_pixel, std=_std_pixel) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self, batch_size, num_threads, data_dir, crop, size): super(HybridValPipe, self).__init__(batch_size, num_threads, 0, seed=12) #self.input = ops.FileReader( # file_root=osp.join(data_dir, 'val'), # random_shuffle=False) self.input = ops.MXNetReader(path=osp.join(data_dir, 'val.rec'), index_path=osp.join(data_dir, 'val.idx'), random_shuffle=False) self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.res = ops.Resize(device="gpu", resize_shorter=size) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
def __init__(self, rec_path, index_path, batch_size, input_size, num_gpus, num_threads, device_id): super(HybridValPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.input = ops.MXNetReader(path=[rec_path], index_path=[index_path], random_shuffle=False, shard_id=device_id, num_shards=num_gpus) self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) # self.decode = ops.HostDecoder(device="cpu", output_type=types.RGB) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(input_size, input_size), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
def __init__(self, db_prefix, for_train, input_size, batch_size, num_threads, device_id, num_gpus): super(HybridRecPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, prefetch_queue_depth=2) self.for_train = for_train self.input = ops.MXNetReader(path=[db_prefix + ".rec"], index_path=[db_prefix + ".idx"], random_shuffle=for_train, shard_id=device_id, num_shards=num_gpus) self.resize = ops.Resize(device="gpu", resize_x=input_size, resize_y=input_size) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(input_size, input_size), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) if self.for_train: self.decode = ops.ImageDecoderRandomCrop( device="mixed", output_type=types.RGB, random_aspect_ratio=[3 / 4, 4 / 3], random_area=[0.08, 1.0], num_attempts=100) self.color = ops.ColorTwist(device='gpu') self.rng_brightness = ops.Uniform(range=(0.6, 1.4)) self.rng_contrast = ops.Uniform(range=(0.6, 1.4)) self.rng_saturation = ops.Uniform(range=(0.6, 1.4)) self.mirror_coin = ops.CoinFlip(probability=0.5) else: self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)
def __init__(self, batch_size, num_threads, device_id, record_dir, crop, num_gpus, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.input = ops.MXNetReader( path=[record_dir + "/train.rec"], index_path=[join(record_dir, "train.idx")], random_shuffle=True, shard_id=device_id, num_shards=num_gpus) # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.random_resize_crop = ops.RandomResizedCrop( device="gpu", size=(crop, crop), interp_type=types.INTERP_CUBIC, random_area=[0.2, 1]) self.crop_mirror_norm = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format("gpu"))
def __init__(self, batch_size, num_threads, device_id, num_gpus, db_folder): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.input = ops.MXNetReader( path=[os.path.join(db_folder, "train.rec")], index_path=[os.path.join(db_folder, "train.idx")], random_shuffle=True, shard_id=device_id, num_shards=num_gpus) self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) self.rrc = ops.RandomResizedCrop(device="gpu", size=(224, 224)) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(224, 224), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5)