def __init__(self, data_paths, num_gpus, batch_size, num_threads, device_id, prefetch, fp16, nhwc, decoder_type, decoder_cache_params, reader_queue_depth): super(CommonPipeline, self).__init__(batch_size, num_threads, device_id, prefetch_queue_depth=prefetch) if decoder_type == 'roi': print('Using nvJPEG with ROI decoding') self.decode_gpu = ops.nvJPEGDecoderRandomCrop( device="mixed", output_type=types.RGB) self.res = ops.Resize(device="gpu", resize_x=224, resize_y=224) elif decoder_type == 'roi_split': print('Using nvJPEG with ROI decoding and split CPU/GPU stages') self.decode_gpu = ops.nvJPEGDecoderRandomCrop( device="mixed", output_type=types.RGB, split_stages=True) self.res = ops.Resize(device="gpu", resize_x=224, resize_y=224) elif decoder_type == 'cached': assert decoder_cache_params['cache_enabled'] == True cache_size = decoder_cache_params['cache_size'] cache_threshold = decoder_cache_params['cache_threshold'] cache_type = decoder_cache_params['cache_type'] print( 'Using nvJPEG with cache (size : {} threshold: {}, type: {})'. format(cache_size, cache_threshold, cache_type)) self.decode_gpu = ops.nvJPEGDecoder( device="mixed", output_type=types.RGB, cache_size=cache_size, cache_threshold=cache_threshold, cache_type=cache_type, cache_debug=False) self.res = ops.RandomResizedCrop(device="gpu", size=(224, 224)) elif decoder_type == 'split': print('Using nvJPEG with split CPU/GPU stages') self.decode_gpu = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB, split_stages=True) self.res = ops.RandomResizedCrop(device="gpu", size=(224, 224)) else: print('Using nvJPEG') self.decode_gpu = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) self.res = ops.RandomResizedCrop(device="gpu", size=(224, 224)) layout = types.NHWC if nhwc else types.NCHW out_type = types.FLOAT16 if fp16 else types.FLOAT self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=out_type, output_layout=layout, crop=(224, 224), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True) #let user decide which pipeline works him bets for RN version he runs if dali_cpu: dali_device = "cpu" self.decode = ops.HostDecoderRandomCrop(device=dali_device, output_type=types.RGB, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) else: dali_device = "gpu" # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations self.decode = ops.nvJPEGDecoderRandomCrop(device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) self.res = ops.Resize("gpu", resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255,0.456 * 255,0.406 * 255], std=[0.229 * 255,0.224 * 255,0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) if torch.distributed.is_initialized(): local_rank = torch.distributed.get_rank() world_size = torch.distributed.get_world_size() else: local_rank = 0 world_size = 1 self.input = ops.FileReader(file_root=data_dir, shard_id=local_rank, num_shards=world_size, random_shuffle=True) if dali_cpu: dali_device = "cpu" self.decode = ops.HostDecoderRandomCrop( device=dali_device, output_type=types.RGB, random_aspect_ratio=[0.75, 4. / 3.], random_area=[0.08, 1.0], num_attempts=100) else: dali_device = "gpu" # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations self.decode = ops.nvJPEGDecoderRandomCrop( device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512, random_aspect_ratio=[0.75, 4. / 3.], random_area=[0.08, 1.0], num_attempts=100) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu, file_list): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True, file_list=file_list) if dali_cpu: dali_device = "cpu" self.decode = ops.HostDecoderRandomCrop( device=dali_device, output_type=types.RGB, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) else: dali_device = "gpu" self.decode = ops.nvJPEGDecoderRandomCrop( device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=mean, std=std) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, batch_size, num_threads, device_id, rec_path, idx_path, shard_id, num_shards, crop_shape, min_random_area, max_random_area, min_random_aspect_ratio, max_random_aspect_ratio, nvjpeg_padding, prefetch_queue=3, seed=12, output_layout=types.NCHW, pad_output=True, dtype='float16', mlperf_print=True, use_roi_decode=False, cache_size=0): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=seed + device_id, prefetch_queue_depth=prefetch_queue) if cache_size > 0: self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=True, shard_id=shard_id, num_shards=num_shards, stick_to_shard=True, lazy_init=True, skip_cached_images=True) else: # stick_to_shard might not exist in this version of DALI. self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=True, shard_id=shard_id, num_shards=num_shards) if use_roi_decode and cache_size == 0: self.decode = ops.nvJPEGDecoderRandomCrop( device="mixed", output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding, random_area=[min_random_area, max_random_area], random_aspect_ratio=[ min_random_aspect_ratio, max_random_aspect_ratio ]) self.rrc = ops.Resize(device="gpu", resize_x=crop_shape[0], resize_y=crop_shape[1]) else: if cache_size > 0: self.decode = ops.nvJPEGDecoder( device="mixed", output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding, cache_type='threshold', cache_size=cache_size, cache_threshold=0, cache_debug=False) else: self.decode = ops.nvJPEGDecoder( device="mixed", output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding) self.rrc = ops.RandomResizedCrop( device="gpu", random_area=[min_random_area, max_random_area], random_aspect_ratio=[ min_random_aspect_ratio, max_random_aspect_ratio ], size=crop_shape) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT16 if dtype == 'float16' else types.FLOAT, output_layout=output_layout, crop=crop_shape, pad_output=pad_output, image_type=types.RGB, mean=_mean_pixel, std=_std_pixel) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self, args, batch_size, num_threads, device_id, rec_path, idx_path, shard_id, num_shards, crop_shape, nvjpeg_padding, prefetch_queue=3, output_layout=types.NCHW, pad_output=True, dtype='float16', dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, prefetch_queue_depth=prefetch_queue) self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=True, shard_id=shard_id, num_shards=num_shards) if dali_cpu: dali_device = "cpu" if args.dali_fuse_decoder: self.decode = ops.HostDecoderRandomCrop(device=dali_device, output_type=types.RGB) else: self.decode = ops.HostDecoder(device=dali_device, output_type=types.RGB) else: dali_device = "gpu" if args.dali_fuse_decoder: self.decode = ops.nvJPEGDecoderRandomCrop( device="mixed", output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding) else: self.decode = ops.nvJPEGDecoder( device="mixed", output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding) if args.dali_fuse_decoder: self.resize = ops.Resize(device=dali_device, resize_x=crop_shape[1], resize_y=crop_shape[0]) else: self.resize = ops.RandomResizedCrop(device=dali_device, size=crop_shape) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT16 if dtype == 'float16' else types.FLOAT, output_layout=output_layout, crop=crop_shape, pad_output=pad_output, image_type=types.RGB, mean=args.rgb_mean, std=args.rgb_std) self.coin = ops.CoinFlip(probability=0.5)