def __init__(self, data_paths, num_shards, batch_size, num_threads, device_id, prefetch, fp16, random_shuffle, nhwc, dont_use_mmap, decoder_type, decoder_cache_params, reader_queue_depth, shard_id): super(CommonPipeline, self).__init__(batch_size, num_threads, device_id, random_shuffle, prefetch_queue_depth=prefetch) if decoder_type == 'roi': print('Using nvJPEG with ROI decoding') self.decode_gpu = ops.ImageDecoderRandomCrop(device="mixed", output_type=types.RGB) self.res = ops.Resize(device="gpu", resize_x=224, resize_y=224) elif decoder_type == 'roi_split': print('Using nvJPEG with ROI decoding and split CPU/GPU stages') self.decode_gpu = ops.ImageDecoderRandomCrop(device="mixed", output_type=types.RGB, split_stages=True) self.res = ops.Resize(device="gpu", resize_x=224, resize_y=224) elif decoder_type == 'cached': assert decoder_cache_params['cache_enabled'] == True cache_size = decoder_cache_params['cache_size'] cache_threshold = decoder_cache_params['cache_threshold'] cache_type = decoder_cache_params['cache_type'] print( 'Using nvJPEG with cache (size : {} threshold: {}, type: {})'. format(cache_size, cache_threshold, cache_type)) self.decode_gpu = ops.ImageDecoder(device="mixed", output_type=types.RGB, cache_size=cache_size, cache_threshold=cache_threshold, cache_type=cache_type, cache_debug=False) self.res = ops.RandomResizedCrop(device="gpu", size=(224, 224)) elif decoder_type == 'split': print('Using nvJPEG with split CPU/GPU stages') self.decode_gpu = ops.ImageDecoder(device="mixed", output_type=types.RGB, split_stages=True) self.res = ops.RandomResizedCrop(device="gpu", size=(224, 224)) else: print('Using nvJPEG') self.decode_gpu = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.res = ops.RandomResizedCrop(device="gpu", size=(224, 224)) layout = types.NHWC if nhwc else types.NCHW out_type = types.FLOAT16 if fp16 else types.FLOAT self.cmnp = ops.CropMirrorNormalize( device="gpu", dtype=out_type, output_layout=layout, crop=(224, 224), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.random.CoinFlip(probability=0.5)
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) if torch.distributed.is_initialized(): local_rank = torch.distributed.get_rank() world_size = torch.distributed.get_world_size() else: local_rank = 0 world_size = 1 self.input = ops.FileReader(file_root=data_dir, shard_id=local_rank, num_shards=world_size, random_shuffle=True) if dali_cpu: dali_device = "cpu" self.decode = ops.ImageDecoderRandomCrop( device=dali_device, output_type=types.RGB, random_aspect_ratio=[0.75, 4. / 3.], random_area=[0.08, 1.0], num_attempts=100) else: dali_device = "gpu" # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations self.decode = ops.ImageDecoderRandomCrop( device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512, random_aspect_ratio=[0.75, 4. / 3.], random_area=[0.08, 1.0], num_attempts=100) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self, split, args=None): #batch_size, num_threads, data_dir, crop, shuffle=False, device_id=0, size=256, dali_cpu=False): self.split = split self.bs = args.batch_size if self.split == 'train' else args.val_batch_size self.shuffle = self.split == 'train' self.data_dir = os.path.join(args.root, split) self.crop = args.input_size super(HybridPipe, self).__init__(self.bs, args.workers, 0, seed=12) self.input = ops.FileReader(file_root=self.data_dir, shard_id=0, num_shards=1, random_shuffle=self.shuffle) dali_device = "gpu" if split == 'train': self.decode = ops.ImageDecoderRandomCrop(device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512, random_aspect_ratio=[0.75, 1.333], random_area=[0.08, 1.0], num_attempts=100) #self.res = ops.Resize(device=dali_device, resize_x=args.input_size, resize_y=args.input_size, interp_type=types.INTERP_TRIANGULAR) self.res = ops.Resize(device=dali_device, resize_shorter=args.input_size, interp_type=types.INTERP_TRIANGULAR) else: self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.res = ops.Resize(device=dali_device, resize_shorter=256, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(self.crop, self.crop), image_type=types.RGB, mean=[0.485 * 255,0.456 * 255,0.406 * 255], std=[0.229 * 255,0.224 * 255,0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self, cfg): super(ClsTrainPipe, self).__init__(batch_size=cfg.dataset.loader.batch_size, num_threads=cfg.dataset.loader.num_workers, device_id=cfg.device.local_rank) self.eii = ClsInputIterator(cfg=cfg, is_train=True) self.source = ops.ExternalSource(source=self.eii, num_outputs=2) self.decode = ops.ImageDecoderRandomCrop( device='mixed', output_type=types.RGB, random_aspect_ratio=[0.8, 1.25], random_area=[0.3, 1.0], num_attempts=100) self.rotate = ops.Rotate(device='gpu', fill_value=127.5) self.res = ops.Resize(device='gpu', resize_x=cfg.dataset.transform.image_size, resize_y=cfg.dataset.transform.image_size, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize( device='gpu', dtype=types.FLOAT, output_layout=types.NCHW, crop=(cfg.dataset.transform.image_size, cfg.dataset.transform.image_size), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) self.angle = ops.Uniform( range=(-1 * cfg.dataset.transform.max_rotate_angle, cfg.dataset.transform.max_rotate_angle))
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True) #let user decide which pipeline works him bets for RN version he runs dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 self.decode = ops.ImageDecoderRandomCrop(device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255,0.456 * 255,0.406 * 255], std=[0.229 * 255,0.224 * 255,0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5)
def imagenet_train_graph(data_dir, size, random_aspect_ratio, random_area, interp_type=types.INTERP_TRIANGULAR, stats=imagenet_stats): inputs = ops.FileReader(file_root=data_dir, random_shuffle=True) decode = ops.ImageDecoderRandomCrop( device='mixed', random_aspect_ratio=random_aspect_ratio, random_area=random_area) resize = ops.Resize(device='gpu', resize_x=size, resize_y=size, interp_type=interp_type) mean, std = [[x * 255 for x in stat] for stat in stats] crop_mirror_norm = ops.CropMirrorNormalize(device='gpu', output_dtype=types.FLOAT16, crop=(size, size), mean=mean, std=std) coin = ops.CoinFlip(probability=0.5) def define_graph(): jpegs, labels = inputs(name='Reader') output = crop_mirror_norm(resize(decode(jpegs)), mirror=coin()) return [output, labels] return define_graph
def __init__(self, batch_size, num_threads, device_id, data_dir="./", dali_cpu=False): super(DecodeCropResizePipeline, self).__init__(batch_size, num_threads, device_id, seed=12) self.input = ops.FileReader(file_root=data_dir, random_shuffle=True) if dali_cpu: decoder_device = "cpu" else: decoder_device = "mixed" device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 dali_device = "cpu" if dali_cpu else "gpu" self.decode = ops.ImageDecoderRandomCrop( device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) self.res = ops.Resize(device=dali_device, resize_x=224, resize_y=224, interp_type=types.INTERP_TRIANGULAR)
def __init__(self, args, batch_size, num_threads, device_id, rec_path, idx_path, shard_id, num_shards, crop_shape, nvjpeg_padding, prefetch_queue=3, output_layout=types.NCHW, pad_output=True, dtype='float16', dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, prefetch_queue_depth=prefetch_queue) self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=True, shard_id=shard_id, num_shards=num_shards) dali_device, decoder_device = get_device_names(dali_cpu) if args.dali_fuse_decoder: self.decode = ops.ImageDecoderRandomCrop( device=decoder_device, output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding) else: self.decode = ops.ImageDecoder( device=decoder_device, output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding) if args.dali_fuse_decoder: self.resize = ops.Resize(device=dali_device, resize_x=crop_shape[1], resize_y=crop_shape[0]) else: self.resize = ops.RandomResizedCrop(device=dali_device, size=crop_shape) self.cmnp = ops.CropMirrorNormalize( device="gpu", dtype=types.FLOAT16 if dtype == 'float16' else types.FLOAT, output_layout=output_layout, crop=crop_shape, pad_output=pad_output, mean=args.rgb_mean, std=args.rgb_std) self.coin = ops.random.CoinFlip(probability=0.5)
def __init__(self, batch_size, num_threads, device_id, root, list_path, crop, shard_id, num_shards, coji=False, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.read = ops.FileReader(file_root=root, file_list=list_path, shard_id=shard_id, num_shards=num_shards, random_shuffle=True, initial_fill=1024) # Let user decide which pipeline works dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' # This padding sets the size of the internal nvJPEG buffers to be able to handle all images # from full-sized ImageNet without additional reallocations device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 self.decode = ops.ImageDecoderRandomCrop( device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.75, 1.33333333], random_area=[0.08, 1.0], num_attempts=100) self.resize = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize( device=dali_device, output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[x * 255 for x in IMAGENET_MEAN], std=[x * 255 for x in IMAGENET_STD]) self.coin = ops.CoinFlip(probability=0.5) self.coji = coji if self.coji: self.twist = ops.ColorTwist(device=dali_device) self.brightness_rng = ops.Uniform(range=[1.0 - 0.4, 1.0 + 0.4]) self.contrast_rng = ops.Uniform(range=[1.0 - 0.4, 1.0 + 0.4]) self.saturation_rng = ops.Uniform(range=[1.0 - 0.4, 1.0 + 0.4])
def __init__(self, file_root, file_list, batch_size, resize_shorter, crop, min_area, lower, upper, interp, mean, std, device_id, shard_id=0, num_shards=1, random_shuffle=True, num_threads=4, seed=42, pad_output=False, output_dtype=types.FLOAT): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=seed) self.input = ops.FileReader(file_root=file_root, file_list=file_list, shard_id=shard_id, num_shards=num_shards, random_shuffle=random_shuffle) # set internal nvJPEG buffers size to handle full-sized ImageNet images # without additional reallocations device_memory_padding = 211025920 host_memory_padding = 140544512 self.decode = ops.ImageDecoderRandomCrop( device='mixed', output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[lower, upper], random_area=[min_area, 1.0], num_attempts=100) self.res = ops.Resize(device='gpu', resize_x=crop, resize_y=crop, interp_type=interp) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=output_dtype, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=mean, std=std, pad_output=pad_output) self.coin = ops.CoinFlip(probability=0.5) self.to_int64 = ops.Cast(dtype=types.INT64, device="gpu")
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False, args_new=None, shuffle_seed=0): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, prefetch_queue_depth=3) #self.input = ops.FileReader(file_root=data_dir, shard_id=args_new.local_rank_per_job, num_shards=args_new.world_size, random_shuffle=True, shuffle_seed=shuffle_seed) self.input = ops.FileReader(file_root=data_dir, shard_id=args_new.local_rank_per_job, num_shards=args_new.world_size, shuffle_after_epoch=True, shuffle_seed=shuffle_seed) #self.input = ops.FileReader(file_root=data_dir, shard_id=args_new.local_rank_per_job, num_shards=args_new.world_size, shuffle_after_epoch=True) #self.input = ops.FileReader(file_root=data_dir, shard_id=args_new.local_rank_per_job, num_shards=args_new.world_size, random_shuffle=True) # self.input = ops.FileReader(file_root=data_dir, shard_id=args_new.local_rank_per_job, num_shards=args_new.world_size, random_shuffle=True, shuffle_seed=shuffle_seed) dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 self.decode = ops.ImageDecoderRandomCrop( device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize( device=dali_device, output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant, shards={1}'.format(dali_device, args_new.world_size))
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, mean, std, local_rank=0, world_size=1, dali_cpu=False, shuffle=True, fp16=False, min_crop_size=0.08, color_jitter=False): # As we're recreating the Pipeline at every epoch, the seed must be -1 (random seed) super(HybridTrainPipe, self).__init__( batch_size, num_threads, device_id, seed=-1) # Enabling read_ahead slowed down processing ~40% self.input = ops.FileReader(file_root=data_dir, shard_id=local_rank, num_shards=world_size, random_shuffle=shuffle) # Let user decide which pipeline works best with the chosen model if dali_cpu: decode_device = "cpu" self.dali_device = "cpu" self.flip = ops.Flip(device=self.dali_device) else: decode_device = "mixed" self.dali_device = "gpu" output_dtype = types.FLOAT if fp16: output_dtype = types.FLOAT16 self.cmn = ops.CropMirrorNormalize(device=self.dali_device, output_dtype=output_dtype, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=mean, std=std,) # To be able to handle all images from full-sized ImageNet, this padding sets the size of the internal # nvJPEG buffers without additional reallocations device_memory_padding = 211025920 if decode_device == 'mixed' else 0 host_memory_padding = 140544512 if decode_device == 'mixed' else 0 self.decode = ops.ImageDecoderRandomCrop(device=decode_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[ 0.8, 1.25], random_area=[ min_crop_size, 1.0], num_attempts=100) # Resize as desired. To match torchvision data loader, use triangular interpolation. self.res = ops.Resize(device=self.dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format(self.dali_device))
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) # self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True) index_path = [] for path in os.listdir("/home/guojia/idx_files/train"): index_path.append(os.path.join("/home/guojia/idx_files/train", path)) index_path = sorted(index_path) self.input = ops.TFRecordReader(path=data_dir, index_path=index_path, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True, features={ 'image/height': tfrec.FixedLenFeature([1], tfrec.int64, -1), 'image/width': tfrec.FixedLenFeature([1], tfrec.int64, -1), 'image/colorspace': tfrec.FixedLenFeature([ ], tfrec.string, ''), 'image/channels': tfrec.FixedLenFeature([], tfrec.int64, -1), 'image/class/label': tfrec.FixedLenFeature([1], tfrec.int64, -1), 'image/class/synset': tfrec.FixedLenFeature([ ], tfrec.string, ''), # 'image/class/text': tfrec.FixedLenFeature([ ], tfrec.string, ''), # 'image/object/bbox/xmin': tfrec.VarLenFeature(tfrec.float32, 0.0), # 'image/object/bbox/xmax': tfrec.VarLenFeature(tfrec.float32, 0.0), # 'image/object/bbox/ymin': tfrec.VarLenFeature(tfrec.float32, 0.0), # 'image/object/bbox/ymax': tfrec.VarLenFeature(tfrec.float32, 0.0), # 'image/object/bbox/label': tfrec.FixedLenFeature([1], tfrec.int64,-1), 'image/format': tfrec.FixedLenFeature((), tfrec.string, ""), 'image/filename': tfrec.FixedLenFeature((), tfrec.string, ""), 'image/encoded': tfrec.FixedLenFeature((), tfrec.string, "") }) #let user decide which pipeline works him bets for RN version he runs dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 self.decode = ops.ImageDecoderRandomCrop(device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255,0.456 * 255,0.406 * 255], std=[0.229 * 255,0.224 * 255,0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, device_id, crop, size, rank, seed_rank, options): super(TrainPipeline, self).__init__(options.batchsize, 4, device_id, prefetch_queue_depth=3, set_affinity=True, seed=options.seed + seed_rank) rec_path = os.path.join(options.train_dir, "data.rec") idx_path = os.path.join(options.train_dir, "data.idx") self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=True, shard_id=rank, num_shards=size) # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations random_aspect_ratio = [0.75, 4. / 3.] random_area = [0.08, 1.0] self.decode = ops.ImageDecoderRandomCrop( device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512, random_aspect_ratio=random_aspect_ratio, random_area=random_area, num_attempts=100, seed=options.seed + seed_rank + 1641) self.res = ops.Resize(device="gpu", resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) dtype = types.FLOAT16 if options.fp16 else types.FLOAT layout = types.NCHW padding = False self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=dtype, output_layout=layout, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], pad_output=padding, seed=options.seed + seed_rank + 1223) self.coin = ops.CoinFlip(probability=0.5, seed=options.seed + seed_rank + 412)
def __new__( cls, output_type='BGR', host_memory_padding=8388608, device_memory_padding=16777216, random_area=(0.08, 1.), random_aspect_ratio=(0.75, 1.33), num_attempts=10, **kwargs ): """Create a ``ImageDecoderRandomCrop`` operator. Parameters ---------- output_type : {'BGR', 'RGB'}, optional The output color space. host_memory_padding : int, optional, default=8388608 The number of bytes for host buffer. device_memory_padding : int, optional, default=16777216 The number of bytes for device buffer. random_area : Sequence[float], optional, default=(0.08, 1.) The range of scale for sampling. random_aspect_ratio : Sequence[float], optional, default=(0.75, 1.33) The range of aspect ratio for sampling. num_attempts : int, optional, default=10 The max number of sampling trails. Returns ------- nvidia.dali.ops.ImageDecoderRandomCrop The operator. """ if isinstance(output_type, six.string_types): output_type = getattr(types, output_type) return ops.ImageDecoderRandomCrop( output_type=output_type, host_memory_padding=host_memory_padding, device_memory_padding=device_memory_padding, random_area=random_area, random_aspect_ratio=random_aspect_ratio, num_attempts=num_attempts, device=context.get_device_type(mixed=True), **kwargs )
def __init__(self, args, batch_size, num_threads, device_id, rec_path, idx_path, shard_id, num_shards, crop_shape, nvjpeg_padding, prefetch_queue=3, output_layout=types.NCHW, pad_output=True, dtype='float16', dali_cpu=False, nvjpeg_width_hint=5980, nvjpeg_height_hint=6430, ): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, prefetch_queue_depth = prefetch_queue) self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=True, shard_id=shard_id, num_shards=num_shards, dont_use_mmap=args.dali_dont_use_mmap) if dali_cpu: dali_device = "cpu" decoder_device = "cpu" else: dali_device = "gpu" decoder_device = "mixed" dali_kwargs_fallback = {} if Version(dali.__version__) >= Version("1.2.0"): dali_kwargs_fallback = { "preallocate_width_hint": nvjpeg_width_hint, "preallocate_height_hint": nvjpeg_height_hint, } if args.dali_fuse_decoder: self.decode = ops.ImageDecoderRandomCrop(device=decoder_device, output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding, **dali_kwargs_fallback) else: self.decode = ops.ImageDecoder(device=decoder_device, output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding, **dali_kwargs_fallback) if args.dali_fuse_decoder: self.resize = ops.Resize(device=dali_device, resize_x=crop_shape[1], resize_y=crop_shape[0]) else: self.resize = ops.RandomResizedCrop(device=dali_device, size=crop_shape) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT16 if dtype == 'float16' else types.FLOAT, output_layout=output_layout, crop=crop_shape, pad_output=pad_output, image_type=types.RGB, mean=args.rgb_mean, std=args.rgb_std) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self, batch_size, num_threads, data_dir, crop, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, 0, seed=12) #self.input = ops.FileReader( # file_root=osp.join(data_dir, 'train'), # random_shuffle=True) self.input = ops.MXNetReader(path=osp.join(data_dir, 'train.rec'), index_path=osp.join( data_dir, 'train.idx'), random_shuffle=True) # let user decide which pipeline works him bets for RN version he runs if dali_cpu: dali_device = "cpu" self.decode = ops.ImageDecoderRandomCrop(device=dali_device, output_type=types.RGB) self.res = ops.Resize(resize_x=crop, resize_y=crop) else: dali_device = "gpu" # This padding sets the size of the internal nvJPEG buffers to be able to handle # all images from full-sized ImageNet without additional reallocations self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512) self.res = ops.RandomResizedCrop(device=dali_device, size=(crop, crop)) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True) #let user decide which pipeline works him bets for RN version he runs dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 ## randomly crop and resize, crop sampling, self.decode = ops.ImageDecoderRandomCrop(device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.75, 4/3.0], random_area=[0.08, 1.0], num_attempts=10) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.vert_flip=ops.Flip(device='gpu', horizontal=0) self.vert_coin = ops.CoinFlip(probability=0.075) ##color jitter https://www.gitmemory.com/ruiyuanlu, https://github.com/NVIDIA/DALI/issues/336 self.twist = ops.ColorTwist(device="gpu") self.rng1 = ops.Uniform(range=[0.6, 1.4]) self.rng2 = ops.Uniform(range=[-102, 102]) ## factor=0.4, 0.4*255, -0.4*255 #self.rng2 = ops.Uniform(range=[-51, 51]) ## factor=0.2, 0.2*255, -0.2*255 self.flip = ops.Flip(device = "gpu", vertical = 1, horizontal = 0) self.color_jitter = ops.ColorTwist(device="gpu", hue=0.2, brightness=0.4, contrast=0.4, saturation=0.4) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255,0.456 * 255,0.406 * 255], std=[0.229 * 255,0.224 * 255,0.225 * 255]) ## this is torch.transform.RandomHorizontalFlip self.mirrorcoin = ops.CoinFlip(probability=0.5) self.uniform = ops.Uniform(range = (0.0, 1.0)) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, db_prefix, for_train, input_size, batch_size, num_threads, device_id, num_gpus): super(HybridRecPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, prefetch_queue_depth=2) self.for_train = for_train self.input = ops.MXNetReader(path=[db_prefix + ".rec"], index_path=[db_prefix + ".idx"], random_shuffle=for_train, shard_id=device_id, num_shards=num_gpus) self.resize = ops.Resize(device="gpu", resize_x=input_size, resize_y=input_size) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(input_size, input_size), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) if self.for_train: self.decode = ops.ImageDecoderRandomCrop( device="mixed", output_type=types.RGB, random_aspect_ratio=[3 / 4, 4 / 3], random_area=[0.08, 1.0], num_attempts=100) self.color = ops.ColorTwist(device='gpu') self.rng_brightness = ops.Uniform(range=(0.6, 1.4)) self.rng_contrast = ops.Uniform(range=(0.6, 1.4)) self.rng_saturation = ops.Uniform(range=(0.6, 1.4)) self.mirror_coin = ops.CoinFlip(probability=0.5) else: self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)
def __init__(self, batch_size, num_threads, device_id, data_dir="./", dali_cpu=False): super(DecodeFullPipeline, self).__init__(batch_size, num_threads, device_id, seed=12) self.input = ops.FileReader(file_root=data_dir, random_shuffle=True) if dali_cpu: decoder_device = "cpu" else: decoder_device = "mixed" device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 dali_device = "cpu" if dali_cpu else "gpu" self.decode = ops.ImageDecoderRandomCrop( device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) self.res = ops.Resize(device=dali_device, resize_x=224, resize_y=224, interp_type=types.INTERP_TRIANGULAR) print("Device = {}".format(dali_device)) self.cmnp = ops.CropMirrorNormalize( device=dali_device, output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(224, 224), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self): super(HybridTrainPipe, self).__init__(FLAGS.batch_size, FLAGS.num_threads, FLAGS.device_id, seed=42 + FLAGS.device_id) data_dir = os.path.join(FLAGS.data, 'train') crop = 224 self.input = ops.FileReader(file_root=data_dir, shard_id=FLAGS.local_rank, num_shards=FLAGS.world_size, random_shuffle=True, pad_last_batch=True) # set internal nvJPEG buffers size to handle full-sized ImageNet images # without additional reallocations device_memory_padding = 211025920 host_memory_padding = 140544512 self.decode = ops.ImageDecoderRandomCrop( device='mixed', output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) self.res = ops.Resize(device='gpu', resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) self.to_int64 = ops.Cast(dtype=types.INT64, device="gpu")
def __init__( self, batch_size, num_threads, device_id, data_dir, label_dir, crop, dali_cpu=False, training=True, ): shard_id = ht.MPI_WORLD.rank num_shards = ht.MPI_WORLD.size super(HybridPipe, self).__init__(batch_size, num_threads, device_id, seed=68 + shard_id) data_dir_list = [data_dir + d for d in os.listdir(data_dir)] label_dir_list = [label_dir + d for d in os.listdir(label_dir)] self.input = dali.ops.TFRecordReader( path=data_dir_list, index_path=label_dir_list, random_shuffle=True if training else False, shard_id=shard_id, num_shards=num_shards, initial_fill=10000, features={ "image/encoded": dali.tfrecord.FixedLenFeature((), dali.tfrecord.string, ""), "image/class/label": dali.tfrecord.FixedLenFeature([1], dali.tfrecord.int64, -1), "image/class/text": dali.tfrecord.FixedLenFeature([], dali.tfrecord.string, ""), "image/object/bbox/xmin": dali.tfrecord.VarLenFeature(dali.tfrecord.float32, 0.0), "image/object/bbox/ymin": dali.tfrecord.VarLenFeature(dali.tfrecord.float32, 0.0), "image/object/bbox/xmax": dali.tfrecord.VarLenFeature(dali.tfrecord.float32, 0.0), "image/object/bbox/ymax": dali.tfrecord.VarLenFeature(dali.tfrecord.float32, 0.0), }, ) # let user decide which pipeline works him bets for RN version he runs dali_device = "cpu" if dali_cpu else "gpu" decoder_device = "cpu" if dali_cpu else "mixed" # This padding sets the size of the internal nvJPEG buffers to be able to # handle all images from full-sized ImageNet without additional reallocations # leaving the padding in for now to allow for the case for loading to GPUs # todo: move info to GPUs device_memory_padding = 211025920 if decoder_device == "mixed" else 0 host_memory_padding = 140544512 if decoder_device == "mixed" else 0 if training: self.decode = ops.ImageDecoderRandomCrop( device="cpu", # decoder_device, output_type=dali.types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.75, 1.33], random_area=[0.05, 1.0], num_attempts=100, ) self.resize = ops.Resize( device="cpu", # dali_device, resize_x=crop, resize_y=crop, interp_type=dali.types.INTERP_TRIANGULAR, ) else: self.decode = dali.ops.ImageDecoder(device="cpu", output_type=dali.types.RGB) self.resize = ops.Resize(device="cpu", resize_shorter=crop, interp_type=dali.types.INTERP_TRIANGULAR) # should this be CPU or GPU? -> if prefetching then do it on CPU before sending self.normalize = ops.CropMirrorNormalize( device="cpu", # need to make this work with the define graph # dtype=dali.types.FLOAT, # todo: not implemented on test system (old version of DALI) output_layout=dali.types.NCHW, crop=(crop, crop), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], ) self.coin = ops.CoinFlip(probability=0.5) self.training = training print0( f"Completed init of DALI Dataset on '{dali_device}', is training set? -> {training}" )
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False, resume_index=0, resume_epoch=0): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) shard = int(args.node_rank * args.world_size / args.nnodes + args.local_rank) if args.mint: self.input = ops.FileReader(file_root=data_dir, shard_id=shard, num_shards=args.world_size, shuffle_after_epoch=True, cache_size=args.cache_size) else: cf_det = True if not resume_index and not resume_epoch and not args.cf_iterator: cf_det = False self.input = ops.FileReader(file_root=data_dir, shard_id=shard, num_shards=args.world_size, shuffle_after_epoch=True) else: self.input = ops.FileReader(file_root=data_dir, shard_id=shard, num_shards=args.world_size, shuffle_after_epoch=True, resume_index=resume_index, resume_epoch=resume_epoch, cf_det=cf_det) print("CF deterministic shuffling is {}".format(cf_det)) #let user decide which pipeline works him bets for RN version he runs dali_device = 'cpu' if dali_cpu else 'gpu' #decoder_device = 'cpu' decoder_device = 'cpu' if dali_cpu else 'mixed' # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 self.decode = ops.ImageDecoderRandomCrop( device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, batch_size, num_threads, device_id, rec_path, idx_path, shard_id, num_shards, crop_shape, min_random_area, max_random_area, min_random_aspect_ratio, max_random_aspect_ratio, nvjpeg_padding, prefetch_queue=3, seed=12, output_layout=types.NCHW, pad_output=True, dtype='float16', mlperf_print=True, use_roi_decode=False, cache_size=0): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=seed + device_id, prefetch_queue_depth=prefetch_queue) if cache_size > 0: self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=True, shard_id=shard_id, num_shards=num_shards, stick_to_shard=True, lazy_init=True, skip_cached_images=True) else: # stick_to_shard might not exist in this version of DALI. self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=True, shard_id=shard_id, num_shards=num_shards) if use_roi_decode and cache_size == 0: self.decode = ops.ImageDecoderRandomCrop( device="mixed", output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding, random_area=[min_random_area, max_random_area], random_aspect_ratio=[ min_random_aspect_ratio, max_random_aspect_ratio ]) self.rrc = ops.Resize(device="gpu", resize_x=crop_shape[0], resize_y=crop_shape[1]) else: if cache_size > 0: self.decode = ops.ImageDecoder( device="mixed", output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding, cache_type='threshold', cache_size=cache_size, cache_threshold=0, cache_debug=False) else: self.decode = ops.ImageDecoder( device="mixed", output_type=types.RGB, device_memory_padding=nvjpeg_padding, host_memory_padding=nvjpeg_padding) self.rrc = ops.RandomResizedCrop( device="gpu", random_area=[min_random_area, max_random_area], random_aspect_ratio=[ min_random_aspect_ratio, max_random_aspect_ratio ], size=crop_shape) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT16 if dtype == 'float16' else types.FLOAT, output_layout=output_layout, crop=crop_shape, pad_output=pad_output, image_type=types.RGB, mean=_mean_pixel, std=_std_pixel) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) # adding distributed sharding if torch.distributed.is_initialized(): local_rank = torch.distributed.get_rank() world_size = torch.distributed.get_world_size() else: local_rank = 0 world_size = 1 self.input = ops.FileReader(file_root=data_dir, shard_id=local_rank, num_shards=world_size, random_shuffle=True) #let user decide which pipeline works him bets for RN version he runs if dali_cpu: dali_device = "cpu" self.decode = ops.ImageDecoderRandomCrop( device=dali_device, output_type=types.RGB, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) else: dali_device = "gpu" # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations `ImageDecoderRandomCrop` self.decode = ops.ImageDecoderRandomCrop( device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, # from https://github.com/Armour/pytorch-nn-practice/blob/master/utils/meanstd.py mean=[0.50707516 * 255, 0.48654887 * 255, 0.44091784 * 255], std=[0.26733429 * 255, 0.25643846 * 255, 0.27615047 * 255]) #self.rotate = ops.Rotate(device="gpu", interp_type=types.INTERP_NN) self.coin = ops.CoinFlip(probability=0.5) self.to_int64 = ops.Cast(dtype=types.INT64, device="gpu") # add a rotate #self.rotate_range = ops.Uniform(range = (-7, 7)) # 7 degrees either way #self.rotate_coin = ops.CoinFlip(probability=0.075) # 7.5% chance pipeline_logger.info('DALI "{0}" variant'.format(dali_device))
def __init__( self, train=False, batch_size=16, size=384, num_threads=4, device_id=0 ): super(ExternalSourcePipeline, self).__init__( batch_size, num_threads, device_id, seed=42 ) self.eii = iter( ExternalInputIterator(train, batch_size) ) self.images_input = ops.ExternalSource() self.masks_input = ops.ExternalSource() if train: fixed_area = (size / 784)**2 self.decode = ops.ImageDecoderRandomCrop( device="mixed", random_area=[fixed_area*0.7, fixed_area*1.3], random_aspect_ratio=[0.7, 1.3], ) else: self.decode = ops.ImageDecoderCrop( device="mixed", crop=(size, size) ) self.resize = ops.Resize( device="gpu", interp_type=types.INTERP_TRIANGULAR, resize_x=size, resize_y=size ) self.mask_resize = ops.Resize( device="gpu", interp_type=types.INTERP_NN, resize_x=size, resize_y=size ) self.normalize = ops.CropMirrorNormalize( device="gpu", mean=[0.5 * 255], # 0.456 * 255, 0.406 * 255], std=[0.5 * 255], # , 0.224 * 255, 0.225 * 255], output_layout=types.NCHW ) self.mask_normalize = ops.CropMirrorNormalize( device="gpu", mean=[0], std=[255], output_layout=types.NCHW, image_type=types.GRAY, ) # extra augmentations self.to_gray = ops.ColorSpaceConversion( device="gpu", image_type=types.RGB, output_type=types.GRAY ) self.contrast = ops.BrightnessContrast(device="gpu") self.hsv = ops.Hsv(device="gpu") self.jitter = ops.Jitter(device ="gpu") # self.rng1 = ops.Uniform(range=[0, 1]) self.rng2 = ops.Uniform(range=[0.8,1.2]) self.rng3 = ops.Uniform(range=[-30, 30]) # for hue self.coin03 = ops.CoinFlip(probability=0.3) self.train = train