def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) if torch.distributed.is_initialized(): local_rank = torch.distributed.get_rank() world_size = torch.distributed.get_world_size() else: local_rank = 0 world_size = 1 self.input = ops.FileReader(file_root=data_dir, shard_id=local_rank, num_shards=world_size, random_shuffle=True) if dali_cpu: dali_device = "cpu" self.decode = ops.HostDecoderRandomCrop( device=dali_device, output_type=types.RGB, random_aspect_ratio=[0.75, 4. / 3.], random_area=[0.08, 1.0], num_attempts=100) else: dali_device = "gpu" # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations self.decode = ops.nvJPEGDecoderRandomCrop( device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512, random_aspect_ratio=[0.75, 4. / 3.], random_area=[0.08, 1.0], num_attempts=100) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self, batch_size, num_threads, device_id, root, list_path, crop, shard_id, num_shards, coji=False, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.read = ops.FileReader(file_root=root, file_list=list_path, shard_id=shard_id, num_shards=num_shards, random_shuffle=True, initial_fill=1024) # Let user decide which pipeline works dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' # This padding sets the size of the internal nvJPEG buffers to be able to handle all images # from full-sized ImageNet without additional reallocations device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 self.decode = ops.ImageDecoderRandomCrop( device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.75, 1.33333333], random_area=[0.08, 1.0], num_attempts=100) self.resize = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize( device=dali_device, output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[x * 255 for x in IMAGENET_MEAN], std=[x * 255 for x in IMAGENET_STD]) self.coin = ops.CoinFlip(probability=0.5) self.coji = coji if self.coji: self.twist = ops.ColorTwist(device=dali_device) self.brightness_rng = ops.Uniform(range=[1.0 - 0.4, 1.0 + 0.4]) self.contrast_rng = ops.Uniform(range=[1.0 - 0.4, 1.0 + 0.4]) self.saturation_rng = ops.Uniform(range=[1.0 - 0.4, 1.0 + 0.4])
def __init__(self, alpha=[0.5, 1.5], delta=[0.875, 1.125], gamma=[-0.5, 0.5]): self.contrast = ops.Uniform(range=gamma) self.brightness = ops.Uniform(range=[-0.125, 0.125]) self.saturation = ops.Uniform(range=gamma) self.hue = ops.Uniform(range=gamma) self.ct = ops.ColorTwist(device="gpu") self.toss_a_coin = ops.CoinFlip(probability=0.5)
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True) #let user decide which pipeline works him bets for RN version he runs dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 ## randomly crop and resize, crop sampling, self.decode = ops.ImageDecoderRandomCrop(device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.75, 4/3.0], random_area=[0.08, 1.0], num_attempts=10) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.vert_flip=ops.Flip(device='gpu', horizontal=0) self.vert_coin = ops.CoinFlip(probability=0.075) ##color jitter https://www.gitmemory.com/ruiyuanlu, https://github.com/NVIDIA/DALI/issues/336 self.twist = ops.ColorTwist(device="gpu") self.rng1 = ops.Uniform(range=[0.6, 1.4]) self.rng2 = ops.Uniform(range=[-102, 102]) ## factor=0.4, 0.4*255, -0.4*255 #self.rng2 = ops.Uniform(range=[-51, 51]) ## factor=0.2, 0.2*255, -0.2*255 self.flip = ops.Flip(device = "gpu", vertical = 1, horizontal = 0) self.color_jitter = ops.ColorTwist(device="gpu", hue=0.2, brightness=0.4, contrast=0.4, saturation=0.4) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255,0.456 * 255,0.406 * 255], std=[0.229 * 255,0.224 * 255,0.225 * 255]) ## this is torch.transform.RandomHorizontalFlip self.mirrorcoin = ops.CoinFlip(probability=0.5) self.uniform = ops.Uniform(range = (0.0, 1.0)) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, p: float = 0.5): """Initialization Args: p (float, optional): Probability to apply this transformation. Defaults to 0.5. """ self.flip_coin_vflip = ops.CoinFlip(probability=p) self.image_vflip = ops.Flip(device='gpu', horizontal=0) self.bbox_vflip = ops.BbFlip(device='cpu', horizontal=0) self.ldmrks_vflip = ops.CoordFlip(layout='xy', device='cpu', flip_x=0)
def __init__(self, device, batch_size, layout, iterator, num_threads=1, device_id=0, num_gpus=1, dtype=types.FLOAT, output_layout="FHWC", mirror_probability=0.0, mean=[0., 0., 0.], std=[1., 1., 1.], scale=None, shift=None, pad_output=False, crop_seq_as_depth=False, crop_d=8, crop_h=16, crop_w=32, crop_pos_x=0.3, crop_pos_y=0.2, crop_pos_z=0.1, out_of_bounds_policy=None, fill_values=None, extra_outputs=False): super(CMNRandomDataPipeline, self).__init__(batch_size, num_threads, device_id) self.device = device self.layout = layout self.iterator = iterator self.inputs = ops.ExternalSource() self.extra_outputs = extra_outputs if layout.count('D') <= 0 and not (crop_seq_as_depth and layout.count('F') > 0): crop_d = None self.cmn = ops.CropMirrorNormalize( device=self.device, dtype=dtype, output_layout=output_layout, crop_d=crop_d, crop_h=crop_h, crop_w=crop_w, crop_pos_x=crop_pos_x, crop_pos_y=crop_pos_y, crop_pos_z=crop_pos_z, mean=mean, std=std, pad_output=pad_output, scale=scale, shift=shift, out_of_bounds_policy=out_of_bounds_policy, fill_values=fill_values) self.coin = ops.CoinFlip(probability=mirror_probability, seed=7865)
def __init__(self, file_root, file_list, batch_size, resize_shorter, crop, min_area, lower, upper, interp, mean, std, device_id, shard_id=0, num_shards=1, random_shuffle=True, num_threads=4, seed=42, pad_output=False, output_dtype=types.FLOAT): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=seed) self.input = ops.FileReader(file_root=file_root, file_list=file_list, shard_id=shard_id, num_shards=num_shards, random_shuffle=random_shuffle) # set internal nvJPEG buffers size to handle full-sized ImageNet images # without additional reallocations device_memory_padding = 211025920 host_memory_padding = 140544512 self.decode = ops.ImageDecoderRandomCrop( device='mixed', output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[lower, upper], random_area=[min_area, 1.0], num_attempts=100) self.res = ops.Resize(device='gpu', resize_x=crop, resize_y=crop, interp_type=interp) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=output_dtype, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=mean, std=std, pad_output=pad_output) self.coin = ops.CoinFlip(probability=0.5) self.to_int64 = ops.Cast(dtype=types.INT64, device="gpu")
def __init__(self, batch_size, img_dir, json_dir, num_threads=2, device_id=0, num_gpus=1, resize=None, augment=False, shuffle=True): """ Args: batch_size: batch size for output at the first dim. num_threads: int, number of cpu working threads. device_id: int, the slice number of gpu. num_gpus: int, number of multiple gpu. img_dir: str, dir path where the images are stored. json_dir: str, json path for coco dataset. resize(optional): default int, if other format please modify function params in ops.Resize. Output: (images, captions) pair stacked by batch_size. The output shape of images will be NCHW with type of float. Note that the output type of captions will be a list of numpy which is encoded from the original string caption. To use it in the custom model, one needs to decode the numpy into string by .tostring() function or .tobytes().decode() function. .tostring will get a bytes type result while .tobytes.decode will directly get the string. Notes: param 'device' in ops functions instruct which device will process the data. optional in 'mixed'/'cpu'/'gpu', for detail please see DALI documentation online. """ super(COCOCaptionPipeline, self).__init__(batch_size, num_threads, device_id, seed=15) self.coco_itr = COCOCaptionInputIterator(batch_size, device_id, num_gpus, img_dir, json_dir, shuffle=shuffle) self.iterator = iter(self.coco_itr) self.input = ops.ExternalSource() self.caption = ops.ExternalSource() self.img_id = ops.ExternalSource() self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.augment = augment if resize is not None: if isinstance(resize, tuple): resx, resy = resize elif isinstance(resize, int): resx = resy = resize else: resx = resy = 0. self.res = ops.Resize(device="gpu", resize_x=resx, resize_y=resy) else: self.res = None if augment: self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NHWC, image_type=types.RGB, mean=MEAN, std=STD) self.cf = ops.CoinFlip() self.rotate = ops.Rotate(device="gpu") self.rng = ops.Uniform(range=(-10.0, 10.0))
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, shard_id, num_shards): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, data_dir, crop, shard_id, num_shards) self.pad = ops.Paste(device="gpu", fill_value=0, ratio=1.1, min_canvas_size=crop) self.res = ops.RandomResizedCrop(device="gpu", size=crop, random_area=[0.1, 1.0], random_aspect_ratio=[0.8, 1.25], num_attempts=100) self.cutmix = ops.PythonFunction(function=cut_mixe_image, num_outputs=2, device='gpu') self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) self.rotated = ops.Rotate(device="gpu", keep_size=True) self.rotated_rng = ops.Uniform(range=(-5.0, 5.0)) self.brightness = ops.Brightness(device="gpu") self.brightness_rng = ops.Uniform(range=(0.8, 1.2)) self.reshape = ops.Reshape(device="gpu", layout="HWC") self.one_hot = ops.OneHot(num_classes=3, dtype=types.INT32, device="cpu") self.jitter_rng = ops.CoinFlip(probability=0.3) self.jittered = ops.Jitter(device="gpu")
def __init__(self, batch_size, layout, data_iterator): super(SynthPythonFlipPipeline, self).__init__(batch_size, seed=1234, num_threads=4, device_id=0, exec_async=False, exec_pipelined=False) self.iterator = data_iterator self.layout = layout self.input = ops.ExternalSource() self.coin = ops.CoinFlip(seed=1234) h_dim, v_dim, d_dim = find_dims(layout) fun = lambda d, hor, ver, depth: numpy_flip(d, h_dim, v_dim, d_dim, hor, ver, depth) self.python_flip = ops.PythonFunction(function=fun, output_layouts=layout)
def __init__(self, batch_size, num_threads, device_id, data_dir, file_list, crop, local_rank, world_size, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.input = ops.FileReader(file_root=data_dir, file_list=file_list, shard_id=local_rank, num_shards=world_size, random_shuffle=True) #let user decide which pipeline works him bets for RN version he runs if dali_cpu: dali_device = "cpu" self.decode = ops.HostDecoderRandomCrop( device=dali_device, output_type=types.RGB, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) else: dali_device = "gpu" # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations self.decode = ops.nvJPEGDecoderRandomCrop( device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, batch_size, layout, data_iterator, device): super(SynthFlipPipeline, self).__init__(batch_size, seed=1234, num_threads=4, device_id=0) self.device = device self.iterator = data_iterator self.layout = layout self.input = ops.ExternalSource() self.coin = ops.CoinFlip(seed=1234) self.flip = ops.Flip(device=device)
def __init__(self, batch_size, num_threads, path, training, annotations, world, device_id, mean, std, resize, max_size, stride, rotate_augment=False, augment_brightness=0.0, augment_contrast=0.0, augment_hue=0.0, augment_saturation=0.0): super().__init__(batch_size=batch_size, num_threads=num_threads, device_id=device_id, prefetch_queue_depth=num_threads, seed=42) self.path = path self.training = training self.stride = stride self.iter = 0 self.rotate_augment = rotate_augment self.augment_brightness = augment_brightness self.augment_contrast = augment_contrast self.augment_hue = augment_hue self.augment_saturation = augment_saturation self.reader = ops.COCOReader(annotations_file=annotations, file_root=path, num_shards=world, shard_id=torch.cuda.current_device(), ltrb=True, ratio=True, shuffle_after_epoch=True, save_img_ids=True) self.decode_train = ops.ImageDecoderSlice(device="mixed", output_type=types.RGB) self.decode_infer = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.bbox_crop = ops.RandomBBoxCrop(device='cpu', ltrb=True, scaling=[0.3, 1.0], thresholds=[0.1, 0.3, 0.5, 0.7, 0.9]) self.bbox_flip = ops.BbFlip(device='cpu', ltrb=True) self.img_flip = ops.Flip(device='gpu') self.coin_flip = ops.CoinFlip(probability=0.5) self.bc = ops.BrightnessContrast(device='gpu') self.hsv = ops.Hsv(device='gpu') # Random number generation for augmentation self.brightness_dist = ops.NormalDistribution(mean=1.0, stddev=augment_brightness) self.contrast_dist = ops.NormalDistribution(mean=1.0, stddev=augment_contrast) self.hue_dist = ops.NormalDistribution(mean=0.0, stddev=augment_hue) self.saturation_dist = ops.NormalDistribution(mean=1.0, stddev=augment_saturation) if rotate_augment: raise RuntimeWarning("--augment-rotate current has no effect when using the DALI data loader.") if isinstance(resize, list): resize = max(resize) self.rand_resize = ops.Uniform(range=[resize, float(max_size)]) self.resize_train = ops.Resize(device='gpu', interp_type=types.DALIInterpType.INTERP_CUBIC, save_attrs=True) self.resize_infer = ops.Resize(device='gpu', interp_type=types.DALIInterpType.INTERP_CUBIC, resize_longer=max_size, save_attrs=True) padded_size = max_size + ((self.stride - max_size % self.stride) % self.stride) self.pad = ops.Paste(device='gpu', fill_value=0, ratio=1.1, min_canvas_size=padded_size, paste_x=0, paste_y=0) self.normalize = ops.CropMirrorNormalize(device='gpu', mean=mean, std=std, crop=(padded_size, padded_size), crop_pos_x=0, crop_pos_y=0)
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False, local_rank=0, world_size=1): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) dali_device = "gpu" self.input = ops.FileReader(file_root=data_dir, shard_id=local_rank, num_shards=world_size, random_shuffle=True) self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.res = ops.RandomResizedCrop(device="gpu", size=crop, random_area=[0.08, 1.25]) # self.res = ops.CenterCrop(device="gpu", size=crop) self.cmnp = ops.CropMirrorNormalize(device="gpu",output_dtype=types.FLOAT,output_layout=types.NCHW, image_type=types.RGB,mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False, args_new=None, shuffle_seed=0): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, prefetch_queue_depth=3) #self.input = ops.FileReader(file_root=data_dir, shard_id=args_new.local_rank_per_job, num_shards=args_new.world_size, random_shuffle=True, shuffle_seed=shuffle_seed) self.input = ops.FileReader(file_root=data_dir, shard_id=args_new.local_rank_per_job, num_shards=args_new.world_size, shuffle_after_epoch=True, shuffle_seed=shuffle_seed) #self.input = ops.FileReader(file_root=data_dir, shard_id=args_new.local_rank_per_job, num_shards=args_new.world_size, shuffle_after_epoch=True) #self.input = ops.FileReader(file_root=data_dir, shard_id=args_new.local_rank_per_job, num_shards=args_new.world_size, random_shuffle=True) # self.input = ops.FileReader(file_root=data_dir, shard_id=args_new.local_rank_per_job, num_shards=args_new.world_size, random_shuffle=True, shuffle_seed=shuffle_seed) dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 self.decode = ops.ImageDecoderRandomCrop( device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize( device=dali_device, output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant, shards={1}'.format(dali_device, args_new.world_size))
def __init__(self, batch_size, num_threads, device_id, crop, colorjitter=None, dali_cpu=False): super(ImageNetTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.data_input = ops.ExternalSource() self.label_input = ops.ExternalSource() self.colorjitter = colorjitter # let user decide which pipeline works him bets for RN version he runs if dali_cpu: dali_device = "cpu" self.decode = ops.HostDecoderRandomCrop(device=dali_device, output_type=types.RGB) self.res = ops.Resize(resize_x=crop, resize_y=crop) else: dali_device = "gpu" # This padding sets the size of the internal nvJPEG buffers to be able to # handle all images from full-sized ImageNet without additional reallocations self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512) self.res = ops.RandomResizedCrop(device=dali_device, size=(crop, crop)) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) if self.colorjitter is not None: self.colorjit = ops.ColorTwist(device="gpu") self.rng_brightness = ops.Uniform(range=(1.0 - self.colorjitter[0], 1.0 + self.colorjitter[0])) self.rng_contrast = ops.Uniform(range=(1.0 - self.colorjitter[1], 1.0 + self.colorjitter[1])) self.rng_saturation = ops.Uniform(range=(1.0 - self.colorjitter[2], 1.0 + self.colorjitter[2])) self.rng_hue = ops.Uniform(range=(-self.colorjitter[3], self.colorjitter[3]))
def __init__(self, batch_size, num_threads, device_id): super(CommonPipeline, self).__init__(batch_size, num_threads, device_id) dali_device = "gpu" self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.cmn = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, image_type=types.RGB, mean=[128., 128., 128.], std=[1., 1., 1.]) self.coin = ops.CoinFlip(device="gpu", probability=0.5) self.uniform = ops.Uniform(range=(0.0, 1.0))
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, mean, std, local_rank=0, world_size=1, dali_cpu=False, shuffle=True, fp16=False, min_crop_size=0.08, color_jitter=False): # As we're recreating the Pipeline at every epoch, the seed must be -1 (random seed) super(HybridTrainPipe, self).__init__( batch_size, num_threads, device_id, seed=-1) # Enabling read_ahead slowed down processing ~40% self.input = ops.FileReader(file_root=data_dir, shard_id=local_rank, num_shards=world_size, random_shuffle=shuffle) # Let user decide which pipeline works best with the chosen model if dali_cpu: decode_device = "cpu" self.dali_device = "cpu" self.flip = ops.Flip(device=self.dali_device) else: decode_device = "mixed" self.dali_device = "gpu" output_dtype = types.FLOAT if fp16: output_dtype = types.FLOAT16 self.cmn = ops.CropMirrorNormalize(device=self.dali_device, output_dtype=output_dtype, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=mean, std=std,) # To be able to handle all images from full-sized ImageNet, this padding sets the size of the internal # nvJPEG buffers without additional reallocations device_memory_padding = 211025920 if decode_device == 'mixed' else 0 host_memory_padding = 140544512 if decode_device == 'mixed' else 0 self.decode = ops.ImageDecoderRandomCrop(device=decode_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[ 0.8, 1.25], random_area=[ min_crop_size, 1.0], num_attempts=100) # Resize as desired. To match torchvision data loader, use triangular interpolation. self.res = ops.Resize(device=self.dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format(self.dali_device))
def __init__(self, session, dataset, batch_size, num_threads, is_random_flip=True, num_samples=1000000, device_id=0, preprocess=None, fill_last_batch=True): super(ExternalSourcePipeline, self).__init__(batch_size, num_threads, device_id, seed=12) self.session = session self.num_samples = num_samples self.dataset = dataset self.is_random_flip = is_random_flip self.preprocess = preprocess if self.preprocess is not None: crop = (preprocess['width'], preprocess['height']) self.res = ops.Resize(resize_x=preprocess['width'], resize_y=preprocess['height']) self.flip = ops.Flip() self.coin = ops.CoinFlip(probability=0.5) self.coin2 = ops.CoinFlip(probability=0.5) self.iterator = iter( ExternalInputIterator3(self.session, self.dataset, self.num_samples, batch_size, fill_last_batch)) self.iterator.reset() self.input = ops.ExternalSource() self.input_label = ops.ExternalSource()
def __init__(self, batch_size, num_threads, device_id, data_dir, crop): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed = 12 + device_id) self.input = ops.FileReader(file_root = data_dir, shard_id = args.local_rank, num_shards = args.world_size, random_shuffle = True) self.decode = ops.nvJPEGDecoder(device = "mixed", output_type = types.RGB) self.rrc = ops.RandomResizedCrop(device = "gpu", size = (crop, crop)) self.cmnp = ops.CropMirrorNormalize(device = "gpu", output_dtype = types.FLOAT, output_layout = types.NCHW, crop = (crop, crop), image_type = types.RGB, mean = [0.485 * 255,0.456 * 255,0.406 * 255], std = [0.229 * 255,0.224 * 255,0.225 * 255]) self.coin = ops.CoinFlip(probability = 0.5)
def __init__(self, batch_size, num_threads, device_id): super(TFRecordPipeline, self).__init__(batch_size, num_threads, device_id) self.input = ops.TFRecordReader( path=tfrecord, index_path=tfrecord_idx, features={ "image/encoded": tfrec.FixedLenFeature((), tfrec.string, ""), 'image/filename': tfrec.FixedLenFeature([], tfrec.string, ''), 'image/height': tfrec.FixedLenFeature([1], tfrec.int64, -1), 'image/width': tfrec.FixedLenFeature([1], tfrec.int64, -1), 'image/colorspace': tfrec.FixedLenFeature([], tfrec.string, ''), 'image/channels': tfrec.FixedLenFeature([1], tfrec.int64, -1), 'image/format': tfrec.FixedLenFeature([], tfrec.string, ''), 'image/class/label': tfrec.FixedLenFeature([1], tfrec.int64, -1), 'image/class/synset': tfrec.FixedLenFeature([], tfrec.string, ''), 'image/class/text': tfrec.FixedLenFeature([], tfrec.string, ''), 'image/object/bbox/xmin': tfrec.VarLenFeature(tfrec.float32, 0.0), 'image/object/bbox/ymin': tfrec.VarLenFeature(tfrec.float32, 0.0), 'image/object/bbox/xmax': tfrec.VarLenFeature(tfrec.float32, 0.0), 'image/object/bbox/ymax': tfrec.VarLenFeature(tfrec.float32, 0.0), 'image/object/bbox/label': tfrec.FixedLenFeature([1], tfrec.int64, -1) }) self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.resize = ops.Resize(device="gpu", resize_x=5000., resize_y=5000.) self.vert_flip = ops.Flip(device="gpu", horizontal=0) self.vert_coin = ops.CoinFlip(probability=0.5) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, crop=(1920, 1920), image_type=types.RGB, mean=[0., 0., 0.], std=[1., 1., 1.]) self.uniform = ops.Uniform(range=(0.0, 1.0)) self.iter = 0
def __init__(self, batch_size, num_threads, device_id, num_gpus, data_paths): super(RN50Pipeline, self).__init__(batch_size, num_threads, device_id,) self.input = ops.FileReader(file_root = data_paths, shard_id = device_id, num_shards = num_gpus) self.decode_gpu = ops.ImageDecoder(device = "cpu", output_type = types.RGB) self.res = ops.RandomResizedCrop(device="cpu", size =(224,224)) self.cmnp = ops.CropMirrorNormalize(device="cpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(224, 224), image_type=types.RGB, mean=[0.485 * 255,0.456 * 255,0.406 * 255], std=[0.229 * 255,0.224 * 255,0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self, device, batch_size, iterator, layout, center_x=None, center_y=None, center_z=None, num_threads=1, device_id=0): super(CoordFlipPipeline, self).__init__(batch_size, num_threads, device_id) self.device = device self.iterator = iterator self.coord_flip = ops.CoordFlip(device=self.device, layout=layout, center_x=center_x, center_y=center_y, center_z=center_z) self.flip_x = ops.CoinFlip(probability=0.5) self.flip_y = ops.CoinFlip(probability=0.5) self.flip_z = ops.CoinFlip( probability=0.5) if len(layout) == 3 else None
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) # self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True) index_path = [] for path in os.listdir("/home/guojia/idx_files/train"): index_path.append(os.path.join("/home/guojia/idx_files/train", path)) index_path = sorted(index_path) self.input = ops.TFRecordReader(path=data_dir, index_path=index_path, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True, features={ 'image/height': tfrec.FixedLenFeature([1], tfrec.int64, -1), 'image/width': tfrec.FixedLenFeature([1], tfrec.int64, -1), 'image/colorspace': tfrec.FixedLenFeature([ ], tfrec.string, ''), 'image/channels': tfrec.FixedLenFeature([], tfrec.int64, -1), 'image/class/label': tfrec.FixedLenFeature([1], tfrec.int64, -1), 'image/class/synset': tfrec.FixedLenFeature([ ], tfrec.string, ''), # 'image/class/text': tfrec.FixedLenFeature([ ], tfrec.string, ''), # 'image/object/bbox/xmin': tfrec.VarLenFeature(tfrec.float32, 0.0), # 'image/object/bbox/xmax': tfrec.VarLenFeature(tfrec.float32, 0.0), # 'image/object/bbox/ymin': tfrec.VarLenFeature(tfrec.float32, 0.0), # 'image/object/bbox/ymax': tfrec.VarLenFeature(tfrec.float32, 0.0), # 'image/object/bbox/label': tfrec.FixedLenFeature([1], tfrec.int64,-1), 'image/format': tfrec.FixedLenFeature((), tfrec.string, ""), 'image/filename': tfrec.FixedLenFeature((), tfrec.string, ""), 'image/encoded': tfrec.FixedLenFeature((), tfrec.string, "") }) #let user decide which pipeline works him bets for RN version he runs dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 self.decode = ops.ImageDecoderRandomCrop(device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255,0.456 * 255,0.406 * 255], std=[0.229 * 255,0.224 * 255,0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, file_root, annotations_file, batch_size=1, device_id=0, num_threads=4, local_rank=0, world_size=1): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=42 + device_id) self.reader = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, skip_empty=True, shard_id=local_rank, num_shards=world_size, ratio=True, ltrb=True, shuffle_after_epoch=True) self.crop = ops.RandomBBoxCrop(device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=50) self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.roi_decode = ops.ImageDecoderSlice(device="mixed") self.resize = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) self.bc = ops.BrightnessContrast(device="gpu") self.hsv = ops.Hsv(device="gpu") self.cmnp = ops.CropMirrorNormalize(device="gpu", mean=[104., 117., 123.], std=[1., 1., 1.], output_dtype=types.FLOAT, output_layout=types.NCHW, pad_output=False) self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5]) self.coin = ops.CoinFlip(probability=0.5) self.build()
def __init__(self, batch_size, num_threads, device_id, num_shards, shard_id, use_fp16=False, train=True, root=os.path.expanduser('./data')): super().__init__(batch_size, num_threads, device_id, seed=12) part = "train" if train else "test" idx_files = [os.path.join(root, "cifar10_{}.idx").format(part)] rec_files = [os.path.join(root, "cifar10_{}.rec").format(part)] self.num_classes = 10 self.image_size = (32, 32) self.size = 0 self.train = train for idx_file in idx_files: with open(idx_file, "r") as f: self.size += len(list(f.readlines())) self._input = ops.MXNetReader(path=rec_files, index_path=idx_files, random_shuffle=True if train else False, num_shards=num_shards, shard_id=shard_id, seed=12, tensor_init_bytes=self.image_size[0] * self.image_size[1] * 8) self._decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self._cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT16 if use_fp16 else types.FLOAT, output_layout=types.NCHW, crop=self.image_size, image_type=types.RGB, mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]) if train: self.padding = ops.Paste(device="gpu", fill_value=128, ratio=1.25) self.px = ops.Uniform(range=(0, 1)) self.py = ops.Uniform(range=(0, 1)) self._uniform = ops.Uniform(range=(0.7, 1.3)) self._coin = ops.CoinFlip(probability=0.5)
def __init__(self, file_list, file_root, crop_size, batch_size, num_threads, device_id, random_shuffle=True, seed=-1, mean=None, std=None, num_samples=None): super(DaliPipelineTrain, self).__init__(batch_size, num_threads, device_id, seed=seed) crop_size = _pair(crop_size) if mean is None: mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] if std is None: std = [0.229 * 255, 0.224 * 255, 0.225 * 255] if num_samples is None: initial_fill = 4096 else: initial_fill = min(4096, num_samples) self.loader = ops.FileReader(file_root=file_root, file_list=file_list, random_shuffle=random_shuffle, initial_fill=initial_fill) self.decode = ops.HostDecoder() self.resize = ops.Resize(device='gpu', resize_x=256, resize_y=256) # self.hue = ops.Hue(device="gpu") # self.bright = ops.Brightness(device="gpu") # self.cntrst = ops.Contrast(device="gpu") # self.rotate = ops.Rotate(device="gpu") # self.jitter = ops.Jitter(device="gpu") random_area = (crop_size[0] / 256.0) * (crop_size[1] / 256.0) random_area = _pair(random_area) random_aspect_ratio = _pair(1.0) self.rrcrop = ops.RandomResizedCrop( device='gpu', size=crop_size, random_area=random_area, random_aspect_ratio=random_aspect_ratio) self.cmnorm = ops.CropMirrorNormalize(device='gpu', crop=list(crop_size), mean=mean, std=std) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self, p: float = .5, nDegree: int = 2, fill_value: Union[float, int] = 0.): """Initialization Args: p (float, optional): Probability to apply this transformation. Defaults to .5. nDegree (int, optional): Each pixel is moved by a random amount in range [-nDegree/2, nDegree/2]. Defaults to 2. fill_value (Union[float,int], optional): Color value used for padding pixels. Defaults to 0.. """ self.jitter = ops.Jitter(device='gpu', fill_value=0, nDegree=nDegree) self.flip_coin = ops.CoinFlip(probability=p)
def __new__(cls, probability=0.5, **kwargs): """Create a ``CoinFlip`` operator. Parameters ---------- probability : float, optional, default=0.5 The probability to return 1. Returns ------- nvidia.dali.ops.CoinFlip The operator. """ return ops.CoinFlip(probability=probability, **kwargs)
def __init__(self, device_id, crop, size, rank, seed_rank, options): super(TrainPipeline, self).__init__(options.batchsize, 4, device_id, prefetch_queue_depth=3, set_affinity=True, seed=options.seed + seed_rank) rec_path = os.path.join(options.train_dir, "data.rec") idx_path = os.path.join(options.train_dir, "data.idx") self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path], random_shuffle=True, shard_id=rank, num_shards=size) # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations random_aspect_ratio = [0.75, 4. / 3.] random_area = [0.08, 1.0] self.decode = ops.ImageDecoderRandomCrop( device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512, random_aspect_ratio=random_aspect_ratio, random_area=random_area, num_attempts=100, seed=options.seed + seed_rank + 1641) self.res = ops.Resize(device="gpu", resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) dtype = types.FLOAT16 if options.fp16 else types.FLOAT layout = types.NCHW padding = False self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=dtype, output_layout=layout, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], pad_output=padding, seed=options.seed + seed_rank + 1223) self.coin = ops.CoinFlip(probability=0.5, seed=options.seed + seed_rank + 412)