def __init__(self, batch_size, num_threads, device_id, data_dir, crop, size=256, dali_cpu=False, local_rank=0, world_size=1): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) dali_device = "gpu" self.input = ops.FileReader(file_root=data_dir, shard_id=local_rank, num_shards=world_size, random_shuffle=True) self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) # self.res = ops.Resize(device="gpu", resize_x=size, resize_y=size, interp_type=types.INTERP_LINEAR) self.res = ops.Resize(device="gpu", resize_shorter=size, interp_type=types.INTERP_LINEAR) self.rescrop = ops.RandomResizedCrop(device="gpu", size=crop, random_area=[0.08, 1.25]) self.bc = ops.BrightnessContrast(device="gpu", brightness=0.5, contrast=0.6) # Will flip vertically with prob of 0.1 self.vert_flip = ops.Flip(device='gpu', horizontal=0) self.vert_coin = ops.CoinFlip(probability=0.4) self.transform_source = ops.ExternalSource() self.warp_keep_size = ops.WarpAffine( device="gpu", # size # keep original canvas size interp_type=types.INTERP_LINEAR # use linear interpolation ) # My workaround for Dali not supporting random affine transforms: # a "synthetic random" warp affine transform. # Rotate within a narrow range with probability of 0.075 self.rotate = ops.Rotate(device='gpu') self.rotate_range = ops.Uniform(range=(-20.0, 20.0)) self.rotate_coin = ops.CoinFlip(probability=0.075) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, target_size, preproc_param, training=False): self.training = training mean = preproc_param.MEAN std = preproc_param.STD bri_delta = preproc_param.BRI_DELTA hue_delta = preproc_param.HUE_DELTA max_expand_ratio = preproc_param.MAX_EXPAND_RATIO contrast_range = preproc_param.CONTRAST_RANGE saturation_range = preproc_param.SATURATION_RANGE crop_aspect_ratio = preproc_param.CROP_ASPECT_RATIO crop_scale = preproc_param.CROP_SCALE crop_attempts = preproc_param.CROP_ATTEMPTS # decoder self.decode_train = ops.ImageDecoderSlice(device="mixed", output_type=types.RGB) self.decode_infer = ops.ImageDecoder(device="mixed", output_type=types.RGB) # ssd crop self.bbox_crop = ops.RandomBBoxCrop( device="cpu", bbox_layout="xyXY", scaling=crop_scale, aspect_ratio=crop_aspect_ratio, allow_no_crop=True, thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], num_attempts=crop_attempts, ) # color twist self.uniform_con = ops.Uniform(range=contrast_range) self.uniform_bri = ops.Uniform( range=(1.0 - bri_delta / 256.0, 1.0 + bri_delta / 256.0) ) self.uniform_sat = ops.Uniform(range=saturation_range) self.uniform_hue = ops.Uniform(range=(-hue_delta, hue_delta)) self.hsv = ops.Hsv(device="gpu") self.contrast = ops.BrightnessContrast(device="gpu") # hflip self.bbox_flip = ops.BbFlip(device="cpu", ltrb=True) self.img_flip = ops.Flip(device="gpu") self.coin_flip = ops.CoinFlip(probability=0.5) # past self.paste_pos = ops.Uniform(range=(0, 1)) self.paste_ratio = ops.Uniform(range=(1, max_expand_ratio)) self.paste = ops.Paste(device="gpu", fill_value=mean) self.bbox_paste = ops.BBoxPaste(device="cpu", ltrb=True) # resize and normalize self.resize = ops.Resize( device="gpu", interp_type=types.DALIInterpType.INTERP_CUBIC, resize_x=target_size[0], resize_y=target_size[1], save_attrs=True, ) self.normalize = ops.CropMirrorNormalize(device="gpu", mean=mean, std=std)
def __init__(self, file_root, annotations_file, batch_size=1, device_id=0, num_threads=4, local_rank=0, world_size=1): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=42 + device_id) self.reader = ops.COCOReader( file_root=file_root, annotations_file=annotations_file, skip_empty=True, shard_id=local_rank, num_shards=world_size, ratio=True, ltrb=True, shuffle_after_epoch=True, pad_last_batch=True) self.crop = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=50) self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.roi_decode = ops.ImageDecoderSlice(device="mixed") self.resize = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) self.hsv = ops.Hsv(device="gpu", dtype=types.FLOAT) # use float to avoid clipping and # quantizing the intermediate result self.bc = ops.BrightnessContrast(device="gpu", contrast_center=128, # input is in float, but in 0..255 range dtype=types.UINT8) self.cmnp = ops.CropMirrorNormalize( device="gpu", mean=[104., 117., 123.], std=[1., 1., 1.], dtype=types.FLOAT, output_layout=types.NCHW, pad_output=False) self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5]) self.coin = ops.CoinFlip(probability=0.5) self.build()
def __new__(cls, **kwargs): """Create a ``BrightnessContrast`` operator. Returns ------- nvidia.dali.ops.BrightnessContrast The operator. """ return ops.BrightnessContrast(device=context.get_device_type(), **kwargs)
def __init__(self, batch_size, num_threads, path, training, annotations, world, device_id, mean, std, resize, max_size, stride, rotate_augment=False, augment_brightness=0.0, augment_contrast=0.0, augment_hue=0.0, augment_saturation=0.0): super().__init__(batch_size=batch_size, num_threads=num_threads, device_id=device_id, prefetch_queue_depth=num_threads, seed=42) self.path = path self.training = training self.stride = stride self.iter = 0 self.rotate_augment = rotate_augment self.augment_brightness = augment_brightness self.augment_contrast = augment_contrast self.augment_hue = augment_hue self.augment_saturation = augment_saturation self.reader = ops.COCOReader(annotations_file=annotations, file_root=path, num_shards=world, shard_id=torch.cuda.current_device(), ltrb=True, ratio=True, shuffle_after_epoch=True, save_img_ids=True) self.decode_train = ops.ImageDecoderSlice(device="mixed", output_type=types.RGB) self.decode_infer = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.bbox_crop = ops.RandomBBoxCrop(device='cpu', ltrb=True, scaling=[0.3, 1.0], thresholds=[0.1, 0.3, 0.5, 0.7, 0.9]) self.bbox_flip = ops.BbFlip(device='cpu', ltrb=True) self.img_flip = ops.Flip(device='gpu') self.coin_flip = ops.CoinFlip(probability=0.5) self.bc = ops.BrightnessContrast(device='gpu') self.hsv = ops.Hsv(device='gpu') # Random number generation for augmentation self.brightness_dist = ops.NormalDistribution(mean=1.0, stddev=augment_brightness) self.contrast_dist = ops.NormalDistribution(mean=1.0, stddev=augment_contrast) self.hue_dist = ops.NormalDistribution(mean=0.0, stddev=augment_hue) self.saturation_dist = ops.NormalDistribution(mean=1.0, stddev=augment_saturation) if rotate_augment: raise RuntimeWarning("--augment-rotate current has no effect when using the DALI data loader.") if isinstance(resize, list): resize = max(resize) self.rand_resize = ops.Uniform(range=[resize, float(max_size)]) self.resize_train = ops.Resize(device='gpu', interp_type=types.DALIInterpType.INTERP_CUBIC, save_attrs=True) self.resize_infer = ops.Resize(device='gpu', interp_type=types.DALIInterpType.INTERP_CUBIC, resize_longer=max_size, save_attrs=True) padded_size = max_size + ((self.stride - max_size % self.stride) % self.stride) self.pad = ops.Paste(device='gpu', fill_value=0, ratio=1.1, min_canvas_size=padded_size, paste_x=0, paste_y=0) self.normalize = ops.CropMirrorNormalize(device='gpu', mean=mean, std=std, crop=(padded_size, padded_size), crop_pos_x=0, crop_pos_y=0)
def __init__(self, p: float = .5, brightness_limit: Union[List, float] = 0.5, contrast_limit: Union[List, float] = 0.5): """Initialization Args: p (float, optional): Probability to apply this transformation. Defaults to .5. brightness_limit (Union[List,float], optional): Factor multiplier range for changing brightness in [min,max] value format. If provided as a single float, the range will be 1 + (-limit, limit). Defaults to 0.5. contrast_limit (Union[List,float], optional): Factor multiplier range for changing contrast in [min,max] value format. If provided as a single float, the range will be 1 + (-limit, limit). Defaults to 0.5. """ brightness_limit = _check_and_convert_limit_value(brightness_limit) contrast_limit = _check_and_convert_limit_value(contrast_limit) self.brightness_uniform = ops.Uniform(range=brightness_limit) self.contrast_uniform = ops.Uniform(range=contrast_limit) self.random_brightness_contrast = ops.BrightnessContrast(device='gpu') self.rng = ops.CoinFlip(probability=p) self.bool = ops.Cast(dtype=types.DALIDataType.BOOL)
def __init__(self, default_boxes, args, seed): super(COCOPipeline, self).__init__(batch_size=args.batch_size, device_id=args.local_rank, num_threads=args.num_workers, seed=seed) try: shard_id = torch.distributed.get_rank() num_shards = torch.distributed.get_world_size() except RuntimeError: shard_id = 0 num_shards = 1 self.input = ops.COCOReader(file_root=args.train_coco_root, annotations_file=args.train_annotate, skip_empty=True, shard_id=shard_id, num_shards=num_shards, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True) self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB) # Augumentation techniques self.crop = ops.RandomBBoxCrop(device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=1) self.slice = ops.Slice(device="cpu") self.hsv = ops.Hsv( device="gpu", dtype=types.FLOAT) # use float to avoid clipping and # quantizing the intermediate result self.bc = ops.BrightnessContrast( device="gpu", contrast_center=128, # input is in float, but in 0..255 range dtype=types.UINT8) self.resize = ops.Resize( device="cpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) dtype = types.FLOAT16 if args.fp16 else types.FLOAT self.normalize = ops.CropMirrorNormalize( device="gpu", crop=(300, 300), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=0, dtype=dtype, output_layout=types.NCHW, pad_output=False) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5]) self.flip = ops.Flip(device="cpu") self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.flip_coin = ops.CoinFlip(probability=0.5) self.box_encoder = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes.as_ltrb_list())
def __init__(self, args, device_id, file_root, annotations_file): super(DetectionPipeline, self).__init__(batch_size=args.batch_size, num_threads=args.num_workers, device_id=device_id, prefetch_queue_depth=args.prefetch, seed=args.seed) # Reading COCO dataset self.input = ops.readers.COCO(file_root=file_root, annotations_file=annotations_file, shard_id=device_id, num_shards=args.num_gpus, ratio=True, ltrb=True, random_shuffle=True) self.decode_cpu = ops.decoders.Image(device="cpu", output_type=types.RGB) self.decode_crop = ops.decoders.ImageSlice(device="cpu", output_type=types.RGB) self.decode_gpu = ops.decoders.Image(device="mixed", output_type=types.RGB, hw_decoder_load=0) self.decode_gpu_crop = ops.decoders.ImageSlice(device="mixed", output_type=types.RGB, hw_decoder_load=0) self.ssd_crop = ops.SSDRandomCrop(device="cpu", num_attempts=1, seed=args.seed) self.random_bbox_crop = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout="xyXY", seed=args.seed) self.slice_cpu = ops.Slice(device="cpu") self.slice_gpu = ops.Slice(device="gpu") self.resize_cpu = ops.Resize( device="cpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) self.resize_gpu = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] std = [0.229 * 255, 0.224 * 255, 0.225 * 255] crop_size = (300, 300) self.normalize_cpu = ops.CropMirrorNormalize(device="cpu", crop=crop_size, mean=mean, std=std, mirror=0, dtype=types.FLOAT) self.normalize_gpu = ops.CropMirrorNormalize(device="gpu", crop=crop_size, mean=mean, std=std, mirror=0, dtype=types.FLOAT) self.twist_cpu = ops.ColorTwist(device="cpu") self.twist_gpu = ops.ColorTwist(device="gpu") self.hsv_cpu = ops.Hsv(device="cpu", dtype=types.FLOAT) self.hsv_gpu = ops.Hsv(device="gpu", dtype=types.FLOAT) self.bc_cpu = ops.BrightnessContrast(device="cpu", dtype=types.UINT8, contrast_center=128) self.bc_gpu = ops.BrightnessContrast(device="gpu", dtype=types.UINT8, contrast_center=128) self.flip_cpu = ops.Flip(device="cpu") self.bbox_flip_cpu = ops.BbFlip(device="cpu", ltrb=True) self.flip_gpu = ops.Flip(device="gpu") self.bbox_flip_gpu = ops.BbFlip(device="gpu", ltrb=True) default_boxes = coco_anchors() self.box_encoder_cpu = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes) self.box_encoder_gpu = ops.BoxEncoder(device="gpu", criteria=0.5, anchors=default_boxes) self.box_encoder_cpu_offsets = ops.BoxEncoder( device="cpu", criteria=0.5, offset=True, scale=2, stds=[0.1, 0.1, 0.2, 0.2], anchors=default_boxes) self.box_encoder_gpu_offsets = ops.BoxEncoder( device="gpu", criteria=0.5, offset=True, scale=2, stds=[0.1, 0.1, 0.2, 0.2], anchors=default_boxes) # Random variables self.saturation_rng = ops.random.Uniform(range=[0.8, 1.2]) self.contrast_rng = ops.random.Uniform(range=[0.5, 1.5]) self.brighness_rng = ops.random.Uniform(range=[0.875, 1.125]) self.hue_rng = ops.random.Uniform(range=[-45, 45])
def __init__(self, batch_size, num_threads, device_id, seed, image_dir): super(BrightnessPipeline, self).__init__(batch_size, num_threads, device_id, seed, image_dir) self.brightness = ops.BrightnessContrast(device="gpu", brightness=0.5)
def __init__(self, train=False, batch_size=16, workers=4, size=512): # TODO: support size as tuple local_rank, world_size = env_rank(), env_world_size() super().__init__(batch_size, workers, local_rank, seed=42) split_str = "train" if train else "val" self.input = ops.COCOReader( file_root=f"{DATA_DIR}/{split_str}2017", annotations_file= f"{DATA_DIR}/annotations/instances_{split_str}2017.json", shard_id=local_rank, num_shards=world_size, ratio=True, # want bbox in [0, 1] ltrb=True, # random_shuffle=train, save_img_ids=True, # Need ids for evaluation skip_empty= True, # skips images without objects. not sure if we want to do so ) self.bbox_crop = ops.RandomBBoxCrop( device="cpu", # gpu is not supported (and not needed actually) bbox_layout="xyXY", # same as 'ltrb' scaling=[0.3, 1.0], # adding 0.0 to thr instead of `allow_no_crop` thresholds=[0.0, 0.1, 0.3, 0.5, 0.7, 0.9], ) if train: self.decode = ops.ImageDecoderSlice(device="mixed", output_type=types.RGB) else: self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.resize = ops.Resize(device="gpu", interp_type=types.INTERP_CUBIC, resize_longer=size, save_attrs=True) self.resize = ops.Resize(device="gpu", interp_type=types.INTERP_CUBIC, resize_longer=size) self.bbox_flip = ops.BbFlip(device="cpu", ltrb=True) self.img_flip = ops.Flip(device="gpu") # color augmentations self.bc = ops.BrightnessContrast(device="gpu") self.hsv = ops.Hsv(device="gpu") # pad to match output stride self.pad = ops.Pad(device="gpu", fill_value=0, axes=(1, 2), shape=(size, size)) self.normalize = ops.CropMirrorNormalize( device="gpu", # Imagenet mean and std mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], # mean=[0, 0, 0], # std=[1, 1, 1], output_dtype=types.FLOAT, output_layout=types.NCHW, ) # TODO: add Jitter aug # Random number generation for augmentation self.coin_flip = ops.CoinFlip(probability=0.5) self.rng1 = ops.Uniform(range=[0, 1]) self.rng2 = ops.Uniform(range=[0.85, 1.15]) self.rng3 = ops.Uniform(range=[-15, 15]) self.train = train
def __init__( self, train=False, batch_size=16, size=384, num_threads=4, device_id=0 ): super(ExternalSourcePipeline, self).__init__( batch_size, num_threads, device_id, seed=42 ) self.eii = iter( ExternalInputIterator(train, batch_size) ) self.images_input = ops.ExternalSource() self.masks_input = ops.ExternalSource() if train: fixed_area = (size / 784)**2 self.decode = ops.ImageDecoderRandomCrop( device="mixed", random_area=[fixed_area*0.7, fixed_area*1.3], random_aspect_ratio=[0.7, 1.3], ) else: self.decode = ops.ImageDecoderCrop( device="mixed", crop=(size, size) ) self.resize = ops.Resize( device="gpu", interp_type=types.INTERP_TRIANGULAR, resize_x=size, resize_y=size ) self.mask_resize = ops.Resize( device="gpu", interp_type=types.INTERP_NN, resize_x=size, resize_y=size ) self.normalize = ops.CropMirrorNormalize( device="gpu", mean=[0.5 * 255], # 0.456 * 255, 0.406 * 255], std=[0.5 * 255], # , 0.224 * 255, 0.225 * 255], output_layout=types.NCHW ) self.mask_normalize = ops.CropMirrorNormalize( device="gpu", mean=[0], std=[255], output_layout=types.NCHW, image_type=types.GRAY, ) # extra augmentations self.to_gray = ops.ColorSpaceConversion( device="gpu", image_type=types.RGB, output_type=types.GRAY ) self.contrast = ops.BrightnessContrast(device="gpu") self.hsv = ops.Hsv(device="gpu") self.jitter = ops.Jitter(device ="gpu") # self.rng1 = ops.Uniform(range=[0, 1]) self.rng2 = ops.Uniform(range=[0.8,1.2]) self.rng3 = ops.Uniform(range=[-30, 30]) # for hue self.coin03 = ops.CoinFlip(probability=0.3) self.train = train