def create_video_pipe(video_files, sequence_length=8, target_size=224, stride=30): pipeline = Pipeline(1, 4, 0, seed=42) with pipeline: images = fn.readers.video(device="gpu", filenames=video_files, sequence_length=sequence_length, stride=stride, shard_id=0, num_shards=1, random_shuffle=False, pad_last_batch=True, name="Reader") images = fn.crop_mirror_normalize( images, dtype=types.FLOAT, output_layout="FCHW", crop=(target_size, target_size), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) pipeline.set_outputs(images) return pipeline
def val_pipeline(cfg: ValLoaderConfig): jpeg, label = fn.readers.file( file_root=ROOT_DATA_DIR + "/val/", shard_id=env_rank(), num_shards=env_world_size(), name="Reader", ) image = fn.decoders.image(jpeg, device="mixed", output_type=types.RGB) crop_size = cfg.image_size if cfg.full_crop else math.ceil( (cfg.image_size * 1.14 + 8) // 16 * 16) image = fn.resize(image, device="gpu", interp_type=types.INTERP_TRIANGULAR, resize_shorter=crop_size) image = fn.crop_mirror_normalize( image, device="gpu", crop=(cfg.image_size, cfg.image_size), mean=DATA_MEAN, std=DATA_STD, dtype=types.FLOAT, output_layout=types.NCHW, ) label = fn.one_hot(label, num_classes=cfg.num_classes).gpu() return image, label
def get_pipeline(folder="train", custom_reader=None): pipe = Pipeline(batch_size=64, num_threads=1, device_id=1) if custom_reader: raw_files, labels = custom_reader else: raw_files, labels = fn.file_reader(file_root="%s" % folder, random_shuffle=True) decode = fn.image_decoder(raw_files, device="mixed", output_type=types.GRAY) resize = fn.resize(decode, device="gpu", image_type=types.RGB, interp_type=types.INTERP_LINEAR, resize_x=WIDTH, resize_y=HEIGHT) hsv = fn.hsv(resize, hue=fn.uniform(range=(-10, 10)), saturation=fn.uniform(range=(-.5, .5)), value=fn.uniform(range=(0.9, 1.2)), device="gpu", dtype=types.UINT8) bc = fn.brightness_contrast(hsv, device="gpu", brightness=fn.uniform(range=(.9, 1.1))) cmn = fn.crop_mirror_normalize(bc, device="gpu", output_dtype=types.FLOAT, output_layout=types.NHWC, image_type=types.GRAY, mean=[255 // 2], std=[255 // 2]) rot = fn.rotate(cmn, angle=fn.uniform(range=(-40, 40)), device="gpu", keep_size=True) tpose = fn.transpose(rot, perm=(2, 0, 1), device="gpu") # Reshaping to a format PyTorch likes pipe.set_outputs(tpose, labels) pipe.build() dali_iter = DALIClassificationIterator([pipe], -1) return dali_iter
def check_gaussian_blur_output(batch_size, sigma, window_size, op_type="cpu"): decoder_device = "cpu" if op_type == "cpu" else "mixed" pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) with pipe: input, _ = fn.file_reader(file_root=images_dir, shard_id=0, num_shards=1) decoded = fn.image_decoder(input, device=decoder_device, output_type=types.RGB) blurred = fn.gaussian_blur(decoded, device=op_type, sigma=sigma, window_size=window_size) normalized = fn.crop_mirror_normalize(blurred, device=op_type, dtype=types.FLOAT, output_layout="HWC", mean=[128.0, 128.0, 128.0], std=[100.0, 100.0, 100.0]) pipe.set_outputs(normalized) pipe.build() for _ in range(3): result = pipe.run()
def mnist_pipeline(num_threads, path, device, device_id=0, shard_id=0, num_shards=1, seed=0): pipeline = Pipeline(BATCH_SIZE, num_threads, device_id, seed) with pipeline: jpegs, labels = fn.readers.caffe2(path=path, random_shuffle=True, shard_id=shard_id, num_shards=num_shards) images = fn.image_decoder(jpegs, device='mixed' if device == 'gpu' else 'cpu', output_type=types.GRAY) if device == 'gpu': labels = labels.gpu() images = fn.crop_mirror_normalize(images, dtype=types.FLOAT, mean=[0.], std=[255.], output_layout="CHW") pipeline.set_outputs(images, labels) return pipeline
def order_change_pipeline(): if order_change_pipeline.change: rng = 0 else: order_change_pipeline.change = True rng = fn.random.coin_flip(probability=0.5, seed=47) jpegs, labels = fn.readers.file(file_root=file_root, shard_id=0, num_shards=2) images = fn.decoders.image(jpegs, device='mixed', output_type=types.RGB) resized_images = fn.random_resized_crop(images, device="gpu", size=(224, 224), seed=27) out_type = types.FLOAT16 output = fn.crop_mirror_normalize( resized_images.gpu(), mirror=rng, device="gpu", dtype=out_type, crop=(224, 224), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) return rng, jpegs, labels, images, resized_images, output
def create_coco_pipeline(file_root, annotations_file, batch_size=1, device_id=0, num_threads=4, local_rank=0, world_size=1): pipeline = Pipeline(batch_size, num_threads, local_rank, seed=42 + device_id) with pipeline: images, bboxes, labels = fn.coco_reader(file_root=file_root, annotations_file=annotations_file, skip_empty=True, shard_id=local_rank, num_shards=world_size, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True, name="Reader") crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop(bboxes, labels, device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout="xyXY", allow_no_crop=True, num_attempts=50) images = fn.image_decoder_slice(images, crop_begin, crop_size, device="mixed", output_type=types.RGB) flip_coin = fn.coin_flip(probability=0.5) images = fn.resize(images, resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) # use float to avoid clipping and quantizing the intermediate result images = fn.hsv(images, dtype=types.FLOAT, hue=fn.uniform(range=[-0.5, 0.5]), saturation=fn.uniform(range=[0.5, 1.5])) images = fn.brightness_contrast(images, contrast_center = 128, # input is in float, but in 0..255 range dtype = types.UINT8, brightness = fn.uniform(range=[0.875, 1.125]), contrast = fn.uniform(range=[0.5, 1.5])) bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin) images = fn.crop_mirror_normalize(images, mean=[104., 117., 123.], std=[1., 1., 1.], mirror=flip_coin, dtype=types.FLOAT, output_layout="CHW", pad_output=False) pipeline.set_outputs(images, bboxes, labels) return pipeline
def dali_data_iter(batch_size: int, rec_file: str, idx_file: str, num_threads: int, initial_fill=32768, random_shuffle=True, prefetch_queue_depth=1, local_rank=0, name="reader", mean=(127.5, 127.5, 127.5), std=(127.5, 127.5, 127.5)): """ Parameters: ---------- initial_fill: int Size of the buffer that is used for shuffling. If random_shuffle is False, this parameter is ignored. """ rank: int = distributed.get_rank() world_size: int = distributed.get_world_size() import nvidia.dali.fn as fn import nvidia.dali.types as types from nvidia.dali.pipeline import Pipeline from nvidia.dali.plugin.pytorch import DALIClassificationIterator pipe = Pipeline( batch_size=batch_size, num_threads=num_threads, device_id=local_rank, prefetch_queue_depth=prefetch_queue_depth, ) condition_flip = fn.random.coin_flip(probability=0.5) with pipe: jpegs, labels = fn.readers.mxnet(path=rec_file, index_path=idx_file, initial_fill=initial_fill, num_shards=world_size, shard_id=rank, random_shuffle=random_shuffle, pad_last_batch=False, name=name) images = fn.decoders.image(jpegs, device="mixed", output_type=types.RGB) images = fn.crop_mirror_normalize(images, dtype=types.FLOAT, mean=mean, std=std, mirror=condition_flip) pipe.set_outputs(images, labels) pipe.build() return DALIWarper( DALIClassificationIterator( pipelines=[pipe], reader_name=name, ))
def get_pipe(): def get_data(): out = [ np.zeros(input_shape, dtype=np.uint8) for _ in range(batch_size) ] return out data = fn.external_source(source=get_data, device=device) return fn.crop_mirror_normalize(data, crop_h=10, crop_w=20)
def create_dali_pipeline(data_dir, crop, size, shard_id, num_shards, dali_cpu=False, is_training=True): images, labels = fn.readers.file(file_root=data_dir, shard_id=shard_id, num_shards=num_shards, random_shuffle=is_training, pad_last_batch=True, name="Reader") dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 if is_training: images = fn.decoders.image_random_crop( images, device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) images = fn.resize(images, device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) mirror = fn.random.coin_flip(probability=0.5) else: images = fn.decoders.image(images, device=decoder_device, output_type=types.RGB) images = fn.resize(images, device=dali_device, size=size, mode="not_smaller", interp_type=types.INTERP_TRIANGULAR) mirror = False images = fn.crop_mirror_normalize( images.gpu(), dtype=types.FLOAT, output_layout="CHW", crop=(crop, crop), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=mirror) labels = labels.gpu() return images, labels
def create_dali_pipeline(batch_size, num_threads, device_id, data_dir): files = [] with open(join(data_dir, "file_list.txt"), "r") as f: files = [line.rstrip() for line in f if line is not ''] shuffle(files) img_files = [] seg_files = [] for prefix in files: img_files.append(join(data_dir, "leftImg8bit/train", prefix + "_leftImg8bit.png")) seg_files.append(join(data_dir, "gtFine/train", prefix + "_gtFine_labelIds.png")) pipeline = Pipeline(batch_size, num_threads, device_id, seed=12 + device_id) with pipeline: imgs, _ = fn.file_reader(files=img_files, shard_id=0, num_shards=1, random_shuffle=False, pad_last_batch=True) segs, _ = fn.file_reader(files=seg_files, shard_id=0, num_shards=1, random_shuffle=False, pad_last_batch=True) dali_device = 'gpu' decoder_device = 'mixed' # device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 # host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 device_memory_padding = 0 host_memory_padding = 0 imgs = fn.image_decoder(imgs, device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, hybrid_huffman_threshold=250000) segs = fn.image_decoder(segs, device=decoder_device, output_type=types.GRAY, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, hybrid_huffman_threshold=250000) imgs = fn.crop_mirror_normalize(imgs, device=dali_device, crop=(512, 512), dtype=types.FLOAT, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], output_layout="CHW") segs = fn.crop(segs, device=dali_device, dtype=types.UINT8, crop=(512, 512)) pipeline.set_outputs(imgs, segs) return pipeline
def rn50_pipeline_2(data_path): uniform = fn.random.uniform(range=(0., 1.), shape=2) resize_uniform = fn.random.uniform(range=(256., 480.)) mirror = fn.random.coin_flip(probability=0.5) jpegs, _ = fn.readers.file(file_root=data_path) images = fn.decoders.image(jpegs, device='mixed', output_type=types.RGB) resized_images = fn.resize(images, device='gpu', interp_type=types.INTERP_LINEAR, resize_shorter=resize_uniform) output = fn.crop_mirror_normalize(resized_images, device='gpu', dtype=types.FLOAT16, crop=(224, 224), mean=[128., 128., 128.], std=[1., 1., 1.], mirror=mirror, crop_pos_x=uniform[0], crop_pos_y=uniform[1]) return output
def create_dali_pipeline(data_dir, crop, size, shard_id, num_shards, dali_cpu=False, is_training=True): images, labels = fn.readers.file(file_root=data_dir, shard_id=shard_id, num_shards=num_shards, random_shuffle=is_training, pad_last_batch=True, name="Reader") dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' # ask nvJPEG to preallocate memory for the biggest sample in ImageNet for CPU and GPU to avoid reallocations in runtime device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 # ask HW NVJPEG to allocate memory ahead for the biggest image in the data set to avoid reallocations in runtime preallocate_width_hint = 5980 if decoder_device == 'mixed' else 0 preallocate_height_hint = 6430 if decoder_device == 'mixed' else 0 if is_training: images = fn.decoders.image_random_crop(images, device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, preallocate_width_hint=preallocate_width_hint, preallocate_height_hint=preallocate_height_hint, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) images = fn.resize(images, device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) mirror = fn.random.coin_flip(probability=0.5) else: images = fn.decoders.image(images, device=decoder_device, output_type=types.RGB) images = fn.resize(images, device=dali_device, size=size, mode="not_smaller", interp_type=types.INTERP_TRIANGULAR) mirror = False images = fn.crop_mirror_normalize(images.gpu(), dtype=types.FLOAT, output_layout="CHW", crop=(crop, crop), mean=[0.485 * 255,0.456 * 255,0.406 * 255], std=[0.229 * 255,0.224 * 255,0.225 * 255], mirror=mirror) labels = labels.gpu() return images, labels
def es_pipeline_debug(): images = fn.external_source(name='input') labels = fn.external_source(name='labels') rng = fn.random.coin_flip(probability=0.5, seed=47) images = fn.random_resized_crop(images, size=(224, 224), seed=27) out_type = types.FLOAT16 output = fn.crop_mirror_normalize( images.gpu(), mirror=rng, device="gpu", dtype=out_type, crop=(224, 224), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) return rng, images, output, labels
def get_image_pipeline(batch_size, num_threads, device, device_id=0, shard_id=0, num_shards=1, def_for_dataset=False): test_data_root = get_dali_extra_path() file_root = os.path.join(test_data_root, 'db', 'coco_dummy', 'images') annotations_file = os.path.join( test_data_root, 'db', 'coco_dummy', 'instances.json') pipe = Pipeline(batch_size, num_threads, device_id) with pipe: jpegs, _, _, image_ids = fn.readers.coco( file_root=file_root, annotations_file=annotations_file, shard_id=shard_id, num_shards=num_shards, ratio=False, image_ids=True) images = fn.decoders.image( jpegs, device=('mixed' if device == 'gpu' else 'cpu'), output_type=types.RGB) images = fn.resize( images, resize_x=224, resize_y=224, interp_type=types.INTERP_LINEAR) images = fn.crop_mirror_normalize( images, dtype=types.FLOAT, mean=[128., 128., 128.], std=[1., 1., 1.]) if device == 'gpu': image_ids = image_ids.gpu() ids_reshaped = fn.reshape(image_ids, shape=[1, 1]) ids_int16 = fn.cast(image_ids, dtype=types.INT16) pipe.set_outputs(images, ids_reshaped, ids_int16) shapes = ( (batch_size, 3, 224, 224), (batch_size, 1, 1), (batch_size, 1)) dtypes = ( tf.float32, tf.int32, tf.int16) return pipe, shapes, dtypes
def injection_pipeline(callback, device='cpu'): rng = fn.random.coin_flip(probability=0.5, seed=47) images = fn.random_resized_crop(callback(), device=device, size=(224, 224), seed=27) out_type = types.FLOAT16 output = fn.crop_mirror_normalize( images.gpu(), mirror=rng, device="gpu", dtype=out_type, crop=(224, 224), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) return rng, images, output
def es_pipeline_standard(): jpegs, labels = fn.readers.file(file_root=file_root, shard_id=0, num_shards=2) images = fn.decoders.image(jpegs, output_type=types.RGB) rng = fn.random.coin_flip(probability=0.5, seed=47) images = fn.random_resized_crop(images, size=(224, 224), seed=27) out_type = types.FLOAT16 output = fn.crop_mirror_normalize( images.gpu(), mirror=rng, device="gpu", dtype=out_type, crop=(224, 224), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) return rng, images, output, labels
def pipe(): image_like = fn.random.uniform(device=device, range=(0, 255), shape=(80, 120, 3)) image_like = fn.reshape(image_like, layout="HWC") mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] std = [0.229 * 255, 0.224 * 255, 0.225 * 255] if rand_mean: mean = fn.random.uniform(range=(100, 125), shape=(3, )) if rand_stdev: std = fn.random.uniform(range=(55, 60), shape=(3, )) out = fn.crop_mirror_normalize(image_like, dtype=types.FLOAT, output_layout="HWC", mean=mean, std=std, scale=scale, shift=shift, pad_output=False) return out, image_like, mean, std
def rn50_pipeline(): rng = fn.random.coin_flip(probability=0.5, seed=47) print(f'rng: {rng.get().as_array()}') tmp = rng ^ 1 print(f'rng xor: {tmp.get().as_array()}') jpegs, labels = fn.readers.file(file_root=file_root, shard_id=0, num_shards=2) if jpegs.get().is_dense_tensor(): print(f'jpegs: {jpegs.get().as_array()}') else: print('jpegs shapes:') for j in jpegs.get(): print(j.shape()) print(f'labels: {labels.get().as_array()}') images = fn.decoders.image(jpegs, device='mixed', output_type=types.RGB) for i in images.get().as_cpu(): print(i) for i in images.get(): print(i.shape()) images = fn.random_resized_crop(images, device="gpu", size=(224, 224), seed=27) for i in images.get(): print(i.shape()) print(np.array(images.get().as_cpu()[0])) images += 1 print(np.array(images.get().as_cpu()[0])) out_type = types.FLOAT16 output = fn.crop_mirror_normalize( images.gpu(), mirror=rng, device="gpu", dtype=out_type, crop=(224, 224), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) return (output, labels.gpu())
def get_pipeline(batch_size, num_threads, device, device_id=0, shard_id=0, num_shards=1): test_data_root = os.environ['DALI_EXTRA_PATH'] file_root = os.path.join(test_data_root, 'db', 'coco_dummy', 'images') annotations_file = os.path.join(test_data_root, 'db', 'coco_dummy', 'instances.json') pipe = Pipeline(batch_size, num_threads, device_id) with pipe: jpegs, _, _, image_ids = fn.coco_reader( file_root=file_root, annotations_file=annotations_file, shard_id=shard_id, num_shards=num_shards, ratio=False, image_ids=True) images = fn.image_decoder( jpegs, device=('mixed' if device == 'gpu' else 'cpu'), output_type=types.RGB) images = fn.resize(images, resize_x=224, resize_y=224, interp_type=types.INTERP_LINEAR) images = fn.crop_mirror_normalize(images, dtype=types.FLOAT, mean=[128., 128., 128.], std=[1., 1., 1.]) if device == 'gpu': image_ids = image_ids.gpu() ids_reshaped = fn.reshape(image_ids, shape=[1, 1]) ids_int16 = fn.cast(image_ids, dtype=types.INT16) pipe.set_outputs(images, ids_reshaped, ids_int16) return pipe
def RN50Pipeline(): device = 'mixed' if args.device == 'gpu' else 'cpu' jpegs, _ = fn.readers.file(file_root=args.images_dir) images = fn.decoders.image_random_crop( jpegs, device=device, output_type=types.RGB, hw_decoder_load=args.hw_load, preallocate_width_hint=args.width_hint, preallocate_height_hint=args.height_hint) images = fn.resize(images, resize_x=224, resize_y=224) layout = types.NCHW out_type = types.FLOAT16 coin_flip = fn.random.coin_flip(probability=0.5) images = fn.crop_mirror_normalize( images, dtype=out_type, output_layout=layout, crop=(224, 224), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=coin_flip) return images
def create_image_pipeline( batch_size, num_threads, device_id, image0_list, image1_list, flow_list, valBool, ): pipeline = Pipeline(batch_size, num_threads, device_id, seed=2) with pipeline: if valBool: shuffleBool = False else: shuffleBool = True """ READ FILES """ image0, _ = fn.readers.file( file_root=args.data, files=image0_list, random_shuffle=shuffleBool, name="Reader", seed=1, ) image1, _ = fn.readers.file( file_root=args.data, files=image1_list, random_shuffle=shuffleBool, seed=1, ) flo = fn.readers.numpy( file_root=args.data, files=flow_list, random_shuffle=shuffleBool, seed=1, ) """ DECODE AND RESHAPE """ image0 = fn.decoders.image(image0, device="cpu") image0 = fn.reshape(image0, layout="HWC") image1 = fn.decoders.image(image1, device="cpu") image1 = fn.reshape(image1, layout="HWC") images = fn.cat(image0, image1, axis=2) flo = fn.reshape(flo, layout="HWC") if valBool: images = fn.resize(images, resize_x=162, resize_y=122) else: """ CO-TRANSFORM """ # random translate # angle_rng = fn.random.uniform(range=(-90, 90)) # images = fn.rotate(images, angle=angle_rng, fill_value=0) # flo = fn.rotate(flo, angle=angle_rng, fill_value=0) images = fn.random_resized_crop( images, size=[122, 162], # 122, 162 random_aspect_ratio=[1.3, 1.4], random_area=[0.8, 0.9], seed=1, ) flo = fn.random_resized_crop( flo, size=[122, 162], random_aspect_ratio=[1.3, 1.4], random_area=[0.8, 0.9], seed=1, ) # coin1 = fn.random.coin_flip(dtype=types.DALIDataType.BOOL, seed=10) # coin1_n = coin1 ^ True # coin2 = fn.random.coin_flip(dtype=types.DALIDataType.BOOL, seed=20) # coin2_n = coin2 ^ True # images = ( # fn.flip(images, horizontal=1, vertical=1) * coin1 * coin2 # + fn.flip(images, horizontal=1) * coin1 * coin2_n # + fn.flip(images, vertical=1) * coin1_n * coin2 # + images * coin1_n * coin2_n # ) # flo = ( # fn.flip(flo, horizontal=1, vertical=1) * coin1 * coin2 # + fn.flip(flo, horizontal=1) * coin1 * coin2_n # + fn.flip(flo, vertical=1) * coin1_n * coin2 # + flo * coin1_n * coin2_n # ) # _flo = flo # flo_0 = fn.slice(_flo, axis_names="C", start=0, shape=1) # flo_1 = fn.slice(_flo, axis_names="C", start=1, shape=1) # flo_0 = flo_0 * coin1 * -1 + flo_0 * coin1_n # flo_1 = flo_1 * coin2 * -1 + flo_1 * coin2_n # # flo = noflip + vertical flip + horizontal flip + both_flip # # A horizontal flip is around the vertical axis (switch left and right) # # So for a vertical flip coin1 is activated and needs to give +1, coin2 is activated needs to give -1 # # for a horizontal flip coin1 is activated and needs to be -1, coin2_n needs +1 # # no flip coin coin1_n +1, coin2_n +1 # flo = fn.cat(flo_0, flo_1, axis_name="C") """ NORMALIZE """ images = fn.crop_mirror_normalize( images, mean=[0, 0, 0, 0, 0, 0], std=[255, 255, 255, 255, 255, 255]) images = fn.crop_mirror_normalize( images, mean=[0.45, 0.432, 0.411, 0.45, 0.432, 0.411], std=[1, 1, 1, 1, 1, 1], ) flo = fn.crop_mirror_normalize( flo, mean=[0, 0], std=[args.div_flow, args.div_flow]) pipeline.set_outputs(images, flo) return pipeline
import nvidia.dali.types as types import matplotlib.pylab as plt import nvidia.dali.fn as fn import nvidia.dali.types as types pipe = Pipeline(batch_size = 64, num_threads = 1, device_id = 0) raw_files, labels = fn.file_reader(file_root = "data/resized", random_shuffle = True) decode = fn.image_decoder(raw_files, device = "mixed", output_type = types.GRAY) resize = fn.resize(decode, device = "gpu", image_type = types.GRAY, interp_type = types.INTERP_LINEAR, resize_x=WIDTH, resize_y=HEIGHT) cmn = fn.crop_mirror_normalize(resize, device="gpu",output_dtype=types.FLOAT, output_layout=types.NCHW, image_type=types.GRAY, mean=[ 255//2], std=[255//2]) pipe.set_outputs(cmn, labels) pipe.build() from nvidia.dali.plugin.pytorch import DALIClassificationIterator dali_iter = DALIClassificationIterator([pipe], -1) output = next(dali_iter)[0] output["data"].shape, output["label"].shape
def get_dali_pipeline(tfrec_filenames, tfrec_idx_filenames, height, width, shard_id, num_gpus, dali_cpu=True, training=True): inputs = fn.readers.tfrecord(path=tfrec_filenames, index_path=tfrec_idx_filenames, random_shuffle=training, shard_id=shard_id, num_shards=num_gpus, initial_fill=10000, features={ 'image/encoded': tfrec.FixedLenFeature((), tfrec.string, ""), 'image/class/label': tfrec.FixedLenFeature([1], tfrec.int64, -1), 'image/class/text': tfrec.FixedLenFeature([], tfrec.string, ''), 'image/object/bbox/xmin': tfrec.VarLenFeature(tfrec.float32, 0.0), 'image/object/bbox/ymin': tfrec.VarLenFeature(tfrec.float32, 0.0), 'image/object/bbox/xmax': tfrec.VarLenFeature(tfrec.float32, 0.0), 'image/object/bbox/ymax': tfrec.VarLenFeature(tfrec.float32, 0.0) }) decode_device = "cpu" if dali_cpu else "mixed" resize_device = "cpu" if dali_cpu else "gpu" if training: images = fn.decoders.image_random_crop( inputs["image/encoded"], device=decode_device, output_type=types.RGB, random_aspect_ratio=[0.75, 1.25], random_area=[0.05, 1.0], num_attempts=100) images = fn.resize(images, device=resize_device, resize_x=width, resize_y=height) else: images = fn.decoders.image(inputs["image/encoded"], device=decode_device, output_type=types.RGB) # Make sure that every image > 224 for CropMirrorNormalize images = fn.resize(images, device=resize_device, resize_shorter=256) images = fn.crop_mirror_normalize(images.gpu(), dtype=types.FLOAT, crop=(height, width), mean=[123.68, 116.78, 103.94], std=[58.4, 57.12, 57.3], output_layout="HWC", mirror=fn.random.coin_flip()) labels = inputs["image/class/label"].gpu() labels -= 1 # Change to 0-based (don't use background class) return images, labels
def dali_dataloader( tfrec_filenames, tfrec_idx_filenames, shard_id=0, num_shards=1, batch_size=128, num_threads=os.cpu_count(), image_size=224, num_workers=1, training=True): pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=0) with pipe: inputs = fn.readers.tfrecord( path=tfrec_filenames, index_path=tfrec_idx_filenames, random_shuffle=training, shard_id=shard_id, num_shards=num_shards, initial_fill=10000, read_ahead=True, pad_last_batch=True, prefetch_queue_depth=num_workers, name='Reader', features={ 'image/encoded': tfrec.FixedLenFeature((), tfrec.string, ""), 'image/class/label': tfrec.FixedLenFeature([1], tfrec.int64, -1), }) jpegs = inputs["image/encoded"] if training: images = fn.decoders.image_random_crop( jpegs, device="mixed", output_type=types.RGB, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) images = fn.resize(images, device='gpu', resize_x=image_size, resize_y=image_size, interp_type=types.INTERP_TRIANGULAR) mirror = fn.random.coin_flip(probability=0.5) else: images = fn.decoders.image(jpegs, device='mixed', output_type=types.RGB) images = fn.resize(images, device='gpu', size=int(image_size / 0.875), mode="not_smaller", interp_type=types.INTERP_TRIANGULAR) mirror = False images = fn.crop_mirror_normalize( images.gpu(), dtype=types.FLOAT, crop=(image_size, image_size), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=mirror) label = inputs["image/class/label"] - 1 # 0-999 label = fn.element_extract(label, element_map=0) # Flatten label = label.gpu() pipe.set_outputs(images, label) pipe.build() last_batch_policy = LastBatchPolicy.DROP if training else LastBatchPolicy.PARTIAL loader = DALIClassificationIterator( pipe, reader_name="Reader", auto_reset=True, last_batch_policy=last_batch_policy) return loader
def train_pipeline(cfg: TrainLoaderConfig): jpeg, label = fn.readers.file( file_root=ROOT_DATA_DIR + "/train/", random_shuffle=True, shard_id=env_rank(), num_shards=env_world_size(), name="Reader", ) image = fn.decoders.image_random_crop( jpeg, device="mixed", random_aspect_ratio=[0.75, 1.25], random_area=[cfg.min_area, 1.0], num_attempts=100, output_type=types.RGB, ) image_tr = fn.resize(image, device="gpu", size=cfg.image_size, interp_type=types.INTERP_TRIANGULAR) if cfg.random_interpolation: image_cub = fn.resize(image, device="gpu", size=cfg.image_size, interp_type=types.INTERP_CUBIC) image = mix(fn.random.coin_flip(probability=0.5), image_cub, image_tr) else: image = image_tr if cfg.blur_prob > 0: blur_image = fn.gaussian_blur( image, device="gpu", window_size=11, sigma=fn.random.uniform(range=[0.5, 1.1])) image = mix( fn.random.coin_flip(probability=cfg.blur_prob, dtype=types.BOOL), blur_image, image) if cfg.color_twist_prob > 0: image_ct = fn.color_twist( image, device="gpu", contrast=fn.random.uniform(range=[0.7, 1.3]), brightness=fn.random.uniform(range=[0.7, 1.3]), hue=fn.random.uniform(range=[-20, 20]), # in degrees saturation=fn.random.uniform(range=[0.7, 1.3]), ) image = mix( fn.random.coin_flip(probability=cfg.color_twist_prob, dtype=types.BOOL), image_ct, image) if cfg.gray_prob > 0: grayscale_coin = fn.cast( fn.random.coin_flip(probability=cfg.gray_prob), dtype=types.FLOAT) image = fn.hsv(image, device="gpu", saturation=grayscale_coin) if cfg.re_prob: # random erasing image_re = fn.erase( image, device="gpu", anchor=fn.random.uniform(range=(0.0, 1), shape=cfg.re_count * 2), shape=fn.random.uniform(range=(0.05, 0.25), shape=cfg.re_count * 2), axis_names="HW", fill_value=DATA_MEAN, normalized_anchor=True, normalized_shape=True, ) image = mix( fn.random.coin_flip(probability=cfg.re_prob, dtype=types.BOOL), image_re, image) image = fn.crop_mirror_normalize( image, device="gpu", crop=(cfg.image_size, cfg.image_size), mirror=fn.random.coin_flip(probability=0.5), mean=DATA_MEAN, std=DATA_STD, dtype=types.FLOAT, output_layout=types.NCHW, ) label = fn.one_hot(label, num_classes=cfg.num_classes).gpu() return image, label
def create_coco_pipeline(default_boxes, args): try: shard_id = torch.distributed.get_rank() num_shards = torch.distributed.get_world_size() except RuntimeError: shard_id = 0 num_shards = 1 images, bboxes, labels = fn.readers.coco( file_root=args.train_coco_root, annotations_file=args.train_annotate, skip_empty=True, shard_id=shard_id, num_shards=num_shards, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True, name="Reader") crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop( bboxes, labels, device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout="xyXY", allow_no_crop=True, num_attempts=50) images = fn.image_decoder_slice(images, crop_begin, crop_size, device="mixed", output_type=types.RGB) flip_coin = fn.random.coin_flip(probability=0.5) images = fn.resize(images, resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) saturation = fn.uniform(range=[0.5, 1.5]) contrast = fn.uniform(range=[0.5, 1.5]) brightness = fn.uniform(range=[0.875, 1.125]) hue = fn.uniform(range=[-0.5, 0.5]) images = fn.hsv(images, dtype=types.FLOAT, hue=hue, saturation=saturation) # use float to avoid clipping and # quantizing the intermediate result images = fn.brightness_contrast( images, contrast_center=128, # input is in float, but in 0..255 range dtype=types.UINT8, brightness=brightness, contrast=contrast) dtype = types.FLOAT16 if args.fp16 else types.FLOAT bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin) images = fn.crop_mirror_normalize( images, crop=(300, 300), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=flip_coin, dtype=dtype, output_layout="CHW", pad_output=False) bboxes, labels = fn.box_encoder(bboxes, labels, criteria=0.5, anchors=default_boxes.as_ltrb_list()) labels = labels.gpu() bboxes = bboxes.gpu() return images, bboxes, labels
def load_tfrecord(directory, batch_size, training): tfrecord = [] tfrecord_idx = [] for f in os.listdir(directory): fullpath = os.path.join(directory, f) if not os.path.isfile(fullpath): continue if f.endswith(".tfrecord"): tfrecord.append(fullpath) if f.endswith(".idx"): tfrecord_idx.append(fullpath) tfrecord.sort() tfrecord_idx.sort() pipe = Pipeline(batch_size=batch_size, num_threads=32, device_id=0) with pipe: inputs = fn.tfrecord_reader( path=tfrecord, index_path=tfrecord_idx, features={ "frame_one": tfrec.FixedLenFeature((), tfrec.string, ""), "frame_two": tfrec.FixedLenFeature((), tfrec.string, ""), "frame_three": tfrec.FixedLenFeature((), tfrec.string, ""), "frame_four": tfrec.FixedLenFeature((), tfrec.string, ""), "plus_one_position": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_one_orientation": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_two_position": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_two_orientation": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_three_position": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_three_orientation": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "speed": tfrec.FixedLenFeature([], tfrec.float32, 0.0), }) frame1 = inputs["frame_one"] frame1 = fn.image_decoder(frame1, device="mixed", output_type=types.RGB) # frame1 = fn.resize(frame1, device="gpu", resize_shorter=256.) frame1 = fn.crop_mirror_normalize(frame1, device="gpu", dtype=types.FLOAT, mean=[0., 0., 0.], std=[1., 1., 1.]) frame1 = fn.transpose(frame1, device="gpu", perm=[1, 2, 0]) frame2 = inputs["frame_two"] frame2 = fn.image_decoder(frame2, device="mixed", output_type=types.RGB) # frame2 = fn.resize(frame2, device="gpu", resize_shorter=256.) frame2 = fn.crop_mirror_normalize(frame2, device="gpu", dtype=types.FLOAT, mean=[0., 0., 0.], std=[1., 1., 1.]) frame2 = fn.transpose(frame2, device="gpu", perm=[1, 2, 0]) position = inputs["plus_one_position"].gpu() orientation = inputs["plus_one_orientation"].gpu() speed = inputs["speed"].gpu() image = fn.cat(frame1, frame2, device="gpu", axis=2) pose = fn.cat(position, orientation, device="gpu", axis=0) pipe.set_outputs(image, pose, speed) # Define shapes and types of the outputs shapes = ((batch_size, 480, 640), (batch_size, 6), (batch_size)) dtypes = (tf.float32, tf.float32) # Create dataset return dali_tf.DALIDataset(pipeline=pipe, batch_size=batch_size, output_shapes=shapes, output_dtypes=dtypes, device_id=0)