def define_graph(self): inputs, bboxes, labels, polygons, vertices = fn.readers.coco( file_root=self.file_root, annotations_file=self.annotation_file, skip_empty=True, shard_id=self.share_id, num_shards=self.num_gpus, ratio=True, ltrb=True, polygon_masks = True, random_shuffle=self.random_shuffle, shuffle_after_epoch=self.shuffle_after_epoch, name="Reader") input_shape = fn.slice(fn.cast(fn.peek_image_shape(inputs), dtype=types.INT32), 0, 2, axes=[0]) h = fn.slice(input_shape, 0, 1, axes = [0], dtype=types.FLOAT) w = fn.slice(input_shape, 1, 1, axes = [0], dtype=types.FLOAT) short_side = math.min(w, h) scale = fn.random.uniform(range=[0.3, 1.]) crop_side = fn.cast(math.ceil(scale * short_side), dtype=types.INT32) crop_shape = fn.cat(crop_side, crop_side) anchor_rel, shape_rel, bboxes, labels, bbox_indices = fn.random_bbox_crop( bboxes, labels, input_shape=input_shape, crop_shape=crop_shape, shape_layout="HW", thresholds=[0.], # No minimum intersection-over-union, for demo purposes allow_no_crop=False, # No-crop is disallowed, for demo purposes seed=-1, # Fixed random seed for deterministic results bbox_layout="xyXY", # left, top, right, back output_bbox_indices=True, # Output indices of the filtered bounding boxes total_num_attempts=1024, ) polygons, vertices = fn.segmentation.select_masks( bbox_indices, polygons, vertices ) images = fn.decoders.image_slice( inputs, anchor_rel, shape_rel, normalized_anchor=False, normalized_shape=False, device='mixed' ) images = fn.color_space_conversion(images, image_type=types.RGB, output_type=types.BGR) MT_1_vertices = fn.transforms.crop( to_start=(0.0, 0.0), to_end=fn.cat(w, h) ) MT_2_vertices = fn.transforms.crop( from_start=anchor_rel, from_end=(anchor_rel + shape_rel), to_start=(0.0, 0.0), to_end=(1., 1.) ) vertices = fn.coord_transform(fn.coord_transform(vertices, MT=MT_1_vertices), MT=MT_2_vertices) targets = fn.cat( bboxes, fn.reshape(vertices, shape=[-1, 10]), axis=1) interp_methods = [types.INTERP_LINEAR, types.INTERP_CUBIC, types.INTERP_LANCZOS3, types.INTERP_GAUSSIAN, types.INTERP_NN, types.INTERP_TRIANGULAR] interp_method = fn.random.uniform(values=[int(x) for x in interp_methods], dtype=types.INT32) interp_method = fn.reinterpret(interp_method, dtype=types.INTERP_TYPE) images = fn.resize(images, dtype=types.FLOAT, size=self.input_dim, interp_type=interp_method) labels = labels.gpu() targets = targets.gpu() return (images, targets, labels)
def check_random_mask_pixel(ndim=2, batch_size=3, min_extent=20, max_extent=50): pipe = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=4, device_id=0, seed=1234) with pipe: # Input mask in_shape_dims = [fn.cast(fn.random.uniform(range=(min_extent, max_extent + 1)), dtype=types.INT32) for _ in range(ndim)] in_shape = fn.stack(*in_shape_dims) in_mask = fn.cast(fn.random.uniform(range=(0, 2), shape=in_shape), dtype=types.INT32) # > 0 fg_pixel1 = fn.segmentation.random_mask_pixel(in_mask, foreground=1) # >= 0.99 fg_pixel2 = fn.segmentation.random_mask_pixel(in_mask, foreground=1, threshold=0.99) # == 2 fg_pixel3 = fn.segmentation.random_mask_pixel(in_mask, foreground=1, value=2) rnd_pixel = fn.segmentation.random_mask_pixel(in_mask, foreground=0) coin_flip = fn.random.coin_flip(probability=0.7) fg_biased = fn.segmentation.random_mask_pixel(in_mask, foreground=coin_flip) # Demo purposes: Taking a random pixel and produce a valid anchor to feed slice # We want to force the center adjustment, thus the large crop shape crop_shape = in_shape - 2 anchor = fn.cast(fg_pixel1, dtype=types.INT32) - crop_shape // 2 anchor = math.min(math.max(0, anchor), in_shape - crop_shape) out_mask = fn.slice(in_mask, anchor, crop_shape, axes=tuple(range(ndim))) pipe.set_outputs(in_mask, fg_pixel1, fg_pixel2, fg_pixel3, rnd_pixel, coin_flip, fg_biased, anchor, crop_shape, out_mask) pipe.build() for iter in range(3): outputs = pipe.run() for idx in range(batch_size): in_mask = outputs[0].at(idx) fg_pixel1 = outputs[1].at(idx).tolist() fg_pixel2 = outputs[2].at(idx).tolist() fg_pixel3 = outputs[3].at(idx).tolist() rnd_pixel = outputs[4].at(idx).tolist() coin_flip = outputs[5].at(idx).tolist() fg_biased = outputs[6].at(idx).tolist() anchor = outputs[7].at(idx).tolist() crop_shape = outputs[8].at(idx).tolist() out_mask = outputs[9].at(idx) assert in_mask[tuple(fg_pixel1)] > 0 assert in_mask[tuple(fg_pixel2)] > 0.99 assert in_mask[tuple(fg_pixel3)] == 2 assert in_mask[tuple(fg_biased)] > 0 or not coin_flip for d in range(ndim): assert 0 <= anchor[d] and anchor[d] + crop_shape[d] <= in_mask.shape[d] assert out_mask.shape == tuple(crop_shape)
def check_pad_per_sample_shapes_and_alignment(device='cpu', batch_size=3, ndim=2, num_iter=3): pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=0, seed=1234) axes = (0, 1) with pipe: in_shape = fn.cast(fn.random.uniform(range=(10, 20), shape=(ndim, )), dtype=types.INT32) in_data = fn.random.uniform(range=(0., 1.), shape=in_shape) if device == 'gpu': in_data = in_data.gpu() req_shape = fn.cast(fn.random.uniform(range=(21, 30), shape=(ndim, )), dtype=types.INT32) req_align = fn.cast(fn.random.uniform(range=(3, 5), shape=(ndim, )), dtype=types.INT32) out_pad_shape = fn.pad(in_data, axes=axes, align=None, shape=req_shape) out_pad_align = fn.pad(in_data, axes=axes, align=req_align, shape=None) out_pad_both = fn.pad(in_data, axes=axes, align=req_align, shape=req_shape) pipe.set_outputs(in_shape, in_data, req_shape, req_align, out_pad_shape, out_pad_align, out_pad_both) pipe.build() for _ in range(num_iter): outs = [ out.as_cpu() if isinstance(out, TensorListGPU) else out for out in pipe.run() ] for i in range(batch_size): in_shape, in_data, req_shape, req_align, out_pad_shape, out_pad_align, out_pad_both = \ [outs[out_idx].at(i) for out_idx in range(len(outs))] assert (in_shape == in_data.shape).all() # Pad to explicit shape assert (out_pad_shape.shape >= in_shape).all() assert (req_shape == out_pad_shape.shape).all() # Alignment only assert (out_pad_align.shape >= in_shape).all() assert is_aligned(out_pad_align.shape, req_align, axes) # Explicit shape + alignment assert (out_pad_both.shape >= in_shape).all() assert (req_shape <= out_pad_both.shape).all() assert is_aligned(out_pad_both.shape, req_align, axes)
def setup_dali( image_file='/mnt/data/DATASETS/samples/images/image_110.jpg', image_dim=[800, 1600], batch_size=1, num_threads=4, device='mixed', device_id=0, output_dir='./out/', ): os.makedirs(os.path.dirname(output_dir), exist_ok=True) pipeline = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) with pipeline: data, _ = fn.file_reader(files=[image_file]) # image preprocess images = fn.image_decoder(data, device=device) images = fn.resize(images, size=image_dim, mode="not_larger", max_size=image_dim) images = fn.pad(images, fill_value=0, shape=[image_dim[0], image_dim[1], 1]) images = fn.transpose(images, perm=[2, 0, 1]) images = fn.cast(images, dtype=dali.types.FLOAT) images = images / 255. # input shape input_shape = np.float32((image_dim[0], image_dim[1], 1)) # original shape shapes = fn.peek_image_shape(data) shapes = fn.cast(shapes, dtype=dali.types.FLOAT) # gather outputs out = [images, input_shape, shapes] pipeline.set_outputs(*out) pipeline.build() output = pipeline.run() img = output[0].at(0) if device == 'cpu' else output[0].as_cpu().at(0) img = img.transpose(1, 2, 0) # HWC img = img[:, :, ::-1] # BGR print(img) quit() cv2.imwrite(os.path.join(output_dir, 'dali_image.jpg'), img)
def many_input_pipeline(def_for_dataset, device, sources, input_names, batches): """ Pipeline accepting multiple inputs via external source Parameters ---------- def_for_dataset : bool True if this pipeline will be converted to TF Dataset device : str device that the Dataset will be placed ("cpu" or "gpu") sources : list of callables callbacks for the external sources in baseline pipeline otherwise None input_names : list of str Names of inputs placeholder for TF """ inputs = [] if def_for_dataset: for input_name, batch in zip(input_names, batches): if batch == "dataset": # Special value used in tests, reroute it to the default batch = None input = fn.external_source(name=input_name, batch=batch) input = input if device == 'cpu' else input.gpu() inputs.append(input) else: for source in sources: input = fn.external_source(source=source, batch=False) input = input if device == 'cpu' else input.gpu() inputs.append(input) processed = [] for input in inputs: processed.append(fn.cast(input + 10, dtype=dali.types.INT32)) results = fn.pad(inputs + processed) return tuple(results)
def check_per_sample_gaussian_blur(batch_size, sigma_dim, window_size_dim, shape, layout, axes, op_type="cpu"): pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) data = RandomlyShapedDataIterator(batch_size, max_shape=shape) with pipe: if sigma_dim is not None: sigma = fn.random.uniform(range=[0.5, 3], shape=[sigma_dim]) sigma_arg = sigma else: # placeholder, so we can return something sigma = fn.random.coin_flip(probability=0) sigma_arg = None if window_size_dim is not None: window_radius = fn.random.uniform(range=[5, 10], shape=[window_size_dim]) window_size = fn.cast(window_radius, dtype=types.INT32) * 2 + 1 window_arg = window_size else: window_size = fn.random.coin_flip(probability=0) window_arg = None input = fn.external_source(data, layout=layout) if op_type == "gpu": input = input.gpu() blurred = fn.gaussian_blur(input, device=op_type, sigma=sigma_arg, window_size=window_arg) pipe.set_outputs(blurred, input, sigma, window_size) pipe.build() for _ in range(test_iters): result, input, sigma, window_size = pipe.run() if op_type == "gpu": result = result.as_cpu() input = input.as_cpu() input = to_batch(input, batch_size) sigma = to_batch(sigma, batch_size) window_size = to_batch(window_size, batch_size) baseline = [] for i in range(batch_size): sigma_arg = sigma[i] if sigma is not None else None window_arg = window_size[i] if window_size_dim is not None else None skip_axes = count_skip_axes(layout) baseline.append( gaussian_baseline(input[i], sigma_arg, window_arg, axes, skip_axes)) check_batch(result, baseline, batch_size, max_allowed_error=1, expected_layout=layout)
def define_graph(self): inputs, labels = self.input(name="Reader") images = self.decode(inputs) if self.prime_size: images = fn.resize(images, resize_x=101, resize_y=43) images = fn.cast(images, dtype=self.dtype) images = self.water(images) return images
def make_pipe(): encoded, _ = fn.readers.caffe(path=caffe_db_folder, random_shuffle=False) image = fn.decoders.image(encoded, device="cpu", output_type=types.RGB) if device == 'gpu': image = image.gpu() image = fn.cast(image, dtype=dtype) sliced1 = fn.slice(image, 0, 3, axes=(2,)) sliced2 = fn.slice(image, rel_start=(0, 0, 0), rel_end=(1, 1, 1), axis_names="HWC") return image, sliced1, sliced2
def get_random_pipeline(batch_size): pipe = Pipeline(batch_size, 4, None) with pipe: input, _ = fn.readers.file(file_root=img_dir) decoded = fn.image_decoder(input, device='cpu', output_type=types.RGB) tile = fn.cast(fn.uniform(range=(50, 200), shape=[1]), dtype=types.INT32) ratio = fn.uniform(range=(0.3, 0.7), shape=[1]) angle = fn.uniform(range=(-math.pi, math.pi), shape=[1]) grided = fn.grid_mask(decoded, device='cpu', tile=tile, ratio=ratio, angle=angle) pipe.set_outputs(grided, decoded, tile, ratio, angle) return pipe
def define_graph(self): inputs, labels = self.input(name="Reader") images = self.decode(inputs) if self.device == 'gpu': images = images.gpu() if self.prime_size: images = fn.resize(images, resize_x=101, resize_y=43) mask = fn.random.coin_flip(seed=42) if self.do_mask else None images = fn.cast(images, dtype=self.dtype) images = self.water(images, mask=mask) return images
def create_ref_pipe(channel_first, seq_len, interp, dtype, w, h, batch_size=2): pipe = dali.pipeline.Pipeline(batch_size, 1, 0, 0, exec_async=False, exec_pipelined=False) with pipe: layout = "FCHW" if channel_first else "FHWC" ext = fn.external_source(GetSequences(channel_first, seq_len, batch_size), layout=layout) pil_resized = fn.python_function(ext, function=resize_PIL(channel_first, interp, w, h), batch_processing=False) if dtype is not None: # unfortunately, PIL can't quite handle that pil_resized = fn.cast(pil_resized, dtype=dtype) pil_resized = fn.reshape(pil_resized, layout=layout) pipe.set_outputs(pil_resized) return pipe
def setup_dali( input_name='DALI_INPUT_0', image_dim=[896, 1536], batch_size=1, num_threads=4, device='cpu', device_id=0, output_dir='./out/', ): pipeline = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) with pipeline: data = fn.external_source(name=input_name, device="cpu") # image preprocess images = fn.image_decoder(data, device=device) images = fn.resize(images, size=image_dim, mode="not_larger", max_size=image_dim) images = fn.pad(images, fill_value=0, shape=[image_dim[0], image_dim[1], 1]) images = fn.transpose(images, perm=[2, 0, 1]) images = fn.cast(images, dtype=dali.types.FLOAT) images = images / 255. # input shape input_shape = np.float32((image_dim[0], image_dim[1], 1)) # original shape shapes = fn.peek_image_shape(data) shapes = fn.cast(shapes, dtype=dali.types.FLOAT) # gather outputs out = [images, input_shape, shapes] pipeline.set_outputs(*out) os.makedirs(os.path.dirname(output_dir), exist_ok=True) pipeline.serialize(filename=os.path.join(output_dir, 'model.dali'))
def create_dali_pipeline(batch_size, num_threads, device_id, data_dir, crop, size, shard_id, num_shards, dali_cpu=False, is_training=True): pipeline = Pipeline(batch_size, num_threads, device_id, seed=12 + device_id) with pipeline: images, labels = fn.readers.file(file_root=data_dir, shard_id=shard_id, num_shards=num_shards, random_shuffle=is_training, pad_last_batch=True, name="Reader") dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 if is_training: images = fn.image_decoder_random_crop(images, device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) images = fn.resize(images, device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) mirror = fn.random.coin_flip(probability=0.5) else: images = fn.image_decoder(images, device=decoder_device, output_type=types.RGB) images = fn.resize(images, device=dali_device, size=size, mode="not_smaller", interp_type=types.INTERP_TRIANGULAR) mirror = False images = fn.crop_mirror_normalize(images.gpu(), dtype=types.FLOAT, output_layout="CHW", crop=(crop, crop), mean=[0.485 * 255,0.456 * 255,0.406 * 255], std=[0.229 * 255,0.224 * 255,0.225 * 255], mirror=mirror) labels = labels.gpu() labels = fn.cast(labels, dtype=types.INT64) pipeline.set_outputs(images, labels) return pipeline
def create_dali_pipe(channel_first, seq_len, interp, dtype, w, h, batch_size=2): pipe = dali.pipeline.Pipeline(batch_size, 1, 0, 0) with pipe: layout = "FCHW" if channel_first else "FHWC" ext = fn.external_source(GetSequences(channel_first, seq_len, batch_size), layout=layout) resize_cpu_out = fn.resize(ext, resize_x=w, resize_y=h, interp_type=interp, dtype=dtype, save_attrs=True) resize_gpu_out = fn.resize(ext.gpu(), resize_x=w, resize_y=h, interp_type=interp, minibatch_size=4, dtype=dtype, save_attrs=True) dali_resized_cpu, size_cpu = resize_cpu_out dali_resized_gpu, size_gpu = resize_gpu_out # extract just HW part from the input shape ext_size = fn.slice(fn.cast(fn.shapes(ext), dtype=types.INT32), 2 if channel_first else 1, 2, axes=[0]) pipe.set_outputs(dali_resized_cpu, dali_resized_gpu, ext_size, size_cpu, size_gpu) return pipe
def get_random_pipeline(device, batch_size): pipe = Pipeline(batch_size, 4, 0) with pipe: input, _ = fn.readers.file(file_root=img_dir) decoded = fn.decoders.image(input, device='cpu', output_type=types.RGB) decoded = decoded.gpu() if device == 'gpu' else decoded tile = fn.cast(fn.uniform(range=(50, 200)), dtype=types.INT32) ratio = fn.uniform(range=(0.3, 0.7)) angle = fn.uniform(range=(-math.pi, math.pi)) grided = fn.grid_mask(decoded, device=device, tile=tile, ratio=ratio, angle=angle) pipe.set_outputs(grided, decoded, tile, ratio, angle) return pipe
def get_image_pipeline(batch_size, num_threads, device, device_id=0, shard_id=0, num_shards=1, def_for_dataset=False): test_data_root = get_dali_extra_path() file_root = os.path.join(test_data_root, 'db', 'coco_dummy', 'images') annotations_file = os.path.join( test_data_root, 'db', 'coco_dummy', 'instances.json') pipe = Pipeline(batch_size, num_threads, device_id) with pipe: jpegs, _, _, image_ids = fn.readers.coco( file_root=file_root, annotations_file=annotations_file, shard_id=shard_id, num_shards=num_shards, ratio=False, image_ids=True) images = fn.decoders.image( jpegs, device=('mixed' if device == 'gpu' else 'cpu'), output_type=types.RGB) images = fn.resize( images, resize_x=224, resize_y=224, interp_type=types.INTERP_LINEAR) images = fn.crop_mirror_normalize( images, dtype=types.FLOAT, mean=[128., 128., 128.], std=[1., 1., 1.]) if device == 'gpu': image_ids = image_ids.gpu() ids_reshaped = fn.reshape(image_ids, shape=[1, 1]) ids_int16 = fn.cast(image_ids, dtype=types.INT16) pipe.set_outputs(images, ids_reshaped, ids_int16) shapes = ( (batch_size, 3, 224, 224), (batch_size, 1, 1), (batch_size, 1)) dtypes = ( tf.float32, tf.int32, tf.int16) return pipe, shapes, dtypes
def laplacian_pipe(window_size, in_type, out_type, normalize, grayscale): # use OpenCV convention - window size 1 implies deriv kernel of size 3 and no smoothing if window_size == 1: window_size, smoothing_size = 3, 1 else: smoothing_size = None imgs, _ = fn.readers.file(file_root=images_dir, shard_id=0, num_shards=1) output_type = types.GRAY if grayscale else types.RGB imgs = fn.decoders.image(imgs, device="cpu", output_type=output_type) if in_type != types.UINT8: imgs = fn.cast(imgs, dtype=in_type) edges = fn.laplacian(imgs, window_size=window_size, smoothing_size=smoothing_size, normalized_kernel=normalize, dtype=out_type) return edges, imgs
def ExternalSourcePipeline(params, num_threads, device_id, external_date, seed): pipe = Pipeline(params.batch_size, num_threads, device_id, seed=seed) with pipe: jpegs, labels = fn.external_source(source=external_date, num_outputs=2) images = fn.image_decoder(jpegs, device="mixed", output_type=types.RGB) images = fn.resize(images, resize_x=224, resize_y=224) images = fn.cast(images, dtype=types.UINT8) / 255 images = fn.normalize(images, axes=[0, 1], mean=params.mean, stddev=params.std, device='gpu', batch=False) output = fn.transpose(images, perm=[2, 0, 1], device='gpu') pipe.set_outputs(output, labels) return pipe
def pipe_gaussian_noise(mean, stddev, variable_dist_params, device=None): encoded, _ = fn.readers.file(file_root=images_dir) in_data = fn.cast(fn.decoders.image(encoded, device="cpu", output_type=types.RGB), dtype=types.FLOAT) if device == 'gpu': in_data = in_data.gpu() mean_arg = mean stddev_arg = stddev if variable_dist_params: mean_arg = fn.random.uniform(range=(-50.0, 50.0)) stddev_arg = fn.random.uniform(range=(1.0, 10.0)) seed = 12345 out_data1 = fn.noise.gaussian(in_data, mean=mean_arg, stddev=stddev_arg, seed=seed) out_data2 = in_data + fn.random.normal( in_data, mean=mean_arg, stddev=stddev_arg, seed=seed) return out_data1, out_data2
def create_dali_pipe(channel_first, seq_len, interp, dtype, w, h, batch_size=2): pipe = dali.pipeline.Pipeline(batch_size, 1, 0, 0) with pipe: layout = "FCHW" if channel_first else "FHWC" ext = fn.external_source(GetSequences(channel_first, seq_len, batch_size), layout=layout) resize_cpu_out = fn.resize(ext, resize_x=w, resize_y=h, interp_type=interp, dtype=dtype, save_attrs=True) resize_gpu_out = fn.resize(ext.gpu(), resize_x=w, resize_y=h, interp_type=interp, minibatch_size=4, dtype=dtype, save_attrs=True) dali_resized_cpu, size_cpu = resize_cpu_out dali_resized_gpu, size_gpu = resize_gpu_out # extract just HW part from the input shape shape_anchor = np.array([2 if channel_first else 1], dtype=np.float32) shape_shape = np.array([2], dtype=np.float32) ext_size = fn.slice(fn.cast(fn.shapes(ext), dtype=types.INT32), types.Constant(shape_anchor, device="cpu"), types.Constant(shape_shape, device="cpu"), normalized_anchor=False, normalized_shape=False, axes=[0]) pipe.set_outputs(dali_resized_cpu, dali_resized_gpu, ext_size, size_cpu, size_gpu) return pipe
def get_pipeline(batch_size, num_threads, device, device_id=0, shard_id=0, num_shards=1): test_data_root = os.environ['DALI_EXTRA_PATH'] file_root = os.path.join(test_data_root, 'db', 'coco_dummy', 'images') annotations_file = os.path.join(test_data_root, 'db', 'coco_dummy', 'instances.json') pipe = Pipeline(batch_size, num_threads, device_id) with pipe: jpegs, _, _, image_ids = fn.coco_reader( file_root=file_root, annotations_file=annotations_file, shard_id=shard_id, num_shards=num_shards, ratio=False, image_ids=True) images = fn.image_decoder( jpegs, device=('mixed' if device == 'gpu' else 'cpu'), output_type=types.RGB) images = fn.resize(images, resize_x=224, resize_y=224, interp_type=types.INTERP_LINEAR) images = fn.crop_mirror_normalize(images, dtype=types.FLOAT, mean=[128., 128., 128.], std=[1., 1., 1.]) if device == 'gpu': image_ids = image_ids.gpu() ids_reshaped = fn.reshape(image_ids, shape=[1, 1]) ids_int16 = fn.cast(image_ids, dtype=types.INT16) pipe.set_outputs(images, ids_reshaped, ids_int16) return pipe
def one_input_pipeline(def_for_dataset, device, source, external_source_device, no_copy, batch): """Pipeline accepting single input via external source Parameters ---------- def_for_dataset : bool True if this pipeline will be converted to TF Dataset device : str device that the Dataset will be placed ("cpu" or "gpu") source : callable callback for the external source in baseline pipeline otherwise None external_source_device : str Device that we want the external source in TF dataset to be placed """ if def_for_dataset: if no_copy is None: # If no_copy is None, we infer it automatically and we use no_copy=True when # the input memory is matching the external source placement, # so the Dataset's placement is the same as external source's device, # otherwise for cross-backend we use False. no_copy = (device == external_source_device) if batch == "dataset": # Special value used in tests, reroute it to the default batch = None input = fn.external_source(name="input_placeholder", no_copy=no_copy, device=external_source_device, batch=batch) else: input = fn.external_source(name="actual_input", source=source, batch=False, device=external_source_device) input = input if device == 'cpu' else input.gpu() processed = fn.cast(input + 10, dtype=dali.types.INT32) input_padded, processed_padded = fn.pad([input, processed]) return input_padded, processed_padded
def check_pad_to_square(device='cpu', batch_size=3, ndim=2, num_iter=3): pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=0, seed=1234) axes = (0, 1) with pipe: in_shape = fn.cast(fn.random.uniform(range=(10, 20), shape=(ndim, )), dtype=types.INT32) in_data = fn.reshape(fn.random.uniform(range=(0., 1.), shape=in_shape), layout="HW") shape = fn.shapes(in_data, dtype=types.INT32) h = fn.slice(shape, 0, 1, axes=[0]) w = fn.slice(shape, 1, 1, axes=[0]) side = math.max(h, w) if device == 'gpu': in_data = in_data.gpu() out_data = fn.pad(in_data, axis_names="HW", shape=fn.cat(side, side, axis=0)) pipe.set_outputs(in_data, out_data) pipe.build() for _ in range(num_iter): outs = [ out.as_cpu() if isinstance(out, TensorListGPU) else out for out in pipe.run() ] for i in range(batch_size): in_data, out_data = \ [outs[out_idx].at(i) for out_idx in range(len(outs))] in_shape = in_data.shape max_side = max(in_shape) for s in out_data.shape: assert s == max_side np.testing.assert_equal(out_data[:in_shape[0], :in_shape[1]], in_data) np.testing.assert_equal(out_data[in_shape[0]:, :], 0) np.testing.assert_equal(out_data[:, in_shape[1]:], 0)
def cast_pipe(): inp = fn.external_source(src) inp_dev = inp.gpu() if device == 'gpu' else inp return inp, fn.cast(inp_dev, dtype=np_type_to_dali(out_dtype))
def build_pipes(device, dim, batch_size, channel_first, mode, interp, dtype, w_input, h_input, d_input, use_size_arg, use_size_input, use_roi): dali_pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, seed=1234) with dali_pipe: if dim == 2: files, labels = dali.fn.readers.caffe(path=db_2d_folder, random_shuffle=True) images_cpu = dali.fn.decoders.image(files, device="cpu") else: images_cpu = dali.fn.external_source( source=random_3d_loader(batch_size), layout="DHWC") images_hwc = images_cpu if device == "cpu" else images_cpu.gpu() if channel_first: images = dali.fn.transpose( images_hwc, perm=[3, 0, 1, 2] if dim == 3 else [2, 0, 1], transpose_layout=True) else: images = images_hwc roi_start = None roi_end = None w = None h = None d = None size = None minibatch_size = 2 if dim == 3 else 8 if use_roi: # Calculate absolute RoI in_size = fn.slice(fn.shapes(images_cpu), types.Constant(0, dtype=types.FLOAT, device="cpu"), types.Constant(dim, dtype=types.FLOAT, device="cpu"), axes=[0], normalized_shape=False) roi_start = fn.random.uniform(range=(0, 0.4), shape=[dim ]) * in_size roi_end = fn.random.uniform(range=(0.6, 1.0), shape=[dim ]) * in_size size_range = (10, 200) if dim == 3 else (10, 1000) if use_size_arg: if use_size_input: mask = fn.cast(fn.random.uniform(range=(0.8, 1.9), shape=[dim]), dtype=types.INT32) size = fn.random.uniform(range=size_range, shape=[dim]) * mask else: size = [300, 400] if dim == 2 else [80, 100, 120] resized = resize_dali(images, channel_first, dtype, interp, mode, size, None, None, None, roi_start, roi_end, minibatch_size=minibatch_size, max_size=max_size(dim)) else: if w_input: has_w = fn.random.coin_flip(probability=0.8) w = fn.random.uniform(range=size_range) * has_w else: w = 320 # some fixed value if h_input: has_h = fn.random.coin_flip(probability=0.8) h = fn.random.uniform(range=size_range) * has_h else: h = 240 # some other fixed value if dim >= 3: if d_input: has_d = fn.random.coin_flip(probability=0.8) d = fn.random.uniform(range=size_range) * has_d else: d = 31 # some other fixed value resized = resize_dali(images, channel_first, dtype, interp, mode, None, w, h, d, roi_start, roi_end, minibatch_size=minibatch_size, max_size=max_size(dim)) outputs = [images, resized] if roi_start is not None and roi_end is not None: outputs += [roi_start, roi_end] for x in (d, h, w, size): if x is not None: if isinstance(x, _DataNode): outputs.append(x) else: outputs.append( types.Constant(np.array(x, dtype=np.float32))) dali_pipe.set_outputs(*outputs) pil_pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, exec_async=False, exec_pipelined=False) with pil_pipe: images = fn.external_source(name="images", layout=layout_str(dim, channel_first)) sizes = fn.external_source(name="size") roi_start = fn.external_source(name="roi_start") roi_end = fn.external_source(name="roi_end") resized = resize_PIL(dim, channel_first, dtype, interp, images, sizes, roi_start, roi_end) resized = fn.reshape(resized, layout=layout_str(dim, channel_first)) pil_pipe.set_outputs(resized) dali_pipe.build() pil_pipe.build() return dali_pipe, pil_pipe
def random_augmentation(self, probability, augmented, original): condition = fn.cast(fn.coin_flip(probability=probability), dtype=types.DALIDataType.BOOL) neg_condition = condition ^ True return condition * augmented + neg_condition * original
def dali_asr_pipeline( train_pipeline, # True if training, False if validation file_root, file_list, sample_rate, silence_threshold, resample_range, discrete_resample_range, window_size, window_stride, nfeatures, nfft, frame_splicing_factor, dither_coeff, pad_align, preemph_coeff, do_spectrogram_masking=False, cutouts_generator=None, shard_id=0, n_shards=1, preprocessing_device="gpu", is_triton_pipeline=False, ): do_remove_silence = silence_threshold is not None def _div_ceil(dividend, divisor): return (dividend + (divisor - 1)) // divisor if is_triton_pipeline: assert not train_pipeline, "Pipeline for Triton shall be a validation pipeline" encoded = fn.external_source(device="cpu", name="DALI_INPUT_0", no_copy=True) else: encoded, label = fn.readers.file( device="cpu", name="file_reader", file_root=file_root, file_list=file_list, shard_id=shard_id, num_shards=n_shards, shuffle_after_epoch=train_pipeline, ) speed_perturbation_coeffs = None if resample_range is not None: if discrete_resample_range: values = [resample_range[0], 1.0, resample_range[1]] speed_perturbation_coeffs = fn.random.uniform(device="cpu", values=values) else: speed_perturbation_coeffs = fn.random.uniform(device="cpu", range=resample_range) if train_pipeline and speed_perturbation_coeffs is not None: dec_sample_rate_arg = speed_perturbation_coeffs * sample_rate elif resample_range is None: dec_sample_rate_arg = sample_rate else: dec_sample_rate_arg = None audio, _ = fn.decoders.audio(encoded, sample_rate=dec_sample_rate_arg, dtype=types.FLOAT, downmix=True) if do_remove_silence: begin, length = fn.nonsilent_region(audio, cutoff_db=silence_threshold) audio = fn.slice(audio, begin, length, axes=[0]) # Max duration drop is performed at DataLayer stage if preprocessing_device == "gpu": audio = audio.gpu() if dither_coeff != 0.0: audio = audio + fn.random.normal( device=preprocessing_device) * dither_coeff audio = fn.preemphasis_filter(audio, preemph_coeff=preemph_coeff) spec = fn.spectrogram( audio, nfft=nfft, window_length=window_size * sample_rate, window_step=window_stride * sample_rate, ) mel_spec = fn.mel_filter_bank(spec, sample_rate=sample_rate, nfilter=nfeatures, normalize=True) log_features = fn.to_decibels(mel_spec, multiplier=np.log(10), reference=1.0, cutoff_db=math.log(1e-20)) log_features_len = fn.shapes(log_features) if frame_splicing_factor != 1: log_features_len = _div_ceil(log_features_len, frame_splicing_factor) log_features = fn.normalize(log_features, axes=[1]) log_features = fn.pad(log_features, axes=[1], fill_value=0, align=pad_align, shape=(-1, )) if train_pipeline and do_spectrogram_masking: anchors, shapes = fn.external_source(source=cutouts_generator, num_outputs=2, cycle=True) log_features = fn.erase( log_features, anchor=anchors, shape=shapes, axes=[0, 1], fill_value=0, normalized_anchor=True, ) # When modifying DALI pipeline returns, make sure you update `output_map` # in DALIGenericIterator invocation if not is_triton_pipeline: return log_features.gpu(), label.gpu(), log_features_len.gpu() else: return fn.cast(log_features.gpu(), dtype=types.FLOAT16)
device='gpu', bytes_per_sample_hint=ImageBytes, size=(H, W)) images = fn.gaussian_blur(images, device='gpu', bytes_per_sample_hint=ImageBytes, sigma=fn.uniform(range=(0.1, 2)), window_size=11) images = fn.color_twist(images, device='gpu', bytes_per_sample_hint=ImageBytes, brightness=fn.uniform(range=(0.5, 1.5)), contrast=fn.uniform(range=(0.5, 2.5)), saturation=fn.uniform(range=(0.1, 2))) images = fn.cast(images, device='gpu', bytes_per_sample_hint=ImageBytes, dtype=DALIDataType.FLOAT) images = fn.normalize( images, device='gpu', bytes_per_sample_hint=ImageBytes, mean=Constant(numpy.array([[[190.6380, 207.2640, 202.5720]]])), stddev=Constant(numpy.array([[[85.2720, 68.6970, 81.4215]]]))) images = fn.transpose(images, device='gpu', bytes_per_sample_hint=ImageBytes, perm=[2, 0, 1]) TestingPipe.set_outputs(images, labels) TestingLoader = DALIClassificationIterator(TestingPipe, size=1000 * args.bs)
def pipeline(): inp1 = fn.external_source(device='cpu', name='DALI_INPUT_0') inp2 = fn.external_source(device='gpu', name='DALI_INPUT_1') return inp1.gpu() / 3, fn.cast(inp2, dtype=dali.types.FLOAT) / 2
def _test_random_object_bbox_with_class(max_batch_size, ndim, dtype, format=None, fg_prob=None, classes=None, weights=None, background=None, threshold=None, k_largest=None, cache=None): pipe = dali.Pipeline(max_batch_size, 4, device_id=None, seed=4321) background_out = 0 if background is None else background classes_out = np.int32([]) if classes is None else classes weights_out = np.int32([]) if weights is None else weights threshold_out = np.int32([]) if threshold is None else threshold if cache: source = sampled_dataset(2 * max_batch_size, max_batch_size, ndim, dtype) else: source = batch_generator(max_batch_size, ndim, dtype) with pipe: inp = fn.external_source(source) if isinstance(background, dali.pipeline.DataNode) or (background is not None and background >= 0): inp = fn.cast(inp + (background_out + 1), dtype=np_type_to_dali(dtype)) # preconfigure op = ops.segmentation.RandomObjectBBox(format=format, foreground_prob=fg_prob, classes=classes, class_weights=weights, background=background, threshold=threshold, k_largest=k_largest, seed=1234) outs1 = op(inp, cache_objects=cache) outs2 = op(inp, output_class=True) if not isinstance(outs1, list): outs1 = [outs1] # the second instance should have always at least 2 outputs assert isinstance(outs2, (list, tuple)) outputs = [ inp, classes_out, weights_out, background_out, threshold_out, *outs1, *outs2 ] pipe.set_outputs(*outputs) pipe.build() format = format or "anchor_shape" for _ in range(50): inp, classes_out, weights_out, background_out, threshold_out, *outs = pipe.run( ) nout = (len(outs) - 1) // 2 outs1 = outs[:nout] outs2 = outs[nout:] for i in range(len(outs1)): check_batch(outs1[i], outs2[i]) # Iterate over indices instead of elements, because normal iteration # causes an exception to be thrown in native code, making debugging near impossible. outs = tuple([np.array(out[i]) for i in range(len(out))] for out in outs1) box_class_labels = [ np.int32(outs2[-1][i]) for i in range(len(outs2[-1])) ] boxes = convert_boxes(outs, format) for i in range(len(inp)): in_tensor = inp.at(i) class_labels = classes_out.at(i) if background is not None or classes is None: background_label = background_out.at(i) else: background_label = 0 if 0 not in class_labels else np.min( class_labels) - 1 label = box_class_labels[i] if classes is not None: assert label == background_label or label in list(class_labels) is_foreground = label != background_label cls_boxes = class_boxes(in_tensor, label if is_foreground else None) if is_foreground: ref_boxes = cls_boxes if threshold is not None: extent = box_extent(boxes[i]) thr = threshold_out.at(i) assert np.all(extent >= thr) ref_boxes = list( filter(lambda box: np.all(box_extent(box) >= thr), cls_boxes)) if k_largest is not None: assert box_in_k_largest(ref_boxes, boxes[i], k_largest) assert contains_box(cls_boxes, boxes[i])