def pipeline(): output = fn.external_source(source=np.zeros((8, 8)), name='input') return output
def create_pipline(): outputs = fn.external_source(source=callback, batch=False, parallel=parallel) return outputs
def pipe_no_input(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) data = fn.external_source(source=input_data, cycle=False, device=device) dist = data + fn.random.normal() pipe.set_outputs(dist) return pipe
def test_ellipsis_not_implemented(): data = [np.uint8([1, 2, 3]), np.uint8([1, 2])] src = fn.external_source(lambda: data) with assert_raises(NotImplementedError): src[..., :1]
def build_pipes(device, dim, batch_size, channel_first, mode, interp, dtype, w_input, h_input, d_input, use_size_arg, use_size_input, use_roi): dali_pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, seed=1234) with dali_pipe: if dim == 2: files, labels = dali.fn.caffe_reader(path = db_2d_folder, random_shuffle = True) images_cpu = dali.fn.image_decoder(files, device="cpu") else: images_cpu = dali.fn.external_source(source=random_3d_loader(batch_size), layout="DHWC") images_hwc = images_cpu if device == "cpu" else images_cpu.gpu() if channel_first: images = dali.fn.transpose(images_hwc, perm=[3,0,1,2] if dim == 3 else [2,0,1], transpose_layout=True) else: images = images_hwc roi_start = None roi_end = None w = None h = None d = None size = None minibatch_size = 2 if dim == 3 else 8 if use_roi: # Calculate absolute RoI in_size = fn.slice(fn.shapes(images_cpu), types.Constant(0, dtype=types.FLOAT, device="cpu"), types.Constant(dim, dtype=types.FLOAT, device="cpu"), axes=[0], normalized_shape=False) roi_start = fn.uniform(range=(0,0.4), shape=[dim]) * in_size roi_end = fn.uniform(range=(0.6,1.0), shape=[dim]) * in_size size_range = (10, 200) if dim == 3 else (10, 1000) if use_size_arg: if use_size_input: mask = fn.cast(fn.uniform(range=(0.8, 1.9), shape=[dim]), dtype=types.INT32) size = fn.uniform(range=size_range, shape=[dim]) * mask else: size = [300, 400] if dim == 2 else [80, 100, 120] resized = resize_dali(images, channel_first, dtype, interp, mode, size, None, None, None, roi_start, roi_end, minibatch_size=minibatch_size, max_size=max_size(dim)) else: if w_input: has_w = fn.coin_flip(probability=0.8) w = fn.uniform(range=size_range) * has_w else: w = 320 # some fixed value if h_input: has_h = fn.coin_flip(probability=0.8) h = fn.uniform(range=size_range) * has_h else: h = 240 # some other fixed value if dim >= 3: if d_input: has_d = fn.coin_flip(probability=0.8) d = fn.uniform(range=size_range) * has_d else: d = 31 # some other fixed value resized = resize_dali(images, channel_first, dtype, interp, mode, None, w, h, d, roi_start, roi_end, minibatch_size=minibatch_size, max_size=max_size(dim)) outputs = [images, resized] if roi_start and roi_end: outputs += [roi_start, roi_end] for x in (d, h, w, size): if x is not None: if isinstance(x, _DataNode): outputs.append(x) else: outputs.append(types.Constant(np.array(x, dtype=np.float32))) dali_pipe.set_outputs(*outputs) pil_pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, exec_async=False, exec_pipelined=False) with pil_pipe: images = fn.external_source(name="images", layout=layout_str(dim, channel_first)) sizes = fn.external_source(name="size") roi_start = fn.external_source(name="roi_start") roi_end = fn.external_source(name="roi_end") resized = resize_PIL(dim, channel_first, dtype, interp, images, sizes, roi_start, roi_end) resized = fn.reshape(resized, layout=layout_str(dim, channel_first)) pil_pipe.set_outputs(resized) dali_pipe.build() pil_pipe.build() return dali_pipe, pil_pipe
def reduce_pipeline(op): data = fn.external_source(source=get_data) mean = fn.reductions.mean(data) out = op(data, mean) return out
def multi_input_pipeline(op, n): data = [fn.external_source(source=get_data, layout='HWC') for _ in range(n)] out = op(*data) return out
def image_decoder_rcrop_pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) encoded = fn.external_source(source=input_data, cycle=False, device='cpu') decoded = fn.decoders.image_random_crop(encoded, device=device) pipe.set_outputs(decoded) return pipe
def peek_image_shape_pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) encoded = fn.external_source(source=input_data, cycle=False, device='cpu') shape = fn.peek_image_shape(encoded, device=device) pipe.set_outputs(shape) return pipe
def pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) data = fn.external_source(source=input_data, cycle=False, device=device) processed = fn.lookup_table(data, keys=[1, 3], values=[10, 50]) pipe.set_outputs(processed) return pipe
def audio_decoder_pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) encoded = fn.external_source(source=input_data, cycle=False, device='cpu') decoded, _ = fn.decoders.audio(encoded, downmix=True, sample_rate=12345, device=device) pipe.set_outputs(decoded) return pipe
def pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) shape = fn.external_source(source=input_data, cycle=False, device='cpu') data = fn.constant(fdata=3.1415, shape=shape, device=device) pipe.set_outputs(data) return pipe
return [np.random.randint(lo, hi, size=(bs,) + size_fn(), dtype=dtype) for bs in batch_sizes] elif np.issubdtype(dtype, np.float): ret = (np.random.random_sample(size=(bs,) + size_fn()) for bs in batch_sizes) ret = map(lambda batch: (hi - lo) * batch + lo, ret) ret = map(lambda batch: batch.astype(dtype), ret) return list(ret) else: raise RuntimeError("Invalid type argument") def single_op_pipeline(max_batch_size, input_data, device, /, *, input_layout=None, operator_fn=None, **opfn_args): pipe = Pipeline(batch_size=max_batch_size, num_threads=1, device_id=0) with pipe: input = fn.external_source(source=input_data, cycle=False, device=device, layout=input_layout) output = input if operator_fn is None else operator_fn(input, device=device, **opfn_args) pipe.set_outputs(output) return pipe def run_pipeline(input_epoch, pipeline_fn, *, devices: list = ['cpu', 'gpu'], **pipeline_fn_args): """ Verifies, if given pipeline supports iter-to-iter variable batch size This function verifies only if given pipeline runs without crashing. There is no qualitative verification. Use this for checking pipelines based on random operators (as they can't be verifies against one another). :param input_epoch: List of numpy arrays, where every item is a single batch :param pipeline_fn: Function, that returns created (but not built) pipeline.
def serialized_pipe(): return fn.external_source(name="es")
def box_encoder_pipeline(get_boxes, get_labels): boxes = fn.external_source(source=get_boxes) labels = fn.external_source(source=get_labels) out = fn.box_encoder(boxes, labels, anchors=coco_anchors()) return tuple(out)
def random_weights(): def get_weights(): tmp = np.random.random(np.random.randint(1, 10)).astype(np.float32) return tmp return fn.external_source(get_weights, batch=False)
def segmentation_select_masks_input_pipeline(source): device = 'cpu' if Pipeline.current().device_id is None else 'gpu' polygons, vertices, selected_masks = fn.external_source( source=source, num_outputs=3, device=device) return selected_masks, polygons, vertices
def _test_random_object_bbox_with_class(max_batch_size, ndim, dtype, format=None, fg_prob=None, classes=None, weights=None, background=None, threshold=None, k_largest=None, cache=None): pipe = dali.Pipeline(max_batch_size, 4, device_id=None, seed=4321) background_out = 0 if background is None else background classes_out = np.int32([]) if classes is None else classes weights_out = np.int32([]) if weights is None else weights threshold_out = np.int32([]) if threshold is None else threshold if cache: source = sampled_dataset(2 * max_batch_size, max_batch_size, ndim, dtype) else: source = batch_generator(max_batch_size, ndim, dtype) with pipe: inp = fn.external_source(source) if isinstance(background, dali.pipeline.DataNode) or (background is not None and background >= 0): inp = fn.cast(inp + (background_out + 1), dtype=np_type_to_dali(dtype)) # preconfigure op = ops.segmentation.RandomObjectBBox(format=format, foreground_prob=fg_prob, classes=classes, class_weights=weights, background=background, threshold=threshold, k_largest=k_largest, seed=1234) outs1 = op(inp, cache_objects=cache) outs2 = op(inp, output_class=True) if not isinstance(outs1, list): outs1 = [outs1] # the second instance should have always at least 2 outputs assert isinstance(outs2, (list, tuple)) outputs = [ inp, classes_out, weights_out, background_out, threshold_out, *outs1, *outs2 ] pipe.set_outputs(*outputs) pipe.build() format = format or "anchor_shape" for _ in range(50): inp, classes_out, weights_out, background_out, threshold_out, *outs = pipe.run( ) nout = (len(outs) - 1) // 2 outs1 = outs[:nout] outs2 = outs[nout:] for i in range(len(outs1)): check_batch(outs1[i], outs2[i]) # Iterate over indices instead of elements, because normal iteration # causes an exception to be thrown in native code, making debugging near impossible. outs = tuple([np.array(out[i]) for i in range(len(out))] for out in outs1) box_class_labels = [ np.int32(outs2[-1][i]) for i in range(len(outs2[-1])) ] boxes = convert_boxes(outs, format) for i in range(len(inp)): in_tensor = inp.at(i) class_labels = classes_out.at(i) if background is not None or classes is None: background_label = background_out.at(i) else: background_label = 0 if 0 not in class_labels else np.min( class_labels) - 1 label = box_class_labels[i] if classes is not None: assert label == background_label or label in list(class_labels) is_foreground = label != background_label cls_boxes = class_boxes(in_tensor, label if is_foreground else None) if is_foreground: ref_boxes = cls_boxes if threshold is not None: extent = box_extent(boxes[i]) thr = threshold_out.at(i) assert np.all(extent >= thr) ref_boxes = list( filter(lambda box: np.all(box_extent(box) >= thr), cls_boxes)) if k_largest is not None: assert box_in_k_largest(ref_boxes, boxes[i], k_largest) assert contains_box(cls_boxes, boxes[i])
def reduce_input_pipeline(): data = fn.external_source(source=get_data) mean = fn.reductions.mean(data) return data, mean
def test_reduce_variance_cpu(): pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None) data = fn.external_source(source=get_data) mean = fn.reductions.mean(data) reduced = fn.reductions.variance(data, mean) pipe.set_outputs(reduced)
def test_stride_not_implemented(): data = [np.uint8([1, 2, 3]), np.uint8([1, 2])] src = fn.external_source(lambda: data) src[::1] with assert_raises(NotImplementedError): src[::2]
def test_pytorch_plugin_cpu(): pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=None) outs = fn.external_source(source=get_data, layout="HWC") pipe.set_outputs(outs) pii = pytorch.DALIGenericIterator([pipe], ["data"])
def test_box_encoder_cpu(): def coco_anchors(): anchors = [] fig_size = 300 feat_sizes = [38, 19, 10, 5, 3, 1] feat_count = len(feat_sizes) steps = [8., 16., 32., 64., 100., 300.] scales = [21., 45., 99., 153., 207., 261., 315.] aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]] fks = [] for step in steps: fks.append(fig_size / step) anchor_idx = 0 for idx in range(feat_count): sk1 = scales[idx] / fig_size sk2 = scales[idx + 1] / fig_size sk3 = sqrt(sk1 * sk2) all_sizes = [[sk1, sk1], [sk3, sk3]] for alpha in aspect_ratios[idx]: w = sk1 * sqrt(alpha) h = sk1 / sqrt(alpha) all_sizes.append([w, h]) all_sizes.append([h, w]) for sizes in all_sizes: w, h = sizes[0], sizes[1] for i in range(feat_sizes[idx]): for j in range(feat_sizes[idx]): cx = (j + 0.5) / fks[idx] cy = (i + 0.5) / fks[idx] cx = max(min(cx, 1.), 0.) cy = max(min(cy, 1.), 0.) w = max(min(w, 1.), 0.) h = max(min(h, 1.), 0.) anchors.append(cx - 0.5 * w) anchors.append(cy - 0.5 * h) anchors.append(cx + 0.5 * w) anchors.append(cy + 0.5 * h) anchor_idx = anchor_idx + 1 return anchors pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None) test_box_shape = [20, 4] def get_boxes(): out = [ (np.random.randint(0, 255, size=test_box_shape, dtype=np.uint8) / 255).astype(dtype=np.float32) for _ in range(batch_size) ] return out test_lables_shape = [20, 1] def get_lables(): out = [ np.random.randint(0, 255, size=test_lables_shape, dtype=np.int32) for _ in range(batch_size) ] return out boxes = fn.external_source(source=get_boxes) lables = fn.external_source(source=get_lables) processed, _ = fn.box_encoder(boxes, lables, anchors=coco_anchors()) pipe.set_outputs(processed) pipe.build() for _ in range(3): pipe.run()
def dali_asr_pipeline(train_pipeline, # True if training, False if validation file_root, file_list, sample_rate, silence_threshold, resample_range, discrete_resample_range, window_size, window_stride, nfeatures, nfft, frame_splicing_factor, dither_coeff, pad_align, preemph_coeff, do_spectrogram_masking=False, cutouts_generator=None, shard_id=0, n_shards=1, preprocessing_device="gpu"): do_remove_silence = silence_threshold is not None def _div_ceil(dividend, divisor): return (dividend + (divisor - 1)) // divisor encoded, label = fn.readers.file( device="cpu", name="file_reader", file_root=file_root, file_list=file_list, shard_id=shard_id, num_shards=n_shards, shuffle_after_epoch=train_pipeline) speed_perturbation_coeffs = None if resample_range is not None: if discrete_resample_range: values = [resample_range[0], 1.0, resample_range[1]] speed_perturbation_coeffs = fn.random.uniform(device="cpu", values=values) else: speed_perturbation_coeffs = fn.random.uniform(device="cpu", range=resample_range) if train_pipeline and speed_perturbation_coeffs is not None: dec_sample_rate_arg = speed_perturbation_coeffs * sample_rate elif resample_range is None: dec_sample_rate_arg = sample_rate else: dec_sample_rate_arg = None audio, _ = fn.decoders.audio(encoded, sample_rate=dec_sample_rate_arg, dtype=types.FLOAT, downmix=True) if do_remove_silence: begin, length = fn.nonsilent_region(audio, cutoff_db=silence_threshold) audio = fn.slice(audio, begin, length, axes=[0]) # Max duration drop is performed at DataLayer stage if preprocessing_device == "gpu": audio = audio.gpu() if dither_coeff != 0.: audio = audio + fn.random.normal(audio) * dither_coeff audio = fn.preemphasis_filter(audio, preemph_coeff=preemph_coeff) spec = fn.spectrogram(audio, nfft=nfft, window_length=window_size * sample_rate, window_step=window_stride * sample_rate) mel_spec = fn.mel_filter_bank(spec, sample_rate=sample_rate, nfilter=nfeatures, normalize=True) log_features = fn.to_decibels(mel_spec, multiplier=np.log(10), reference=1.0, cutoff_db=math.log(1e-20)) log_features_len = fn.shapes(log_features) if frame_splicing_factor != 1: log_features_len = _div_ceil(log_features_len, frame_splicing_factor) log_features = fn.normalize(log_features, axes=[1]) log_features = fn.pad(log_features, axes=[1], fill_value=0, align=pad_align) if train_pipeline and do_spectrogram_masking: anchors, shapes = fn.external_source(source=cutouts_generator, num_outputs=2, cycle=True) log_features = fn.erase(log_features, anchor=anchors, shape=shapes, axes=[0, 1], fill_value=0, normalized_anchor=True) # When modifying DALI pipeline returns, make sure you update `output_map` # in DALIGenericIterator invocation return log_features.gpu(), label.gpu(), log_features_len.gpu()
def reshape_pipe(shapes, src_dims=None, rel_shape=None): data = fn.external_source(lambda: get_data(shapes), batch=True, device = "cpu") return fn.reshape(data, src_dims=src_dims, rel_shape=rel_shape)
def mel_filter_pipeline(source): data = fn.external_source(source=source) spectrum = fn.spectrogram(data, nfft=60, window_length=50, window_step=25) processed = fn.mel_filter_bank(spectrum) return processed
def _run_test(device, batch_size, out_dim, in_dim, in_dtype, out_dtype, M_kind, T_kind): pipe = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=4, device_id=0, seed=1234) with pipe: X = fn.external_source(source=get_data_source(batch_size, in_dim, in_dtype), device=device, layout="NX") M = None T = None MT = None if T_kind == "fused": MT = make_param(M_kind, [out_dim, in_dim + 1]) else: M = make_param(M_kind, [out_dim, in_dim]) T = make_param(T_kind, [out_dim]) Y = fn.coord_transform( X, MT=MT.flatten().tolist() if isinstance(MT, np.ndarray) else MT, M=M.flatten().tolist() if isinstance(M, np.ndarray) else M, T=T.flatten().tolist() if isinstance(T, np.ndarray) else T, dtype=dali_type(out_dtype)) if M is None: M = 1 if T is None: T = 0 if MT is None: MT = 0 M, T, MT = (x if isinstance(x, dali.data_node.DataNode) else dali.types.Constant(x, dtype=dali.types.FLOAT) for x in (M, T, MT)) pipe.set_outputs(X, Y, M, T, MT) pipe.build() for iter in range(3): outputs = pipe.run() outputs = [x.as_cpu() if hasattr(x, "as_cpu") else x for x in outputs] ref = [] scale = 1 for idx in range(batch_size): X = outputs[0].at(idx) if T_kind == "fused": MT = outputs[4].at(idx) if MT.size == 1: M = MT T = 0 else: M = MT[:, :-1] T = MT[:, -1] else: M = outputs[2].at(idx) T = outputs[3].at(idx) if M.size == 1: Y = X.astype(np.float32) * M + T else: Y = np.matmul(X.astype(np.float32), M.transpose()) + T if np.issubdtype(out_dtype, np.integer): info = np.iinfo(out_dtype) Y = Y.clip(info.min, info.max) ref.append(Y) scale = max(scale, np.max(np.abs(Y)) - np.min(np.abs(Y))) if Y.size > 0 else 1 avg = 1e-6 * scale eps = 1e-6 * scale if out_dtype != np.float32: # headroom for rounding avg += 0.33 eps += 0.5 check_batch(outputs[1], ref, batch_size, eps, eps, expected_layout="NX", compare_layouts=True)
def mel_filter_input_pipeline(source): data = fn.external_source(source=source) spectrum = fn.spectrogram(data, nfft=60, window_length=50, window_step=25) return spectrum
def pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) data = fn.external_source(source=input_data, cycle=False, device=device) processed = fn.coord_flip(data) pipe.set_outputs(processed) return pipe
def define_graph(self): inputs = fn.external_source(source=self.bbox_source, num_outputs=self.bbox_source.num_outputs) outputs = self.bbox_crop(*inputs) return [inputs[0], *outputs]