def __init__(self, batch_size, num_threads, device_id, **kwargs): super().__init__(batch_size, num_threads, device_id, **kwargs) self.oversampling = kwargs["oversampling"] self.crop_shape = types.Constant(np.array(self.patch_size), dtype=types.INT64) self.crop_shape_float = types.Constant(np.array(self.patch_size), dtype=types.FLOAT)
def __init__(self, batch_size, num_threads, device_id, **kwargs): super(TrainPipeline, self).__init__(batch_size, num_threads, device_id) self.dim = kwargs["dim"] self.oversampling = kwargs["oversampling"] self.input_x = get_numpy_reader( num_shards=kwargs["num_device"], files=kwargs["imgs"], seed=kwargs["seed"], shard_id=device_id, shuffle=True, ) self.input_y = get_numpy_reader( num_shards=kwargs["num_device"], files=kwargs["lbls"], seed=kwargs["seed"], shard_id=device_id, shuffle=True, ) self.patch_size = kwargs["patch_size"] if self.dim == 2: self.patch_size = [kwargs["batch_size_2d"]] + self.patch_size self.crop_shape = types.Constant(np.array(self.patch_size), dtype=types.INT64) self.crop_shape_float = types.Constant(np.array(self.patch_size), dtype=types.FLOAT) shard_id = int(os.getenv("LOCAL_RANK", "0")) if kwargs['set_aug_seed']: aug_seed = kwargs['seed'] + shard_id self.aug_seed_kwargs = {'seed': aug_seed} print("TrainPipeline augmentation seed: ", aug_seed) else: self.aug_seed_kwargs = {} print("TrainPipeline WO augmentation seed") self.augment = kwargs['augment']
def __init__(self, batch_size, num_threads, device_id, **kwargs): super(TFRecordTrain, self).__init__(batch_size, num_threads, device_id) self.dim = kwargs["dim"] self.seed = kwargs["seed"] self.oversampling = kwargs["oversampling"] self.input = ops.TFRecordReader( path=kwargs["tfrecords"], index_path=kwargs["tfrecords_idx"], features={ "X_shape": tfrec.FixedLenFeature([self.dim + 1], tfrec.int64, 0), "Y_shape": tfrec.FixedLenFeature([self.dim + 1], tfrec.int64, 0), "X": tfrec.VarLenFeature([], tfrec.float32, 0.0), "Y": tfrec.FixedLenFeature([], tfrec.string, ""), "fname": tfrec.FixedLenFeature([], tfrec.string, ""), }, num_shards=kwargs["gpus"], shard_id=device_id, random_shuffle=True, pad_last_batch=True, read_ahead=True, seed=self.seed, ) self.patch_size = kwargs["patch_size"] self.crop_shape = types.Constant(np.array(self.patch_size), dtype=types.INT64) self.crop_shape_float = types.Constant(np.array(self.patch_size), dtype=types.FLOAT) self.layout = "CDHW" if self.dim == 3 else "CHW" self.axis_name = "DHW" if self.dim == 3 else "HW"
def __init__(self, batch_size, num_threads, device_id, **kwargs): super(TrainPipeline, self).__init__(batch_size, num_threads, device_id) self.dim = kwargs["dim"] self.oversampling = kwargs["oversampling"] self.input_x = get_numpy_reader( num_shards=kwargs["gpus"], files=kwargs["imgs"], seed=kwargs["seed"], shard_id=device_id, shuffle=True, ) self.input_y = get_numpy_reader( num_shards=kwargs["gpus"], files=kwargs["lbls"], seed=kwargs["seed"], shard_id=device_id, shuffle=True, ) self.patch_size = kwargs["patch_size"] if self.dim == 2: self.patch_size = [kwargs["batch_size_2d"]] + self.patch_size self.crop_shape = types.Constant(np.array(self.patch_size), dtype=types.INT64) self.crop_shape_float = types.Constant(np.array(self.patch_size), dtype=types.FLOAT)
def decoder_slice_pipe(decoder_op, file_root, device, use_fast_idct): encoded, _ = fn.readers.file(file_root=file_root) start = types.Constant(np.array([0., 0.])) end = types.Constant(np.array([0.5, 0.5])) decoded = decoder_op(encoded, start, end, device=device, output_type=types.RGB, use_fast_idct=use_fast_idct) return decoded
def define_graph(self): device = self.device return [ # no-op ops.Reshape(device=device, shape=[1])(types.Constant(1.25)), # flatten with reshape op ops.Reshape(device=device) (types.Constant(np.array([[1, 2], [3, 4]], dtype=np.uint16), device=device), shape=types.Constant([4])) ]
def __init__(self, params, num_threads, device_id): super(DaliPipeline, self).__init__(params.batch_size, num_threads, device_id, seed=12) with h5py.File(params.data_path, 'r') as f: # load hydro and clean up Hydro = f['Hydro'][...] self.Hydro = types.Constant(Hydro, shape=Hydro.shape, layout="DHWC", device="cpu") del Hydro # load nbody and clean up Nbody = f['Nbody'][...] self.Nbody = types.Constant(Nbody, shape=Nbody.shape, layout="DHWC", device="cpu") del Nbody #self.ndummy = np.zeros((20, 20, 20, 4), dtype=np.float32) #self.hdummy = np.zeros((20, 20, 20, 5), dtype=np.float32) #self.Nbody = types.Constant(self.ndummy, shape = self.ndummy.shape, layout = "DHWC", device="cpu") #self.Hydro = types.Constant(self.hdummy, shape = self.hdummy.shape, layout = "DHWC", device="cpu") #self.Nbody = ops.Constant(fdata = self.ndummy.flatten().tolist(), shape = self.ndummy.shape, layout = "DHWC", device = "cpu") #self.Hydro = ops.Constant(fdata = self.hdummy.flatten().tolist(), shape = self.hdummy.shape, layout = "DHWC", device = "cpu") self.do_rotate = True if params.rotate_input == 1 else False print("Enable Rotation" if self.do_rotate else "Disable Rotation") self.rng_angle = ops.Uniform(device="cpu", range=[-1.5, 2.5]) self.rng_pos = ops.Uniform(device="cpu", range=[0., 1.]) self.icast = ops.Cast(device="cpu", dtype=types.INT32) self.fcast = ops.Cast(device="cpu", dtype=types.FLOAT) self.crop = ops.Crop(device="cpu", crop_d=params.data_size, crop_h=params.data_size, crop_w=params.data_size) self.rotate1 = ops.Rotate(device="gpu", axis=(1, 0, 0), interp_type=types.INTERP_LINEAR) self.rotate2 = ops.Rotate(device="gpu", axis=(0, 1, 0), interp_type=types.INTERP_LINEAR) self.rotate3 = ops.Rotate(device="gpu", axis=(0, 0, 1), interp_type=types.INTERP_LINEAR) self.transpose = ops.Transpose(device="gpu", perm=[3, 0, 1, 2])
def get_operand(self, operand, kind, operand_type): if kind == "const": return types.Constant(magic_number, np_types_to_dali[operand_type]) elif kind == "cpu": return operand elif kind == "gpu": return operand.gpu()
def make_pipe(): image = fn.external_source(source=image_gen) if device == "gpu": image = image.gpu() if get_dynamic_axes: axes, rel_start, rel_shape = fn.external_source( source=get_dynamic_axes, num_outputs=3) else: axes = types.Constant(np.array([0, 1], dtype=np.int32), device="cpu") rel_start = fn.random.uniform(range=(0.1, 0.2), shape=(2, ), dtype=types.FLOAT, device=args_device) rel_shape = fn.random.uniform(range=(0.4, 0.6), shape=(2, ), dtype=types.FLOAT, device=args_device) if args_device == "gpu": sliced = fn.slice(image, rel_start, rel_shape, axes=axes) return image, axes, rel_start, rel_shape, sliced else: sliced1 = fn.slice(image, rel_start=rel_start, rel_shape=rel_shape, axes=axes) sliced2 = fn.slice(image, rel_start, rel_shape, axes=axes) return image, axes, rel_start, rel_shape, sliced1, sliced2
def pipe(device, interp_type, test_data=checkerboard, out_size=out_size): data = types.Constant(test_data, device=device) data = fn.expand_dims(data, axes=[2]) resized = fn.resize(data, dtype=types.FLOAT, min_filter=interp_type, mag_filter=interp_type, size=out_size) resized = fn.squeeze(resized, axes=[2]) return resized
def test_variable_batch(): pipe = Pipeline(6, 1, 0) batches = [ [np.array(1), np.array(2)], [np.array(1)], [np.array(1), np.array(2), np.array(3), np.array(4), np.array(5), np.array(5)] ] dummy = fn.external_source(batches, cycle=True) val = np.float32([[1,2],[3,4]]) pipe.set_outputs(types.Constant(val, device="cpu"), types.Constant(val, device="gpu"), dummy) pipe.build() for batch in batches: cpu, gpu, _ = pipe.run() assert len(cpu) == len(batch) assert len(gpu) == len(batch) gpu = gpu.as_cpu() for i in range(len(batch)): assert np.array_equal(cpu.at(i), val) assert np.array_equal(gpu.at(i), val)
def create_dali_pipe(channel_first, seq_len, interp, dtype, w, h, batch_size=2): pipe = dali.pipeline.Pipeline(batch_size, 1, 0, 0) with pipe: layout = "FCHW" if channel_first else "FHWC" ext = fn.external_source(GetSequences(channel_first, seq_len, batch_size), layout=layout) resize_cpu_out = fn.resize(ext, resize_x=w, resize_y=h, interp_type=interp, dtype=dtype, save_attrs=True) resize_gpu_out = fn.resize(ext.gpu(), resize_x=w, resize_y=h, interp_type=interp, minibatch_size=4, dtype=dtype, save_attrs=True) dali_resized_cpu, size_cpu = resize_cpu_out dali_resized_gpu, size_gpu = resize_gpu_out # extract just HW part from the input shape shape_anchor = np.array([2 if channel_first else 1], dtype=np.float32) shape_shape = np.array([2], dtype=np.float32) ext_size = fn.slice(fn.cast(fn.shapes(ext), dtype=types.INT32), types.Constant(shape_anchor, device="cpu"), types.Constant(shape_shape, device="cpu"), normalized_anchor=False, normalized_shape=False, axes=[0]) pipe.set_outputs(dali_resized_cpu, dali_resized_gpu, ext_size, size_cpu, size_gpu) return pipe
def define_graph(self): device = self.device return [ types.Constant(device = device, value = (1.25,2.5,3)), types.Constant(device = device, value = self.array([[[1,2]],[[3,4]]], dtype=self.dtype('int32'))), types.Constant(device = device, value = self.array([0,1,2,3,4], dtype=self.dtype('uint8'))), types.Constant(device = device, value = self.array([0.25,1.25,2.25,3.25,4.25], dtype=self.dtype('float16'))), types.Constant(device = device, value = 5.5, shape=(100,100), name="large"), types.Constant(device = device, value = -4, shape=(10,20)), types.Constant(device = device, value = [False, True, False]) ]
def define_graph(self): self.jpegs, self.labels = self.input(name = "Reader") images = self.decode(self.jpegs) if self.warp.device == "gpu": images = images.gpu() if self.cast: images = self.cast(images) dynamic_size = types.Constant(np.array([240, 320], dtype=np.float32)) if self.use_dynamic_size else None if self.use_input: transform = self.transform_source() outputs = self.warp(images, transform, size = dynamic_size) else: outputs = self.warp(images, size = dynamic_size) return outputs
def nonsilent_pipe(data_arr=None, window_size=256, cutoff_value=-10, reference_power=None): if data_arr is None: raw, _ = fn.readers.file(files=audio_files) audio, _ = fn.decoders.audio(raw, dtype=types.INT16, downmix=True) else: audio = types.Constant(device='cpu', value=data_arr) begin_cpu, len_cpu = fn.nonsilent_region( audio, cutoff_db=cutoff_value, window_length=window_size, reference_power=reference_power, ) begin_gpu, len_gpu = fn.nonsilent_region( audio.gpu(), cutoff_db=cutoff_value, window_length=window_size, reference_power=reference_power, ) return begin_cpu, len_cpu, begin_gpu, len_gpu
def dali_pipe(data, label): fdata = types.Constant(data) flabel = types.Constant(label) return fdata, flabel
def check_normal_distribution(device, dtype, shape=None, use_shape_like_input=False, variable_shape=False, mean=0.0, stddev=1.0, variable_dist_params=False, shape_gen_f=None, niter=3, batch_size=3, device_id=0, num_threads=3): pipe = Pipeline(batch_size=batch_size, device_id=device_id, num_threads=num_threads, seed=123456) with pipe: shape_like_in = None shape_arg = None assert shape is None or shape_gen_f is None if variable_shape: if shape_gen_f is None: def shape_gen_f(): return random_shape(shape) if use_shape_like_input: shape_like_in = fn.external_source( lambda: np.zeros(shape_gen_f()), device=device, batch=False) shape_out = fn.shapes(shape_like_in) else: shape_arg = fn.external_source(shape_gen_f, batch=False) shape_out = shape_arg else: if use_shape_like_input: shape_like_in = np.zeros(shape) else: shape_arg = shape # Can't make an empty list constant shape_out = types.Constant(shape if shape is not None and shape != () else (1, ), dtype=types.INT32) mean_arg = None stddev_arg = None if variable_dist_params: mean_arg = fn.external_source(lambda: np.array( np.random.uniform(low=-100.0, high=100.0), dtype=np.float32), device='cpu', batch=False) stddev_arg = fn.external_source(lambda: np.array( np.random.uniform(low=1.0, high=100.0), dtype=np.float32), device='cpu', batch=False) else: mean_arg = mean stddev_arg = stddev inputs = [shape_like_in] if shape_like_in is not None else [] out = fn.random.normal(*inputs, device=device, shape=shape_arg, mean=mean_arg, stddev=stddev_arg, dtype=dtype) pipe.set_outputs(out, shape_out, mean_arg, stddev_arg) pipe.build() for i in range(niter): outputs = pipe.run() out, shapes, means, stddevs = tuple(outputs[i].as_cpu( ) if isinstance(outputs[i], TensorListGPU) else outputs[i] for i in range(len(outputs))) for sample_idx in range(batch_size): sample = np.array(out[sample_idx]) if sample.shape == (): continue sample_shape = np.array(shapes[sample_idx]) mean = np.array(means[sample_idx]) stddev = np.array(stddevs[sample_idx]) assert (sample.shape == sample_shape ).all(), f"{sample.shape} != {sample_shape}" data = sample.flatten() data_len = len(data) # Checking sanity of the data if data_len >= 100 and dtype in [types.FLOAT, types.FLOAT64]: # Empirical rule: # ~68% of the observations within one standard deviation # ~95% of the observations within one standard deviation # ~99.7% of the observations within one standard deviation within_1stddevs = np.where((data > (mean - 1 * stddev)) & (data < (mean + 1 * stddev))) p1 = len(within_1stddevs[0]) / data_len within_2stddevs = np.where((data > (mean - 2 * stddev)) & (data < (mean + 2 * stddev))) p2 = len(within_2stddevs[0]) / data_len within_3stddevs = np.where((data > (mean - 3 * stddev)) & (data < (mean + 3 * stddev))) p3 = len(within_3stddevs[0]) / data_len assert p3 > 0.9, f"{p3}" # leave some room assert p2 > 0.8, f"{p2}" # leave some room assert p1 > 0.5, f"{p1}" # leave some room # It's not 100% mathematically correct, but makes do in case of this test _, pvalues_anderson, _ = st.anderson(data, dist='norm') assert pvalues_anderson[2] > 0.5
def pipe(): data = np.array([np.random.rand(1, 2) for i in range(10)]) label = np.array([np.random.rand(1, 3) for i in range(10)]) fdata = types.Constant(data) flabel = types.Constant(label) return fdata, flabel
def build_pipes(device, dim, batch_size, channel_first, mode, interp, dtype, w_input, h_input, d_input, use_size_arg, use_size_input, use_roi): dali_pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, seed=1234) with dali_pipe: if dim == 2: files, labels = dali.fn.readers.caffe(path=db_2d_folder, random_shuffle=True) images_cpu = dali.fn.decoders.image(files, device="cpu") else: images_cpu = dali.fn.external_source( source=random_3d_loader(batch_size), layout="DHWC") images_hwc = images_cpu if device == "cpu" else images_cpu.gpu() if channel_first: images = dali.fn.transpose( images_hwc, perm=[3, 0, 1, 2] if dim == 3 else [2, 0, 1], transpose_layout=True) else: images = images_hwc roi_start = None roi_end = None w = None h = None d = None size = None minibatch_size = 2 if dim == 3 else 8 if use_roi: # Calculate absolute RoI in_size = fn.slice(fn.shapes(images_cpu), types.Constant(0, dtype=types.FLOAT, device="cpu"), types.Constant(dim, dtype=types.FLOAT, device="cpu"), axes=[0], normalized_shape=False) roi_start = fn.random.uniform(range=(0, 0.4), shape=[dim ]) * in_size roi_end = fn.random.uniform(range=(0.6, 1.0), shape=[dim ]) * in_size size_range = (10, 200) if dim == 3 else (10, 1000) if use_size_arg: if use_size_input: mask = fn.cast(fn.random.uniform(range=(0.8, 1.9), shape=[dim]), dtype=types.INT32) size = fn.random.uniform(range=size_range, shape=[dim]) * mask else: size = [300, 400] if dim == 2 else [80, 100, 120] resized = resize_dali(images, channel_first, dtype, interp, mode, size, None, None, None, roi_start, roi_end, minibatch_size=minibatch_size, max_size=max_size(dim)) else: if w_input: has_w = fn.random.coin_flip(probability=0.8) w = fn.random.uniform(range=size_range) * has_w else: w = 320 # some fixed value if h_input: has_h = fn.random.coin_flip(probability=0.8) h = fn.random.uniform(range=size_range) * has_h else: h = 240 # some other fixed value if dim >= 3: if d_input: has_d = fn.random.coin_flip(probability=0.8) d = fn.random.uniform(range=size_range) * has_d else: d = 31 # some other fixed value resized = resize_dali(images, channel_first, dtype, interp, mode, None, w, h, d, roi_start, roi_end, minibatch_size=minibatch_size, max_size=max_size(dim)) outputs = [images, resized] if roi_start is not None and roi_end is not None: outputs += [roi_start, roi_end] for x in (d, h, w, size): if x is not None: if isinstance(x, _DataNode): outputs.append(x) else: outputs.append( types.Constant(np.array(x, dtype=np.float32))) dali_pipe.set_outputs(*outputs) pil_pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, exec_async=False, exec_pipelined=False) with pil_pipe: images = fn.external_source(name="images", layout=layout_str(dim, channel_first)) sizes = fn.external_source(name="size") roi_start = fn.external_source(name="roi_start") roi_end = fn.external_source(name="roi_end") resized = resize_PIL(dim, channel_first, dtype, interp, images, sizes, roi_start, roi_end) resized = fn.reshape(resized, layout=layout_str(dim, channel_first)) pil_pipe.set_outputs(resized) dali_pipe.build() pil_pipe.build() return dali_pipe, pil_pipe
def get_dali_pipe(): data = types.Constant(1) return data
def get_dali_pipe(value): data = types.Constant(value) return data
def _prepare_graph(self, define_graph = None): self._pipe = b.Pipeline(self._max_batch_size, self._num_threads, self._device_id, self._seed, self._exec_pipelined, self._prefetch_queue_depth, self._exec_async, self._bytes_per_sample, self._set_affinity, self._max_streams, self._default_cuda_stream_priority) self._pipe.SetExecutionTypes(self._exec_pipelined, self._exec_separated, self._exec_async) self._pipe.SetQueueSizes(self._cpu_queue_size, self._gpu_queue_size) self._pipe.EnableExecutorMemoryStats(self._enable_memory_stats) if define_graph is not None: if self._graph_out is not None: raise RuntimeError("Duplicate graph definition - `define_graph` argument " "should not be specified when graph was defined with a call to `set_outputs`.") else: define_graph = self.define_graph if self._graph_out: outputs = self._graph_out else: with self: outputs = define_graph() if isinstance(outputs, tuple): outputs = list(outputs) elif not isinstance(outputs, list): outputs = [outputs] for i in range(len(outputs)): if isinstance(outputs[i], types.ScalarConstant): import nvidia.dali.ops outputs[i] = nvidia.dali.ops._instantiate_constant_node("cpu", outputs[i]) elif not isinstance(outputs[i], DataNode): outputs[i] = types.Constant(outputs[i], device="cpu") _data_node._check(outputs[i]) # Backtrack to construct the graph op_ids = set() edges = deque(list(outputs) + self._sinks) ops = [] while edges: current_edge = edges.popleft() source_op = current_edge.source if source_op is None: raise RuntimeError( "Pipeline encountered " "Edge with no source op.") # To make sure we don't double count ops in # the case that they produce more than one # output, we keep track of the unique op ids # for each op we encounter and only add the # op if we have not already if source_op.id not in op_ids: op_ids.add(source_op.id) source_op.check_args() ops.append(source_op) else: # If the op was already added, we need to # change its position to the top of the list. # This ensures topological ordering of ops # when adding to the backend pipeline ops.remove(source_op) ops.append(source_op) for edge in source_op.inputs: if isinstance(edge, list): for e in edge: edges.append(e) else: edges.append(edge) # Add the ops to the graph and build the backend related_logical_id = {} self._ops = [] while ops: op = ops.pop() self._ops.append(op) if op.relation_id not in related_logical_id: related_logical_id[op.relation_id] = self._pipe.AddOperator(op.spec, op.name) else: self._pipe.AddOperator(op.spec, op.name, related_logical_id[op.relation_id]) self._prepared = True self._setup_input_callbacks() self._names_and_devices = [(e.name, e.device) for e in outputs]