def __init__(self, **kwargs): self._spec = b.OpSpec(type(self).__name__) self._schema = b.GetSchema(type(self).__name__) # Get the device argument. We will need this to determine # the device that our outputs will be stored on if "device" in kwargs.keys(): self._device = kwargs["device"] del kwargs["device"] else: self._device = op_device self._spec.AddArg("device", self._device) if "preserve" in kwargs.keys(): self._preserve = kwargs["preserve"] else: self._preserve = False self._spec.AddArg("preserve", self._preserve) self._preserve = self._preserve or self._schema.IsNoPrune() # Store the specified arguments for key, value in kwargs.items(): if isinstance(value, list): if not value: raise RuntimeError( "List arguments need to have at least 1 element.") dtype = self._schema.GetArgumentType(key) converted_value = _type_convert_value(dtype, value) self._spec.AddArg(key, converted_value)
def __init__(self, **kwargs): self._spec = b.OpSpec(type(self).__name__) self._schema = b.GetSchema(type(self).__name__) # Get the device argument. We will need this to determine # the device that our outputs will be stored on if "device" in kwargs.keys(): self._device = kwargs["device"] del kwargs["device"] else: self._device = op_device self._spec.AddArg("device", self._device) if "preserve" in kwargs.keys(): self._preserve = kwargs["preserve"] else: self._preserve = False self._spec.AddArg("preserve", self._preserve) self._preserve = self._preserve or self._schema.IsNoPrune() # Store the specified arguments for key, value in kwargs.items(): if value is None: # None is not a valid value for any argument type, so treat it # as if the argument was not supplied at all continue dtype = self._schema.GetArgumentType(key) if isinstance(value, (list, tuple)): if len(value) == 0: self._spec.AddArgEmptyList(key, _vector_element_type(dtype)) continue converted_value = _type_convert_value(dtype, value) self._spec.AddArg(key, converted_value)
def __init__(self, source=None, num_outputs=None, *, cycle=None, layout=None, name=None, device="cpu", cuda_stream=None, use_copy_kernel=None, **kwargs): self._schema = _b.GetSchema("_ExternalSource") self._spec = _b.OpSpec("_ExternalSource") self._device = device self._layout = layout self._cuda_stream = cuda_stream self._use_copy_kernel = use_copy_kernel callback = _get_callback_from_source(source, cycle) if name is not None and num_outputs is not None: raise ValueError( "`num_outputs` is not compatible with named `ExternalSource`") self._name = name self._num_outputs = num_outputs self._callback = callback self._spec.AddArg("device", device) for key, value in kwargs.items(): self._spec.AddArg(key, value)
def __init__( self, source=None, num_outputs=None, *, cycle=None, layout=None, name=None, device="cpu", cuda_stream=None, use_copy_kernel=None, batch=None, parallel=None, no_copy=None, prefetch_queue_depth=None, **kwargs): self._schema = _b.GetSchema("_ExternalSource") self._spec = _b.OpSpec("_ExternalSource") self._device = device self._layout = layout self._cuda_stream = cuda_stream self._use_copy_kernel = use_copy_kernel import nvidia.dali.ops kwargs, self._call_args = nvidia.dali.ops._separate_kwargs(kwargs) callback = _get_callback_from_source(source, cycle) if name is not None and num_outputs is not None: raise ValueError("`num_outputs` is not compatible with named `ExternalSource`") self._name = name self._num_outputs = num_outputs self._batch = batch self._callback = callback self._parallel = parallel self._no_copy = no_copy self._prefetch_queue_depth = prefetch_queue_depth self._spec.AddArg("device", device) for key, value in kwargs.items(): self._spec.AddArg(key, value)
def __init__(self, **kwargs): schema_name = _schema_name(type(self)) self._spec = _b.OpSpec(schema_name) self._schema = _b.GetSchema(schema_name) # Get the device argument. We will need this to determine # the device that our outputs will be stored on if "device" in kwargs.keys(): self._device = kwargs["device"] del kwargs["device"] else: self._device = op_device self._spec.AddArg("device", self._device) if "preserve" in kwargs.keys(): self._preserve = kwargs["preserve"] else: self._preserve = False self._spec.AddArg("preserve", self._preserve) self._preserve = self._preserve or self._schema.IsNoPrune() # Check for any deprecated arguments that should be replaced or removed arg_names = list(kwargs.keys()) for arg_name in arg_names: if not self._schema.IsDeprecatedArg(arg_name): continue meta = self._schema.DeprecatedArgMeta(arg_name) new_name = meta['renamed_to'] removed = meta['removed'] msg = meta['msg'] if new_name: if new_name in kwargs: raise TypeError( "Operator {} got an unexpected '{}' deprecated argument when '{}' was already provided" .format(type(self).__name__, arg_name, new_name)) kwargs[new_name] = kwargs[arg_name] del kwargs[arg_name] elif removed: del kwargs[arg_name] with warnings.catch_warnings(): warnings.simplefilter("default") warnings.warn(msg, DeprecationWarning, stacklevel=2) # Store the specified arguments for key, value in kwargs.items(): if value is None: # None is not a valid value for any argument type, so treat it # as if the argument was not supplied at all continue dtype = self._schema.GetArgumentType(key) if isinstance(value, (list, tuple)): if len(value) == 0: self._spec.AddArgEmptyList(key, _vector_element_type(dtype)) continue converted_value = _type_convert_value(dtype, value) self._spec.AddArg(key, converted_value)
def __init__(self, function, **kwargs): self._schema = b.GetSchema("PythonFunctionImpl") self._spec = b.OpSpec("PythonFunctionImpl") self._device = "cpu" for key, value in kwargs.items(): self._spec.AddArg(key, value) self.function = function
def __init__(self, function, num_outputs=1, **kwargs): self._schema = b.GetSchema("PythonFunctionImpl") self._spec = b.OpSpec("PythonFunctionImpl") self._device = "cpu" for key, value in kwargs.items(): self._spec.AddArg(key, value) self.function = function self.num_outputs = num_outputs self._preserve = True
def __init__(self, **kwargs): schema_name = _schema_name(type(self)) self._spec = _b.OpSpec(schema_name) self._schema = _b.GetSchema(schema_name) # Get the device argument. We will need this to determine # the device that our outputs will be stored on if "device" in kwargs.keys(): self._device = kwargs["device"] del kwargs["device"] else: self._device = op_device self._spec.AddArg("device", self._device) kwargs, self._call_args = _separate_kwargs(kwargs) for k in self._call_args.keys(): _check_arg_input(self._schema, type(self).__name__, k) if "preserve" in kwargs.keys(): self._preserve = kwargs["preserve"] # we don't want to set "preserve" arg twice del kwargs["preserve"] else: self._preserve = False self._spec.AddArg("preserve", self._preserve) self._preserve = self._preserve or self._schema.IsNoPrune() # Check for any deprecated arguments that should be replaced or removed arg_names = list(kwargs.keys()) for arg_name in arg_names: if not self._schema.IsDeprecatedArg(arg_name): continue meta = self._schema.DeprecatedArgMeta(arg_name) new_name = meta['renamed_to'] removed = meta['removed'] msg = meta['msg'] if new_name: if new_name in kwargs: raise TypeError( "Operator {} got an unexpected '{}' deprecated argument when '{}' was already provided" .format(type(self).__name__, arg_name, new_name)) kwargs[new_name] = kwargs[arg_name] del kwargs[arg_name] elif removed: del kwargs[arg_name] with warnings.catch_warnings(): warnings.simplefilter("default") warnings.warn(msg, DeprecationWarning, stacklevel=2) # Store the specified arguments _add_spec_args(self._schema, self._spec, kwargs)
def __init__(self, impl_name, function, num_outputs=1, device='cpu', **kwargs): self._schema = _b.GetSchema(impl_name) self._spec = _b.OpSpec(impl_name) self._device = device self._impl_name = impl_name for key, value in kwargs.items(): self._spec.AddArg(key, value) self.function = function self.num_outputs = num_outputs self._preserve = True
def __init__(self, **kwargs): self._spec = b.OpSpec(type(self).__name__) self._schema = b.GetSchema(type(self).__name__) # Get the device argument. We will need this to determine # the device that our outputs will be stored on if "device" in kwargs.keys(): self._device = kwargs["device"] else: self._spec.AddArg("device", op_device) self._device = op_device # Store the specified arguments for key, value in kwargs.items(): if isinstance(value, list): if not value: raise RuntimeError( "List arguments need to have at least 1 element.") self._spec.AddArg(key, value)
def __init__(self, path, index_path, features, **kwargs): if isinstance(path, list): self._path = path else: self._path = [path] if isinstance(index_path, list): self._index_path = index_path else: self._index_path = [index_path] self._schema = b.GetSchema("_TFRecordReader") self._spec = b.OpSpec("_TFRecordReader") self._device = "cpu" self._spec.AddArg("path", self._path) self._spec.AddArg("index_path", self._index_path) for key, value in kwargs.items(): self._spec.AddArg(key, value) self._features = features
def __init__(self, path, index_path, features, **kwargs): if isinstance(path, list): self._path = path else: self._path = [path] if isinstance(index_path, list): self._index_path = index_path else: self._index_path = [index_path] self._schema = _b.GetSchema(self._internal_schema_name) self._spec = _b.OpSpec(self._internal_schema_name) self._device = "cpu" self._spec.AddArg("path", self._path) self._spec.AddArg("index_path", self._index_path) kwargs, self._call_args = _separate_kwargs(kwargs) for key, value in kwargs.items(): self._spec.AddArg(key, value) self._features = features
def __init__(self, run_fn, out_types, in_types, outs_ndim, ins_ndim, setup_fn=None, device='cpu', batch_processing=False, **kwargs): assert len(in_types) == len( ins_ndim ), "Number of input types and input dimensions should match." assert len(out_types) == len( outs_ndim ), "Number of output types and output dimensions should match." if not isinstance(outs_ndim, list): outs_ndim = [outs_ndim] if not isinstance(ins_ndim, list): ins_ndim = [ins_ndim] if not isinstance(out_types, list): out_types = [out_types] if not isinstance(in_types, list): in_types = [in_types] setup_fn_address = None if setup_fn != None: setup_fn = njit(setup_fn) @cfunc(self._setup_fn_sig(), nopython=True) def setup_cfunc(out_shapes_ptr, out_ndims_ptr, num_outs, in_shapes_ptr, in_ndims_ptr, num_ins, num_samples): out_shapes_np = _get_shape_view(out_shapes_ptr, out_ndims_ptr, num_outs, num_samples) in_shapes_np = _get_shape_view(in_shapes_ptr, in_ndims_ptr, num_outs, num_samples) setup_fn(out_shapes_np, in_shapes_np) setup_fn_address = setup_cfunc.address out0_lambda, out1_lambda, out2_lambda, out3_lambda, out4_lambda, out5_lambda = self._get_carrays_eval_lambda( out_types, outs_ndim) in0_lambda, in1_lambda, in2_lambda, in3_lambda, in4_lambda, in5_lambda = self._get_carrays_eval_lambda( in_types, ins_ndim) run_fn = njit(run_fn) run_fn_lambda = self._get_run_fn_lambda(len(out_types), len(in_types)) if batch_processing: @cfunc(self._run_fn_sig(batch_processing=True), nopython=True) def run_cfunc(out_ptr, out_shapes_ptr, out_ndims_ptr, num_outs, in_ptr, in_shapes_ptr, in_ndims_ptr, num_ins, num_samples): out0 = out1 = out2 = out3 = out4 = out5 = None out_shapes_np = _get_shape_view(out_shapes_ptr, out_ndims_ptr, num_outs, num_samples) out_arr = carray(address_as_void_pointer(out_ptr), (num_outs, num_samples), dtype=np.int64) if num_outs >= 1: out0 = [ out0_lambda(address_as_void_pointer(ptr), shape) for ptr, shape in zip(out_arr[0], out_shapes_np[0]) ] if num_outs >= 2: out1 = [ out1_lambda(address_as_void_pointer(ptr), shape) for ptr, shape in zip(out_arr[1], out_shapes_np[1]) ] if num_outs >= 3: out2 = [ out2_lambda(address_as_void_pointer(ptr), shape) for ptr, shape in zip(out_arr[2], out_shapes_np[2]) ] if num_outs >= 4: out3 = [ out3_lambda(address_as_void_pointer(ptr), shape) for ptr, shape in zip(out_arr[3], out_shapes_np[3]) ] if num_outs >= 5: out4 = [ out4_lambda(address_as_void_pointer(ptr), shape) for ptr, shape in zip(out_arr[4], out_shapes_np[4]) ] if num_outs >= 6: out5 = [ out5_lambda(address_as_void_pointer(ptr), shape) for ptr, shape in zip(out_arr[5], out_shapes_np[5]) ] in0 = in1 = in2 = in3 = in4 = in5 = None in_shapes_np = _get_shape_view(in_shapes_ptr, in_ndims_ptr, num_ins, num_samples) in_arr = carray(address_as_void_pointer(in_ptr), (num_ins, num_samples), dtype=np.int64) if num_ins >= 1: in0 = [ in0_lambda(address_as_void_pointer(ptr), shape) for ptr, shape in zip(in_arr[0], in_shapes_np[0]) ] if num_ins >= 2: in1 = [ in1_lambda(address_as_void_pointer(ptr), shape) for ptr, shape in zip(in_arr[1], in_shapes_np[1]) ] if num_ins >= 3: in2 = [ in2_lambda(address_as_void_pointer(ptr), shape) for ptr, shape in zip(in_arr[2], in_shapes_np[2]) ] if num_ins >= 4: in3 = [ in3_lambda(address_as_void_pointer(ptr), shape) for ptr, shape in zip(in_arr[3], in_shapes_np[3]) ] if num_ins >= 5: in4 = [ in4_lambda(address_as_void_pointer(ptr), shape) for ptr, shape in zip(in_arr[4], in_shapes_np[4]) ] if num_ins >= 6: in5 = [ in5_lambda(address_as_void_pointer(ptr), shape) for ptr, shape in zip(in_arr[5], in_shapes_np[5]) ] run_fn_lambda(run_fn, out0, out1, out2, out3, out4, out5, in0, in1, in2, in3, in4, in5) else: @cfunc(self._run_fn_sig(batch_processing=False), nopython=True) def run_cfunc(out_ptr, out_shapes_ptr, out_ndims_ptr, num_outs, in_ptr, in_shapes_ptr, in_ndims_ptr, num_ins): out0 = out1 = out2 = out3 = out4 = out5 = None out_shapes_np = _get_shape_view(out_shapes_ptr, out_ndims_ptr, num_outs, 1) out_arr = carray(address_as_void_pointer(out_ptr), num_outs, dtype=np.int64) if num_outs >= 1: out0 = out0_lambda(address_as_void_pointer(out_arr[0]), out_shapes_np[0][0]) if num_outs >= 2: out1 = out1_lambda(address_as_void_pointer(out_arr[1]), out_shapes_np[1][0]) if num_outs >= 3: out2 = out2_lambda(address_as_void_pointer(out_arr[2]), out_shapes_np[2][0]) if num_outs >= 4: out3 = out3_lambda(address_as_void_pointer(out_arr[3]), out_shapes_np[3][0]) if num_outs >= 5: out4 = out4_lambda(address_as_void_pointer(out_arr[4]), out_shapes_np[4][0]) if num_outs >= 6: out5 = out5_lambda(address_as_void_pointer(out_arr[5]), out_shapes_np[5][0]) in0 = in1 = in2 = in3 = in4 = in5 = None in_shapes_np = _get_shape_view(in_shapes_ptr, in_ndims_ptr, num_ins, 1) in_arr = carray(address_as_void_pointer(in_ptr), num_ins, dtype=np.int64) if num_ins >= 1: in0 = in0_lambda(address_as_void_pointer(in_arr[0]), in_shapes_np[0][0]) if num_ins >= 2: in1 = in1_lambda(address_as_void_pointer(in_arr[1]), in_shapes_np[1][0]) if num_ins >= 3: in2 = in2_lambda(address_as_void_pointer(in_arr[2]), in_shapes_np[2][0]) if num_ins >= 4: in3 = in3_lambda(address_as_void_pointer(in_arr[3]), in_shapes_np[3][0]) if num_ins >= 5: in4 = in4_lambda(address_as_void_pointer(in_arr[4]), in_shapes_np[4][0]) if num_ins >= 6: in5 = in5_lambda(address_as_void_pointer(in_arr[5]), in_shapes_np[5][0]) run_fn_lambda(run_fn, out0, out1, out2, out3, out4, out5, in0, in1, in2, in3, in4, in5) self._impl_name = "NumbaFuncImpl" self._schema = _b.GetSchema(self._impl_name) self._spec = _b.OpSpec(self._impl_name) self._device = device kwargs, self._call_args = ops._separate_kwargs(kwargs) for key, value in kwargs.items(): self._spec.AddArg(key, value) self.run_fn = run_cfunc.address self.setup_fn = setup_fn_address self.out_types = out_types self.in_types = in_types self.outs_ndim = outs_ndim self.ins_ndim = ins_ndim self.num_outputs = len(out_types) self.batch_processing = batch_processing self._preserve = True
def __init__(self, run_fn, out_types, in_types, outs_ndim, ins_ndim, setup_fn=None, device='cpu', batch_processing=False, blocks=None, threads_per_block=None, **kwargs): if device == 'gpu': self._check_minimal_numba_version() self._check_cuda_compatibility() assert len(in_types) == len(ins_ndim), ( "Number of input types " "and input dimensions should match.") assert len(out_types) == len(outs_ndim), ( "Number of output types " "and output dimensions should match.") if 'float16' in dir(numba_types): for t in [*in_types, *out_types]: if t == dali_types.FLOAT16: raise RuntimeError("Numba does not support float16 for " "current Python version. " "Python 3.7 or newer is required") if device == 'gpu': assert batch_processing is False, ( "Currently batch processing for GPU " "is not supported.") assert len(blocks) == 3, ( "`blocks` array should contain 3 numbers, " f"while received: {len(blocks)}") for i, block_dim in enumerate(blocks): assert block_dim > 0, ("All dimensions should be positive. " "Value specified in `blocks` at index " f"{i} is nonpositive: {block_dim}") assert len(threads_per_block) == 3, ( "`threads_per_block` array " "should contain 3 numbers, " f"while received: {len(threads_per_block)}") for i, threads in enumerate(threads_per_block): assert threads > 0, ( "All dimensions should be positive. " "Value specified in `threads_per_block` at index " f"{i} is nonpositive: {threads}") if not isinstance(outs_ndim, list): outs_ndim = [outs_ndim] if not isinstance(ins_ndim, list): ins_ndim = [ins_ndim] if not isinstance(out_types, list): out_types = [out_types] if not isinstance(in_types, list): in_types = [in_types] self._impl_name = "NumbaFuncImpl" self._schema = _b.GetSchema(self._impl_name) self._spec = _b.OpSpec(self._impl_name) self._device = device kwargs, self._call_args = ops._separate_kwargs(kwargs) for key, value in kwargs.items(): self._spec.AddArg(key, value) if device == 'gpu': self.run_fn = self._get_run_fn_gpu(run_fn, in_types + out_types, ins_ndim + outs_ndim) self.setup_fn = None else: self.run_fn = self._get_run_fn_cpu(run_fn, out_types, in_types, outs_ndim, ins_ndim, batch_processing) self.setup_fn = self._get_setup_fn_cpu(setup_fn) self.out_types = out_types self.in_types = in_types self.outs_ndim = outs_ndim self.ins_ndim = ins_ndim self.num_outputs = len(out_types) self.batch_processing = batch_processing self._preserve = True self.blocks = blocks self.threads_per_block = threads_per_block