def __init__(self, topology): if not lang.misc.is_extension_supported(impl.current_cfg().arch, lang.extension.mesh): raise Exception('Backend ' + str(impl.current_cfg().arch) + ' doesn\'t support MeshTaichi extension') self.topology = topology self.verts = MeshElement(MeshElementType.Vertex, self) self.edges = MeshElement(MeshElementType.Edge, self) self.faces = MeshElement(MeshElementType.Face, self) if topology == MeshTopology.Tetrahedron: self.cells = MeshElement(MeshElementType.Cell, self) self.elements = set() self.relations = set() impl.current_cfg().use_mesh = True
def fill(self, val): """Fills ndarray with a specific scalar value. Args: val (Union[int, float]): Value to fill. """ if impl.current_cfg().arch != _ti_core.Arch.cuda and impl.current_cfg( ).arch != _ti_core.Arch.x64: self._fill_by_kernel(val) elif self.dtype == primitive_types.f32: self.arr.fill_float(val) elif self.dtype == primitive_types.i32: self.arr.fill_int(val) elif self.dtype == primitive_types.u32: self.arr.fill_uint(val) else: self._fill_by_kernel(val)
def bit_struct(self, num_bits: int): """Adds a bit_struct SNode as a child component of `self`. Args: num_bits: Number of bits to use. Returns: The added :class:`~taichi.lang.SNode` instance. """ return SNode(self.ptr.bit_struct(num_bits, impl.current_cfg().packed))
def ndarray_fill(self, val): """Fills ndarray with a specific scalar value. Args: val (Union[int, float]): Value to fill. """ if impl.current_cfg().ndarray_use_torch: self.arr.fill_(val) else: taichi.lang.meta.fill_ndarray(self, val)
def fill(self, val): """Fills ndarray with a specific scalar value. Args: val (Union[int, float]): Value to fill. """ if impl.current_cfg().ndarray_use_torch: self.arr.fill_(val) else: from taichi.lang.meta import fill_ndarray # pylint: disable=C0415 fill_ndarray(self, val)
def from_source(cls, source_code, compile_fn=None): if impl.current_cfg().arch not in [ _ti_core.Arch.x64, _ti_core.Arch.cuda ]: raise TaichiSyntaxError( "Unsupported arch for external function call") _temp_dir = tempfile.mkdtemp() _temp_source = os.path.join(_temp_dir, '_temp_source.cpp') with open(_temp_source, 'w') as f: f.write(source_code) return SourceBuilder.from_file(_temp_source, compile_fn, _temp_dir)
def materialize(self, key=None, args=None, arg_features=None): if key is None: key = (self.func, 0) self.runtime.materialize() if key in self.compiled_functions: return grad_suffix = "" if self.is_grad: grad_suffix = "_grad" kernel_name = f"{self.func.__name__}_c{self.kernel_counter}_{key[1]}{grad_suffix}" _logging.trace(f"Compiling kernel {kernel_name}...") tree, ctx = _get_tree_and_ctx( self, args=args, excluded_parameters=self.template_slot_locations, arg_features=arg_features) if self.is_grad: KernelSimplicityASTChecker(self.func).visit(tree) if impl.current_cfg().use_mesh: taichi.lang.Mesh.update_relation(tree, ctx) # Do not change the name of 'taichi_ast_generator' # The warning system needs this identifier to remove unnecessary messages def taichi_ast_generator(kernel_cxx): if self.runtime.inside_kernel: raise TaichiSyntaxError( "Kernels cannot call other kernels. I.e., nested kernels are not allowed. " "Please check if you have direct/indirect invocation of kernels within kernels. " "Note that some methods provided by the Taichi standard library may invoke kernels, " "and please move their invocations to Python-scope.") self.runtime.inside_kernel = True self.runtime.current_kernel = self try: ctx.ast_builder = kernel_cxx.ast_builder() transform_tree(tree, ctx) if not ctx.is_real_function: if self.return_type and ctx.returned != ReturnStatus.ReturnedValue: raise TaichiSyntaxError( "Kernel has a return type but does not have a return statement" ) finally: self.runtime.inside_kernel = False self.runtime.current_kernel = None taichi_kernel = impl.get_runtime().prog.create_kernel( taichi_ast_generator, kernel_name, self.is_grad) self.kernel_cpp = taichi_kernel assert key not in self.compiled_functions self.compiled_functions[key] = self.get_function_body(taichi_kernel)
def __init__(self, dtype, shape): self.host_accessor = None if impl.current_cfg().ndarray_use_torch: assert has_pytorch( ), "PyTorch must be available if you want to create a Taichi ndarray with PyTorch as its underlying storage." # pylint: disable=E1101 self.arr = torch.zeros(shape, dtype=to_pytorch_type(cook_dtype(dtype))) if impl.current_cfg().arch == _ti_core.Arch.cuda: self.arr = self.arr.cuda() def ndarray_fill(val, fill_func): self.arr.fill_(val) else: self.arr = _ti_core.Ndarray(impl.get_runtime().prog, cook_dtype(dtype), shape) def ndarray_fill(val, fill_func): fill_func(self, val) self.ndarray_fill = ndarray_fill
def __call__(self, *args, **kwargs): args = _process_args(self, args, kwargs) # Transform the primal kernel to forward mode grad kernel # then recover to primal when exiting the forward mode manager if self.runtime.fwd_mode_manager: # TODO: if we would like to compute 2nd-order derivatives by forward-on-reverse in a nested context manager fashion, # i.e., a `Tape` nested in the `FwdMode`, we can transform the kernels with `mode_original == AutodiffMode.REVERSE` only, # to avoid duplicate computation for 1st-order derivatives mode_original = self.autodiff_mode self.autodiff_mode = AutodiffMode.FORWARD self.runtime.fwd_mode_manager.insert(self, mode_original) if self.autodiff_mode != AutodiffMode.NONE and impl.current_cfg( ).opt_level == 0: _logging.warn( """opt_level = 1 is enforced to enable gradient computation.""" ) impl.current_cfg().opt_level = 1 key = self.ensure_compiled(*args) return self.runtime.compiled_functions[key](*args)
def compile_fn_impl(filename): if impl.current_cfg().arch == _ti_core.Arch.x64: subprocess.call(get_clangpp() + ' -flto -c ' + filename + ' -o ' + os.path.join(self.td, 'source.bc'), shell=True) else: subprocess.call(get_clangpp() + ' -flto -c ' + filename + ' -o ' + os.path.join(self.td, 'source.bc') + ' -target nvptx64-nvidia-cuda', shell=True) return os.path.join(self.td, 'source.bc')
def ndarray_to_numpy(self): """Converts ndarray to a numpy array. Returns: numpy.ndarray: The result numpy array. """ if impl.current_cfg().ndarray_use_torch: return self.arr.cpu().numpy() arr = np.zeros(shape=self.arr.shape, dtype=to_numpy_type(self.dtype)) taichi.lang.meta.ndarray_to_ext_arr(self, arr) impl.get_runtime().sync() return arr
def ndarray_matrix_from_numpy(self, arr, as_vector): """Loads all values from a numpy array. Args: arr (numpy.ndarray): The source numpy array. """ if not isinstance(arr, np.ndarray): raise TypeError(f"{np.ndarray} expected, but {type(arr)} provided") if tuple(self.arr.shape) != tuple(arr.shape): raise ValueError( f"Mismatch shape: {tuple(self.arr.shape)} expected, but {tuple(arr.shape)} provided" ) if impl.current_cfg().ndarray_use_torch: self.arr = torch.from_numpy(arr).to(self.arr.dtype) # pylint: disable=E1101 if impl.current_cfg().arch == _ti_core.Arch.cuda: self.arr = self.arr.cuda() else: if hasattr(arr, 'contiguous'): arr = arr.contiguous() taichi.lang.meta.ext_arr_to_ndarray_matrix(arr, self, as_vector) impl.get_runtime().sync()
def __init__(self, dtype, shape): if isinstance(shape, numbers.Number): shape = (shape, ) assert has_pytorch( ), "PyTorch must be available if you want to create a Taichi ndarray." import torch if impl.current_cfg().arch == _ti_core.Arch.cuda: device = 'cuda:0' else: device = 'cpu' self.arr = torch.empty(shape, dtype=to_pytorch_type(dtype), device=device)
def bitmasked(self, axes, dimensions): """Adds a bitmasked SNode as a child component of `self`. Args: axes (List[Axis]): Axes to activate. dimensions (Union[List[int], int]): Shape of each axis. Returns: The added :class:`~taichi.lang.SNode` instance. """ if isinstance(dimensions, int): dimensions = [dimensions] * len(axes) return SNode( self.ptr.bitmasked(axes, dimensions, impl.current_cfg().packed))
def quant_array(self, axes, dimensions, num_bits): """Adds a quant_array SNode as a child component of `self`. Args: axes (List[Axis]): Axes to activate. dimensions (Union[List[int], int]): Shape of each axis. num_bits (int): Number of bits to use. Returns: The added :class:`~taichi.lang.SNode` instance. """ if isinstance(dimensions, int): dimensions = [dimensions] * len(axes) return SNode( self.ptr.quant_array(axes, dimensions, num_bits, impl.current_cfg().packed))
def dynamic(self, axis, dimension, chunk_size=None): """Adds a dynamic SNode as a child component of `self`. Args: axis (List[Axis]): Axis to activate, must be 1. dimension (int): Shape of the axis. chunk_size (int): Chunk size. Returns: The added :class:`~taichi.lang.SNode` instance. """ assert len(axis) == 1 if chunk_size is None: chunk_size = dimension return SNode( self.ptr.dynamic(axis[0], dimension, chunk_size, impl.current_cfg().packed))
def to_numpy(self): """Converts ndarray to a numpy array. Returns: numpy.ndarray: The result numpy array. """ if impl.current_cfg().ndarray_use_torch: return self.arr.cpu().numpy() else: import numpy as np # pylint: disable=C0415 arr = np.zeros(shape=self.arr.shape, dtype=to_numpy_type(self.dtype)) from taichi.lang.meta import \ ndarray_to_ext_arr # pylint: disable=C0415 ndarray_to_ext_arr(self, arr) impl.get_runtime().sync() return arr
def func__(*args): assert len(args) == len( self.arguments ), f'{len(self.arguments)} arguments needed but {len(args)} provided' tmps = [] callbacks = [] has_external_arrays = False has_torch = has_pytorch() has_pp = has_paddle() actual_argument_slot = 0 launch_ctx = t_kernel.make_launch_context() for i, v in enumerate(args): needed = self.arguments[i].annotation if isinstance(needed, template): continue provided = type(v) # Note: do not use sth like "needed == f32". That would be slow. if id(needed) in primitive_types.real_type_ids: if not isinstance(v, (float, int)): raise TaichiRuntimeTypeError.get( i, needed.to_string(), provided) launch_ctx.set_arg_float(actual_argument_slot, float(v)) elif id(needed) in primitive_types.integer_type_ids: if not isinstance(v, int): raise TaichiRuntimeTypeError.get( i, needed.to_string(), provided) launch_ctx.set_arg_int(actual_argument_slot, int(v)) elif isinstance(needed, sparse_matrix_builder): # Pass only the base pointer of the ti.types.sparse_matrix_builder() argument launch_ctx.set_arg_int(actual_argument_slot, v._get_addr()) elif isinstance(needed, ndarray_type.NdarrayType) and isinstance( v, taichi.lang._ndarray.Ndarray): has_external_arrays = True v = v.arr launch_ctx.set_arg_ndarray(actual_argument_slot, v) elif isinstance(needed, texture_type.TextureType) and isinstance( v, taichi.lang._texture.Texture): has_external_arrays = True v = v.tex launch_ctx.set_arg_texture(actual_argument_slot, v) elif isinstance(needed, texture_type.RWTextureType) and isinstance( v, taichi.lang._texture.Texture): has_external_arrays = True v = v.tex launch_ctx.set_arg_rw_texture(actual_argument_slot, v) elif isinstance( needed, ndarray_type.NdarrayType) and (self.match_ext_arr(v)): has_external_arrays = True is_numpy = isinstance(v, np.ndarray) is_torch = isinstance(v, torch.Tensor) if has_torch else False # Element shapes are already spcialized in Taichi codegen. # The shape information for element dims are no longer needed. # Therefore we strip the element shapes from the shape vector, # so that it only holds "real" array shapes. is_soa = needed.layout == Layout.SOA array_shape = v.shape element_dim = needed.element_dim if element_dim: array_shape = v.shape[ element_dim:] if is_soa else v.shape[:-element_dim] if is_numpy: tmp = np.ascontiguousarray(v) # Purpose: DO NOT GC |tmp|! tmps.append(tmp) launch_ctx.set_arg_external_array_with_shape( actual_argument_slot, int(tmp.ctypes.data), tmp.nbytes, array_shape) elif is_torch: is_ndarray = False tmp, torch_callbacks = self.get_torch_callbacks( v, has_torch, is_ndarray) callbacks += torch_callbacks launch_ctx.set_arg_external_array_with_shape( actual_argument_slot, int(tmp.data_ptr()), tmp.element_size() * tmp.nelement(), array_shape) else: # For now, paddle.fluid.core.Tensor._ptr() is only available on develop branch tmp, paddle_callbacks = self.get_paddle_callbacks( v, has_pp) callbacks += paddle_callbacks launch_ctx.set_arg_external_array_with_shape( actual_argument_slot, int(tmp._ptr()), v.element_size() * v.size, array_shape) elif isinstance(needed, MatrixType): if id(needed.dtype) in primitive_types.real_type_ids: for a in range(needed.n): for b in range(needed.m): if not isinstance(v[a, b], (int, float)): raise TaichiRuntimeTypeError.get( i, needed.dtype.to_string(), type(v[a, b])) launch_ctx.set_arg_float( actual_argument_slot, float(v[a, b])) actual_argument_slot += 1 elif id(needed.dtype) in primitive_types.integer_type_ids: for a in range(needed.n): for b in range(needed.m): if not isinstance(v[a, b], int): raise TaichiRuntimeTypeError.get( i, needed.dtype.to_string(), type(v[a, b])) launch_ctx.set_arg_int(actual_argument_slot, int(v[a, b])) actual_argument_slot += 1 else: raise ValueError( f'Matrix dtype {needed.dtype} is not integer type or real type.' ) continue else: raise ValueError( f'Argument type mismatch. Expecting {needed}, got {type(v)}.' ) actual_argument_slot += 1 # Both the class kernels and the plain-function kernels are unified now. # In both cases, |self.grad| is another Kernel instance that computes the # gradient. For class kernels, args[0] is always the kernel owner. if self.autodiff_mode == AutodiffMode.NONE and self.runtime.target_tape and not self.runtime.grad_replaced: self.runtime.target_tape.insert(self, args) if actual_argument_slot > 8 and ( impl.current_cfg().arch == _ti_core.opengl or impl.current_cfg().arch == _ti_core.cc): raise TaichiRuntimeError( f"The number of elements in kernel arguments is too big! Do not exceed 8 on {_ti_core.arch_name(impl.current_cfg().arch)} backend." ) if actual_argument_slot > 64 and ( (impl.current_cfg().arch != _ti_core.opengl and impl.current_cfg().arch != _ti_core.cc)): raise TaichiRuntimeError( f"The number of elements in kernel arguments is too big! Do not exceed 64 on {_ti_core.arch_name(impl.current_cfg().arch)} backend." ) try: t_kernel(launch_ctx) except Exception as e: e = handle_exception_from_cpp(e) raise e from None ret = None ret_dt = self.return_type has_ret = ret_dt is not None if has_ret: runtime_ops.sync() if has_ret: if id(ret_dt) in primitive_types.integer_type_ids: ret = t_kernel.get_ret_int(0) elif id(ret_dt) in primitive_types.real_type_ids: ret = t_kernel.get_ret_float(0) elif id(ret_dt.dtype) in primitive_types.integer_type_ids: it = iter(t_kernel.get_ret_int_tensor(0)) ret = Matrix([[next(it) for _ in range(ret_dt.m)] for _ in range(ret_dt.n)]) else: it = iter(t_kernel.get_ret_float_tensor(0)) ret = Matrix([[next(it) for _ in range(ret_dt.m)] for _ in range(ret_dt.n)]) if callbacks: for c in callbacks: c() return ret
def func__(*args): assert len(args) == len( self.argument_annotations ), f'{len(self.argument_annotations)} arguments needed but {len(args)} provided' tmps = [] callbacks = [] has_external_arrays = False has_torch = has_pytorch() ndarray_use_torch = impl.get_runtime().ndarray_use_torch actual_argument_slot = 0 launch_ctx = t_kernel.make_launch_context() for i, v in enumerate(args): needed = self.argument_annotations[i] if isinstance(needed, template): continue provided = type(v) # Note: do not use sth like "needed == f32". That would be slow. if id(needed) in primitive_types.real_type_ids: if not isinstance(v, (float, int)): raise TaichiRuntimeTypeError(i, needed.to_string(), provided) launch_ctx.set_arg_float(actual_argument_slot, float(v)) elif id(needed) in primitive_types.integer_type_ids: if not isinstance(v, int): raise TaichiRuntimeTypeError(i, needed.to_string(), provided) launch_ctx.set_arg_int(actual_argument_slot, int(v)) elif isinstance(needed, sparse_matrix_builder): # Pass only the base pointer of the ti.linalg.sparse_matrix_builder() argument launch_ctx.set_arg_int(actual_argument_slot, v._get_addr()) elif isinstance(needed, any_arr) and isinstance( v, taichi.lang._ndarray.Ndarray): has_external_arrays = True v = v.arr if ndarray_use_torch: is_ndarray = True tmp, torch_callbacks = self.get_torch_callbacks( v, has_torch, is_ndarray) callbacks += torch_callbacks launch_ctx.set_arg_external_array_with_shape( actual_argument_slot, int(tmp.data_ptr()), tmp.element_size() * tmp.nelement(), v.shape) else: launch_ctx.set_arg_ndarray(actual_argument_slot, v) elif isinstance(needed, any_arr) and (self.match_ext_arr(v)): has_external_arrays = True is_numpy = isinstance(v, np.ndarray) if is_numpy: tmp = np.ascontiguousarray(v) # Purpose: DO NOT GC |tmp|! tmps.append(tmp) launch_ctx.set_arg_external_array_with_shape( actual_argument_slot, int(tmp.ctypes.data), tmp.nbytes, v.shape) else: is_ndarray = False tmp, torch_callbacks = self.get_torch_callbacks( v, has_torch, is_ndarray) callbacks += torch_callbacks launch_ctx.set_arg_external_array_with_shape( actual_argument_slot, int(tmp.data_ptr()), tmp.element_size() * tmp.nelement(), v.shape) elif isinstance(needed, MatrixType): if id(needed.dtype) in primitive_types.real_type_ids: for a in range(needed.n): for b in range(needed.m): if not isinstance(v[a, b], (int, float)): raise TaichiRuntimeTypeError( i, needed.dtype.to_string(), type(v[a, b])) launch_ctx.set_arg_float( actual_argument_slot, float(v[a, b])) actual_argument_slot += 1 elif id(needed.dtype) in primitive_types.integer_type_ids: for a in range(needed.n): for b in range(needed.m): if not isinstance(v[a, b], int): raise TaichiRuntimeTypeError( i, needed.dtype.to_string(), type(v[a, b])) launch_ctx.set_arg_int(actual_argument_slot, int(v[a, b])) actual_argument_slot += 1 else: raise ValueError( f'Matrix dtype {needed.dtype} is not integer type or real type.' ) continue else: raise ValueError( f'Argument type mismatch. Expecting {needed}, got {type(v)}.' ) actual_argument_slot += 1 # Both the class kernels and the plain-function kernels are unified now. # In both cases, |self.grad| is another Kernel instance that computes the # gradient. For class kernels, args[0] is always the kernel owner. if not self.is_grad and self.runtime.target_tape and not self.runtime.grad_replaced: self.runtime.target_tape.insert(self, args) t_kernel(launch_ctx) ret = None ret_dt = self.return_type has_ret = ret_dt is not None if has_ret or (impl.current_cfg().async_mode and has_external_arrays): runtime_ops.sync() if has_ret: if id(ret_dt) in primitive_types.integer_type_ids: ret = t_kernel.get_ret_int(0) else: ret = t_kernel.get_ret_float(0) if callbacks: for c in callbacks: c() return ret
def __getitem__(self, key): if impl.current_cfg().ndarray_use_torch: return self.arr.__getitem__(key) self.initialize_host_accessor() return self.host_accessor.getter(*self.pad_key(key))
def __setitem__(self, key, value): if impl.current_cfg().ndarray_use_torch: self.arr.__setitem__(key, value) else: self.initialize_host_accessor() self.host_accessor.setter(value, *self.pad_key(key))
def from_file(cls, filename, compile_fn=None, _temp_dir=None): self = cls() self.td = _temp_dir if self.td is None: self.td = tempfile.mkdtemp() if filename.endswith((".cpp", ".c", ".cc")): if impl.current_cfg().arch not in [ _ti_core.Arch.x64, _ti_core.Arch.cuda ]: raise TaichiSyntaxError( "Unsupported arch for external function call") if compile_fn is None: def compile_fn_impl(filename): if impl.current_cfg().arch == _ti_core.Arch.x64: subprocess.call(get_clangpp() + ' -flto -c ' + filename + ' -o ' + os.path.join(self.td, 'source.bc'), shell=True) else: subprocess.call(get_clangpp() + ' -flto -c ' + filename + ' -o ' + os.path.join(self.td, 'source.bc') + ' -target nvptx64-nvidia-cuda', shell=True) return os.path.join(self.td, 'source.bc') compile_fn = compile_fn_impl self.bc = compile_fn(filename) self.mode = 'bc' elif filename.endswith(".cu"): if impl.current_cfg().arch not in [_ti_core.Arch.cuda]: raise TaichiSyntaxError( "Unsupported arch for external function call") if compile_fn is None: shutil.copy(filename, os.path.join(self.td, 'source.cu')) def compile_fn_impl(filename): # Cannot use -o to specify multiple output files subprocess.call( get_clangpp() + ' ' + os.path.join(self.td, 'source.cu') + ' -c -emit-llvm -std=c++17 --cuda-gpu-arch=sm_50 -nocudalib', cwd=self.td, shell=True) return os.path.join( self.td, 'source-cuda-nvptx64-nvidia-cuda-sm_50.bc') compile_fn = compile_fn_impl self.bc = compile_fn(filename) self.mode = 'bc' elif filename.endswith((".so", ".dylib", ".dll")): if impl.current_cfg().arch not in [_ti_core.Arch.x64]: raise TaichiSyntaxError( "Unsupported arch for external function call") self.so = ctypes.CDLL(filename) self.mode = 'so' elif filename.endswith(".ll"): if impl.current_cfg().arch not in [ _ti_core.Arch.x64, _ti_core.Arch.cuda ]: raise TaichiSyntaxError( "Unsupported arch for external function call") subprocess.call('llvm-as ' + filename + ' -o ' + os.path.join(self.td, 'source.bc'), shell=True) self.bc = os.path.join(self.td, 'source.bc') self.mode = 'bc' elif filename.endswith(".bc"): if impl.current_cfg().arch not in [ _ti_core.Arch.x64, _ti_core.Arch.cuda ]: raise TaichiSyntaxError( "Unsupported arch for external function call") self.bc = filename self.mode = 'bc' else: raise TaichiSyntaxError( 'Unsupported file type for external function call.') return self