Esempio n. 1
0
    def __init__(self, topology):
        if not lang.misc.is_extension_supported(impl.current_cfg().arch,
                                                lang.extension.mesh):
            raise Exception('Backend ' + str(impl.current_cfg().arch) +
                            ' doesn\'t support MeshTaichi extension')

        self.topology = topology
        self.verts = MeshElement(MeshElementType.Vertex, self)
        self.edges = MeshElement(MeshElementType.Edge, self)
        self.faces = MeshElement(MeshElementType.Face, self)
        if topology == MeshTopology.Tetrahedron:
            self.cells = MeshElement(MeshElementType.Cell, self)

        self.elements = set()
        self.relations = set()

        impl.current_cfg().use_mesh = True
Esempio n. 2
0
    def fill(self, val):
        """Fills ndarray with a specific scalar value.

        Args:
            val (Union[int, float]): Value to fill.
        """
        if impl.current_cfg().arch != _ti_core.Arch.cuda and impl.current_cfg(
        ).arch != _ti_core.Arch.x64:
            self._fill_by_kernel(val)
        elif self.dtype == primitive_types.f32:
            self.arr.fill_float(val)
        elif self.dtype == primitive_types.i32:
            self.arr.fill_int(val)
        elif self.dtype == primitive_types.u32:
            self.arr.fill_uint(val)
        else:
            self._fill_by_kernel(val)
Esempio n. 3
0
    def bit_struct(self, num_bits: int):
        """Adds a bit_struct SNode as a child component of `self`.

        Args:
            num_bits: Number of bits to use.

        Returns:
            The added :class:`~taichi.lang.SNode` instance.
        """
        return SNode(self.ptr.bit_struct(num_bits, impl.current_cfg().packed))
Esempio n. 4
0
    def ndarray_fill(self, val):
        """Fills ndarray with a specific scalar value.

        Args:
            val (Union[int, float]): Value to fill.
        """
        if impl.current_cfg().ndarray_use_torch:
            self.arr.fill_(val)
        else:
            taichi.lang.meta.fill_ndarray(self, val)
Esempio n. 5
0
    def fill(self, val):
        """Fills ndarray with a specific scalar value.

        Args:
            val (Union[int, float]): Value to fill.
        """
        if impl.current_cfg().ndarray_use_torch:
            self.arr.fill_(val)
        else:
            from taichi.lang.meta import fill_ndarray  # pylint: disable=C0415
            fill_ndarray(self, val)
Esempio n. 6
0
 def from_source(cls, source_code, compile_fn=None):
     if impl.current_cfg().arch not in [
             _ti_core.Arch.x64, _ti_core.Arch.cuda
     ]:
         raise TaichiSyntaxError(
             "Unsupported arch for external function call")
     _temp_dir = tempfile.mkdtemp()
     _temp_source = os.path.join(_temp_dir, '_temp_source.cpp')
     with open(_temp_source, 'w') as f:
         f.write(source_code)
     return SourceBuilder.from_file(_temp_source, compile_fn, _temp_dir)
Esempio n. 7
0
    def materialize(self, key=None, args=None, arg_features=None):
        if key is None:
            key = (self.func, 0)
        self.runtime.materialize()
        if key in self.compiled_functions:
            return
        grad_suffix = ""
        if self.is_grad:
            grad_suffix = "_grad"
        kernel_name = f"{self.func.__name__}_c{self.kernel_counter}_{key[1]}{grad_suffix}"
        _logging.trace(f"Compiling kernel {kernel_name}...")

        tree, ctx = _get_tree_and_ctx(
            self,
            args=args,
            excluded_parameters=self.template_slot_locations,
            arg_features=arg_features)

        if self.is_grad:
            KernelSimplicityASTChecker(self.func).visit(tree)

        if impl.current_cfg().use_mesh:
            taichi.lang.Mesh.update_relation(tree, ctx)

        # Do not change the name of 'taichi_ast_generator'
        # The warning system needs this identifier to remove unnecessary messages
        def taichi_ast_generator(kernel_cxx):
            if self.runtime.inside_kernel:
                raise TaichiSyntaxError(
                    "Kernels cannot call other kernels. I.e., nested kernels are not allowed. "
                    "Please check if you have direct/indirect invocation of kernels within kernels. "
                    "Note that some methods provided by the Taichi standard library may invoke kernels, "
                    "and please move their invocations to Python-scope.")
            self.runtime.inside_kernel = True
            self.runtime.current_kernel = self
            try:
                ctx.ast_builder = kernel_cxx.ast_builder()
                transform_tree(tree, ctx)
                if not ctx.is_real_function:
                    if self.return_type and ctx.returned != ReturnStatus.ReturnedValue:
                        raise TaichiSyntaxError(
                            "Kernel has a return type but does not have a return statement"
                        )
            finally:
                self.runtime.inside_kernel = False
                self.runtime.current_kernel = None

        taichi_kernel = impl.get_runtime().prog.create_kernel(
            taichi_ast_generator, kernel_name, self.is_grad)

        self.kernel_cpp = taichi_kernel

        assert key not in self.compiled_functions
        self.compiled_functions[key] = self.get_function_body(taichi_kernel)
Esempio n. 8
0
    def __init__(self, dtype, shape):
        self.host_accessor = None
        if impl.current_cfg().ndarray_use_torch:
            assert has_pytorch(
            ), "PyTorch must be available if you want to create a Taichi ndarray with PyTorch as its underlying storage."
            # pylint: disable=E1101
            self.arr = torch.zeros(shape,
                                   dtype=to_pytorch_type(cook_dtype(dtype)))
            if impl.current_cfg().arch == _ti_core.Arch.cuda:
                self.arr = self.arr.cuda()

            def ndarray_fill(val, fill_func):
                self.arr.fill_(val)
        else:
            self.arr = _ti_core.Ndarray(impl.get_runtime().prog,
                                        cook_dtype(dtype), shape)

            def ndarray_fill(val, fill_func):
                fill_func(self, val)

        self.ndarray_fill = ndarray_fill
Esempio n. 9
0
    def __call__(self, *args, **kwargs):
        args = _process_args(self, args, kwargs)

        # Transform the primal kernel to forward mode grad kernel
        # then recover to primal when exiting the forward mode manager
        if self.runtime.fwd_mode_manager:
            # TODO: if we would like to compute 2nd-order derivatives by forward-on-reverse in a nested context manager fashion,
            # i.e., a `Tape` nested in the `FwdMode`, we can transform the kernels with `mode_original == AutodiffMode.REVERSE` only,
            # to avoid duplicate computation for 1st-order derivatives
            mode_original = self.autodiff_mode
            self.autodiff_mode = AutodiffMode.FORWARD
            self.runtime.fwd_mode_manager.insert(self, mode_original)

        if self.autodiff_mode != AutodiffMode.NONE and impl.current_cfg(
        ).opt_level == 0:
            _logging.warn(
                """opt_level = 1 is enforced to enable gradient computation."""
            )
            impl.current_cfg().opt_level = 1
        key = self.ensure_compiled(*args)
        return self.runtime.compiled_functions[key](*args)
Esempio n. 10
0
 def compile_fn_impl(filename):
     if impl.current_cfg().arch == _ti_core.Arch.x64:
         subprocess.call(get_clangpp() + ' -flto -c ' +
                         filename + ' -o ' +
                         os.path.join(self.td, 'source.bc'),
                         shell=True)
     else:
         subprocess.call(get_clangpp() + ' -flto -c ' +
                         filename + ' -o ' +
                         os.path.join(self.td, 'source.bc') +
                         ' -target nvptx64-nvidia-cuda',
                         shell=True)
     return os.path.join(self.td, 'source.bc')
Esempio n. 11
0
    def ndarray_to_numpy(self):
        """Converts ndarray to a numpy array.

        Returns:
            numpy.ndarray: The result numpy array.
        """
        if impl.current_cfg().ndarray_use_torch:
            return self.arr.cpu().numpy()

        arr = np.zeros(shape=self.arr.shape, dtype=to_numpy_type(self.dtype))
        taichi.lang.meta.ndarray_to_ext_arr(self, arr)
        impl.get_runtime().sync()
        return arr
Esempio n. 12
0
    def ndarray_matrix_from_numpy(self, arr, as_vector):
        """Loads all values from a numpy array.

        Args:
            arr (numpy.ndarray): The source numpy array.
        """
        if not isinstance(arr, np.ndarray):
            raise TypeError(f"{np.ndarray} expected, but {type(arr)} provided")
        if tuple(self.arr.shape) != tuple(arr.shape):
            raise ValueError(
                f"Mismatch shape: {tuple(self.arr.shape)} expected, but {tuple(arr.shape)} provided"
            )
        if impl.current_cfg().ndarray_use_torch:
            self.arr = torch.from_numpy(arr).to(self.arr.dtype)  # pylint: disable=E1101
            if impl.current_cfg().arch == _ti_core.Arch.cuda:
                self.arr = self.arr.cuda()
        else:
            if hasattr(arr, 'contiguous'):
                arr = arr.contiguous()

            taichi.lang.meta.ext_arr_to_ndarray_matrix(arr, self, as_vector)
            impl.get_runtime().sync()
Esempio n. 13
0
 def __init__(self, dtype, shape):
     if isinstance(shape, numbers.Number):
         shape = (shape, )
     assert has_pytorch(
     ), "PyTorch must be available if you want to create a Taichi ndarray."
     import torch
     if impl.current_cfg().arch == _ti_core.Arch.cuda:
         device = 'cuda:0'
     else:
         device = 'cpu'
     self.arr = torch.empty(shape,
                            dtype=to_pytorch_type(dtype),
                            device=device)
Esempio n. 14
0
    def bitmasked(self, axes, dimensions):
        """Adds a bitmasked SNode as a child component of `self`.

        Args:
            axes (List[Axis]): Axes to activate.
            dimensions (Union[List[int], int]): Shape of each axis.

        Returns:
            The added :class:`~taichi.lang.SNode` instance.
        """
        if isinstance(dimensions, int):
            dimensions = [dimensions] * len(axes)
        return SNode(
            self.ptr.bitmasked(axes, dimensions,
                               impl.current_cfg().packed))
Esempio n. 15
0
    def quant_array(self, axes, dimensions, num_bits):
        """Adds a quant_array SNode as a child component of `self`.

        Args:
            axes (List[Axis]): Axes to activate.
            dimensions (Union[List[int], int]): Shape of each axis.
            num_bits (int): Number of bits to use.

        Returns:
            The added :class:`~taichi.lang.SNode` instance.
        """
        if isinstance(dimensions, int):
            dimensions = [dimensions] * len(axes)
        return SNode(
            self.ptr.quant_array(axes, dimensions, num_bits,
                                 impl.current_cfg().packed))
Esempio n. 16
0
    def dynamic(self, axis, dimension, chunk_size=None):
        """Adds a dynamic SNode as a child component of `self`.

        Args:
            axis (List[Axis]): Axis to activate, must be 1.
            dimension (int): Shape of the axis.
            chunk_size (int): Chunk size.

        Returns:
            The added :class:`~taichi.lang.SNode` instance.
        """
        assert len(axis) == 1
        if chunk_size is None:
            chunk_size = dimension
        return SNode(
            self.ptr.dynamic(axis[0], dimension, chunk_size,
                             impl.current_cfg().packed))
Esempio n. 17
0
    def to_numpy(self):
        """Converts ndarray to a numpy array.

        Returns:
            numpy.ndarray: The result numpy array.
        """
        if impl.current_cfg().ndarray_use_torch:
            return self.arr.cpu().numpy()
        else:
            import numpy as np  # pylint: disable=C0415
            arr = np.zeros(shape=self.arr.shape,
                           dtype=to_numpy_type(self.dtype))
            from taichi.lang.meta import \
                ndarray_to_ext_arr  # pylint: disable=C0415
            ndarray_to_ext_arr(self, arr)
            impl.get_runtime().sync()
            return arr
Esempio n. 18
0
        def func__(*args):
            assert len(args) == len(
                self.arguments
            ), f'{len(self.arguments)} arguments needed but {len(args)} provided'

            tmps = []
            callbacks = []
            has_external_arrays = False
            has_torch = has_pytorch()
            has_pp = has_paddle()

            actual_argument_slot = 0
            launch_ctx = t_kernel.make_launch_context()
            for i, v in enumerate(args):
                needed = self.arguments[i].annotation
                if isinstance(needed, template):
                    continue
                provided = type(v)
                # Note: do not use sth like "needed == f32". That would be slow.
                if id(needed) in primitive_types.real_type_ids:
                    if not isinstance(v, (float, int)):
                        raise TaichiRuntimeTypeError.get(
                            i, needed.to_string(), provided)
                    launch_ctx.set_arg_float(actual_argument_slot, float(v))
                elif id(needed) in primitive_types.integer_type_ids:
                    if not isinstance(v, int):
                        raise TaichiRuntimeTypeError.get(
                            i, needed.to_string(), provided)
                    launch_ctx.set_arg_int(actual_argument_slot, int(v))
                elif isinstance(needed, sparse_matrix_builder):
                    # Pass only the base pointer of the ti.types.sparse_matrix_builder() argument
                    launch_ctx.set_arg_int(actual_argument_slot, v._get_addr())
                elif isinstance(needed,
                                ndarray_type.NdarrayType) and isinstance(
                                    v, taichi.lang._ndarray.Ndarray):
                    has_external_arrays = True
                    v = v.arr
                    launch_ctx.set_arg_ndarray(actual_argument_slot, v)
                elif isinstance(needed,
                                texture_type.TextureType) and isinstance(
                                    v, taichi.lang._texture.Texture):
                    has_external_arrays = True
                    v = v.tex
                    launch_ctx.set_arg_texture(actual_argument_slot, v)
                elif isinstance(needed,
                                texture_type.RWTextureType) and isinstance(
                                    v, taichi.lang._texture.Texture):
                    has_external_arrays = True
                    v = v.tex
                    launch_ctx.set_arg_rw_texture(actual_argument_slot, v)
                elif isinstance(
                        needed,
                        ndarray_type.NdarrayType) and (self.match_ext_arr(v)):
                    has_external_arrays = True
                    is_numpy = isinstance(v, np.ndarray)
                    is_torch = isinstance(v,
                                          torch.Tensor) if has_torch else False

                    # Element shapes are already spcialized in Taichi codegen.
                    # The shape information for element dims are no longer needed.
                    # Therefore we strip the element shapes from the shape vector,
                    # so that it only holds "real" array shapes.
                    is_soa = needed.layout == Layout.SOA
                    array_shape = v.shape
                    element_dim = needed.element_dim
                    if element_dim:
                        array_shape = v.shape[
                            element_dim:] if is_soa else v.shape[:-element_dim]
                    if is_numpy:
                        tmp = np.ascontiguousarray(v)
                        # Purpose: DO NOT GC |tmp|!
                        tmps.append(tmp)
                        launch_ctx.set_arg_external_array_with_shape(
                            actual_argument_slot, int(tmp.ctypes.data),
                            tmp.nbytes, array_shape)
                    elif is_torch:
                        is_ndarray = False
                        tmp, torch_callbacks = self.get_torch_callbacks(
                            v, has_torch, is_ndarray)
                        callbacks += torch_callbacks
                        launch_ctx.set_arg_external_array_with_shape(
                            actual_argument_slot, int(tmp.data_ptr()),
                            tmp.element_size() * tmp.nelement(), array_shape)
                    else:
                        # For now, paddle.fluid.core.Tensor._ptr() is only available on develop branch
                        tmp, paddle_callbacks = self.get_paddle_callbacks(
                            v, has_pp)
                        callbacks += paddle_callbacks
                        launch_ctx.set_arg_external_array_with_shape(
                            actual_argument_slot, int(tmp._ptr()),
                            v.element_size() * v.size, array_shape)

                elif isinstance(needed, MatrixType):
                    if id(needed.dtype) in primitive_types.real_type_ids:
                        for a in range(needed.n):
                            for b in range(needed.m):
                                if not isinstance(v[a, b], (int, float)):
                                    raise TaichiRuntimeTypeError.get(
                                        i, needed.dtype.to_string(),
                                        type(v[a, b]))
                                launch_ctx.set_arg_float(
                                    actual_argument_slot, float(v[a, b]))
                                actual_argument_slot += 1
                    elif id(needed.dtype) in primitive_types.integer_type_ids:
                        for a in range(needed.n):
                            for b in range(needed.m):
                                if not isinstance(v[a, b], int):
                                    raise TaichiRuntimeTypeError.get(
                                        i, needed.dtype.to_string(),
                                        type(v[a, b]))
                                launch_ctx.set_arg_int(actual_argument_slot,
                                                       int(v[a, b]))
                                actual_argument_slot += 1
                    else:
                        raise ValueError(
                            f'Matrix dtype {needed.dtype} is not integer type or real type.'
                        )
                    continue
                else:
                    raise ValueError(
                        f'Argument type mismatch. Expecting {needed}, got {type(v)}.'
                    )
                actual_argument_slot += 1
            # Both the class kernels and the plain-function kernels are unified now.
            # In both cases, |self.grad| is another Kernel instance that computes the
            # gradient. For class kernels, args[0] is always the kernel owner.
            if self.autodiff_mode == AutodiffMode.NONE and self.runtime.target_tape and not self.runtime.grad_replaced:
                self.runtime.target_tape.insert(self, args)

            if actual_argument_slot > 8 and (
                    impl.current_cfg().arch == _ti_core.opengl
                    or impl.current_cfg().arch == _ti_core.cc):
                raise TaichiRuntimeError(
                    f"The number of elements in kernel arguments is too big! Do not exceed 8 on {_ti_core.arch_name(impl.current_cfg().arch)} backend."
                )

            if actual_argument_slot > 64 and (
                (impl.current_cfg().arch != _ti_core.opengl
                 and impl.current_cfg().arch != _ti_core.cc)):
                raise TaichiRuntimeError(
                    f"The number of elements in kernel arguments is too big! Do not exceed 64 on {_ti_core.arch_name(impl.current_cfg().arch)} backend."
                )

            try:
                t_kernel(launch_ctx)
            except Exception as e:
                e = handle_exception_from_cpp(e)
                raise e from None

            ret = None
            ret_dt = self.return_type
            has_ret = ret_dt is not None

            if has_ret:
                runtime_ops.sync()

            if has_ret:
                if id(ret_dt) in primitive_types.integer_type_ids:
                    ret = t_kernel.get_ret_int(0)
                elif id(ret_dt) in primitive_types.real_type_ids:
                    ret = t_kernel.get_ret_float(0)
                elif id(ret_dt.dtype) in primitive_types.integer_type_ids:
                    it = iter(t_kernel.get_ret_int_tensor(0))
                    ret = Matrix([[next(it) for _ in range(ret_dt.m)]
                                  for _ in range(ret_dt.n)])
                else:
                    it = iter(t_kernel.get_ret_float_tensor(0))
                    ret = Matrix([[next(it) for _ in range(ret_dt.m)]
                                  for _ in range(ret_dt.n)])
            if callbacks:
                for c in callbacks:
                    c()

            return ret
Esempio n. 19
0
        def func__(*args):
            assert len(args) == len(
                self.argument_annotations
            ), f'{len(self.argument_annotations)} arguments needed but {len(args)} provided'

            tmps = []
            callbacks = []
            has_external_arrays = False
            has_torch = has_pytorch()
            ndarray_use_torch = impl.get_runtime().ndarray_use_torch

            actual_argument_slot = 0
            launch_ctx = t_kernel.make_launch_context()
            for i, v in enumerate(args):
                needed = self.argument_annotations[i]
                if isinstance(needed, template):
                    continue
                provided = type(v)
                # Note: do not use sth like "needed == f32". That would be slow.
                if id(needed) in primitive_types.real_type_ids:
                    if not isinstance(v, (float, int)):
                        raise TaichiRuntimeTypeError(i, needed.to_string(),
                                                     provided)
                    launch_ctx.set_arg_float(actual_argument_slot, float(v))
                elif id(needed) in primitive_types.integer_type_ids:
                    if not isinstance(v, int):
                        raise TaichiRuntimeTypeError(i, needed.to_string(),
                                                     provided)
                    launch_ctx.set_arg_int(actual_argument_slot, int(v))
                elif isinstance(needed, sparse_matrix_builder):
                    # Pass only the base pointer of the ti.linalg.sparse_matrix_builder() argument
                    launch_ctx.set_arg_int(actual_argument_slot, v._get_addr())
                elif isinstance(needed, any_arr) and isinstance(
                        v, taichi.lang._ndarray.Ndarray):
                    has_external_arrays = True
                    v = v.arr
                    if ndarray_use_torch:
                        is_ndarray = True
                        tmp, torch_callbacks = self.get_torch_callbacks(
                            v, has_torch, is_ndarray)
                        callbacks += torch_callbacks
                        launch_ctx.set_arg_external_array_with_shape(
                            actual_argument_slot, int(tmp.data_ptr()),
                            tmp.element_size() * tmp.nelement(), v.shape)
                    else:
                        launch_ctx.set_arg_ndarray(actual_argument_slot, v)
                elif isinstance(needed, any_arr) and (self.match_ext_arr(v)):
                    has_external_arrays = True
                    is_numpy = isinstance(v, np.ndarray)
                    if is_numpy:
                        tmp = np.ascontiguousarray(v)
                        # Purpose: DO NOT GC |tmp|!
                        tmps.append(tmp)
                        launch_ctx.set_arg_external_array_with_shape(
                            actual_argument_slot, int(tmp.ctypes.data),
                            tmp.nbytes, v.shape)
                    else:
                        is_ndarray = False
                        tmp, torch_callbacks = self.get_torch_callbacks(
                            v, has_torch, is_ndarray)
                        callbacks += torch_callbacks
                        launch_ctx.set_arg_external_array_with_shape(
                            actual_argument_slot, int(tmp.data_ptr()),
                            tmp.element_size() * tmp.nelement(), v.shape)

                elif isinstance(needed, MatrixType):
                    if id(needed.dtype) in primitive_types.real_type_ids:
                        for a in range(needed.n):
                            for b in range(needed.m):
                                if not isinstance(v[a, b], (int, float)):
                                    raise TaichiRuntimeTypeError(
                                        i, needed.dtype.to_string(),
                                        type(v[a, b]))
                                launch_ctx.set_arg_float(
                                    actual_argument_slot, float(v[a, b]))
                                actual_argument_slot += 1
                    elif id(needed.dtype) in primitive_types.integer_type_ids:
                        for a in range(needed.n):
                            for b in range(needed.m):
                                if not isinstance(v[a, b], int):
                                    raise TaichiRuntimeTypeError(
                                        i, needed.dtype.to_string(),
                                        type(v[a, b]))
                                launch_ctx.set_arg_int(actual_argument_slot,
                                                       int(v[a, b]))
                                actual_argument_slot += 1
                    else:
                        raise ValueError(
                            f'Matrix dtype {needed.dtype} is not integer type or real type.'
                        )
                    continue
                else:
                    raise ValueError(
                        f'Argument type mismatch. Expecting {needed}, got {type(v)}.'
                    )
                actual_argument_slot += 1
            # Both the class kernels and the plain-function kernels are unified now.
            # In both cases, |self.grad| is another Kernel instance that computes the
            # gradient. For class kernels, args[0] is always the kernel owner.
            if not self.is_grad and self.runtime.target_tape and not self.runtime.grad_replaced:
                self.runtime.target_tape.insert(self, args)

            t_kernel(launch_ctx)

            ret = None
            ret_dt = self.return_type
            has_ret = ret_dt is not None

            if has_ret or (impl.current_cfg().async_mode
                           and has_external_arrays):
                runtime_ops.sync()

            if has_ret:
                if id(ret_dt) in primitive_types.integer_type_ids:
                    ret = t_kernel.get_ret_int(0)
                else:
                    ret = t_kernel.get_ret_float(0)

            if callbacks:
                for c in callbacks:
                    c()

            return ret
Esempio n. 20
0
 def __getitem__(self, key):
     if impl.current_cfg().ndarray_use_torch:
         return self.arr.__getitem__(key)
     self.initialize_host_accessor()
     return self.host_accessor.getter(*self.pad_key(key))
Esempio n. 21
0
 def __setitem__(self, key, value):
     if impl.current_cfg().ndarray_use_torch:
         self.arr.__setitem__(key, value)
     else:
         self.initialize_host_accessor()
         self.host_accessor.setter(value, *self.pad_key(key))
Esempio n. 22
0
    def from_file(cls, filename, compile_fn=None, _temp_dir=None):
        self = cls()
        self.td = _temp_dir
        if self.td is None:
            self.td = tempfile.mkdtemp()

        if filename.endswith((".cpp", ".c", ".cc")):
            if impl.current_cfg().arch not in [
                    _ti_core.Arch.x64, _ti_core.Arch.cuda
            ]:
                raise TaichiSyntaxError(
                    "Unsupported arch for external function call")
            if compile_fn is None:

                def compile_fn_impl(filename):
                    if impl.current_cfg().arch == _ti_core.Arch.x64:
                        subprocess.call(get_clangpp() + ' -flto -c ' +
                                        filename + ' -o ' +
                                        os.path.join(self.td, 'source.bc'),
                                        shell=True)
                    else:
                        subprocess.call(get_clangpp() + ' -flto -c ' +
                                        filename + ' -o ' +
                                        os.path.join(self.td, 'source.bc') +
                                        ' -target nvptx64-nvidia-cuda',
                                        shell=True)
                    return os.path.join(self.td, 'source.bc')

                compile_fn = compile_fn_impl
            self.bc = compile_fn(filename)
            self.mode = 'bc'
        elif filename.endswith(".cu"):
            if impl.current_cfg().arch not in [_ti_core.Arch.cuda]:
                raise TaichiSyntaxError(
                    "Unsupported arch for external function call")
            if compile_fn is None:
                shutil.copy(filename, os.path.join(self.td, 'source.cu'))

                def compile_fn_impl(filename):
                    # Cannot use -o to specify multiple output files
                    subprocess.call(
                        get_clangpp() + ' ' +
                        os.path.join(self.td, 'source.cu') +
                        ' -c -emit-llvm -std=c++17 --cuda-gpu-arch=sm_50 -nocudalib',
                        cwd=self.td,
                        shell=True)
                    return os.path.join(
                        self.td, 'source-cuda-nvptx64-nvidia-cuda-sm_50.bc')

                compile_fn = compile_fn_impl
            self.bc = compile_fn(filename)
            self.mode = 'bc'
        elif filename.endswith((".so", ".dylib", ".dll")):
            if impl.current_cfg().arch not in [_ti_core.Arch.x64]:
                raise TaichiSyntaxError(
                    "Unsupported arch for external function call")
            self.so = ctypes.CDLL(filename)
            self.mode = 'so'
        elif filename.endswith(".ll"):
            if impl.current_cfg().arch not in [
                    _ti_core.Arch.x64, _ti_core.Arch.cuda
            ]:
                raise TaichiSyntaxError(
                    "Unsupported arch for external function call")
            subprocess.call('llvm-as ' + filename + ' -o ' +
                            os.path.join(self.td, 'source.bc'),
                            shell=True)
            self.bc = os.path.join(self.td, 'source.bc')
            self.mode = 'bc'
        elif filename.endswith(".bc"):
            if impl.current_cfg().arch not in [
                    _ti_core.Arch.x64, _ti_core.Arch.cuda
            ]:
                raise TaichiSyntaxError(
                    "Unsupported arch for external function call")
            self.bc = filename
            self.mode = 'bc'
        else:
            raise TaichiSyntaxError(
                'Unsupported file type for external function call.')
        return self