def augassign(self, value, op): from taichi.lang.impl import call_internal, ti_float if op == 'Add': call_internal("insert_triplet", self.ptr, self.i, self.j, ti_float(value)) elif op == 'Sub': call_internal("insert_triplet", self.ptr, self.i, self.j, -ti_float(value)) else: assert False, f"Only operations '+=' and '-=' are supported on sparse matrices."
def shfl_xor_i32(mask, val, offset): return impl.call_internal("cuda_shfl_xor_sync_i32", mask, val, offset, 31, with_runtime_context=False)
def shfl_down_f32(mask, val, offset): # lane offset is 31 for warp size 32 return impl.call_internal("cuda_shfl_down_sync_f32", mask, val, offset, 31, with_runtime_context=False)
def shfl_up_i32(mask, val, offset): # lane offset is 0 for warp size 32 return impl.call_internal("cuda_shfl_up_sync_i32", mask, val, offset, 0, with_runtime_context=False)
def match_any(mask, value): # These intrinsics are only available on compute_70 or higher # https://docs.nvidia.com/cuda/pdf/NVVM_IR_Specification.pdf if impl.get_cuda_compute_capability() < 70: raise AssertionError( "match_any intrinsic only available on compute_70 or higher") return impl.call_internal("cuda_match_any_sync_i32", mask, value, with_runtime_context=False)
def fetch(self, index, lod): args_group = () if self.num_dims == 1: args_group = impl.make_expr_group(index.x, lod) elif self.num_dims == 2: args_group = impl.make_expr_group(index.x, index.y, lod) elif self.num_dims == 3: args_group = impl.make_expr_group(index.x, index.y, index.z, lod) v = _ti_core.make_texture_op_expr(_ti_core.TextureOpType.fetch_texel, self.ptr_expr, args_group) r = impl.call_internal("composite_extract_0", v, with_runtime_context=False) g = impl.call_internal("composite_extract_1", v, with_runtime_context=False) b = impl.call_internal("composite_extract_2", v, with_runtime_context=False) a = impl.call_internal("composite_extract_3", v, with_runtime_context=False) return ti.Vector([r, g, b, a])
def sample_lod(self, uv, lod): args_group = () if self.num_dims == 1: args_group = (uv.x, lod) elif self.num_dims == 2: args_group = impl.make_expr_group(uv.x, uv.y, lod) elif self.num_dims == 3: args_group = impl.make_expr_group(uv.x, uv.y, uv.z, lod) v = _ti_core.make_texture_op_expr(_ti_core.TextureOpType.sample_lod, self.ptr_expr, args_group) r = impl.call_internal("composite_extract_0", v, with_runtime_context=False) g = impl.call_internal("composite_extract_1", v, with_runtime_context=False) b = impl.call_internal("composite_extract_2", v, with_runtime_context=False) a = impl.call_internal("composite_extract_3", v, with_runtime_context=False) return ti.Vector([r, g, b, a])
def test_cpu(): ret = impl.call_internal("test_internal_func_args", 1.0, 2.0, 3) assert ret == 9
def test(): impl.call_internal("test_node_allocator")
def test(): impl.call_internal("refresh_counter")
def sync(mask): return impl.call_internal("warp_barrier", mask, with_runtime_context=False)
def active_mask(): return impl.call_internal("cuda_active_mask", with_runtime_context=False)
def memfence(): return impl.call_internal("grid_memfence", with_runtime_context=False)
def test(): for i in range(32): impl.call_internal("test_shfl")
def test(): for i in range(48): if i % 2 == 0: impl.call_internal("test_active_mask")
def sync(): return impl.call_internal("block_barrier", with_runtime_context=False)
def test(): for _ in range(10): impl.call_internal("do_nothing")
def unique(mask, predicate): return impl.call_internal("cuda_uni_sync_i32", mask, predicate, with_runtime_context=False)
def test(): impl.call_internal("test_list_manager")
def ballot(predicate): return impl.call_internal("cuda_ballot_i32", predicate, with_runtime_context=False)
def test_cpu(): impl.call_internal("test_node_allocator_gc_cpu")
def all_nonzero(mask, predicate): return impl.call_internal("cuda_all_sync_i32", mask, predicate, with_runtime_context=False)
def func(): impl.call_internal("test_stack")
def match_any(mask, value): return impl.call_internal("cuda_match_any_sync_i32", mask, value, with_runtime_context=False)