def resolve_local(self, mod): return types.Module(cuda.local)
return types.Function(Hsa_get_group_id) def resolve_barrier(self, mod): return types.Function(Hsa_barrier) def resolve_mem_fence(self, mod): return types.Function(Hsa_mem_fence) def resolve_wavebarrier(self, mod): return types.Function(Hsa_wavebarrier) def resolve_activelanepermute_wavewidth(self, mod): return types.Function(Hsa_activelanepermute_wavewidth) def resolve_ds_permute(self, mod): return types.Function(Hsa_ds_permute) def resolve_ds_bpermute(self, mod): return types.Function(Hsa_ds_bpermute) def resolve_shared(self, mod): return types.Module(roc.shared) def resolve_atomic(self, mod): return types.Module(roc.atomic) # intrinsic intrinsic_global(roc, types.Module(roc))
def resolve_fp16(self, mod): return types.Module(cuda.fp16)
def resolve_const(self, mod): return types.Module(cuda.const)
def resolve_syncwarp(self, mod): return types.Function(Cuda_syncwarp) def resolve_shfl_sync_intrinsic(self, mod): return types.Function(Cuda_shfl_sync_intrinsic) def resolve_vote_sync_intrinsic(self, mod): return types.Function(Cuda_vote_sync_intrinsic) def resolve_match_any_sync(self, mod): return types.Function(Cuda_match_any_sync) def resolve_match_all_sync(self, mod): return types.Function(Cuda_match_all_sync) def resolve_selp(self, mod): return types.Function(Cuda_selp) def resolve_atomic(self, mod): return types.Module(cuda.atomic) def resolve_const(self, mod): return types.Module(cuda.const) def resolve_local(self, mod): return types.Module(cuda.local) register_global(cuda, types.Module(cuda))
def resolve_threadIdx(self, mod): return types.Module(cuda.threadIdx)
class CudaConstModuleTemplate(AttributeTemplate): key = types.Module(cuda.const) def resolve_array_like(self, mod): return types.Function(Cuda_const_array_like)
def resolve_shared(self, mod): return types.Module(roc.shared)
from numba.cuda import nvvmutils, stubs from numba.cuda.types import dim3 registry = Registry() lower = registry.lower lower_attr = registry.lower_getattr def initialize_dim3(builder, prefix): x = nvvmutils.call_sreg(builder, "%s.x" % prefix) y = nvvmutils.call_sreg(builder, "%s.y" % prefix) z = nvvmutils.call_sreg(builder, "%s.z" % prefix) return cgutils.pack_struct(builder, (x, y, z)) @lower_attr(types.Module(cuda), 'threadIdx') def cuda_threadIdx(context, builder, sig, args): return initialize_dim3(builder, 'tid') @lower_attr(types.Module(cuda), 'blockDim') def cuda_blockDim(context, builder, sig, args): return initialize_dim3(builder, 'ntid') @lower_attr(types.Module(cuda), 'blockIdx') def cuda_blockIdx(context, builder, sig, args): return initialize_dim3(builder, 'ctaid') @lower_attr(types.Module(cuda), 'gridDim')
class CudaCgModuleTemplate(AttributeTemplate): key = types.Module(cuda.cg) def resolve_this_grid(self, mod): return types.Function(Cuda_cg_this_grid)
def _get_global_type(self, gv): ty = self._lookup_global(gv) if ty is not None: return ty if isinstance(gv, pytypes.ModuleType): return types.Module(gv)
def resolve_gridDim(self, mod): return types.Module(cuda.gridDim)
def resolve_blockDim(self, mod): return types.Module(cuda.blockDim)
class HsaSharedTemplate(AttributeTemplate): key = types.Module(roc.shared) def resolve_array(self, mod): return types.Macro(Hsa_shared_array)
class CudaLocalModuleTemplate(AttributeTemplate): key = types.Module(cuda.local) def resolve_array(self, mod): return types.Function(Cuda_local_array)
class HsaAtomicTemplate(AttributeTemplate): key = types.Module(roc.atomic) def resolve_add(self, mod): return types.Function(Hsa_atomic_add)
class CudaModuleTemplate(AttributeTemplate): key = types.Module(cuda) def resolve_grid(self, mod): return types.Function(Cuda_grid) def resolve_gridsize(self, mod): return types.Function(Cuda_gridsize) def resolve_cg(self, mod): return types.Module(cuda.cg) def resolve_threadIdx(self, mod): return dim3 def resolve_blockIdx(self, mod): return dim3 def resolve_blockDim(self, mod): return dim3 def resolve_gridDim(self, mod): return dim3 def resolve_warpsize(self, mod): return types.int32 def resolve_laneid(self, mod): return types.int32 def resolve_shared(self, mod): return types.Module(cuda.shared) def resolve_popc(self, mod): return types.Function(Cuda_popc) def resolve_brev(self, mod): return types.Function(Cuda_brev) def resolve_clz(self, mod): return types.Function(Cuda_clz) def resolve_ffs(self, mod): return types.Function(Cuda_ffs) def resolve_fma(self, mod): return types.Function(Cuda_fma) def resolve_cbrt(self, mod): return types.Function(Cuda_cbrt) def resolve_syncthreads(self, mod): return types.Function(Cuda_syncthreads) def resolve_syncthreads_count(self, mod): return types.Function(Cuda_syncthreads_count) def resolve_syncthreads_and(self, mod): return types.Function(Cuda_syncthreads_and) def resolve_syncthreads_or(self, mod): return types.Function(Cuda_syncthreads_or) def resolve_threadfence(self, mod): return types.Function(Cuda_threadfence_device) def resolve_threadfence_block(self, mod): return types.Function(Cuda_threadfence_block) def resolve_threadfence_system(self, mod): return types.Function(Cuda_threadfence_system) def resolve_syncwarp(self, mod): return types.Function(Cuda_syncwarp) def resolve_shfl_sync_intrinsic(self, mod): return types.Function(Cuda_shfl_sync_intrinsic) def resolve_vote_sync_intrinsic(self, mod): return types.Function(Cuda_vote_sync_intrinsic) def resolve_match_any_sync(self, mod): return types.Function(Cuda_match_any_sync) def resolve_match_all_sync(self, mod): return types.Function(Cuda_match_all_sync) def resolve_activemask(self, mod): return types.Function(Cuda_activemask) def resolve_lanemask_lt(self, mod): return types.Function(Cuda_lanemask_lt) def resolve_selp(self, mod): return types.Function(Cuda_selp) def resolve_nanosleep(self, mod): return types.Function(Cuda_nanosleep) def resolve_atomic(self, mod): return types.Module(cuda.atomic) def resolve_fp16(self, mod): return types.Module(cuda.fp16) def resolve_const(self, mod): return types.Module(cuda.const) def resolve_local(self, mod): return types.Module(cuda.local)
def resolve_atomic(self, mod): return types.Module(roc.atomic)
def resolve_cg(self, mod): return types.Module(cuda.cg)
class CudaSharedModuleTemplate(AttributeTemplate): key = types.Module(cuda.shared) def resolve_array(self, mod): return types.Function(Cuda_shared_array)
def resolve_match_any_sync(self, mod): return types.Function(Cuda_match_any_sync) def resolve_match_all_sync(self, mod): return types.Function(Cuda_match_all_sync) def resolve_selp(self, mod): return types.Function(Cuda_selp) def resolve_atomic(self, mod): return types.Module(cuda.atomic) def resolve_const(self, mod): return types.Module(cuda.const) def resolve_local(self, mod): return types.Module(cuda.local) intrinsic_global(cuda, types.Module(cuda)) ## Forces the use of the cuda namespace by not recognizing individual the ## following as globals. # intrinsic_global(cuda.grid, types.Function(Cuda_grid)) # intrinsic_global(cuda.gridsize, types.Function(Cuda_gridsize)) # intrinsic_global(cuda.threadIdx, types.Module(cuda.threadIdx)) # intrinsic_global(cuda.shared, types.Module(cuda.shared)) # intrinsic_global(cuda.shared.array, types.Function(Cuda_shared_array)) # intrinsic_global(cuda.syncthreads, types.Function(Cuda_syncthreads)) # intrinsic_global(cuda.atomic, types.Module(cuda.atomic))