def hsail_atomic_add_tuple(context, builder, sig, args): aryty, indty, valty = sig.args ary, inds, val = args dtype = aryty.dtype if indty == types.intp: indices = [inds] # just a single integer indty = [indty] else: indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [ context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices) ] if dtype != valty: raise TypeError("expecting %s but got %s" % (dtype, valty)) if aryty.ndim != len(indty): raise TypeError("indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))) lary = context.make_array(aryty)(context, builder, ary) ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices) return builder.atomic_rmw("add", ptr, val, ordering='monotonic')
def atomic_add(context, builder, sig, args, name): from .atomics import atomic_support_present if atomic_support_present(): context.extra_compile_options[target.LINK_ATOMIC] = True aryty, indty, valty = sig.args ary, inds, val = args dtype = aryty.dtype if indty == types.intp: indices = [inds] # just a single integer indty = [indty] else: indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [ context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices) ] if dtype != valty: raise TypeError("expecting %s but got %s" % (dtype, valty)) if aryty.ndim != len(indty): raise TypeError("indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))) lary = context.make_array(aryty)(context, builder, ary) ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices) if (isinstance(aryty, DPPYArray) and aryty.addrspace == address_space.LOCAL): return insert_and_call_atomic_fn( context, builder, sig, name, dtype, ptr, val, address_space.LOCAL, ) else: return insert_and_call_atomic_fn( context, builder, sig, name, dtype, ptr, val, address_space.GLOBAL, ) else: raise ImportError( "Atomic support is not present, can not perform atomic_add")
def ptx_atomic_cas_tuple(context, builder, sig, args): aryty, oldty, valty = sig.args ary, old, val = args dtype = aryty.dtype lary = context.make_array(aryty)(context, builder, ary) zero = context.get_constant(types.intp, 0) ptr = cgutils.get_item_pointer(context, builder, aryty, lary, (zero, )) if aryty.dtype == types.int32: lmod = builder.module return builder.call(nvvmutils.declare_atomic_cas_int32(lmod), (ptr, old, val)) else: raise TypeError('Unimplemented atomic compare_and_swap ' 'with %s array' % dtype)
def ptx_atomic_cas_tuple(context, builder, sig, args): aryty, oldty, valty = sig.args ary, old, val = args dtype = aryty.dtype lary = context.make_array(aryty)(context, builder, ary) zero = context.get_constant(types.intp, 0) ptr = cgutils.get_item_pointer(context, builder, aryty, lary, (zero, )) if aryty.dtype in (cuda.cudadecl.integer_numba_types): lmod = builder.module bitwidth = aryty.dtype.bitwidth return nvvmutils.atomic_cmpxchg(builder, lmod, bitwidth, ptr, old, val) else: raise TypeError('Unimplemented atomic compare_and_swap ' 'with %s array' % dtype)
def imp(context, builder, sig, args): # The common argument handling code aryty, indty, valty = sig.args ary, inds, val = args dtype = aryty.dtype indty, indices = _normalize_indices(context, builder, indty, inds) if dtype != valty: raise TypeError("expect %s but got %s" % (dtype, valty)) if aryty.ndim != len(indty): raise TypeError("indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))) lary = context.make_array(aryty)(context, builder, ary) ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices) # dispatcher to implementation base on dtype return dispatch_fn(context, builder, dtype, ptr, val)
def native_atomic_add(context, builder, sig, args): aryty, indty, valty = sig.args ary, inds, val = args dtype = aryty.dtype if indty == types.intp: indices = [inds] # just a single integer indty = [indty] else: indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [ context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices) ] if dtype != valty: raise TypeError("expecting %s but got %s" % (dtype, valty)) if aryty.ndim != len(indty): raise TypeError("indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))) lary = context.make_array(aryty)(context, builder, ary) ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices) if dtype == types.float32 or dtype == types.float64: context.extra_compile_options[target.LLVM_SPIRV_ARGS] = [ "--spirv-ext=+SPV_EXT_shader_atomic_float_add" ] name = "__spirv_AtomicFAddEXT" elif dtype == types.int32 or dtype == types.int64: name = "__spirv_AtomicIAdd" else: raise TypeError("Unsupported type") assert name != "" ptr_type = context.get_value_type(dtype).as_pointer() ptr_type.addrspace = aryty.addrspace retty = context.get_value_type(sig.return_type) spirv_fn_arg_types = [ ptr_type, ir.IntType(32), ir.IntType(32), context.get_value_type(sig.args[2]), ] from numba_dppy import extended_numba_itanium_mangler as ext_itanium_mangler numba_ptr_ty = types.CPointer(dtype, addrspace=ptr_type.addrspace) mangled_fn_name = ext_itanium_mangler.mangle( name, [ numba_ptr_ty, "__spv.Scope.Flag", "__spv.MemorySemanticsMask.Flag", valty, ], ) fnty = ir.FunctionType(retty, spirv_fn_arg_types) fn = cgutils.get_or_insert_function(builder.module, fnty, mangled_fn_name) fn.calling_convention = target.CC_SPIR_FUNC sycl_memory_order = atomic_helper.sycl_memory_order.relaxed sycl_memory_scope = atomic_helper.sycl_memory_scope.device spirv_scope = atomic_helper.get_scope(sycl_memory_scope) spirv_memory_semantics_mask = atomic_helper.get_memory_semantics_mask( sycl_memory_order) fn_args = [ ptr, context.get_constant(types.int32, spirv_scope), context.get_constant(types.int32, spirv_memory_semantics_mask), val, ] return builder.call(fn, fn_args)
def codegen(context, builder, signature, args): array_type, idx_type, axis_type, extent_type = signature.args array, idx, axis, extent = args array = context.make_array(array_type)(context, builder, array) zero = context.get_constant(types.intp, 0) llvm_intp_t = context.get_value_type(types.intp) ndim = array_type.ndim view_shape = cgutils.alloca_once(builder, llvm_intp_t) view_stride = cgutils.alloca_once(builder, llvm_intp_t) # Final array indexes. We only know the slicing index at runtime # so we need to recreate idx but with zero at the slicing axis indices = cgutils.alloca_once(builder, llvm_intp_t, size=array_type.ndim) for ax in range(array_type.ndim): llvm_ax = context.get_constant(types.intp, ax) predicate = builder.icmp_unsigned("!=", llvm_ax, axis) with builder.if_else(predicate) as (not_equal, equal): with not_equal: # If this is not the slicing axis, # use the appropriate tuple index value = builder.extract_value(idx, ax) builder.store(value, builder.gep(indices, [llvm_ax])) with equal: # If this is the slicing axis, # store zero as the index. # Also record the stride and shape builder.store(zero, builder.gep(indices, [llvm_ax])) size = builder.extract_value(array.shape, ax) stride = builder.extract_value(array.strides, ax) if have_extent: ext_predicate = builder.icmp_signed(">=", extent, size) size = builder.select(ext_predicate, size, extent) builder.store(size, view_shape) builder.store(stride, view_stride) # Build a python list from indices tmp_indices = [] for i in range(ndim): i = context.get_constant(types.intp, i) tmp_indices.append(builder.load(builder.gep(indices, [i]))) # Get the data pointer obtained from indexing the array dataptr = cgutils.get_item_pointer(context, builder, array_type, array, tmp_indices, wraparound=True, boundscheck=True) # Set up the shape and stride. There'll only be one # dimension, corresponding to the axis along which we slice view_shapes = [builder.load(view_shape)] view_strides = [builder.load(view_stride)] # Make a view with the data pointer, shapes and strides retary = make_view(context, builder, array_type, array, return_type, dataptr, view_shapes, view_strides) result = retary._getvalue() return impl_ret_borrowed(context, builder, return_type, result)