def get_array_at_offset(self, ind): context = self.context builder = self.builder arytyp = types.Array(dtype=self.dtype, ndim=self.ndim, layout="A") arycls = context.make_array(arytyp) array = arycls(context, builder) offseted_data = cgutils.pointer_add(self.builder, self.data, self.builder.mul(self.core_step, ind)) if not self.as_scalar: shape = cgutils.pack_array(builder, self.shape) strides = cgutils.pack_array(builder, self.strides) else: one = context.get_constant(types.intp, 1) zero = context.get_constant(types.intp, 0) shape = cgutils.pack_array(builder, [one]) strides = cgutils.pack_array(builder, [zero]) itemsize = context.get_abi_sizeof(context.get_data_type(self.dtype)) context.populate_array(array, data=builder.bitcast(offseted_data, array.data.type), shape=shape, strides=strides, itemsize=context.get_constant(types.intp, itemsize), meminfo=None) return array._getvalue()
def _make_array(context, builder, dataptr, dtype, shape, layout="C"): ndim = len(shape) # Create array object aryty = types.Array(dtype=dtype, ndim=ndim, layout="C") ary = context.make_array(aryty)(context, builder) targetdata = _get_target_data(context) lldtype = context.get_data_type(dtype) itemsize = lldtype.get_abi_size(targetdata) # Compute strides rstrides = [itemsize] for i, lastsize in enumerate(reversed(shape[1:])): rstrides.append(lastsize * rstrides[-1]) strides = [s for s in reversed(rstrides)] kshape = [context.get_constant(types.intp, s) for s in shape] kstrides = [context.get_constant(types.intp, s) for s in strides] context.populate_array( ary, data=builder.bitcast(dataptr, ary.data.type), shape=cgutils.pack_array(builder, kshape), strides=cgutils.pack_array(builder, kstrides), itemsize=context.get_constant(types.intp, itemsize), meminfo=None, ) return ary._getvalue()
def getitem_arraynd_intp(context, builder, sig, args): aryty, idxty = sig.args ary, idx = args arystty = make_array(aryty) adapted_ary = arystty(context, builder, ary) ndim = aryty.ndim if ndim == 1: result = _getitem_array1d(context, builder, aryty, adapted_ary, idx, wraparound=idxty.signed) elif ndim > 1: out_ary_ty = make_array(aryty.copy(ndim = ndim - 1)) out_ary = out_ary_ty(context, builder) in_shapes = cgutils.unpack_tuple(builder, adapted_ary.shape, count=ndim) in_strides = cgutils.unpack_tuple(builder, adapted_ary.strides, count=ndim) data_p = cgutils.get_item_pointer2(builder, adapted_ary.data, in_shapes, in_strides, aryty.layout, [idx], wraparound=idxty.signed) populate_array(out_ary, data=data_p, shape=cgutils.pack_array(builder, in_shapes[1:]), strides=cgutils.pack_array(builder, in_strides[1:]), itemsize=adapted_ary.itemsize, parent=adapted_ary.parent,) result = out_ary._getvalue() else: raise NotImplementedError("1D indexing into %dD array" % aryty.ndim) return result
def _shape_and_strides(self, context, builder): # Set shape and strides for a 1D size 1 array one = context.get_constant(types.intp, 1) zero = context.get_constant(types.intp, 0) shape = cgutils.pack_array(builder, [one]) strides = cgutils.pack_array(builder, [zero]) return shape, strides
def get_array_at_offset(self, ind): context = self.context builder = self.builder arytyp = types.Array(dtype=self.dtype, ndim=self.ndim, layout="A") arycls = context.make_array(arytyp) array = arycls(context, builder) offseted_data = cgutils.pointer_add( self.builder, self.data, self.builder.mul(self.core_step, ind)) if not self.as_scalar: shape = cgutils.pack_array(builder, self.shape) strides = cgutils.pack_array(builder, self.strides) else: one = context.get_constant(types.intp, 1) zero = context.get_constant(types.intp, 0) shape = cgutils.pack_array(builder, [one]) strides = cgutils.pack_array(builder, [zero]) itemsize = context.get_abi_sizeof(context.get_data_type(self.dtype)) context.populate_array(array, data=builder.bitcast(offseted_data, array.data.type), shape=shape, strides=strides, itemsize=context.get_constant( types.intp, itemsize), meminfo=None) return array._getvalue()
def getitem_array1d_slice(context, builder, sig, args): aryty, _ = sig.args if aryty.ndim != 1: # TODO raise NotImplementedError("1D indexing into %dD array" % aryty.ndim) ary, idx = args arystty = make_array(aryty) ary = arystty(context, builder, value=ary) shapes = cgutils.unpack_tuple(builder, ary.shape, aryty.ndim) slicestruct = Slice(context, builder, value=idx) cgutils.normalize_slice(builder, slicestruct, shapes[0]) dataptr = cgutils.get_item_pointer(builder, aryty, ary, [slicestruct.start], wraparound=True) retstty = make_array(sig.return_type) retary = retstty(context, builder) shape = cgutils.get_range_from_slice(builder, slicestruct) retary.shape = cgutils.pack_array(builder, [shape]) stride = cgutils.get_strides_from_slice(builder, aryty.ndim, ary.strides, slicestruct, 0) retary.strides = cgutils.pack_array(builder, [stride]) retary.data = dataptr return retary._getvalue()
def _make_array(context, builder, dataptr, dtype, shape, layout='C'): ndim = len(shape) # Create array object aryty = types.Array(dtype=dtype, ndim=ndim, layout='C') ary = context.make_array(aryty)(context, builder) targetdata = _get_target_data(context) lldtype = context.get_data_type(dtype) itemsize = lldtype.get_abi_size(targetdata) # Compute strides rstrides = [itemsize] for i, lastsize in enumerate(reversed(shape[1:])): rstrides.append(lastsize * rstrides[-1]) strides = [s for s in reversed(rstrides)] kshape = [context.get_constant(types.intp, s) for s in shape] kstrides = [context.get_constant(types.intp, s) for s in strides] context.populate_array(ary, data=builder.bitcast(dataptr, ary.data.type), shape=cgutils.pack_array(builder, kshape), strides=cgutils.pack_array(builder, kstrides), itemsize=context.get_constant(types.intp, itemsize), meminfo=None) return ary._getvalue()
def __init__(self, context, builder, args, dims, steps, i, step_offset, typ, syms, sym_dim): self.context = context self.builder = builder if isinstance(typ, types.Array): self.dtype = typ.dtype else: self.dtype = typ self.syms = syms self.as_scalar = not syms if self.as_scalar: self.ndim = 1 else: self.ndim = len(syms) core_step_ptr = builder.gep(steps, [context.get_constant(types.intp, i)], name="core.step.ptr") self.core_step = builder.load(core_step_ptr) self.strides = [] for j in range(self.ndim): step = builder.gep(steps, [context.get_constant(types.intp, step_offset + j)], name="step.ptr") self.strides.append(builder.load(step)) self.shape = [] for s in syms: self.shape.append(sym_dim[s]) data = builder.load(builder.gep(args, [context.get_constant(types.intp, i)], name="data.ptr"), name="data") self.data = data arytyp = types.Array(dtype=self.dtype, ndim=self.ndim, layout="A") arycls = context.make_array(arytyp) self.array = arycls(context, builder) self.array.data = builder.bitcast(self.data, self.array.data.type) if not self.as_scalar: self.array.shape = cgutils.pack_array(builder, self.shape) self.array.strides = cgutils.pack_array(builder, self.strides) else: one = context.get_constant(types.intp, 1) zero = context.get_constant(types.intp, 0) self.array.shape = cgutils.pack_array(builder, [one]) self.array.strides = cgutils.pack_array(builder, [zero]) self.array_value = self.array._getpointer()
def __init__(self, context, builder, args, dims, steps, i, step_offset, typ, syms, sym_dim): self.context = context self.builder = builder if isinstance(typ, types.Array): self.dtype = typ.dtype else: self.dtype = typ self.syms = syms self.as_scalar = not syms if self.as_scalar: self.ndim = 1 else: self.ndim = len(syms) core_step_ptr = builder.gep(steps, [context.get_constant(types.intp, i)], name="core.step.ptr") self.core_step = builder.load(core_step_ptr) self.strides = [] for j in range(self.ndim): step = builder.gep( steps, [context.get_constant(types.intp, step_offset + j)], name="step.ptr") self.strides.append(builder.load(step)) self.shape = [] for s in syms: self.shape.append(sym_dim[s]) data = builder.load(builder.gep(args, [context.get_constant(types.intp, i)], name="data.ptr"), name="data") self.data = data arytyp = types.Array(dtype=self.dtype, ndim=self.ndim, layout="A") arycls = context.make_array(arytyp) self.array = arycls(context, builder) self.array.data = builder.bitcast(self.data, self.array.data.type) if not self.as_scalar: self.array.shape = cgutils.pack_array(builder, self.shape) self.array.strides = cgutils.pack_array(builder, self.strides) else: one = context.get_constant(types.intp, 1) zero = context.get_constant(types.intp, 0) self.array.shape = cgutils.pack_array(builder, [one]) self.array.strides = cgutils.pack_array(builder, [zero]) self.array_value = self.array._getpointer()
def getitem_array_tuple(context, builder, sig, args): aryty, idxty = sig.args ary, idx = args arystty = make_array(aryty) ary = arystty(context, builder, ary) ndim = aryty.ndim if isinstance(sig.return_type, types.Array): # Slicing raw_indices = cgutils.unpack_tuple(builder, idx, aryty.ndim) start = [] shapes = [] strides = [] oshapes = cgutils.unpack_tuple(builder, ary.shape, ndim) for ax, (indexval, idxty) in enumerate(zip(raw_indices, idxty)): if idxty == types.slice3_type: slice = Slice(context, builder, value=indexval) cgutils.normalize_slice(builder, slice, oshapes[ax]) start.append(slice.start) shapes.append(cgutils.get_range_from_slice(builder, slice)) strides.append( cgutils.get_strides_from_slice(builder, ndim, ary.strides, slice, ax)) else: ind = context.cast(builder, indexval, idxty, types.intp) start.append(ind) dataptr = cgutils.get_item_pointer( builder, aryty, ary, start, wraparound=context.metadata['wraparound']) # Build array retstty = make_array(sig.return_type) retary = retstty(context, builder) retary.data = dataptr retary.shape = cgutils.pack_array(builder, shapes) retary.strides = cgutils.pack_array(builder, strides) return retary._getvalue() else: # Indexing indices = cgutils.unpack_tuple(builder, idx, count=len(idxty)) indices = [ context.cast(builder, i, t, types.intp) for t, i in zip(idxty, indices) ] ptr = cgutils.get_item_pointer( builder, aryty, ary, indices, wraparound=context.metadata['wraparound']) return context.unpack_value(builder, aryty.dtype, ptr)
def ptx_cmem_arylike(context, builder, sig, args): lmod = builder.module [arr] = args flat = arr.flatten(order='A') aryty = sig.return_type dtype = aryty.dtype if isinstance(dtype, types.Complex): elemtype = (types.float32 if dtype == types.complex64 else types.float64) constvals = [] for i in range(flat.size): elem = flat[i] real = context.get_constant(elemtype, elem.real) imag = context.get_constant(elemtype, elem.imag) constvals.extend([real, imag]) elif dtype in types.number_domain: constvals = [ context.get_constant(dtype, flat[i]) for i in range(flat.size) ] else: raise TypeError("unsupport type: %s" % dtype) constary = lc.Constant.array(constvals[0].type, constvals) addrspace = nvvm.ADDRSPACE_CONSTANT gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem", addrspace=addrspace) gv.linkage = lc.LINKAGE_INTERNAL gv.global_constant = True gv.initializer = constary # Convert to generic address-space conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace)) genptr = builder.call(conv, [addrspaceptr]) # Create array object ary = context.make_array(aryty)(context, builder) kshape = [context.get_constant(types.intp, s) for s in arr.shape] kstrides = [context.get_constant(types.intp, s) for s in arr.strides] context.populate_array(ary, data=builder.bitcast(genptr, ary.data.type), shape=cgutils.pack_array(builder, kshape), strides=cgutils.pack_array(builder, kstrides), itemsize=ary.itemsize, parent=ary.parent, meminfo=None) return ary._getvalue()
def ptx_cmem_arylike(context, builder, sig, args): lmod = builder.module [arr] = args flat = arr.flatten(order='A') aryty = sig.return_type dtype = aryty.dtype if isinstance(dtype, types.Complex): elemtype = (types.float32 if dtype == types.complex64 else types.float64) constvals = [] for i in range(flat.size): elem = flat[i] real = context.get_constant(elemtype, elem.real) imag = context.get_constant(elemtype, elem.imag) constvals.extend([real, imag]) elif dtype in types.number_domain: constvals = [context.get_constant(dtype, flat[i]) for i in range(flat.size)] else: raise TypeError("unsupport type: %s" % dtype) constary = lc.Constant.array(constvals[0].type, constvals) addrspace = nvvm.ADDRSPACE_CONSTANT gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem", addrspace=addrspace) gv.linkage = lc.LINKAGE_INTERNAL gv.global_constant = True gv.initializer = constary # Convert to generic address-space conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace)) genptr = builder.call(conv, [addrspaceptr]) # Create array object ary = context.make_array(aryty)(context, builder) kshape = [context.get_constant(types.intp, s) for s in arr.shape] kstrides = [context.get_constant(types.intp, s) for s in arr.strides] context.populate_array(ary, data=builder.bitcast(genptr, ary.data.type), shape=cgutils.pack_array(builder, kshape), strides=cgutils.pack_array(builder, kstrides), itemsize=ary.itemsize, parent=ary.parent, meminfo=None) return ary._getvalue()
def getitem_array_unituple(context, builder, sig, args): aryty, idxty = sig.args ary, idx = args ndim = aryty.ndim arystty = make_array(aryty) ary = arystty(context, builder, ary) if idxty.dtype == types.slice3_type: # Slicing raw_slices = cgutils.unpack_tuple(builder, idx, aryty.ndim) slices = [Slice(context, builder, value=sl) for sl in raw_slices] for sl, sh in zip(slices, cgutils.unpack_tuple(builder, ary.shape, ndim)): cgutils.normalize_slice(builder, sl, sh) indices = [sl.start for sl in slices] dataptr = cgutils.get_item_pointer( builder, aryty, ary, indices, wraparound=context.metadata['wraparound']) # Build array retstty = make_array(sig.return_type) retary = retstty(context, builder) retary.data = dataptr shapes = [cgutils.get_range_from_slice(builder, sl) for sl in slices] retary.shape = cgutils.pack_array(builder, shapes) strides = [ cgutils.get_strides_from_slice(builder, ndim, ary.strides, sl, i) for i, sl in enumerate(slices) ] retary.strides = cgutils.pack_array(builder, strides) return retary._getvalue() else: # Indexing assert idxty.dtype == types.intp indices = cgutils.unpack_tuple(builder, idx, count=len(idxty)) indices = [ context.cast(builder, i, t, types.intp) for t, i in zip(idxty, indices) ] ptr = cgutils.get_item_pointer( builder, aryty, ary, indices, wraparound=context.metadata['wraparound']) return context.unpack_value(builder, aryty.dtype, ptr)
def getitem_array_tuple(context, builder, sig, args): aryty, idxty = sig.args ary, idx = args arystty = make_array(aryty) ary = arystty(context, builder, ary) ndim = aryty.ndim if isinstance(sig.return_type, types.Array): # Slicing raw_indices = cgutils.unpack_tuple(builder, idx, aryty.ndim) start = [] shapes = [] strides = [] oshapes = cgutils.unpack_tuple(builder, ary.shape, ndim) for ax, (indexval, idxty) in enumerate(zip(raw_indices, idxty)): if idxty == types.slice3_type: slice = Slice(context, builder, value=indexval) cgutils.normalize_slice(builder, slice, oshapes[ax]) start.append(slice.start) shapes.append(cgutils.get_range_from_slice(builder, slice)) strides.append(cgutils.get_strides_from_slice(builder, ndim, ary.strides, slice, ax)) else: ind = context.cast(builder, indexval, idxty, types.intp) start.append(ind) dataptr = cgutils.get_item_pointer(builder, aryty, ary, start, wraparound=True) # Build array retstty = make_array(sig.return_type) retary = retstty(context, builder) populate_array(retary, data=dataptr, shape=cgutils.pack_array(builder, shapes), strides=cgutils.pack_array(builder, strides), itemsize=ary.itemsize, meminfo=ary.meminfo, parent=ary.parent) return retary._getvalue() else: # Indexing indices = cgutils.unpack_tuple(builder, idx, count=len(idxty)) indices = [context.cast(builder, i, t, types.intp) for t, i in zip(idxty, indices)] ptr = cgutils.get_item_pointer(builder, aryty, ary, indices, wraparound=True) return context.unpack_value(builder, aryty.dtype, ptr)
def iternext_specific(self, context, builder, arrty, arr, result): zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) ndim = arrty.ndim nitems = arr.nitems index = builder.load(self.index) is_valid = builder.icmp(lc.ICMP_SLT, index, nitems) result.set_valid(is_valid) with cgutils.if_likely(builder, is_valid): ptr = builder.load(self.pointer) value = context.unpack_value(builder, arrty.dtype, ptr) if kind == 'flat': result.yield_(value) else: # ndenumerate(): fetch and increment indices indices = self.indices idxvals = [builder.load(cgutils.gep(builder, indices, dim)) for dim in range(ndim)] idxtuple = cgutils.pack_array(builder, idxvals) result.yield_( cgutils.make_anonymous_struct(builder, [idxtuple, value])) _increment_indices_array(context, builder, arrty, arr, indices) index = builder.add(index, one) builder.store(index, self.index) ptr = cgutils.pointer_add(builder, ptr, self.stride) builder.store(ptr, self.pointer)
def iternext_specific(self, context, builder, arrty, arr, result): zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) ndim = arrty.ndim nitems = arr.nitems index = builder.load(self.index) is_valid = builder.icmp(lc.ICMP_SLT, index, nitems) result.set_valid(is_valid) with cgutils.if_likely(builder, is_valid): ptr = builder.load(self.pointer) value = context.unpack_value(builder, arrty.dtype, ptr) if kind == 'flat': result.yield_(value) else: # ndenumerate(): fetch and increment indices indices = self.indices idxvals = [ builder.load(cgutils.gep(builder, indices, dim)) for dim in range(ndim) ] idxtuple = cgutils.pack_array(builder, idxvals) result.yield_( cgutils.make_anonymous_struct( builder, [idxtuple, value])) _increment_indices_array(context, builder, arrty, arr, indices) index = builder.add(index, one) builder.store(index, self.index) ptr = cgutils.pointer_add(builder, ptr, self.stride) builder.store(ptr, self.pointer)
def iternext_specific(self, context, builder, result): zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) bbend = cgutils.append_basic_block(builder, 'end') exhausted = cgutils.as_bool_bit(builder, builder.load(self.exhausted)) with cgutils.if_unlikely(builder, exhausted): result.set_valid(False) builder.branch(bbend) indices = [ builder.load(cgutils.gep(builder, self.indices, dim)) for dim in range(ndim) ] result.yield_(cgutils.pack_array(builder, indices)) result.set_valid(True) shape = cgutils.unpack_tuple(builder, self.shape, ndim) _increment_indices(context, builder, ndim, shape, self.indices, self.exhausted) builder.branch(bbend) builder.position_at_end(bbend)
def to_native_tuple(self, obj, typ): """ Convert tuple *obj* to a native array (if homogenous) or structure. """ n = len(typ) values = [] cleanups = [] is_error = cgutils.false_bit for i, eltype in enumerate(typ): elem = self.tuple_getitem(obj, i) native = self.to_native_value(elem, eltype) values.append(native.value) is_error = self.builder.or_(is_error, native.is_error) if native.cleanup is not None: cleanups.append(native.cleanup) if cleanups: def cleanup(): for func in reversed(cleanups): func() else: cleanup = None if isinstance(typ, types.UniTuple): value = cgutils.pack_array(self.builder, values) else: value = cgutils.make_anonymous_struct(self.builder, values) return NativeValue(value, is_error=is_error, cleanup=cleanup)
def lower_assign(self, ty, inst): value = inst.value if isinstance(value, ir.Const): if self.context.is_struct_type(ty): const = self.context.get_constant_struct( self.builder, ty, value.value) elif ty == types.string: const = self.context.get_constant_string( self.builder, ty, value.value) else: const = self.context.get_constant(ty, value.value) return const elif isinstance(value, ir.Expr): return self.lower_expr(ty, value) elif isinstance(value, ir.Var): val = self.loadvar(value.name) oty = self.typeof(value.name) return self.context.cast(self.builder, val, oty, ty) elif isinstance(value, ir.Global): if (isinstance(ty, types.Dummy) or isinstance(ty, types.Module) or isinstance(ty, types.Function) or isinstance(ty, types.Dispatcher)): return self.context.get_dummy_value() elif ty == types.boolean: return self.context.get_constant(ty, value.value) elif isinstance(ty, types.Array): return self.context.make_constant_array( self.builder, ty, value.value) elif self.context.is_struct_type(ty): return self.context.get_constant_struct( self.builder, ty, value.value) elif ty in types.number_domain: return self.context.get_constant(ty, value.value) elif isinstance(ty, types.UniTuple): consts = [ self.context.get_constant(t, v) for t, v in zip(ty, value.value) ] return cgutils.pack_array(self.builder, consts) elif self.context.is_struct_type(ty): return self.context.get_constant_struct( self.builder, ty, value.value) else: raise NotImplementedError('global', ty) else: raise NotImplementedError(type(value), value)
def _empty_nd_impl(context, builder, arrtype, shapes): """Utility function used for allocating a new array during LLVM code generation (lowering). Given a target context, builder, array type, and a tuple or list of lowered dimension sizes, returns a LLVM value pointing at a Numba runtime allocated array. """ arycls = make_array(arrtype) ary = arycls(context, builder) datatype = context.get_data_type(arrtype.dtype) itemsize = context.get_constant(types.intp, context.get_abi_sizeof(datatype)) # compute array length arrlen = context.get_constant(types.intp, 1) for s in shapes: arrlen = builder.mul(arrlen, s) if arrtype.layout == 'C': strides = [itemsize] for dimension_size in reversed(shapes[1:]): strides.append(builder.mul(strides[-1], dimension_size)) strides = tuple(reversed(strides)) elif arrtype.layout == 'F': strides = [itemsize] for dimension_size in shapes[:-1]: strides.append(builder.mul(strides[-1], dimension_size)) strides = tuple(strides) else: raise NotImplementedError( "Don't know how to allocate array with layout '{0}'.".format( arrtype.layout)) meminfo = context.nrt_meminfo_alloc(builder, size=builder.mul(itemsize, arrlen)) data = context.nrt_meminfo_data(builder, meminfo) populate_array(ary, data=builder.bitcast(data, datatype.as_pointer()), shape=cgutils.pack_array(builder, shapes), strides=cgutils.pack_array(builder, strides), itemsize=itemsize, meminfo=meminfo) return ary._getvalue()
def getitem_array_unituple(context, builder, sig, args): aryty, idxty = sig.args ary, idx = args ndim = aryty.ndim arystty = make_array(aryty) ary = arystty(context, builder, ary) if idxty.dtype == types.slice3_type: # Slicing raw_slices = cgutils.unpack_tuple(builder, idx, aryty.ndim) slices = [Slice(context, builder, value=sl) for sl in raw_slices] for sl, sh in zip(slices, cgutils.unpack_tuple(builder, ary.shape, ndim)): cgutils.normalize_slice(builder, sl, sh) indices = [sl.start for sl in slices] dataptr = cgutils.get_item_pointer(builder, aryty, ary, indices, wraparound=True) # Build array retstty = make_array(sig.return_type) retary = retstty(context, builder) shapes = [cgutils.get_range_from_slice(builder, sl) for sl in slices] strides = [cgutils.get_strides_from_slice(builder, ndim, ary.strides, sl, i) for i, sl in enumerate(slices)] populate_array(retary, data=dataptr, shape=cgutils.pack_array(builder, shapes), strides=cgutils.pack_array(builder, strides), itemsize=ary.itemsize, meminfo=ary.meminfo, parent=ary.parent) return retary._getvalue() else: # Indexing assert isinstance(idxty.dtype, types.Integer) indices = cgutils.unpack_tuple(builder, idx, count=len(idxty)) indices = [context.cast(builder, i, t, types.intp) for t, i in zip(idxty, indices)] ptr = cgutils.get_item_pointer(builder, aryty, ary, indices, wraparound=idxty.dtype.signed) return context.unpack_value(builder, aryty.dtype, ptr)
def lower_assign(self, ty, inst): value = inst.value if isinstance(value, ir.Const): if self.context.is_struct_type(ty): const = self.context.get_constant_struct(self.builder, ty, value.value) elif ty == types.string: const = self.context.get_constant_string(self.builder, ty, value.value) else: const = self.context.get_constant(ty, value.value) return const elif isinstance(value, ir.Expr): return self.lower_expr(ty, value) elif isinstance(value, ir.Var): val = self.loadvar(value.name) oty = self.typeof(value.name) return self.context.cast(self.builder, val, oty, ty) elif isinstance(value, ir.Global): if (isinstance(ty, types.Dummy) or isinstance(ty, types.Module) or isinstance(ty, types.Function) or isinstance(ty, types.Dispatcher)): return self.context.get_dummy_value() elif ty == types.boolean: return self.context.get_constant(ty, value.value) elif isinstance(ty, types.Array): return self.context.make_constant_array(self.builder, ty, value.value) elif self.context.is_struct_type(ty): return self.context.get_constant_struct(self.builder, ty, value.value) elif ty in types.number_domain: return self.context.get_constant(ty, value.value) elif isinstance(ty, types.UniTuple): consts = [self.context.get_constant(t, v) for t, v in zip(ty, value.value)] return cgutils.pack_array(self.builder, consts) elif self.context.is_struct_type(ty): return self.context.get_constant_struct(self.builder, ty, value.value) else: raise NotImplementedError('global', ty) else: raise NotImplementedError(type(value), value)
def imp(context, builder, typ, val): ary = aryty(context, builder) dtype = elemty.dtype newshape = [self.get_constant(types.intp, s) for s in elemty.shape] newstrides = [self.get_constant(types.intp, s) for s in elemty.strides] newdata = cgutils.get_record_member(builder, val, offset, self.get_data_type(dtype)) arrayobj.populate_array( ary, data=newdata, shape=cgutils.pack_array(builder, newshape), strides=cgutils.pack_array(builder, newstrides), itemsize=context.get_constant(types.intp, elemty.size), parent=ary.parent ) return ary._getvalue()
def ptx_cmem_arylike(context, builder, sig, args): lmod = builder.module [arr] = args aryty = sig.return_type constvals = [ context.get_constant(types.byte, i) for i in iter(arr.tobytes(order='A')) ] constary = lc.Constant.array(Type.int(8), constvals) addrspace = nvvm.ADDRSPACE_CONSTANT gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem", addrspace=addrspace) gv.linkage = lc.LINKAGE_INTERNAL gv.global_constant = True gv.initializer = constary # Preserve the underlying alignment lldtype = context.get_data_type(aryty.dtype) align = context.get_abi_sizeof(lldtype) gv.align = 2**(align - 1).bit_length() # Convert to generic address-space conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace)) genptr = builder.call(conv, [addrspaceptr]) # Create array object ary = context.make_array(aryty)(context, builder) kshape = [context.get_constant(types.intp, s) for s in arr.shape] kstrides = [context.get_constant(types.intp, s) for s in arr.strides] context.populate_array(ary, data=builder.bitcast(genptr, ary.data.type), shape=cgutils.pack_array(builder, kshape), strides=cgutils.pack_array(builder, kstrides), itemsize=ary.itemsize, parent=ary.parent, meminfo=None) return ary._getvalue()
def ptx_gridsize2d(context, builder, sig, args): assert len(args) == 1 ntidx = nvvmutils.call_sreg(builder, "ntid.x") nctaidx = nvvmutils.call_sreg(builder, "nctaid.x") ntidy = nvvmutils.call_sreg(builder, "ntid.y") nctaidy = nvvmutils.call_sreg(builder, "nctaid.y") r1 = builder.mul(ntidx, nctaidx) r2 = builder.mul(ntidy, nctaidy) return cgutils.pack_array(builder, [r1, r2])
def imp(context, builder, typ, val): ary = aryty(context, builder) dtype = elemty.dtype newshape = [self.get_constant(types.intp, s) for s in elemty.shape] newstrides = [self.get_constant(types.intp, s) for s in elemty.strides] newdata = cgutils.get_record_member(builder, val, offset, self.get_data_type(dtype)) arrayobj.populate_array( ary, data=newdata, shape=cgutils.pack_array(builder, newshape), strides=cgutils.pack_array(builder, newstrides), itemsize=context.get_constant(types.intp, elemty.size), meminfo=ary.meminfo, parent=ary.parent ) return ary._getvalue()
def ptx_cmem_arylike(context, builder, sig, args): lmod = builder.module [arr] = args aryty = sig.return_type constvals = [ context.get_constant(types.byte, i) for i in six.iterbytes(arr.tobytes(order='A')) ] constary = lc.Constant.array(Type.int(8), constvals) addrspace = nvvm.ADDRSPACE_CONSTANT gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem", addrspace=addrspace) gv.linkage = lc.LINKAGE_INTERNAL gv.global_constant = True gv.initializer = constary # Preserve the underlying alignment lldtype = context.get_data_type(aryty.dtype) align = context.get_abi_sizeof(lldtype) gv.align = 2 ** (align - 1).bit_length() # Convert to generic address-space conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace)) genptr = builder.call(conv, [addrspaceptr]) # Create array object ary = context.make_array(aryty)(context, builder) kshape = [context.get_constant(types.intp, s) for s in arr.shape] kstrides = [context.get_constant(types.intp, s) for s in arr.strides] context.populate_array(ary, data=builder.bitcast(genptr, ary.data.type), shape=cgutils.pack_array(builder, kshape), strides=cgutils.pack_array(builder, kstrides), itemsize=ary.itemsize, parent=ary.parent, meminfo=None) return ary._getvalue()
def ptx_grid2d(context, builder, sig, args): assert len(args) == 1 tidx = _call_sreg(builder, "tid.x") ntidx = _call_sreg(builder, "ntid.x") nctaidx = _call_sreg(builder, "ctaid.x") tidy = _call_sreg(builder, "tid.y") ntidy = _call_sreg(builder, "ntid.y") nctaidy = _call_sreg(builder, "ctaid.y") r1 = builder.add(builder.mul(ntidx, nctaidx), tidx) r2 = builder.add(builder.mul(ntidy, nctaidy), tidy) return cgutils.pack_array(builder, [r1, r2])
def to_native_tuple(self, obj, typ): """ Convert tuple *obj* to a native array (if homogenous) or structure. """ n = len(typ) values = [] for i, eltype in enumerate(typ): elem = self.tuple_getitem(obj, i) values.append(self.to_native_value(elem, eltype)) if isinstance(typ, types.UniTuple): return cgutils.pack_array(self.builder, values) else: return cgutils.make_anonymous_struct(self.builder, values)
def build_set(context, builder, set_type, items): """ Build a set of the given type, containing the given items. """ nitems = len(items) inst = SetInstance.allocate(context, builder, set_type, nitems) # Populate set. Inlining the insertion code for each item would be very # costly, instead we create a LLVM array and iterate over it. array = cgutils.pack_array(builder, items) array_ptr = cgutils.alloca_once_value(builder, array) count = context.get_constant(types.intp, nitems) with cgutils.for_range(builder, count) as loop: item = builder.load(cgutils.gep(builder, array_ptr, 0, loop.index)) inst.add(item) return impl_ret_new_ref(context, builder, set_type, inst.value)
def init_specific(self, context, builder, shapes): zero = context.get_constant(types.intp, 0) indices = cgutils.alloca_once(builder, zero.type, size=context.get_constant(types.intp, ndim)) exhausted = cgutils.alloca_once_value(builder, cgutils.false_byte) for dim in range(ndim): idxptr = cgutils.gep(builder, indices, dim) builder.store(zero, idxptr) # 0-sized dimensions really indicate an empty array, # but we have to catch that condition early to avoid # a bug inside the iteration logic. dim_size = shapes[dim] dim_is_empty = builder.icmp(lc.ICMP_EQ, dim_size, zero) with cgutils.if_unlikely(builder, dim_is_empty): builder.store(cgutils.true_byte, exhausted) self.indices = indices self.exhausted = exhausted self.shape = cgutils.pack_array(builder, shapes)
def init_specific(self, context, builder, shapes): zero = context.get_constant(types.intp, 0) indices = cgutils.alloca_once(builder, zero.type, size=context.get_constant( types.intp, ndim)) exhausted = cgutils.alloca_once_value(builder, cgutils.false_byte) for dim in range(ndim): idxptr = cgutils.gep(builder, indices, dim) builder.store(zero, idxptr) # 0-sized dimensions really indicate an empty array, # but we have to catch that condition early to avoid # a bug inside the iteration logic. dim_size = shapes[dim] dim_is_empty = builder.icmp(lc.ICMP_EQ, dim_size, zero) with cgutils.if_unlikely(builder, dim_is_empty): builder.store(cgutils.true_byte, exhausted) self.indices = indices self.exhausted = exhausted self.shape = cgutils.pack_array(builder, shapes)
def iternext_specific(self, context, builder, result): zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) bbend = cgutils.append_basic_block(builder, 'end') exhausted = cgutils.as_bool_bit(builder, builder.load(self.exhausted)) with cgutils.if_unlikely(builder, exhausted): result.set_valid(False) builder.branch(bbend) indices = [builder.load(cgutils.gep(builder, self.indices, dim)) for dim in range(ndim)] result.yield_(cgutils.pack_array(builder, indices)) result.set_valid(True) shape = cgutils.unpack_tuple(builder, self.shape, ndim) _increment_indices(context, builder, ndim, shape, self.indices, self.exhausted) builder.branch(bbend) builder.position_at_end(bbend)
def ptx_grid2d(context, builder, sig, args): assert len(args) == 1 r1, r2 = nvvmutils.get_global_id(builder, dim=2) return cgutils.pack_array(builder, [r1, r2])
def as_data(self, builder, value): values = [builder.load(cgutils.gep_inbounds(builder, value, i)) for i in range(self._fe_type.count)] return cgutils.pack_array(builder, values)
def _shape_and_strides(self, context, builder): shape = cgutils.pack_array(builder, self.shape) strides = cgutils.pack_array(builder, self.strides) return shape, strides
def iternext_specific(self, context, builder, arrty, arr, result): ndim = arrty.ndim data = arr.data shapes = cgutils.unpack_tuple(builder, arr.shape, ndim) strides = cgutils.unpack_tuple(builder, arr.strides, ndim) indices = self.indices pointers = self.pointers zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) bbend = cgutils.append_basic_block(builder, 'end') # Catch already computed iterator exhaustion is_exhausted = cgutils.as_bool_bit( builder, builder.load(self.exhausted)) with cgutils.if_unlikely(builder, is_exhausted): result.set_valid(False) builder.branch(bbend) result.set_valid(True) # Current pointer inside last dimension last_ptr = cgutils.gep(builder, pointers, ndim - 1) ptr = builder.load(last_ptr) value = context.unpack_value(builder, arrty.dtype, ptr) if kind == 'flat': result.yield_(value) else: # ndenumerate() => yield (indices, value) idxvals = [ builder.load(cgutils.gep(builder, indices, dim)) for dim in range(ndim) ] idxtuple = cgutils.pack_array(builder, idxvals) result.yield_( cgutils.make_anonymous_struct(builder, [idxtuple, value])) # Update indices and pointers by walking from inner # dimension to outer. for dim in reversed(range(ndim)): idxptr = cgutils.gep(builder, indices, dim) idx = builder.add(builder.load(idxptr), one) count = shapes[dim] stride = strides[dim] in_bounds = builder.icmp(lc.ICMP_SLT, idx, count) with cgutils.if_likely(builder, in_bounds): # Index is valid => pointer can simply be incremented. builder.store(idx, idxptr) ptrptr = cgutils.gep(builder, pointers, dim) ptr = builder.load(ptrptr) ptr = cgutils.pointer_add(builder, ptr, stride) builder.store(ptr, ptrptr) # Reset pointers in inner dimensions for inner_dim in range(dim + 1, ndim): ptrptr = cgutils.gep(builder, pointers, inner_dim) builder.store(ptr, ptrptr) builder.branch(bbend) # Reset index and continue with next dimension builder.store(zero, idxptr) # End of array builder.store(cgutils.true_byte, self.exhausted) builder.branch(bbend) builder.position_at_end(bbend)
def as_data(self, builder, value): values = [ builder.load(cgutils.gep_inbounds(builder, value, i)) for i in range(self._fe_type.count) ] return cgutils.pack_array(builder, values)
def iternext_specific(self, context, builder, arrty, arr, result): ndim = arrty.ndim data = arr.data shapes = cgutils.unpack_tuple(builder, arr.shape, ndim) strides = cgutils.unpack_tuple(builder, arr.strides, ndim) indices = self.indices pointers = self.pointers zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) bbend = cgutils.append_basic_block(builder, 'end') # Catch already computed iterator exhaustion is_exhausted = cgutils.as_bool_bit( builder, builder.load(self.exhausted)) with cgutils.if_unlikely(builder, is_exhausted): result.set_valid(False) builder.branch(bbend) result.set_valid(True) # Current pointer inside last dimension last_ptr = cgutils.gep(builder, pointers, ndim - 1) ptr = builder.load(last_ptr) value = context.unpack_value(builder, arrty.dtype, ptr) if kind == 'flat': result.yield_(value) else: # ndenumerate() => yield (indices, value) idxvals = [builder.load(cgutils.gep(builder, indices, dim)) for dim in range(ndim)] idxtuple = cgutils.pack_array(builder, idxvals) result.yield_( cgutils.make_anonymous_struct(builder, [idxtuple, value])) # Update indices and pointers by walking from inner # dimension to outer. for dim in reversed(range(ndim)): idxptr = cgutils.gep(builder, indices, dim) idx = builder.add(builder.load(idxptr), one) count = shapes[dim] stride = strides[dim] in_bounds = builder.icmp(lc.ICMP_SLT, idx, count) with cgutils.if_likely(builder, in_bounds): # Index is valid => pointer can simply be incremented. builder.store(idx, idxptr) ptrptr = cgutils.gep(builder, pointers, dim) ptr = builder.load(ptrptr) ptr = cgutils.pointer_add(builder, ptr, stride) builder.store(ptr, ptrptr) # Reset pointers in inner dimensions for inner_dim in range(dim + 1, ndim): ptrptr = cgutils.gep(builder, pointers, inner_dim) builder.store(ptr, ptrptr) builder.branch(bbend) # Reset index and continue with next dimension builder.store(zero, idxptr) # End of array builder.store(cgutils.true_byte, self.exhausted) builder.branch(bbend) builder.position_at_end(bbend)
def ptx_grid3d(context, builder, sig, args): assert len(args) == 1 r1, r2, r3 = nvvmutils.get_global_id(builder, dim=3) return cgutils.pack_array(builder, [r1, r2, r3])