def init_specific(self, context, builder, arrty, arr): zero = context.get_constant(types.intp, 0) data = arr.data ndim = arrty.ndim shapes = cgutils.unpack_tuple(builder, arr.shape, ndim) indices = cgutils.alloca_once(builder, zero.type, size=context.get_constant( types.intp, arrty.ndim)) pointers = cgutils.alloca_once(builder, data.type, size=context.get_constant( types.intp, arrty.ndim)) strides = cgutils.unpack_tuple(builder, arr.strides, ndim) exhausted = cgutils.alloca_once_value(builder, cgutils.false_byte) # Initialize indices and pointers with their start values. for dim in range(ndim): idxptr = cgutils.gep(builder, indices, dim) ptrptr = cgutils.gep(builder, pointers, dim) builder.store(data, ptrptr) builder.store(zero, idxptr) # 0-sized dimensions really indicate an empty array, # but we have to catch that condition early to avoid # a bug inside the iteration logic (see issue #846). dim_size = shapes[dim] dim_is_empty = builder.icmp(lc.ICMP_EQ, dim_size, zero) with cgutils.if_unlikely(builder, dim_is_empty): builder.store(cgutils.true_byte, exhausted) self.indices = indices self.pointers = pointers self.exhausted = exhausted
def dot_3_vm(context, builder, sig, args): """ np.dot(vector, matrix, out) np.dot(matrix, vector, out) """ xty, yty, outty = sig.args assert outty == sig.return_type dtype = xty.dtype x = make_array(xty)(context, builder, args[0]) y = make_array(yty)(context, builder, args[1]) out = make_array(outty)(context, builder, args[2]) x_shapes = cgutils.unpack_tuple(builder, x.shape) y_shapes = cgutils.unpack_tuple(builder, y.shape) out_shapes = cgutils.unpack_tuple(builder, out.shape) if xty.ndim < yty.ndim: # Vector * matrix # Asked for x * y, we will compute y.T * x mty = yty m_shapes = y_shapes do_trans = yty.layout == 'F' m_data, v_data = y.data, x.data def check_args(a, b, out): m, = a.shape _m, n = b.shape if m != _m: raise ValueError("incompatible array sizes for " "np.dot(a, b) (vector * matrix)") if out.shape != (n,): raise ValueError("incompatible output array size for " "np.dot(a, b, out) (vector * matrix)") else: # Matrix * vector # We will compute x * y mty = xty m_shapes = x_shapes do_trans = xty.layout == 'C' m_data, v_data = x.data, y.data def check_args(a, b, out): m, _n = a.shape n, = b.shape if n != _n: raise ValueError("incompatible array sizes for np.dot(a, b) " "(matrix * vector)") if out.shape != (m,): raise ValueError("incompatible output array size for " "np.dot(a, b, out) (matrix * vector)") context.compile_internal(builder, check_args, signature(types.none, *sig.args), args) for val in m_shapes: check_c_int(context, builder, val) call_xxgemv(context, builder, do_trans, mty, m_shapes, m_data, v_data, out.data) return impl_ret_borrowed(context, builder, sig.return_type, out._getvalue())
def init_specific(self, context, builder, arrty, arr): zero = context.get_constant(types.intp, 0) data = arr.data ndim = arrty.ndim shapes = cgutils.unpack_tuple(builder, arr.shape, ndim) indices = cgutils.alloca_once(builder, zero.type, size=context.get_constant(types.intp, arrty.ndim)) pointers = cgutils.alloca_once(builder, data.type, size=context.get_constant(types.intp, arrty.ndim)) strides = cgutils.unpack_tuple(builder, arr.strides, ndim) exhausted = cgutils.alloca_once_value(builder, cgutils.false_byte) # Initialize indices and pointers with their start values. for dim in range(ndim): idxptr = cgutils.gep(builder, indices, dim) ptrptr = cgutils.gep(builder, pointers, dim) builder.store(data, ptrptr) builder.store(zero, idxptr) # 0-sized dimensions really indicate an empty array, # but we have to catch that condition early to avoid # a bug inside the iteration logic (see issue #846). dim_size = shapes[dim] dim_is_empty = builder.icmp(lc.ICMP_EQ, dim_size, zero) with cgutils.if_unlikely(builder, dim_is_empty): builder.store(cgutils.true_byte, exhausted) self.indices = indices self.pointers = pointers self.exhausted = exhausted
def getitem_arraynd_intp(context, builder, sig, args): aryty, idxty = sig.args ary, idx = args arystty = make_array(aryty) adapted_ary = arystty(context, builder, ary) ndim = aryty.ndim if ndim == 1: result = _getitem_array1d(context, builder, aryty, adapted_ary, idx, wraparound=idxty.signed) elif ndim > 1: out_ary_ty = make_array(aryty.copy(ndim = ndim - 1)) out_ary = out_ary_ty(context, builder) in_shapes = cgutils.unpack_tuple(builder, adapted_ary.shape, count=ndim) in_strides = cgutils.unpack_tuple(builder, adapted_ary.strides, count=ndim) data_p = cgutils.get_item_pointer2(builder, adapted_ary.data, in_shapes, in_strides, aryty.layout, [idx], wraparound=idxty.signed) populate_array(out_ary, data=data_p, shape=cgutils.pack_array(builder, in_shapes[1:]), strides=cgutils.pack_array(builder, in_strides[1:]), itemsize=adapted_ary.itemsize, parent=adapted_ary.parent,) result = out_ary._getvalue() else: raise NotImplementedError("1D indexing into %dD array" % aryty.ndim) return result
def getitem_array_tuple(context, builder, sig, args): aryty, idxty = sig.args ary, idx = args arystty = make_array(aryty) ary = arystty(context, builder, ary) ndim = aryty.ndim if isinstance(sig.return_type, types.Array): # Slicing raw_indices = cgutils.unpack_tuple(builder, idx, aryty.ndim) start = [] shapes = [] strides = [] oshapes = cgutils.unpack_tuple(builder, ary.shape, ndim) for ax, (indexval, idxty) in enumerate(zip(raw_indices, idxty)): if idxty == types.slice3_type: slice = Slice(context, builder, value=indexval) cgutils.normalize_slice(builder, slice, oshapes[ax]) start.append(slice.start) shapes.append(cgutils.get_range_from_slice(builder, slice)) strides.append( cgutils.get_strides_from_slice(builder, ndim, ary.strides, slice, ax)) else: ind = context.cast(builder, indexval, idxty, types.intp) start.append(ind) dataptr = cgutils.get_item_pointer( builder, aryty, ary, start, wraparound=context.metadata['wraparound']) # Build array retstty = make_array(sig.return_type) retary = retstty(context, builder) retary.data = dataptr retary.shape = cgutils.pack_array(builder, shapes) retary.strides = cgutils.pack_array(builder, strides) return retary._getvalue() else: # Indexing indices = cgutils.unpack_tuple(builder, idx, count=len(idxty)) indices = [ context.cast(builder, i, t, types.intp) for t, i in zip(idxty, indices) ] ptr = cgutils.get_item_pointer( builder, aryty, ary, indices, wraparound=context.metadata['wraparound']) return context.unpack_value(builder, aryty.dtype, ptr)
def array_nonzero(context, builder, sig, args): aryty = sig.args[0] # Return type is a N-tuple of 1D C-contiguous arrays retty = sig.return_type outaryty = retty.dtype ndim = aryty.ndim nouts = retty.count ary = make_array(aryty)(context, builder, args[0]) shape = cgutils.unpack_tuple(builder, ary.shape) strides = cgutils.unpack_tuple(builder, ary.strides) data = ary.data layout = aryty.layout # First count the number of non-zero elements zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) count = cgutils.alloca_once_value(builder, zero) with cgutils.loop_nest(builder, shape, zero.type) as indices: ptr = cgutils.get_item_pointer2(builder, data, shape, strides, layout, indices) val = load_item(context, builder, aryty, ptr) nz = context.is_true(builder, aryty.dtype, val) with builder.if_then(nz): builder.store(builder.add(builder.load(count), one), count) # Then allocate output arrays of the right size out_shape = (builder.load(count), ) outs = [ _empty_nd_impl(context, builder, outaryty, out_shape)._getvalue() for i in range(nouts) ] outarys = [make_array(outaryty)(context, builder, out) for out in outs] out_datas = [out.data for out in outarys] # And fill them up index = cgutils.alloca_once_value(builder, zero) with cgutils.loop_nest(builder, shape, zero.type) as indices: ptr = cgutils.get_item_pointer2(builder, data, shape, strides, layout, indices) val = load_item(context, builder, aryty, ptr) nz = context.is_true(builder, aryty.dtype, val) with builder.if_then(nz): # Store element indices in output arrays if not indices: # For a 0-d array, store 0 in the unique output array indices = (zero, ) cur = builder.load(index) for i in range(nouts): ptr = cgutils.get_item_pointer2(builder, out_datas[i], out_shape, (), 'C', [cur]) store_item(context, builder, outaryty, indices[i], ptr) builder.store(builder.add(cur, one), index) tup = context.make_tuple(builder, sig.return_type, outs) return impl_ret_new_ref(context, builder, sig.return_type, tup)
def impl(context, builder, sig, args): [tyinp, tyout] = sig.args [inp, out] = args if scalar_input: ndim = 1 else: ndim = tyinp.ndim if not scalar_input: iary = context.make_array(tyinp)(context, builder, inp) oary = context.make_array(tyout)(context, builder, out) if asfloat: sig = typing.signature(types.float64, types.float64) else: if scalar_input: sig = typing.signature(tyout.dtype, tyinp) else: sig = typing.signature(tyout.dtype, tyinp.dtype) fnwork = context.get_function(funckey, sig) intpty = context.get_value_type(types.intp) # TODO handle differing shape by mimicking broadcasting if scalar_input: shape = cgutils.unpack_tuple(builder, oary.shape, ndim) else: shape = cgutils.unpack_tuple(builder, iary.shape, ndim) with cgutils.loop_nest(builder, shape, intp=intpty) as indices: if not scalar_input: pi = cgutils.get_item_pointer(builder, tyinp, iary, indices) po = cgutils.get_item_pointer(builder, tyout, oary, indices) if scalar_input: ival = inp else: ival = builder.load(pi) if asfloat: if scalar_input: dval = context.cast(builder, ival, tyinp, types.float64) else: dval = context.cast(builder, ival, tyinp.dtype, types.float64) dres = fnwork(builder, [dval]) res = context.cast(builder, dres, types.float64, tyout.dtype) elif scalar_input and tyinp != tyout: tempres = fnwork(builder, [ival]) res = context.cast(builder, tempres, tyinp, tyout.dtype) elif tyinp.dtype != tyout.dtype: tempres = fnwork(builder, [ival]) res = context.cast(builder, tempres, tyinp.dtype, tyout.dtype) else: res = fnwork(builder, [ival]) builder.store(res, po) return out
def array_nonzero(context, builder, sig, args): aryty = sig.args[0] # Return type is a N-tuple of 1D C-contiguous arrays retty = sig.return_type outaryty = retty.dtype ndim = aryty.ndim nouts = retty.count ary = make_array(aryty)(context, builder, args[0]) shape = cgutils.unpack_tuple(builder, ary.shape) strides = cgutils.unpack_tuple(builder, ary.strides) data = ary.data layout = aryty.layout # First count the number of non-zero elements zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) count = cgutils.alloca_once_value(builder, zero) with cgutils.loop_nest(builder, shape, zero.type) as indices: ptr = cgutils.get_item_pointer2(builder, data, shape, strides, layout, indices) val = load_item(context, builder, aryty, ptr) nz = context.is_true(builder, aryty.dtype, val) with builder.if_then(nz): builder.store(builder.add(builder.load(count), one), count) # Then allocate output arrays of the right size out_shape = (builder.load(count),) outs = [_empty_nd_impl(context, builder, outaryty, out_shape)._getvalue() for i in range(nouts)] outarys = [make_array(outaryty)(context, builder, out) for out in outs] out_datas = [out.data for out in outarys] # And fill them up index = cgutils.alloca_once_value(builder, zero) with cgutils.loop_nest(builder, shape, zero.type) as indices: ptr = cgutils.get_item_pointer2(builder, data, shape, strides, layout, indices) val = load_item(context, builder, aryty, ptr) nz = context.is_true(builder, aryty.dtype, val) with builder.if_then(nz): # Store element indices in output arrays if not indices: # For a 0-d array, store 0 in the unique output array indices = (zero,) cur = builder.load(index) for i in range(nouts): ptr = cgutils.get_item_pointer2(builder, out_datas[i], out_shape, (), 'C', [cur]) store_item(context, builder, outaryty, indices[i], ptr) builder.store(builder.add(cur, one), index) tup = context.make_tuple(builder, sig.return_type, outs) return impl_ret_new_ref(context, builder, sig.return_type, tup)
def getitem_array_unituple(context, builder, sig, args): aryty, idxty = sig.args ary, idx = args ndim = aryty.ndim arystty = make_array(aryty) ary = arystty(context, builder, ary) if idxty.dtype == types.slice3_type: # Slicing raw_slices = cgutils.unpack_tuple(builder, idx, aryty.ndim) slices = [Slice(context, builder, value=sl) for sl in raw_slices] for sl, sh in zip(slices, cgutils.unpack_tuple(builder, ary.shape, ndim)): cgutils.normalize_slice(builder, sl, sh) indices = [sl.start for sl in slices] dataptr = cgutils.get_item_pointer( builder, aryty, ary, indices, wraparound=context.metadata['wraparound']) # Build array retstty = make_array(sig.return_type) retary = retstty(context, builder) retary.data = dataptr shapes = [cgutils.get_range_from_slice(builder, sl) for sl in slices] retary.shape = cgutils.pack_array(builder, shapes) strides = [ cgutils.get_strides_from_slice(builder, ndim, ary.strides, sl, i) for i, sl in enumerate(slices) ] retary.strides = cgutils.pack_array(builder, strides) return retary._getvalue() else: # Indexing assert idxty.dtype == types.intp indices = cgutils.unpack_tuple(builder, idx, count=len(idxty)) indices = [ context.cast(builder, i, t, types.intp) for t, i in zip(idxty, indices) ] ptr = cgutils.get_item_pointer( builder, aryty, ary, indices, wraparound=context.metadata['wraparound']) return context.unpack_value(builder, aryty.dtype, ptr)
def getitem_array_tuple(context, builder, sig, args): aryty, idxty = sig.args ary, idx = args arystty = make_array(aryty) ary = arystty(context, builder, ary) ndim = aryty.ndim if isinstance(sig.return_type, types.Array): # Slicing raw_indices = cgutils.unpack_tuple(builder, idx, aryty.ndim) start = [] shapes = [] strides = [] oshapes = cgutils.unpack_tuple(builder, ary.shape, ndim) for ax, (indexval, idxty) in enumerate(zip(raw_indices, idxty)): if idxty == types.slice3_type: slice = Slice(context, builder, value=indexval) cgutils.normalize_slice(builder, slice, oshapes[ax]) start.append(slice.start) shapes.append(cgutils.get_range_from_slice(builder, slice)) strides.append(cgutils.get_strides_from_slice(builder, ndim, ary.strides, slice, ax)) else: ind = context.cast(builder, indexval, idxty, types.intp) start.append(ind) dataptr = cgutils.get_item_pointer(builder, aryty, ary, start, wraparound=True) # Build array retstty = make_array(sig.return_type) retary = retstty(context, builder) populate_array(retary, data=dataptr, shape=cgutils.pack_array(builder, shapes), strides=cgutils.pack_array(builder, strides), itemsize=ary.itemsize, meminfo=ary.meminfo, parent=ary.parent) return retary._getvalue() else: # Indexing indices = cgutils.unpack_tuple(builder, idx, count=len(idxty)) indices = [context.cast(builder, i, t, types.intp) for t, i in zip(idxty, indices)] ptr = cgutils.get_item_pointer(builder, aryty, ary, indices, wraparound=True) return context.unpack_value(builder, aryty.dtype, ptr)
def _define_atomic_cas(module, ordering): """Define a llvm function for atomic compare-and-swap. The generated function is a direct wrapper of the LLVM cmpxchg with the difference that the a int indicate success (1) or failure (0) is returned and the last argument is a output pointer for storing the old value. Note ---- On failure, the generated function behaves like an atomic load. The loaded value is stored to the last argument. """ ftype = ir.FunctionType(ir.IntType(32), [_word_type.as_pointer(), _word_type, _word_type, _word_type.as_pointer()]) fn_cas = ir.Function(module, ftype, name="nrt_atomic_cas") [ptr, cmp, repl, oldptr] = fn_cas.args bb = fn_cas.append_basic_block() builder = ir.IRBuilder(bb) outtup = builder.cmpxchg(ptr, cmp, repl, ordering=ordering) old, ok = cgutils.unpack_tuple(builder, outtup, 2) builder.store(old, oldptr) builder.ret(builder.zext(ok, ftype.return_type)) return fn_cas
def gen_call(context, builder, sig, args, c_func): ''' This is generating the llvm code for calling a d4p C function. May also convert to ndarray. Used by our dynamically generated/exec'ed @lower_builtin/@lower_getattr functions below. ''' lir_types = [lir.IntType(8).as_pointer()] # the first arg is always our algo object (shrd_ptr) c_args = [args[0]] # the first arg is always our algo object (shrd_ptr) # prepare our args (usually none for most get_* attribuutes/properties) for i in range(1, len(args)): lirt = get_lir_type(context, sig.args[i]) if isinstance(lirt, list): # Array! # generate lir code to extract actual arguments # collect args/types in list lir_types += lirt in_arrtype = sig.args[i] in_array = context.make_array(in_arrtype)(context, builder, args[i]) in_shape = cgutils.unpack_tuple(builder, in_array.shape) c_args += [in_array.data, in_shape[0], in_shape[1]] else: lir_types.append(lirt) c_args.append(args[i]) #ret_typ = sig if c_func.startswith('get_') else sig.return_type # Our getter might return an array, which needs special handling ret_is_array = isinstance(sig.return_type, types.Array) # define our llvm return type c_func_ret_type = lir.IntType(8).as_pointer() if ret_is_array else context.get_data_type(sig.return_type) # Now we can define the signature fnty = lir.FunctionType(c_func_ret_type, lir_types) # Get function fn = builder.module.get_or_insert_function(fnty, name=c_func) # and finally generate the call ptr = builder.call(fn, c_args) return nt2nd(context, builder, ptr, sig.return_type) if ret_is_array else ptr
def iternext_array(context, builder, sig, args, result): [iterty] = sig.args [iter] = args arrayty = iterty.array_type if arrayty.ndim != 1: # TODO raise NotImplementedError("iterating over %dD array" % arrayty.ndim) iterobj = make_arrayiter_cls(iterty)(context, builder, value=iter) ary = make_array(arrayty)(context, builder, value=iterobj.array) nitems, = cgutils.unpack_tuple(builder, ary.shape, count=1) index = builder.load(iterobj.index) is_valid = builder.icmp(lc.ICMP_SLT, index, nitems) result.set_valid(is_valid) with cgutils.ifthen(builder, is_valid): value = _getitem_array1d(context, builder, arrayty, ary, index, wraparound=False) result.yield_(value) nindex = builder.add(index, context.get_constant(types.intp, 1)) builder.store(nindex, iterobj.index)
def hsail_atomic_add_tuple(context, builder, sig, args): aryty, indty, valty = sig.args ary, inds, val = args dtype = aryty.dtype if indty == types.intp: indices = [inds] # just a single integer indty = [indty] else: indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [ context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices) ] if dtype != valty: raise TypeError("expecting %s but got %s" % (dtype, valty)) if aryty.ndim != len(indty): raise TypeError("indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))) lary = context.make_array(aryty)(context, builder, ary) ptr = cgutils.get_item_pointer(builder, aryty, lary, indices) return builder.atomic_rmw("add", ptr, val, ordering='monotonic')
def ptx_atomic_add_tuple(context, builder, sig, args): aryty, indty, valty = sig.args ary, inds, val = args dtype = aryty.dtype indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [ context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices) ] if dtype != valty: raise TypeError("expect %s but got %s" % (dtype, valty)) if aryty.ndim != len(indty): raise TypeError("indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))) lary = context.make_array(aryty)(context, builder, ary) ptr = cgutils.get_item_pointer(builder, aryty, lary, indices) if aryty.dtype == types.float32: lmod = cgutils.get_module(builder) return builder.call(nvvmutils.declare_atomic_add_float32(lmod), (ptr, val)) else: return builder.atomic_rmw('add', ptr, val, 'monotonic')
def array_record_getattr(context, builder, typ, value, attr): arrayty = make_array(typ) array = arrayty(context, builder, value) rectype = typ.dtype assert isinstance(rectype, types.Record) dtype = rectype.typeof(attr) offset = rectype.offset(attr) resty = types.Array(dtype, ndim=typ.ndim, layout='A') raryty = make_array(resty) rary = raryty(context, builder) rary.shape = array.shape constoffset = context.get_constant(types.intp, offset) unpackedstrides = cgutils.unpack_tuple(builder, array.strides, typ.ndim) newstrides = [builder.add(s, constoffset) for s in unpackedstrides] rary.strides = array.strides llintp = context.get_value_type(types.intp) newdata = builder.add(builder.ptrtoint(array.data, llintp), constoffset) newdataptr = builder.inttoptr(newdata, rary.data.type) rary.data = newdataptr return rary._getvalue()
def dot_2_vv(context, builder, sig, args, conjugate=False): """ np.dot(vector, vector) np.vdot(vector, vector) """ aty, bty = sig.args dtype = sig.return_type a = make_array(aty)(context, builder, args[0]) b = make_array(bty)(context, builder, args[1]) n, = cgutils.unpack_tuple(builder, a.shape) def check_args(a, b): m, = a.shape n, = b.shape if m != n: raise ValueError("incompatible array sizes for np.dot(a, b) " "(vector * vector)") context.compile_internal(builder, check_args, signature(types.none, *sig.args), args) check_c_int(context, builder, n) out = cgutils.alloca_once(builder, context.get_value_type(dtype)) call_xxdot(context, builder, conjugate, dtype, n, a.data, b.data, out) return builder.load(out)
def _define_atomic_cas(module, ordering): """Define a llvm function for atomic compare-and-swap. The generated function is a direct wrapper of the LLVM cmpxchg with the difference that the a int indicate success (1) or failure (0) is returned and the last argument is a output pointer for storing the old value. Note ---- On failure, the generated function behaves like an atomic load. The loaded value is stored to the last argument. """ ftype = ir.FunctionType(ir.IntType(32), [ _word_type.as_pointer(), _word_type, _word_type, _word_type.as_pointer() ]) fn_cas = ir.Function(module, ftype, name="nrt_atomic_cas") [ptr, cmp, repl, oldptr] = fn_cas.args bb = fn_cas.append_basic_block() builder = ir.IRBuilder(bb) outtup = builder.cmpxchg(ptr, cmp, repl, ordering=ordering) old, ok = cgutils.unpack_tuple(builder, outtup, 2) builder.store(old, oldptr) builder.ret(builder.zext(ok, ftype.return_type)) return fn_cas
def iternext_series_array(context, builder, sig, args, result): """ Implementation of iternext() for the ArrayIterator type :param context: context descriptor :param builder: llvmlite IR Builder :param sig: iterator signature :param args: tuple with iterator arguments, such as instruction, operands and types :param result: iternext result """ [iterty] = sig.args [iter] = args arrayty = iterty.array_type if arrayty.ndim != 1: raise NotImplementedError("iterating over %dD array" % arrayty.ndim) iterobj = context.make_helper(builder, iterty, value=iter) ary = make_array(arrayty)(context, builder, value=iterobj.array) nitems, = cgutils.unpack_tuple(builder, ary.shape, count=1) index = builder.load(iterobj.index) is_valid = builder.icmp(lc.ICMP_SLT, index, nitems) result.set_valid(is_valid) with builder.if_then(is_valid): value = _getitem_array1d(context, builder, arrayty, ary, index, wraparound=False) result.yield_(value) nindex = cgutils.increment_index(builder, index) builder.store(nindex, iterobj.index)
def getitem_array1d_slice(context, builder, sig, args): aryty, _ = sig.args if aryty.ndim != 1: # TODO raise NotImplementedError("1D indexing into %dD array" % aryty.ndim) ary, idx = args arystty = make_array(aryty) ary = arystty(context, builder, value=ary) shapes = cgutils.unpack_tuple(builder, ary.shape, aryty.ndim) slicestruct = Slice(context, builder, value=idx) cgutils.normalize_slice(builder, slicestruct, shapes[0]) dataptr = cgutils.get_item_pointer(builder, aryty, ary, [slicestruct.start], wraparound=True) retstty = make_array(sig.return_type) retary = retstty(context, builder) shape = cgutils.get_range_from_slice(builder, slicestruct) retary.shape = cgutils.pack_array(builder, [shape]) stride = cgutils.get_strides_from_slice(builder, aryty.ndim, ary.strides, slicestruct, 0) retary.strides = cgutils.pack_array(builder, [stride]) retary.data = dataptr return retary._getvalue()
def _box_class_instance(typ, val, c): meminfo, dataptr = cgutils.unpack_tuple(c.builder, val) # Create Box instance box_subclassed = _specialize_box(typ) # Note: the ``box_subclassed`` is kept alive by the cache int_addr_boxcls = c.context.get_constant(types.uintp, id(box_subclassed)) box_cls = c.builder.inttoptr(int_addr_boxcls, c.pyapi.pyobj) box = c.pyapi.call_function_objargs(box_cls, ()) # Initialize Box instance llvoidptr = ir.IntType(8).as_pointer() addr_meminfo = c.builder.bitcast(meminfo, llvoidptr) addr_data = c.builder.bitcast(dataptr, llvoidptr) def set_member(member_offset, value): # Access member by byte offset offset = c.context.get_constant(types.uintp, member_offset) ptr = cgutils.pointer_add(c.builder, box, offset) casted = c.builder.bitcast(ptr, llvoidptr.as_pointer()) c.builder.store(value, casted) set_member(_box.box_meminfoptr_offset, addr_meminfo) set_member(_box.box_dataptr_offset, addr_data) return box
def impl(context, builder, sig, args): [tyinp, tyout] = sig.args [inp, out] = args ndim = tyinp.ndim iary = context.make_array(tyinp)(context, builder, inp) oary = context.make_array(tyout)(context, builder, out) if asfloat: sig = typing.signature(types.float64, types.float64) else: sig = typing.signature(tyout.dtype, tyinp.dtype) fnwork = context.get_function(funckey, sig) intpty = context.get_value_type(types.intp) # TODO handle differing shape by mimicking broadcasting shape = cgutils.unpack_tuple(builder, iary.shape, ndim) with cgutils.loop_nest(builder, shape, intp=intpty) as indices: pi = cgutils.get_item_pointer(builder, tyinp, iary, indices) po = cgutils.get_item_pointer(builder, tyout, oary, indices) ival = builder.load(pi) if asfloat: dval = context.cast(builder, ival, tyinp.dtype, types.float64) dres = fnwork(builder, [dval]) res = context.cast(builder, dres, types.float64, tyout.dtype) elif tyinp.dtype != tyout.dtype: tempres = fnwork(builder, [ival]) res = context.cast(builder, tempres, tyinp.dtype, tyout.dtype) else: res = fnwork(builder, [ival]) builder.store(res, po) return out
def ptx_atomic_add_tuple(context, builder, sig, args): aryty, indty, valty = sig.args ary, inds, val = args dtype = aryty.dtype indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices)] if dtype != valty: raise TypeError("expect %s but got %s" % (dtype, valty)) if aryty.ndim != len(indty): raise TypeError("indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))) lary = context.make_array(aryty)(context, builder, ary) ptr = cgutils.get_item_pointer(builder, aryty, lary, indices) if aryty.dtype == types.float32: lmod = builder.module return builder.call(nvvmutils.declare_atomic_add_float32(lmod), (ptr, val)) elif aryty.dtype == types.float64: lmod = builder.module return builder.call(nvvmutils.declare_atomic_add_float64(lmod), (ptr, val)) else: return builder.atomic_rmw("add", ptr, val, "monotonic")
def _gauss_impl(context, builder, sig, args, state): # The type for all computations (either float or double) ty = sig.return_type llty = context.get_data_type(ty) state_ptr = get_state_ptr(context, builder, state) _random = {"py": random.random, "np": np.random.random}[state] ret = cgutils.alloca_once(builder, llty, name="result") gauss_ptr = get_gauss_ptr(builder, state_ptr) has_gauss_ptr = get_has_gauss_ptr(builder, state_ptr) has_gauss = cgutils.is_true(builder, builder.load(has_gauss_ptr)) with cgutils.ifelse(builder, has_gauss) as (then, otherwise): with then: # if has_gauss: return it builder.store(builder.load(gauss_ptr), ret) builder.store(const_int(0), has_gauss_ptr) with otherwise: # if not has_gauss: compute a pair of numbers using the Box-Muller # transform; keep one and return the other pair = context.compile_internal(builder, _gauss_pair_impl(_random), signature(types.UniTuple(ty, 2)), ()) first, second = cgutils.unpack_tuple(builder, pair, 2) builder.store(first, gauss_ptr) builder.store(second, ret) builder.store(const_int(1), has_gauss_ptr) mu, sigma = args return builder.fadd(mu, builder.fmul(sigma, builder.load(ret)))
def _box_class_instance(typ, val, c): meminfo, dataptr = cgutils.unpack_tuple(c.builder, val) lluintp = c.context.get_data_type(types.uintp) addr_meminfo = c.pyapi.from_native_value(types.uintp, c.builder.ptrtoint(meminfo, lluintp)) addr_dataptr = c.pyapi.from_native_value(types.uintp, c.builder.ptrtoint(dataptr, lluintp)) box_subclassed = _specialize_box(typ) # Note: the ``box_subclassed`` is kept alive by the cache int_addr_boxcls = c.context.get_constant(types.uintp, id(box_subclassed)) box_cls = c.builder.inttoptr(int_addr_boxcls, c.pyapi.pyobj) args = [addr_meminfo, addr_dataptr] res = c.pyapi.call_function_objargs(box_cls, args) # Clean up c.pyapi.decref(addr_meminfo) c.pyapi.decref(addr_dataptr) return res
def ptx_atomic_max_tuple(context, builder, sig, args): aryty, indty, valty = sig.args ary, inds, val = args dtype = aryty.dtype indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [ context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices) ] if dtype != valty: raise TypeError("expect %s but got %s" % (dtype, valty)) if aryty.ndim != len(indty): raise TypeError("indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))) lary = context.make_array(aryty)(context, builder, ary) ptr = cgutils.get_item_pointer(builder, aryty, lary, indices) if aryty.dtype == types.float64: lmod = builder.module return builder.call(nvvmutils.declare_atomic_max_float64(lmod), (ptr, val)) else: raise TypeError('Unimplemented atomic max with %s array' % dtype)
def _gauss_impl(context, builder, sig, args, state): # The type for all computations (either float or double) ty = sig.return_type llty = context.get_data_type(ty) state_ptr = get_state_ptr(context, builder, state) _random = {"py": random.random, "np": np.random.random}[state] ret = cgutils.alloca_once(builder, llty, name="result") gauss_ptr = get_gauss_ptr(builder, state_ptr) has_gauss_ptr = get_has_gauss_ptr(builder, state_ptr) has_gauss = cgutils.is_true(builder, builder.load(has_gauss_ptr)) with builder.if_else(has_gauss) as (then, otherwise): with then: # if has_gauss: return it builder.store(builder.load(gauss_ptr), ret) builder.store(const_int(0), has_gauss_ptr) with otherwise: # if not has_gauss: compute a pair of numbers using the Box-Muller # transform; keep one and return the other pair = context.compile_internal(builder, _gauss_pair_impl(_random), signature(types.UniTuple(ty, 2)), ()) first, second = cgutils.unpack_tuple(builder, pair, 2) builder.store(first, gauss_ptr) builder.store(second, ret) builder.store(const_int(1), has_gauss_ptr) mu, sigma = args return builder.fadd(mu, builder.fmul(sigma, builder.load(ret)))
def getitem_array1d(context, builder, sig, args): aryty, _ = sig.args if aryty.ndim != 1: # TODO raise NotImplementedError("1D indexing into %dD array" % aryty.ndim) ary, idx = args arystty = make_array(aryty) ary = arystty(context, builder, ary) dataptr = ary.data if True or WARPAROUND: # TODO target flag ZERO = context.get_constant(types.intp, 0) negative = builder.icmp(lc.ICMP_SLT, idx, ZERO) bbnormal = builder.basic_block with cgutils.if_unlikely(builder, negative): # Index is negative, wraparound [nelem] = cgutils.unpack_tuple(builder, ary.shape, 1) wrapped = builder.add(nelem, idx) bbwrapped = builder.basic_block where = builder.phi(idx.type) where.add_incoming(idx, bbnormal) where.add_incoming(wrapped, bbwrapped) ptr = builder.gep(dataptr, [where]) else: # No wraparound ptr = builder.gep(dataptr, [idx]) if context.is_struct_type(aryty.dtype): return ptr else: return builder.load(ptr)
def iternext_specific(self, context, builder, result): zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) bbend = cgutils.append_basic_block(builder, 'end') exhausted = cgutils.as_bool_bit(builder, builder.load(self.exhausted)) with cgutils.if_unlikely(builder, exhausted): result.set_valid(False) builder.branch(bbend) indices = [ builder.load(cgutils.gep(builder, self.indices, dim)) for dim in range(ndim) ] result.yield_(cgutils.pack_array(builder, indices)) result.set_valid(True) shape = cgutils.unpack_tuple(builder, self.shape, ndim) _increment_indices(context, builder, ndim, shape, self.indices, self.exhausted) builder.branch(bbend) builder.position_at_end(bbend)
def codegen(context, builder, signature, args): # check that the return type is now defined arrty = signature.return_type assert arrty.is_precise() shapes = unpack_tuple(builder, args[0]) # redirect implementation to np.empty res = _empty_nd_impl(context, builder, arrty, shapes) return impl_ret_new_ref(context, builder, arrty, res._getvalue())
def _increment_indices_array(context, builder, arrty, arr, indices, end_flag=None): shape = cgutils.unpack_tuple(builder, arr.shape, arrty.ndim) _increment_indices(context, builder, arrty.ndim, shape, indices, end_flag)
def array_nbytes(context, builder, typ, value): """ nbytes = size * itemsize """ arrayty = make_array(typ) array = arrayty(context, builder, value) dims = cgutils.unpack_tuple(builder, array.shape, typ.ndim) return builder.mul(array.nitems, array.itemsize)
def getitem_array_unituple(context, builder, sig, args): aryty, idxty = sig.args ary, idx = args ndim = aryty.ndim arystty = make_array(aryty) ary = arystty(context, builder, ary) if idxty.dtype == types.slice3_type: # Slicing raw_slices = cgutils.unpack_tuple(builder, idx, aryty.ndim) slices = [Slice(context, builder, value=sl) for sl in raw_slices] for sl, sh in zip(slices, cgutils.unpack_tuple(builder, ary.shape, ndim)): cgutils.normalize_slice(builder, sl, sh) indices = [sl.start for sl in slices] dataptr = cgutils.get_item_pointer(builder, aryty, ary, indices, wraparound=True) # Build array retstty = make_array(sig.return_type) retary = retstty(context, builder) shapes = [cgutils.get_range_from_slice(builder, sl) for sl in slices] strides = [cgutils.get_strides_from_slice(builder, ndim, ary.strides, sl, i) for i, sl in enumerate(slices)] populate_array(retary, data=dataptr, shape=cgutils.pack_array(builder, shapes), strides=cgutils.pack_array(builder, strides), itemsize=ary.itemsize, meminfo=ary.meminfo, parent=ary.parent) return retary._getvalue() else: # Indexing assert isinstance(idxty.dtype, types.Integer) indices = cgutils.unpack_tuple(builder, idx, count=len(idxty)) indices = [context.cast(builder, i, t, types.intp) for t, i in zip(idxty, indices)] ptr = cgutils.get_item_pointer(builder, aryty, ary, indices, wraparound=idxty.dtype.signed) return context.unpack_value(builder, aryty.dtype, ptr)
def iternext_numpy_flatiter(context, builder, sig, args, result): [flatiterty] = sig.args [flatiter] = args flatitercls = make_array_flat_cls(flatiterty) flatiter = flatitercls(context, builder, value=flatiter) arrty = flatiterty.array_type arrcls = context.make_array(arrty) arr = arrcls(context, builder, value=builder.load(flatiter.array)) ndim = arrty.ndim shapes = cgutils.unpack_tuple(builder, arr.shape, ndim) indptr = flatiter.iters # Load indices and check if they are valid indices = [] is_valid = cgutils.true_bit zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) for ax in range(ndim): axsize = shapes[ax] idxptr = builder.gep(indptr, [context.get_constant(types.intp, ax)]) idx = builder.load(idxptr) ax_valid = builder.icmp(lc.ICMP_SLT, idx, axsize) indices.append(idx) is_valid = builder.and_(is_valid, ax_valid) result.set_valid(is_valid) with cgutils.if_likely(builder, is_valid): # Get yielded value valptr = cgutils.get_item_pointer(builder, arrty, arr, indices) yield_value = builder.load(valptr) result.yield_(yield_value) # Increment iterator indices carry_flags = [cgutils.true_bit] for ax, (idx, axsize) in reversed(list(enumerate(zip(indices, shapes)))): idxptr = builder.gep(indptr, [context.get_constant(types.intp, ax)]) lastcarry = carry_flags[-1] idxp1 = builder.add(idx, one) carry = builder.icmp(lc.ICMP_SGE, idxp1, axsize) idxfinal = builder.select(lastcarry, builder.select(carry, zero, idxp1), idx) builder.store(idxfinal, idxptr) carry_flags.append(builder.and_(carry, lastcarry)) with cgutils.if_unlikely(builder, carry_flags[-1]): # If we have iterated all elements, # Set first index to out-of-bound idxptr = builder.gep(indptr, [context.get_constant(types.intp, 0)]) builder.store(shapes[0], idxptr)
def codegen(context, builder, sig, args): (iterty,) = sig.args (value,) = args intp_t = context.get_value_type(types.intp) iterobj = context.make_helper(builder, iterty, value=value) arrayty = iterty.array_type ary = make_array(arrayty)(context, builder, value=iterobj.array) shape = cgutils.unpack_tuple(builder, ary.shape) # array iterates along the outer dimension return impl_ret_untracked(context, builder, intp_t, shape[0])
def codegen(context, builder, sig, args): (iterty, ) = sig.args (value, ) = args intp_t = context.get_value_type(types.intp) iterobj = context.make_helper(builder, iterty, value=value) arrayty = iterty.array_type ary = make_array(arrayty)(context, builder, value=iterobj.array) shape = cgutils.unpack_tuple(builder, ary.shape) # array iterates along the outer dimension return impl_ret_untracked(context, builder, intp_t, shape[0])
def init_specific(self, context, builder, arrty, arr): zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) data = arr.data ndim = arrty.ndim shapes = cgutils.unpack_tuple(builder, arr.shape, ndim) indices = cgutils.alloca_once(builder, zero.type, size=context.get_constant( types.intp, arrty.ndim)) pointers = cgutils.alloca_once(builder, data.type, size=context.get_constant( types.intp, arrty.ndim)) strides = cgutils.unpack_tuple(builder, arr.strides, ndim) empty = cgutils.alloca_once_value(builder, cgutils.false_byte) # Initialize each dimension with the next index and pointer # values. For the last (inner) dimension, this is 0 and the # start pointer, for the other dimensions, this is 1 and the # pointer to the next subarray after start. for dim in range(ndim): idxptr = cgutils.gep(builder, indices, dim) ptrptr = cgutils.gep(builder, pointers, dim) if dim == ndim - 1: builder.store(zero, idxptr) builder.store(data, ptrptr) else: p = cgutils.pointer_add(builder, data, strides[dim]) builder.store(p, ptrptr) builder.store(one, idxptr) # 0-sized dimensions really indicate an empty array, # but we have to catch that condition early to avoid # a bug inside the iteration logic (see issue #846). dim_size = shapes[dim] dim_is_empty = builder.icmp(lc.ICMP_EQ, dim_size, zero) with cgutils.if_unlikely(builder, dim_is_empty): builder.store(cgutils.true_byte, empty) self.indices = indices self.pointers = pointers self.empty = empty
def init_specific(self, context, builder, arrty, arr): zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) data = arr.data ndim = arrty.ndim shapes = cgutils.unpack_tuple(builder, arr.shape, ndim) indices = cgutils.alloca_once(builder, zero.type, size=context.get_constant(types.intp, arrty.ndim)) pointers = cgutils.alloca_once(builder, data.type, size=context.get_constant(types.intp, arrty.ndim)) strides = cgutils.unpack_tuple(builder, arr.strides, ndim) empty = cgutils.alloca_once_value(builder, cgutils.false_byte) # Initialize each dimension with the next index and pointer # values. For the last (inner) dimension, this is 0 and the # start pointer, for the other dimensions, this is 1 and the # pointer to the next subarray after start. for dim in range(ndim): idxptr = cgutils.gep(builder, indices, dim) ptrptr = cgutils.gep(builder, pointers, dim) if dim == ndim - 1: builder.store(zero, idxptr) builder.store(data, ptrptr) else: p = cgutils.pointer_add(builder, data, strides[dim]) builder.store(p, ptrptr) builder.store(one, idxptr) # 0-sized dimensions really indicate an empty array, # but we have to catch that condition early to avoid # a bug inside the iteration logic (see issue #846). dim_size = shapes[dim] dim_is_empty = builder.icmp(lc.ICMP_EQ, dim_size, zero) with cgutils.if_unlikely(builder, dim_is_empty): builder.store(cgutils.true_byte, empty) self.indices = indices self.pointers = pointers self.empty = empty
def _normalize_indices(context, builder, indty, inds): """ Convert integer indices into tuple of intp """ if indty in types.integer_domain: indty = types.UniTuple(dtype=indty, count=1) indices = [inds] else: indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices)] return indty, indices
def make_array_ndindex(context, builder, sig, args): """ndindex(shape)""" ndim = sig.return_type.ndim idxty = sig.args[0].dtype tup = args[0] shape = cgutils.unpack_tuple(builder, tup, ndim) shape = [context.cast(builder, idx, idxty, types.intp) for idx in shape] nditercls = make_ndindex_cls(types.NumpyNdIndexType(len(shape))) nditer = nditercls(context, builder) nditer.init_specific(context, builder, shape) return nditer._getvalue()
def numpy_empty_nd(context, builder, sig, args): arrshapetype = sig.args[0] arrshape = args[0] arrtype = sig.return_type if isinstance(arrshapetype, types.Integer): shapes = [context.cast(builder, arrshape, arrshapetype, types.intp)] else: arrshape = context.cast(builder, arrshape, arrshapetype, types.UniTuple(types.intp, len(arrshapetype))) shapes = cgutils.unpack_tuple(builder, arrshape, count=len(arrshapetype)) return _empty_nd_impl(context, builder, arrtype, shapes)
def setitem_array_tuple(context, builder, sig, args): aryty, idxty, valty = sig.args ary, idx, val = args arystty = make_array(aryty) ary = arystty(context, builder, ary) # TODO: other than layout indices = cgutils.unpack_tuple(builder, idx, count=len(idxty)) indices = [context.cast(builder, i, t, types.intp) for t, i in zip(idxty, indices)] ptr = cgutils.get_item_pointer(builder, aryty, ary, indices, wraparound=True) context.pack_value(builder, aryty.dtype, val, ptr)
def populate_array(array, data, shape, strides, itemsize, meminfo, parent=None): """ Helper function for populating array structures. This avoids forgetting to set fields. """ context = array._context builder = array._builder datamodel = array._datamodel required_fields = set(datamodel._fields) if meminfo is None: meminfo = Constant.null(context.get_value_type( datamodel.get_type('meminfo'))) attrs = dict(shape=shape, strides=strides, data=data, itemsize=itemsize, meminfo=meminfo,) # Set `parent` attribute if parent is None: attrs['parent'] = Constant.null(context.get_value_type( datamodel.get_type('parent'))) else: attrs['parent'] = parent # Calc num of items from shape nitems = context.get_constant(types.intp, 1) unpacked_shape = cgutils.unpack_tuple(builder, shape, shape.type.count) if unpacked_shape: # Shape is not empty for axlen in unpacked_shape: nitems = builder.mul(nitems, axlen) else: # Shape is empty nitems = context.get_constant(types.intp, 0) attrs['nitems'] = nitems # Make sure that we have all the fields got_fields = set(attrs.keys()) if got_fields != required_fields: raise ValueError("missing {0}".format(required_fields - got_fields)) # Set field value for k, v in attrs.items(): setattr(array, k, v) return array
def setitem_array_unituple(context, builder, sig, args): aryty, idxty, valty = sig.args ary, idx, val = args arystty = make_array(aryty) ary = arystty(context, builder, ary) # TODO: other than layout indices = cgutils.unpack_tuple(builder, idx, count=len(idxty)) ptr = cgutils.get_item_pointer(builder, aryty, ary, indices, wraparound=True) if context.is_struct_type(aryty.dtype): stval = builder.load(val) else: stval = val builder.store(stval, ptr)
def getitem_array_unituple(context, builder, sig, args): aryty, idxty = sig.args ary, idx = args arystty = make_array(aryty) ary = arystty(context, builder, ary) # TODO: other layout indices = cgutils.unpack_tuple(builder, idx, count=len(idxty)) # TODO warparound flag ptr = cgutils.get_item_pointer(builder, aryty, ary, indices, wraparound=True) if context.is_struct_type(aryty.dtype): return ptr else: return builder.load(ptr)
def getitem_tuple_lower(context, builder, sig, args): tupty, idx = sig.args idx = idx.literal_value tup, _ = args if isinstance(idx, int): if idx < 0: idx += len(tupty) if not 0 <= idx < len(tupty): raise IndexError("cannot index at %d in %s" % (idx, tupty)) res = builder.extract_value(tup, idx) elif isinstance(idx, slice): items = cgutils.unpack_tuple(builder, tup)[idx] res = context.make_tuple(builder, sig.return_type, items) else: raise NotImplementedError("unexpected index %r for %s" % (idx, sig.args[0])) return impl_ret_borrowed(context, builder, sig.return_type, res)
def _image_to_array(context, builder, shapes_array, arrtype, data, img): # allocate array shapes = cgutils.unpack_tuple(builder, builder.load(shapes_array)) ary = _empty_nd_impl(context, builder, arrtype, shapes) cgutils.raw_memcpy(builder, ary.data, builder.load(data), ary.nitems, ary.itemsize, align=1) # clean up cv::Mat image fnty = lir.FunctionType(lir.VoidType(), [lir.IntType(8).as_pointer()]) fn_release = builder.module.get_or_insert_function(fnty, name="cv_mat_release") builder.call(fn_release, [img]) return impl_ret_new_ref(context, builder, arrtype, ary._getvalue())
def setitem_array_tuple(context, builder, sig, args): aryty, idxty, valty = sig.args ary, idx, val = args arystty = make_array(aryty) ary = arystty(context, builder, ary) # TODO: other than layout indices = cgutils.unpack_tuple(builder, idx, count=len(idxty)) indices = [ context.cast(builder, i, t, types.intp) for t, i in zip(idxty, indices) ] ptr = cgutils.get_item_pointer(builder, aryty, ary, indices, wraparound=True) context.pack_value(builder, aryty.dtype, val, ptr)
def ptx_atomic_add_tuple(context, builder, sig, args): aryty, indty, valty = sig.args ary, inds, val = args dtype = aryty.dtype indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices)] if dtype != valty: raise TypeError("expect %s but got %s" % (dtype, valty)) if aryty.ndim != len(indty): raise TypeError("indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))) lary = context.make_array(aryty)(context, builder, ary) ptr = cgutils.get_item_pointer(builder, aryty, lary, indices) return builder.atomic_rmw('add', ptr, val, 'monotonic')
def dot_2_vv(context, builder, sig, args, conjugate=False): """ np.dot(vector, vector) np.vdot(vector, vector) """ aty, bty = sig.args dtype = sig.return_type a = make_array(aty)(context, builder, args[0]) b = make_array(bty)(context, builder, args[1]) n, = cgutils.unpack_tuple(builder, a.shape) def check_args(a, b): m, = a.shape n, = b.shape if m != n: raise ValueError("incompatible array sizes for np.dot(a, b) " "(vector * vector)") context.compile_internal(builder, check_args, signature(types.none, *sig.args), args) check_c_int(context, builder, n) out = cgutils.alloca_once(builder, context.get_value_type(dtype)) fnty = ir.FunctionType(ir.IntType(32), [ll_char, ll_char, intp_t, # kind, conjugate, n ll_void_p, ll_void_p, ll_void_p, # a, b, out ]) fn = builder.module.get_or_insert_function(fnty, name="numba_xxdot") kind = get_blas_kind(dtype) kind_val = ir.Constant(ll_char, ord(kind)) conjugate = ir.Constant(ll_char, int(conjugate)) res = builder.call(fn, (kind_val, conjugate, n, builder.bitcast(a.data, ll_void_p), builder.bitcast(b.data, ll_void_p), builder.bitcast(out, ll_void_p))) check_blas_return(context, builder, res) return builder.load(out)