def array_min(context, builder, sig, args): ty = sig.args[0].dtype if isinstance(ty, (types.NPDatetime, types.NPTimedelta)): # NaT is smaller than every other value, but it is # ignored as far as min() is concerned. nat = ty("NaT") def array_min_impl(arry): min_value = nat it = arry.flat for v in it: if v != nat: min_value = v break for v in it: if v != nat and v < min_value: min_value = v return min_value else: def array_min_impl(arry): for v in arry.flat: min_value = v break for v in arry.flat: if v < min_value: min_value = v return min_value res = context.compile_internal(builder, array_min_impl, sig, args) return impl_ret_borrowed(context, builder, sig.return_type, res)
def dot_3_vm(context, builder, sig, args): """ np.dot(vector, matrix, out) np.dot(matrix, vector, out) """ xty, yty, outty = sig.args assert outty == sig.return_type dtype = xty.dtype x = make_array(xty)(context, builder, args[0]) y = make_array(yty)(context, builder, args[1]) out = make_array(outty)(context, builder, args[2]) x_shapes = cgutils.unpack_tuple(builder, x.shape) y_shapes = cgutils.unpack_tuple(builder, y.shape) out_shapes = cgutils.unpack_tuple(builder, out.shape) if xty.ndim < yty.ndim: # Vector * matrix # Asked for x * y, we will compute y.T * x mty = yty m_shapes = y_shapes do_trans = yty.layout == 'F' m_data, v_data = y.data, x.data def check_args(a, b, out): m, = a.shape _m, n = b.shape if m != _m: raise ValueError("incompatible array sizes for " "np.dot(a, b) (vector * matrix)") if out.shape != (n,): raise ValueError("incompatible output array size for " "np.dot(a, b, out) (vector * matrix)") else: # Matrix * vector # We will compute x * y mty = xty m_shapes = x_shapes do_trans = xty.layout == 'C' m_data, v_data = x.data, y.data def check_args(a, b, out): m, _n = a.shape n, = b.shape if n != _n: raise ValueError("incompatible array sizes for np.dot(a, b) " "(matrix * vector)") if out.shape != (m,): raise ValueError("incompatible output array size for " "np.dot(a, b, out) (matrix * vector)") context.compile_internal(builder, check_args, signature(types.none, *sig.args), args) for val in m_shapes: check_c_int(context, builder, val) call_xxgemv(context, builder, do_trans, mty, m_shapes, m_data, v_data, out.data) return impl_ret_borrowed(context, builder, sig.return_type, out._getvalue())
def lower_rolling_variable(context, builder, sig, args): func_name = sig.args[-1].literal_value if func_name == 'sum': def func(a, o, w, c, p): return roll_var_linear_generic(a, o, w, c, p, init_data_sum, add_sum, remove_sum, calc_sum) elif func_name == 'mean': def func(a, o, w, c, p): return roll_var_linear_generic(a, o, w, c, p, init_data_mean, add_mean, remove_mean, calc_mean) elif func_name == 'var': def func(a, o, w, c, p): return roll_var_linear_generic(a, o, w, c, p, init_data_var, add_var, remove_var, calc_var) elif func_name == 'std': def func(a, o, w, c, p): return roll_var_linear_generic(a, o, w, c, p, init_data_var, add_var, remove_var, calc_std) elif func_name == 'count': def func(a, o, w, c, p): return roll_var_linear_generic(a, o, w, c, p, init_data_count, add_count, remove_count, calc_count_var) elif func_name in ['median', 'min', 'max']: # TODO: linear support func_text = "def kernel_func(A):\n" func_text += " arr = dropna(A)\n" func_text += " if len(arr) == 0: return np.nan\n" func_text += " return np.{}(arr)\n".format(func_name) loc_vars = {} exec(func_text, {'np': np, 'dropna': _dropna}, loc_vars) kernel_func = numba.njit(loc_vars['kernel_func']) def func(a, o, w, c, p,): return roll_variable_apply(a, o, w, c, p, kernel_func) else: raise ValueError("invalid rolling (variable) function {}".format(func_name)) res = context.compile_internal( builder, func, signature(sig.return_type, *sig.args[:-1]), args[:-1]) return impl_ret_borrowed(context, builder, sig.return_type, res)
def attr_impl(context, builder, typ, value, attr): """ Generic getattr() for @jitclass instances. """ if attr in typ.struct: # It's a struct field inst = context.make_helper(builder, typ, value=value) data_pointer = inst.data data = context.make_data_helper(builder, typ.get_data_type(), ref=data_pointer) return imputils.impl_ret_borrowed(context, builder, typ.struct[attr], getattr(data, _mangle_attr(attr))) elif attr in typ.jitprops: # It's a jitted property getter = typ.jitprops[attr]['get'] sig = templates.signature(None, typ) dispatcher = types.Dispatcher(getter) sig = dispatcher.get_call_type(context.typing_context, [typ], {}) call = context.get_function(dispatcher, sig) out = call(builder, [value]) return imputils.impl_ret_new_ref(context, builder, sig.return_type, out) raise NotImplementedError('attribute {0!r} not implemented'.format(attr))
def lower_rolling_fixed(context, builder, sig, args): func_name = sig.args[-1].literal_value if func_name == 'sum': def func(a, w, c, p): return roll_fixed_linear_generic(a, w, c, p, init_data_sum, add_sum, remove_sum, calc_sum) elif func_name == 'mean': def func(a, w, c, p): return roll_fixed_linear_generic(a, w, c, p, init_data_mean, add_mean, remove_mean, calc_mean) elif func_name == 'var': def func(a, w, c, p): return roll_fixed_linear_generic(a, w, c, p, init_data_var, add_var, remove_var, calc_var) elif func_name == 'std': def func(a, w, c, p): return roll_fixed_linear_generic(a, w, c, p, init_data_var, add_var, remove_var, calc_std) elif func_name == 'count': def func(a, w, c, p): return roll_fixed_linear_generic(a, w, c, p, init_data_count, add_count, remove_count, calc_count) elif func_name in ['median', 'min', 'max']: # just using 'apply' since we don't have streaming/linear support # TODO: implement linear support similar to others func_text = "def kernel_func(A):\n" func_text += " if np.isnan(A).sum() != 0: return np.nan\n" func_text += " return np.{}(A)\n".format(func_name) loc_vars = {} exec(func_text, {'np': np}, loc_vars) kernel_func = numba.njit(loc_vars['kernel_func']) def func(a, w, c, p): return roll_fixed_apply(a, w, c, p, kernel_func) else: raise ValueError("invalid rolling (fixed) function {}".format(func_name)) res = context.compile_internal( builder, func, signature(sig.return_type, *sig.args[:-1]), args[:-1]) return impl_ret_borrowed(context, builder, sig.return_type, res)
def impl_list_getiter(context, builder, sig, args): """Implement iter(List). """ [tl] = sig.args [l] = args iterablety = types.ListTypeIterableType(tl) it = context.make_helper(builder, iterablety.iterator_type) fnty = ir.FunctionType( ir.VoidType(), [ll_listiter_type, ll_list_type], ) fn = builder.module.get_or_insert_function(fnty, name='numba_list_iter') proto = ctypes.CFUNCTYPE(ctypes.c_size_t) listiter_sizeof = proto(_helperlib.c_helpers['list_iter_sizeof']) state_type = ir.ArrayType(ir.IntType(8), listiter_sizeof()) pstate = cgutils.alloca_once(builder, state_type, zfill=True) it.state = _as_bytes(builder, pstate) it.parent = l dp = _container_get_data(context, builder, iterablety.parent, args[0]) builder.call(fn, [it.state, dp]) return impl_ret_borrowed( context, builder, sig.return_type, it._getvalue(), )
def impl_dict_getiter(context, builder, sig, args): """Implement iter(Dict). Semantically equivalent to dict.keys() """ [td] = sig.args [d] = args iterablety = types.DictKeysIterableType(td) it = context.make_helper(builder, iterablety.iterator_type) fnty = ir.FunctionType( ir.VoidType(), [ll_dictiter_type, ll_dict_type], ) fn = builder.module.get_or_insert_function(fnty, name='numba_dict_iter') proto = ctypes.CFUNCTYPE(ctypes.c_size_t) dictiter_sizeof = proto(_helperlib.c_helpers['dict_iter_sizeof']) state_type = ir.ArrayType(ir.IntType(8), dictiter_sizeof()) pstate = cgutils.alloca_once(builder, state_type, zfill=True) it.state = _as_bytes(builder, pstate) it.parent = d dp = _dict_get_data(context, builder, iterablety.parent, args[0]) builder.call(fn, [it.state, dp]) return impl_ret_borrowed( context, builder, sig.return_type, it._getvalue(), )
def lower_dist_arr_reduce(context, builder, sig, args): op_typ = args[1].type # store an int to specify data type typ_enum = _h5_typ_table[sig.args[0].dtype] typ_arg = cgutils.alloca_once_value( builder, lir.Constant(lir.IntType(32), typ_enum)) ndims = sig.args[0].ndim out = make_array(sig.args[0])(context, builder, args[0]) # store size vars array struct to pointer size_ptr = cgutils.alloca_once(builder, out.shape.type) builder.store(out.shape, size_ptr) size_arg = builder.bitcast(size_ptr, lir.IntType(64).as_pointer()) ndim_arg = cgutils.alloca_once_value( builder, lir.Constant(lir.IntType(32), sig.args[0].ndim)) call_args = [builder.bitcast(out.data, lir.IntType(8).as_pointer()), size_arg, builder.load(ndim_arg), args[1], builder.load(typ_arg)] # array, shape, ndim, extra last arg type for type enum arg_typs = [lir.IntType(8).as_pointer(), lir.IntType(64).as_pointer(), lir.IntType(32), op_typ, lir.IntType(32)] fnty = lir.FunctionType(lir.IntType(32), arg_typs) fn = builder.module.get_or_insert_function( fnty, name="hpat_dist_arr_reduce") builder.call(fn, call_args) res = out._getvalue() return impl_ret_borrowed(context, builder, sig.return_type, res)
def impl_iterable_getiter(context, builder, sig, args): """Implement iter() for .keys(), .values(), .items() """ iterablety = sig.args[0] it = context.make_helper(builder, iterablety.iterator_type, args[0]) fnty = ir.FunctionType( ir.VoidType(), [ll_dictiter_type, ll_dict_type], ) fn = builder.module.get_or_insert_function(fnty, name='numba_dict_iter') proto = ctypes.CFUNCTYPE(ctypes.c_size_t) dictiter_sizeof = proto(_helperlib.c_helpers['dict_iter_sizeof']) state_type = ir.ArrayType(ir.IntType(8), dictiter_sizeof()) pstate = cgutils.alloca_once(builder, state_type, zfill=True) it.state = _as_bytes(builder, pstate) dp = _container_get_data(context, builder, iterablety.parent, it.parent) builder.call(fn, [it.state, dp]) return impl_ret_borrowed( context, builder, sig.return_type, it._getvalue(), )
def series_wrap_array(context, builder, sig, args): src = make_series(context, builder, sig.args[0], value=args[0]) dest = make_series(context, builder, sig.return_type) dest.values = args[1] dest.index = src.index return impl_ret_borrowed(context, builder, sig.return_type, dest._getvalue())
def array_min(context, builder, sig, args): ty = sig.args[0].dtype if isinstance(ty, (types.NPDatetime, types.NPTimedelta)): # NaT is smaller than every other value, but it is # ignored as far as min() is concerned. nat = ty('NaT') def array_min_impl(arry): min_value = nat it = arry.flat for v in it: if v != nat: min_value = v break for v in it: if v != nat and v < min_value: min_value = v return min_value else: def array_min_impl(arry): for v in arry.flat: min_value = v break for v in arry.flat: if v < min_value: min_value = v return min_value res = context.compile_internal(builder, array_min_impl, sig, args) return impl_ret_borrowed(context, builder, sig.return_type, res)
def pdseries_constructor(context, builder, sig, args): data, index = args series = make_series(context, builder, sig.return_type) series.index = index series.values = data return impl_ret_borrowed(context, builder, sig.return_type, series._getvalue())
def get_attr_impl(context, builder, typ, value, attr): """ Generic getattr() for @jitclass instances. """ if attr in typ.struct: # It's a struct field inst = context.make_helper(builder, typ, value=value) data_pointer = inst.data data = context.make_data_helper(builder, typ.get_data_type(), ref=data_pointer) return imputils.impl_ret_borrowed(context, builder, typ.struct[attr], getattr(data, _mangle_attr(attr))) elif attr in typ.jitprops: # It's a jitted property getter = typ.jitprops[attr]['get'] sig = templates.signature(None, typ) dispatcher = types.Dispatcher(getter) sig = dispatcher.get_call_type(context.typing_context, [typ], {}) call = context.get_function(dispatcher, sig) out = call(builder, [value]) _add_linking_libs(context, call) return imputils.impl_ret_new_ref(context, builder, sig.return_type, out) raise NotImplementedError('attribute {0!r} not implemented'.format(attr))
def getitem_list(context, builder, sig, args): inst = ListInstance(context, builder, sig.args[0], args[0]) index = args[1] index = inst.fix_index(index) result = inst.getitem(index) return impl_ret_borrowed(context, builder, sig.return_type, result)
def array_prod(context, builder, sig, args): def array_prod_impl(arr): c = 1 for v in arr.flat: c *= v return c res = context.compile_internal(builder, array_prod_impl, sig, args, locals=dict(c=sig.return_type)) return impl_ret_borrowed(context, builder, sig.return_type, res)
def codegen(context, builder, sig, args): in_arr_ctypes, ind = args arr_ctypes = context.make_helper(builder, arr_ctypes_t, in_arr_ctypes) out = context.make_helper(builder, arr_ctypes_t) out.data = builder.gep(arr_ctypes.data, [ind]) out.meminfo = arr_ctypes.meminfo res = out._getvalue() return impl_ret_borrowed(context, builder, arr_ctypes_t, res)
def impl_COO(context, builder, sig, args): typ = sig.return_type coords, data, shape = args coo = cgutils.create_struct_proxy(typ)(context, builder) coo.coords = coords coo.data = data coo.shape = shape coo.fill_value = context.get_constant_generic( builder, typ.fill_value_type, _zero_of_dtype(typ.data_dtype)) return impl_ret_borrowed(context, builder, sig.return_type, coo._getvalue())
def array_sum(context, builder, sig, args): zero = sig.return_type(0) def array_sum_impl(arr): c = zero for v in arr.flat: c += v return c res = context.compile_internal(builder, array_sum_impl, sig, args, locals=dict(c=sig.return_type)) return impl_ret_borrowed(context, builder, sig.return_type, res)
def codegen(context, builder, sig, args): [d] = args [td] = sig.args iterhelper = context.make_helper(builder, resty) iterhelper.parent = d iterhelper.state = iterhelper.state.type(None) return impl_ret_borrowed( context, builder, resty, iterhelper._getvalue(), )
def array_max(context, builder, sig, args): def array_max_impl(arry): for v in arry.flat: max_value = v break for v in arry.flat: if v > max_value: max_value = v return max_value res = context.compile_internal(builder, array_max_impl, sig, args) return impl_ret_borrowed(context, builder, sig.return_type, res)
def lower_constant_COO(context, builder, typ, pyval): coords = context.get_constant_generic(builder, typ.coords_type, pyval.coords) data = context.get_constant_generic(builder, typ.data_type, pyval.data) shape = context.get_constant_generic(builder, typ.shape_type, pyval.shape) fill_value = context.get_constant_generic(builder, typ.fill_value_type, pyval.fill_value) return impl_ret_borrowed( context, builder, typ, cgutils.pack_struct(builder, (data, coords, shape, fill_value)), )
def codegen(context, builder, sig, args): in_str_arr, = args string_array = context.make_helper(builder, string_array_type, in_str_arr) #return string_array.offsets # # Create new ArrayCType structure ctinfo = context.make_helper(builder, offset_ctypes_type) ctinfo.data = builder.bitcast(string_array.offsets, lir.IntType(32).as_pointer()) ctinfo.meminfo = string_array.meminfo res = ctinfo._getvalue() return impl_ret_borrowed(context, builder, offset_ctypes_type, res)
def codegen(context, builder, sig, args): in_str_arr, = args string_array = context.make_helper(builder, string_array_type, in_str_arr) #return string_array.data # Create new ArrayCType structure # TODO: put offset/data in main structure since immutable ctinfo = context.make_helper(builder, data_ctypes_type) ctinfo.data = string_array.data ctinfo.meminfo = string_array.meminfo res = ctinfo._getvalue() return impl_ret_borrowed(context, builder, data_ctypes_type, res)
def str_arr_size_impl(context, builder, typ, val): dtype = StringArrayPayloadType() inst_struct = context.make_helper(builder, typ, val) data_pointer = context.nrt.meminfo_data(builder, inst_struct.meminfo) # cgutils.printf(builder, "data [%p]\n", data_pointer) data_pointer = builder.bitcast(data_pointer, context.get_data_type(dtype).as_pointer()) string_array = cgutils.create_struct_proxy(dtype)( context, builder, builder.load(data_pointer)) attrval = string_array.size attrty = types.intp return impl_ret_borrowed(context, builder, attrty, attrval)
def array_prod(context, builder, sig, args): def array_prod_impl(arr): c = 1 for v in np.nditer(arr): c *= v.item() return c res = context.compile_internal(builder, array_prod_impl, sig, args, locals=dict(c=sig.return_type)) return impl_ret_borrowed(context, builder, sig.return_type, res)
def array_max(context, builder, sig, args): def array_max_impl(arry): it = np.nditer(arry) for view in it: max_value = view.item() break for view in it: v = view.item() if v > max_value: max_value = v return max_value res = context.compile_internal(builder, array_max_impl, sig, args) return impl_ret_borrowed(context, builder, sig.return_type, res)
def array_sum(context, builder, sig, args): zero = sig.return_type(0) def array_sum_impl(arr): c = zero for v in np.nditer(arr): c += v.item() return c res = context.compile_internal(builder, array_sum_impl, sig, args, locals=dict(c=sig.return_type)) return impl_ret_borrowed(context, builder, sig.return_type, res)
def list_mul_inplace(context, builder, sig, args): inst = ListInstance(context, builder, sig.args[0], args[0]) src_size = inst.size mult = args[1] zero = ir.Constant(mult.type, 0) mult = builder.select(cgutils.is_neg_int(builder, mult), zero, mult) nitems = builder.mul(mult, src_size) inst.resize(nitems) with cgutils.for_range_slice(builder, src_size, nitems, src_size, inc=True) as (dest_offset, _): with cgutils.for_range(builder, src_size) as loop: value = inst.getitem(loop.index) inst.setitem(builder.add(loop.index, dest_offset), value) return impl_ret_borrowed(context, builder, sig.return_type, inst.value)
def getitem_tuple_lower(context, builder, sig, args): tupty, idx = sig.args idx = idx.literal_value tup, _ = args if isinstance(idx, int): if idx < 0: idx += len(tupty) if not 0 <= idx < len(tupty): raise IndexError("cannot index at %d in %s" % (idx, tupty)) res = builder.extract_value(tup, idx) elif isinstance(idx, slice): items = cgutils.unpack_tuple(builder, tup)[idx] res = context.make_tuple(builder, sig.return_type, items) else: raise NotImplementedError("unexpected index %r for %s" % (idx, sig.args[0])) return impl_ret_borrowed(context, builder, sig.return_type, res)
def codegen(context, builder, sig, args): [tmi, tdref] = sig.args td = tdref.instance_type [mi, _] = args ctor = cgutils.create_struct_proxy(td) dstruct = ctor(context, builder) data_pointer = context.nrt.meminfo_data(builder, mi) data_pointer = builder.bitcast(data_pointer, ll_dict_type.as_pointer()) dstruct.data = builder.load(data_pointer) dstruct.meminfo = mi return impl_ret_borrowed( context, builder, dicttype, dstruct._getvalue(), )
def codegen(context, builder, sig, args): [tmi, tdref] = sig.args td = tdref.instance_type [mi, _] = args ctor = cgutils.create_struct_proxy(td) dstruct = ctor(context, builder) data_pointer = context.nrt.meminfo_data(builder, mi) data_pointer = builder.bitcast(data_pointer, ll_list_type.as_pointer()) dstruct.data = builder.load(data_pointer) dstruct.meminfo = mi return impl_ret_borrowed( context, builder, listtype, dstruct._getvalue(), )
def imp(context, builder, typ, val): ary = aryty(context, builder) dtype = elemty.dtype newshape = [self.get_constant(types.intp, s) for s in elemty.shape] newstrides = [self.get_constant(types.intp, s) for s in elemty.strides] newdata = cgutils.get_record_member(builder, val, offset, self.get_data_type(dtype)) arrayobj.populate_array( ary, data=newdata, shape=cgutils.pack_array(builder, newshape), strides=cgutils.pack_array(builder, newstrides), itemsize=context.get_constant(types.intp, elemty.size), meminfo=None, parent=None, ) res = ary._getvalue() return impl_ret_borrowed(context, builder, typ, res)
def attr_impl(context, builder, typ, value, attr): if attr in typ.struct: inst_struct = cgutils.create_struct_proxy(typ) inst = inst_struct(context, builder, value=value) data_pointer = inst.data data_struct = cgutils.create_struct_proxy(typ.get_data_type(), kind='data') data = data_struct(context, builder, ref=data_pointer) return imputils.impl_ret_borrowed(context, builder, typ.struct[attr], getattr(data, attr)) elif attr in typ.jitprops: getter = typ.jitprops[attr]['get'] sig = templates.signature(None, typ) dispatcher = types.Dispatcher(getter) sig = dispatcher.get_call_type(context.typing_context, [typ], {}) call = context.get_function(dispatcher, sig) out = call(builder, [value]) return imputils.impl_ret_new_ref(context, builder, sig.return_type, out) raise NotImplementedError('attribute {0!r} not implemented'.format(attr))
def dot_3_vm(context, builder, sig, args): """ np.dot(vector, matrix, out) np.dot(matrix, vector, out) """ xty, yty, outty = sig.args assert outty == sig.return_type dtype = xty.dtype x = make_array(xty)(context, builder, args[0]) y = make_array(yty)(context, builder, args[1]) out = make_array(outty)(context, builder, args[2]) x_shapes = cgutils.unpack_tuple(builder, x.shape) y_shapes = cgutils.unpack_tuple(builder, y.shape) out_shapes = cgutils.unpack_tuple(builder, out.shape) if xty.ndim < yty.ndim: # Vector * matrix # Asked for x * y, we will compute y.T * x mty = yty m, n = m_shapes = y_shapes do_trans = yty.layout == 'F' m_data, v_data = y.data, x.data def check_args(a, b, out): m, = a.shape _m, n = b.shape if m != _m: raise ValueError("incompatible array sizes for np.dot(a, b) " "(vector * matrix)") if out.shape != (n,): raise ValueError("incompatible output array size for np.dot(a, b, out) " "(vector * matrix)") else: # Matrix * vector # We will compute x * y mty = xty m, n = m_shapes = x_shapes do_trans = xty.layout == 'C' m_data, v_data = x.data, y.data def check_args(a, b, out): m, _n= a.shape n, = b.shape if n != _n: raise ValueError("incompatible array sizes for np.dot(a, b) " "(matrix * vector)") if out.shape != (m,): raise ValueError("incompatible output array size for np.dot(a, b, out) " "(matrix * vector)") context.compile_internal(builder, check_args, signature(types.none, *sig.args), args) check_c_int(context, builder, m) check_c_int(context, builder, n) fnty = ir.FunctionType(ir.IntType(32), [ll_char, ll_char_p, # kind, trans intp_t, intp_t, # m, n ll_void_p, ll_void_p, intp_t, # alpha, a, lda ll_void_p, ll_void_p, ll_void_p, # x, beta, y ]) fn = builder.module.get_or_insert_function(fnty, name="numba_xxgemv") alpha = make_constant_slot(context, builder, dtype, 1.0) beta = make_constant_slot(context, builder, dtype, 0.0) if mty.layout == 'F': lda = m_shapes[0] else: m, n = n, m lda = m_shapes[1] kind = get_blas_kind(dtype) kind_val = ir.Constant(ll_char, ord(kind)) trans = context.insert_const_string(builder.module, "t" if do_trans else "n") res = builder.call(fn, (kind_val, trans, m, n, builder.bitcast(alpha, ll_void_p), builder.bitcast(m_data, ll_void_p), lda, builder.bitcast(v_data, ll_void_p), builder.bitcast(beta, ll_void_p), builder.bitcast(out.data, ll_void_p))) check_blas_return(context, builder, res) return impl_ret_borrowed(context, builder, sig.return_type, out._getvalue())
def dot_3_mm(context, builder, sig, args): """ np.dot(matrix, matrix, out) """ xty, yty, outty = sig.args assert outty == sig.return_type dtype = xty.dtype x = make_array(xty)(context, builder, args[0]) y = make_array(yty)(context, builder, args[1]) out = make_array(outty)(context, builder, args[2]) x_shapes = cgutils.unpack_tuple(builder, x.shape) y_shapes = cgutils.unpack_tuple(builder, y.shape) out_shapes = cgutils.unpack_tuple(builder, out.shape) m, k = x_shapes _k, n = y_shapes def check_args(a, b, out): m, k = a.shape _k, n = b.shape if k != _k: raise ValueError("incompatible array sizes for np.dot(a, b) " "(matrix * matrix)") if out.shape != (m, n): raise ValueError("incompatible output array size for np.dot(a, b, out) " "(matrix * matrix)") context.compile_internal(builder, check_args, signature(types.none, *sig.args), args) check_c_int(context, builder, m) check_c_int(context, builder, k) check_c_int(context, builder, n) fnty = ir.FunctionType(ir.IntType(32), [ll_char, # kind ll_char_p, ll_char_p, # transa, transb intp_t, intp_t, intp_t, # m, n, k ll_void_p, ll_void_p, intp_t, # alpha, a, lda ll_void_p, intp_t, ll_void_p, # b, ldb, beta ll_void_p, intp_t, # c, ldc ]) fn = builder.module.get_or_insert_function(fnty, name="numba_xxgemm") alpha = make_constant_slot(context, builder, dtype, 1.0) beta = make_constant_slot(context, builder, dtype, 0.0) trans = context.insert_const_string(builder.module, "t") notrans = context.insert_const_string(builder.module, "n") # Since out is C-contiguous, compute a * b = y.T * x.T assert outty.layout == 'C' def get_array_param(ty, shapes, data): return ( # Transpose if layout different from result's notrans if ty.layout == outty.layout else trans, # Size of the inner dimension in physical array order shapes[1] if ty.layout == 'C' else shapes[0], # The data pointer, unit-less builder.bitcast(data, ll_void_p), ) transa, lda, data_a = get_array_param(yty, y_shapes, y.data) transb, ldb, data_b = get_array_param(xty, x_shapes, x.data) _, ldc, data_c = get_array_param(outty, out_shapes, out.data) kind = get_blas_kind(dtype) kind_val = ir.Constant(ll_char, ord(kind)) res = builder.call(fn, (kind_val, transa, transb, n, m, k, builder.bitcast(alpha, ll_void_p), data_a, lda, data_b, ldb, builder.bitcast(beta, ll_void_p), data_c, ldc)) check_blas_return(context, builder, res) return impl_ret_borrowed(context, builder, sig.return_type, out._getvalue())
def index_wrap_array(context, builder, sig, args): dest = make_index(context, builder, sig.return_type) dest.data = args[1] return impl_ret_borrowed(context, builder, sig.return_type, dest._getvalue())
def mat_inv(context, builder, sig, args): """ Invert a matrix through the use of its LU decomposition. """ xty = sig.args[0] dtype = xty.dtype x = make_array(xty)(context, builder, args[0]) x_shapes = cgutils.unpack_tuple(builder, x.shape) m, n = x_shapes check_c_int(context, builder, m) check_c_int(context, builder, n) # Allocate the return array (Numpy never works in place contrary to # Scipy for which one can specify to whether or not to overwrite the # input). def create_out(a): m, n= a.shape if m != n: raise ValueError("np.linalg.inv can only work on square " "arrays.") return a.copy() out = context.compile_internal(builder, create_out, signature(sig.return_type, *sig.args), args) o = make_array(xty)(context, builder, out) # Allocate the array in which the pivot indices are stored. ipiv_t = types.Array(types.intc, 1, 'C') i = _empty_nd_impl(context, builder, ipiv_t, (m,)) info = cgutils.alloca_once(builder, ll_intc) # Compute the LU decomposition of the matrix. call_xxgetrf(context, builder, xty, x_shapes, o.data, i.data, info) zero = ir.Constant(ll_intc, 0) info_val = builder.load(info) lapack_error = builder.icmp_signed('!=', info_val, zero) invalid_arg = builder.icmp_signed('<', info_val, zero) with builder.if_then(lapack_error, False): with builder.if_else(invalid_arg) as (then, otherwise): raise_err = context.call_conv.return_user_exc with then: raise_err(builder, ValueError, ('One argument passed to getrf is invalid',) ) with otherwise: raise_err(builder, ValueError, ('Matrix is singular and cannot be inverted',) ) # Compute the optimal lwork. lwork = make_constant_slot(context, builder, types.intc, -1) work = cgutils.alloca_once(builder, context.get_value_type(xty.dtype)) call_xxgetri(context, builder, xty, x_shapes, o.data, i.data, work, lwork, info) info_val = builder.load(info) lapack_error = builder.icmp_signed('!=', info_val, zero) with builder.if_then(lapack_error, False): raise_err = context.call_conv.return_user_exc raise_err(builder, ValueError, ('One argument passed to getri is invalid',) ) # Allocate a work array of the optimal size as computed by getri. def allocate_work(x, size): """Allocate the work array. """ size = int(1.01 * size.real) return numpy.empty((size,), dtype=x.dtype) wty = types.Array(dtype, 1, 'C') work = context.compile_internal(builder, allocate_work, signature(wty, xty, dtype), (args[0], builder.load(work))) w = make_array(wty)(context, builder, work) w_shapes = cgutils.unpack_tuple(builder, w.shape) lw, = w_shapes builder.store(context.cast(builder, lw, types.intp, types.intc), lwork) # Compute the matrix inverse. call_xxgetri(context, builder, xty, x_shapes, o.data, i.data, w.data, lwork, info) info_val = builder.load(info) lapack_error = builder.icmp_signed('!=', info_val, zero) invalid_arg = builder.icmp_signed('<', info_val, zero) with builder.if_then(lapack_error, False): with builder.if_else(invalid_arg) as (then, otherwise): raise_err = context.call_conv.return_user_exc with then: raise_err(builder, ValueError, ('One argument passed to getri is invalid',) ) with otherwise: raise_err(builder, ValueError, ('Matrix is singular and cannot be inverted',) ) return impl_ret_borrowed(context, builder, sig.return_type, out)
def dot_3_mm(context, builder, sig, args): """ np.dot(matrix, matrix, out) """ xty, yty, outty = sig.args assert outty == sig.return_type dtype = xty.dtype x = make_array(xty)(context, builder, args[0]) y = make_array(yty)(context, builder, args[1]) out = make_array(outty)(context, builder, args[2]) x_shapes = cgutils.unpack_tuple(builder, x.shape) y_shapes = cgutils.unpack_tuple(builder, y.shape) out_shapes = cgutils.unpack_tuple(builder, out.shape) m, k = x_shapes _k, n = y_shapes # The only case Numpy supports assert outty.layout == 'C' def check_args(a, b, out): m, k = a.shape _k, n = b.shape if k != _k: raise ValueError("incompatible array sizes for np.dot(a, b) " "(matrix * matrix)") if out.shape != (m, n): raise ValueError("incompatible output array size for " "np.dot(a, b, out) (matrix * matrix)") context.compile_internal(builder, check_args, signature(types.none, *sig.args), args) check_c_int(context, builder, m) check_c_int(context, builder, k) check_c_int(context, builder, n) x_data = x.data y_data = y.data out_data = out.data # Check whether any of the operands is really a 1-d vector represented # as a (1, k) or (k, 1) 2-d array. In those cases, it is pessimal # to call the generic matrix * matrix product BLAS function. one = ir.Constant(intp_t, 1) is_left_vec = builder.icmp_signed('==', m, one) is_right_vec = builder.icmp_signed('==', n, one) with builder.if_else(is_right_vec) as (r_vec, r_mat): with r_vec: with builder.if_else(is_left_vec) as (v_v, m_v): with v_v: # V * V call_xxdot(context, builder, False, dtype, k, x_data, y_data, out_data) with m_v: # M * V do_trans = xty.layout == outty.layout call_xxgemv(context, builder, do_trans, xty, x_shapes, x_data, y_data, out_data) with r_mat: with builder.if_else(is_left_vec) as (v_m, m_m): with v_m: # V * M do_trans = yty.layout != outty.layout call_xxgemv(context, builder, do_trans, yty, y_shapes, y_data, x_data, out_data) with m_m: # M * M call_xxgemm(context, builder, xty, x_shapes, x_data, yty, y_shapes, y_data, outty, out_shapes, out_data) return impl_ret_borrowed(context, builder, sig.return_type, out._getvalue())
def getiter_list(context, builder, sig, args): inst = ListIterInstance.from_list(context, builder, sig.return_type, args[0]) return impl_ret_borrowed(context, builder, sig.return_type, inst.value)
def imp(context, builder, typ, val): dptr = cgutils.get_record_member(builder, val, offset, context.get_data_type(elemty)) align = None if typ.aligned else 1 res = self.unpack_value(builder, elemty, dptr, align) return impl_ret_borrowed(context, builder, typ, res)
def imp(context, builder, typ, val): dptr = cgutils.get_record_member(builder, val, offset, self.get_data_type(elemty)) res = self.unpack_value(builder, elemty, dptr) return impl_ret_borrowed(context, builder, typ, res)
def imp(context, builder, typ, val): return impl_ret_borrowed(context, builder, attrty, llval)
def iterator_getiter(context, builder, sig, args): [it] = args return impl_ret_borrowed(context, builder, sig.return_type, it)
def list_add_inplace(context, builder, sig, args): assert sig.args[0].dtype == sig.return_type.dtype dest = _list_extend_list(context, builder, sig, args) return impl_ret_borrowed(context, builder, sig.return_type, dest.value)
def lower_dist_rebalance_array_parallel(context, builder, sig, args): arr_typ = sig.args[0] ndim = arr_typ.ndim # TODO: support string type shape_tup = ",".join(["count"] + ["in_arr.shape[{}]".format(i) for i in range(1, ndim)]) alloc_text = "np.empty(({}), in_arr.dtype)".format(shape_tup) func_text = """def f(in_arr, count): n_pes = hpat.distributed_api.get_size() my_rank = hpat.distributed_api.get_rank() out_arr = {} # copy old data old_len = len(in_arr) out_ind = 0 for i in range(min(old_len, count)): out_arr[i] = in_arr[i] out_ind += 1 # get diff data for all procs my_diff = old_len - count all_diffs = np.empty(n_pes, np.int64) hpat.distributed_api.allgather(all_diffs, my_diff) # alloc comm requests comm_req_ind = 0 comm_reqs = hpat.distributed_api.comm_req_alloc(n_pes) req_ind = 0 # for each potential receiver for i in range(n_pes): # if receiver if all_diffs[i] < 0: # for each potential sender for j in range(n_pes): # if sender if all_diffs[j] > 0: send_size = min(all_diffs[j], -all_diffs[i]) # if I'm receiver if my_rank == i: buff = out_arr[out_ind:(out_ind+send_size)] comm_reqs[comm_req_ind] = hpat.distributed_api.irecv( buff, np.int32(buff.size), np.int32(j), np.int32(9)) comm_req_ind += 1 out_ind += send_size # if I'm sender if my_rank == j: buff = np.ascontiguousarray(in_arr[out_ind:(out_ind+send_size)]) comm_reqs[comm_req_ind] = hpat.distributed_api.isend( buff, np.int32(buff.size), np.int32(i), np.int32(9)) comm_req_ind += 1 out_ind += send_size # update sender and receivers remaining counts all_diffs[i] += send_size all_diffs[j] -= send_size # if receiver is done, stop sender search if all_diffs[i] == 0: break hpat.distributed_api.waitall(np.int32(comm_req_ind), comm_reqs) hpat.distributed_api.comm_req_dealloc(comm_reqs) return out_arr """.format(alloc_text) loc = {} exec(func_text, {'hpat': hpat, 'np': np}, loc) rebalance_impl = loc['f'] res = context.compile_internal(builder, rebalance_impl, sig, args) return impl_ret_borrowed(context, builder, sig.return_type, res)
def set_inplace(context, builder, sig, args, op_impl=op_impl): assert sig.return_type == sig.args[0] op_impl(context, builder, sig, args) return impl_ret_borrowed(context, builder, sig.args[0], args[0])