def test_copy_arrays(self): pyfunc = usecases.copy_arrays arraytype = types.Array(types.int32, 1, 'A') cr = compile_isolated(pyfunc, (arraytype, arraytype)) cfunc = cr.entry_point nda = 0, 1, 10, 100 for nd in nda: a = np.arange(nd, dtype='int32') b = np.empty_like(a) args = a, b cfunc(*args) self.assertPreciseEqual(a, b, msg=str(args))
def test_copy_complex64(self): pyfunc = copy carray = types.Array(types.complex64, 1, "C") cres = compile_isolated(pyfunc, (carray, carray)) cfunc = cres.entry_point a = np.arange(10, dtype="complex64") + 1j control = np.zeros_like(a) result = np.zeros_like(a) pyfunc(a, control) cfunc(a, result) self.assertTrue(np.all(control == result))
def test_array_1d_record_mutate_npm(self, flags=no_pyobj_flags): pyfunc = record_iter_mutate_usecase item_type = numpy_support.from_dtype(record_dtype) cr = compile_isolated(pyfunc, (types.Array(item_type, 1, 'A'), ), flags=flags) cfunc = cr.entry_point arr = np.recarray(3, dtype=record_dtype) for i in range(3): arr[i].a = float(i * 2) arr[i].b = i + 2 expected = arr.copy() pyfunc(expected) got = arr.copy() cfunc(got) self.assertPreciseEqual(expected, got)
def _get_df_col_type(dtype): if isinstance(dtype, types.Function): if dtype.typing_key == int: return types.Array(types.int_, 1, 'C') elif dtype.typing_key == float: return types.Array(types.float64, 1, 'C') elif dtype.typing_key == str: return string_array_type else: assert False, f"map_dtype_to_col_type: failing to infer column type for dtype={dtype}" if isinstance(dtype, types.StringLiteral): if dtype.literal_value == 'str': return string_array_type else: return types.Array( numba.from_dtype(np.dtype(dtype.literal_value)), 1, 'C') if isinstance(dtype, types.NumberClass): return types.Array(dtype.dtype, 1, 'C') if isinstance(dtype, CategoricalDtypeType): return Categorical(dtype)
def test_array_slicing(self): pyfunc = usecases.slicing arraytype = types.Array(types.int32, 1, 'C') argtys = (arraytype, types.intp, types.intp, types.intp) cr = compile_isolated(pyfunc, argtys, flags=enable_pyobj_flags) cfunc = cr.entry_point a = np.arange(10, dtype='i4') cases = [ (a, 0, 10, 1), (a, 0, 10, 2), (a, 0, 10, -1), (a, 2, 3, 1), (a, 10, 0, 1), ] for args in cases: self.assertTrue(np.all(pyfunc(*args) == cfunc(*args))) arraytype = types.Array(types.int32, 2, 'C') argtys = (arraytype, types.intp, types.intp, types.intp) cr = compile_isolated(pyfunc, argtys, flags=enable_pyobj_flags) cfunc = cr.entry_point a = np.arange(100, dtype='i4').reshape(10, 10) cases = [ (a, 0, 10, 1), (a, 0, 10, 2), (a, 0, 10, -1), (a, 2, 3, 1), (a, 10, 0, 1), ] for args in cases: self.assertTrue(np.all(pyfunc(*args) == cfunc(*args)))
def test_unituple_index_error(self): def pyfunc(a, i): return a.shape[i] cres = compile_isolated(pyfunc, (types.Array(types.int32, 1, 'A'), types.int32)) cfunc = cres.entry_point a = np.empty(2, dtype=np.int32) self.assertEqual(cfunc(a, 0), pyfunc(a, 0)) with self.assertRaises(IndexError) as cm: cfunc(a, 2) self.assertEqual(str(cm.exception), "tuple index out of range")
def test_comparison_operators(self): # see issue #4163, checks that a variable that is an argument and has # value None survives TypeError from invalid comparison which should be # dead def impl(array, a=None): x = 0 if a is None: return 10 # dynamic exec would return here # static analysis requires that this is executed with a=None, # hence TypeError if a < 0: return 20 return x self.assert_prune(impl, ( types.Array(types.float64, 2, 'C'), types.NoneType('none'), ), [False, 'both'], np.zeros((2, 3)), None) self.assert_prune(impl, ( types.Array(types.float64, 2, 'C'), types.float64, ), [None, None], np.zeros((2, 3)), 12.)
def test_redefined_variables_are_not_considered_in_prune(self): # see issue #4163, checks that if a variable that is an argument is # redefined in the user code it is not considered const def impl(array, a=None): if a is None: a = 0 if a < 0: return 10 return 30 self.assert_prune(impl, ( types.Array(types.float64, 2, 'C'), types.NoneType('none'), ), [None, None], np.zeros((2, 3)), None)
def test_tuple_of_arrays_iter(self): # We used to leak a reference to each element of the tuple def bar(arrs): total = 0 for arr in arrs: total += arr[0] return total x = y = np.arange(3, dtype=np.int32) aryty = types.Array(types.int32, 1, 'C') cres = compile_isolated(bar, (types.containers.UniTuple(aryty, 2),)) expect = bar((x, y)) got = cres.entry_point((x, y)) self.assertEqual(expect, got)
def test_incref_after_cast(self): # Issue #1427: when casting a value before returning it, the # cast result should be incref'ed, not the original value. def f(): return 0.0, np.zeros(1, dtype=np.int32) # Note the return type isn't the same as the tuple type above: # the first element is a complex rather than a float. cres = compile_isolated(f, (), types.Tuple((types.complex128, types.Array(types.int32, 1, 'C') )) ) z, arr = cres.entry_point() self.assertPreciseEqual(z, 0j) self.assertPreciseEqual(arr, np.zeros(1, dtype=np.int32))
def box_series(typ, val, c): """ """ if typ.dtype == string_type: arr = box_str_arr(typ, val, c) elif typ.dtype == datetime_date_type: arr = box_datetime_date_array(typ, val, c) else: arr = box_array(types.Array(typ.dtype, 1, 'C'), val, c) mod_name = c.context.insert_const_string(c.builder.module, "pandas") class_obj = c.pyapi.import_module_noblock(mod_name) res = c.pyapi.call_method(class_obj, "Series", (arr,)) # class_obj = c.pyapi.unserialize(c.pyapi.serialize_object(pd.Series)) # res = c.pyapi.call_function_objargs(class_obj, (arr,)) c.pyapi.decref(class_obj) return res
def resolve_argsort(self, ary, args, kws): assert not args kwargs = dict(kws) kind = kwargs.pop('kind', types.StringLiteral('quicksort')) if kwargs: msg = "Unsupported keywords: {!r}" raise TypingError(msg.format([k for k in kwargs.keys()])) if ary.ndim == 1: def argsort_stub(kind='quicksort'): pass pysig = utils.pysignature(argsort_stub) sig = signature(types.Array(types.intp, 1, 'C'), kind).replace(pysig=pysig) return sig
def test_2d_slicing(self, flags=enable_pyobj_flags): """ arr_2d[a:b:c] """ pyfunc = slicing_1d_usecase arraytype = types.Array(types.int32, 2, 'C') argtys = (arraytype, types.int32, types.int32, types.int32) cr = compile_isolated(pyfunc, argtys, flags=flags) cfunc = cr.entry_point a = np.arange(100, dtype='i4').reshape(10, 10) self.assertTrue((pyfunc(a, 0, 10, 1) == cfunc(a, 0, 10, 1)).all()) self.assertTrue((pyfunc(a, 2, 3, 1) == cfunc(a, 2, 3, 1)).all()) self.assertTrue((pyfunc(a, 10, 0, 1) == cfunc(a, 10, 0, 1)).all()) self.assertTrue((pyfunc(a, 0, 10, -1) == cfunc(a, 0, 10, -1)).all()) self.assertTrue((pyfunc(a, 0, 10, 2) == cfunc(a, 0, 10, 2)).all())
def unbox_datetime_date_array(typ, val, c): # n = object_length(c, val) #cgutils.printf(c.builder, "len %d\n", n) arr_typ = types.Array(types.intp, 1, 'C') out_arr = _empty_nd_impl(c.context, c.builder, arr_typ, [n]) with cgutils.for_range(c.builder, n) as loop: dt_date = sequence_getitem(c, val, loop.index) int_date = unbox_datetime_date(datetime_date_type, dt_date, c).value dataptr, shapes, strides = basic_indexing( c.context, c.builder, arr_typ, out_arr, (types.intp,), (loop.index,)) store_item(c.context, c.builder, arr_typ, int_date, dataptr) is_error = cgutils.is_not_null(c.builder, c.pyapi.err_occurred()) return NativeValue(out_arr._getvalue(), is_error=is_error)
def test_2d_slicing(self, flags=enable_pyobj_flags): """ arr_2d[a:b:c] """ pyfunc = slicing_1d_usecase arraytype = types.Array(types.int32, 2, 'C') argtys = (arraytype, types.int32, types.int32, types.int32) cr = compile_isolated(pyfunc, argtys, flags=flags) cfunc = cr.entry_point a = np.arange(100, dtype='i4').reshape(10, 10) for args in [(0, 10, 1), (2, 3, 1), (10, 0, 1), (0, 10, -1), (0, 10, 2)]: self.assertPreciseEqual(pyfunc(a, *args), cfunc(a, *args), msg="for args %s" % (args, ))
def test_copy_arrays2d(self): pyfunc = usecases.copy_arrays2d arraytype = types.Array(types.int32, 2, 'A') cr = compile_isolated(pyfunc, (arraytype, arraytype)) cfunc = cr.entry_point nda = (0, 0), (1, 1), (2, 5), (4, 25) for nd in nda: d1, d2 = nd a = np.arange(d1 * d2, dtype='int32').reshape(d1, d2) b = np.empty_like(a) args = a, b cfunc(*args) self.assertPreciseEqual(a, b, msg=str(args))
def _get_dset_type(self, lhs, file_varname, dset_varname): """get data set type from user-specified locals types or actual file""" if lhs in self.local_vars: return self.local_vars[lhs] if self.reverse_copies[lhs] in self.local_vars: return self.local_vars[self.reverse_copies[lhs]] if file_varname in self.str_const_table and dset_varname in self.str_const_table: file_name = self.str_const_table[file_varname] dset_str = self.str_const_table[dset_varname] f = h5py.File(file_name, "r") ndims = len(f[dset_str].shape) numba_dtype = numpy_support.from_dtype(f[dset_str].dtype) return types.Array(numba_dtype, ndims, 'C') raise RuntimeError("data set type not found")
def test_tuple_iter_issue1504(self): # The issue is due to `row` being typed as heterogeneous tuple. def bar(x, y): total = 0 for row in zip(x, y): total += row[0] + row[1] return total x = y = np.arange(3, dtype=np.int32) aryty = types.Array(types.int32, 1, 'C') cres = compile_isolated(bar, (aryty, aryty)) expect = bar(x, y) got = cres.entry_point(x, y) self.assertEqual(expect, got)
def test_copy_arrays(self): pyfunc = usecases.copy_arrays arraytype = types.Array(types.int32, 1, 'A') cr = compile_isolated(pyfunc, (arraytype, arraytype)) cfunc = cr.entry_point nda = 0, 1, 10, 100 for nd in nda: a = np.arange(nd, dtype='int32') b = np.empty_like(a) args = a, b print("case", args) cfunc(*args) self.assertTrue(np.all(a == b))
def test_print(self): pyfunc = print_value cr = compile_isolated(pyfunc, (types.int32, )) cfunc = cr.entry_point for val in (1, -234): with captured_stdout(): cfunc(val) self.assertEqual(sys.stdout.getvalue(), str(val) + '\n') cr = compile_isolated(pyfunc, (types.int64, )) cfunc = cr.entry_point for val in (1, -234, 123456789876543210, -123456789876543210): with captured_stdout(): cfunc(val) self.assertEqual(sys.stdout.getvalue(), str(val) + '\n') cr = compile_isolated(pyfunc, (types.uint64, )) cfunc = cr.entry_point for val in (1, 234, 123456789876543210, 2**63 + 123): with captured_stdout(): cfunc(val) self.assertEqual(sys.stdout.getvalue(), str(val) + '\n') cr = compile_isolated(pyfunc, (types.float32, )) cfunc = cr.entry_point with captured_stdout(): cfunc(1.1) # Float32 will lose precision got = sys.stdout.getvalue() expect = '1.10000002384' self.assertTrue(got.startswith(expect)) self.assertTrue(got.endswith('\n')) cr = compile_isolated(pyfunc, (types.float64, )) cfunc = cr.entry_point with captured_stdout(): cfunc(100.0**10.0) self.assertEqual(sys.stdout.getvalue(), '1e+20\n') # Array will have to use object mode arraytype = types.Array(types.int32, 1, 'C') cr = compile_isolated(pyfunc, (arraytype, ), flags=enable_pyobj_flags) cfunc = cr.entry_point with captured_stdout(): cfunc(np.arange(10)) self.assertEqual(sys.stdout.getvalue(), '[0 1 2 3 4 5 6 7 8 9]\n')
def test_print_values(self): """ Test printing a single argument value. """ pyfunc = print_value def check_values(typ, values): cr = compile_isolated(pyfunc, (typ, )) cfunc = cr.entry_point for val in values: with captured_stdout(): cfunc(val) self.assertEqual(sys.stdout.getvalue(), str(val) + '\n') # Various scalars check_values(types.int32, (1, -234)) check_values(types.int64, (1, -234, 123456789876543210, -123456789876543210)) check_values(types.uint64, (1, 234, 123456789876543210, 2**63 + 123)) check_values(types.boolean, (True, False)) check_values(types.float64, (1.5, 100.0**10.0, float('nan'))) check_values(types.complex64, (1 + 1j, )) check_values(types.NPTimedelta('ms'), (np.timedelta64(100, 'ms'), )) cr = compile_isolated(pyfunc, (types.float32, )) cfunc = cr.entry_point with captured_stdout(): cfunc(1.1) # Float32 will lose precision got = sys.stdout.getvalue() expect = '1.10000002384' self.assertTrue(got.startswith(expect)) self.assertTrue(got.endswith('\n')) # NRT-enabled type with self.assertNoNRTLeak(): x = [1, 3, 5, 7] with self.assertRefCount(x): check_values(types.List(types.int32), (x, )) # Array will have to use object mode arraytype = types.Array(types.int32, 1, 'C') cr = compile_isolated(pyfunc, (arraytype, ), flags=enable_pyobj_flags) cfunc = cr.entry_point with captured_stdout(): cfunc(np.arange(10, dtype=np.int32)) self.assertEqual(sys.stdout.getvalue(), '[0 1 2 3 4 5 6 7 8 9]\n')
def get_numba_array_types_for_csv(df): """Extracts Numba array types from the given DataFrame.""" result = [] for numpy_type in df.dtypes.values: try: numba_type = numpy_support.from_dtype(numpy_type) except NotImplementedError: numba_type = None if numba_type: array_type = types.Array(numba_type, 1, 'C') else: # default type for CSV is string array_type = string_array_type result.append(array_type) return result
def part_dict_init(train): @jit(nopython=True) def wrap_cal(part_dict, vals): for idx in prange(vals.shape[0]): val = vals[idx] part_dict[np.int64(val[0])][np.int(val[1]) - 1] = val[2:] return part_dict float_array = types.Array(types.uint16, 2, 'A') part_dict = Dict.empty(key_type=types.int64, value_type=float_array) for idx in train['user_id'].unique(): part_dict[idx] = np.zeros((7, 2), dtype=np.uint16) tmp = train.groupby(['user_id', 'part'])['answered_correctly'].agg(['sum', 'count' ]).reset_index() part_dict = wrap_cal(part_dict, tmp.values) return part_dict
def _get_h5_type_file(self, val_def, obj_name_list): require(len(obj_name_list) > 0) require(find_callname(self.func_ir, val_def) == ('File', 'h5py')) require(len(val_def.args) > 0) f_name = find_str_const(self.func_ir, val_def.args[0]) obj_name_list.reverse() import h5py f = h5py.File(f_name, 'r') obj = f for obj_name in obj_name_list: obj = obj[obj_name] require(isinstance(obj, h5py.Dataset)) ndims = len(obj.shape) numba_dtype = numba.numpy_support.from_dtype(obj.dtype) f.close() return types.Array(numba_dtype, ndims, 'C')
def __init__(self, context, builder, args, dims, steps, i, step_offset, typ, syms, sym_dim): if isinstance(typ, types.Array): self.dtype = typ.dtype else: self.dtype = typ self.syms = syms self.ndim = len(syms) core_step_ptr = builder.gep(steps, [context.get_constant(types.intp, i)], name="core.step.ptr") self.core_step = builder.load(core_step_ptr) self.strides = [] for j in range(self.ndim): step = builder.gep( steps, [context.get_constant(types.intp, step_offset + j)], name="step.ptr") self.strides.append(builder.load(step)) self.shape = [] for s in syms: self.shape.append(sym_dim[s]) data = builder.load(builder.gep(args, [context.get_constant(types.intp, i)], name="data.ptr"), name="data") self.data = data arytyp = types.Array(dtype=self.dtype, ndim=self.ndim, layout="A") arycls = context.make_array(arytyp) self.array = arycls(context, builder) self.array.data = builder.bitcast(self.data, self.array.data.type) self.array.shape = cgutils.pack_array(builder, self.shape) self.array.strides = cgutils.pack_array(builder, self.strides) self.array_value = self.array._getpointer() self.builder = builder
def generic(self, args, kws): assert not kws assert len(args) == 1 column = types.unliteral(args[0]) ret_typ = column if (isinstance(column, types.List) and (isinstance(column.dtype, types.Number) or column.dtype == types.boolean)): ret_typ = types.Array(column.dtype, 1, 'C') if (isinstance(column, types.List) and (column.dtype == string_type or isinstance(column.dtype, types.Optional) and column.dtype.type == string_type)): ret_typ = string_array_type if isinstance(column, SeriesType): ret_typ = column.data # TODO: add other types return signature(ret_typ, column)
def run_logical_array(self, pyfunc, flags=force_pyobj_flags): dtype = np.int32 array = np.concatenate([ np.array([-(2**31), 2**31 - 1], dtype=dtype), np.arange(-10, 10, dtype=dtype), ]) x_operands = [array[:-1]] y_operands = [array[1:]] arraytype = types.Array(types.int32, 1, 'C') types_list = [(arraytype, arraytype)] self.run_test_ints(pyfunc, x_operands, y_operands, types_list, flags=flags)
def _get_dset_type(self, lhs, file_var, dset_var): """get data set type from user-specified locals types or actual file""" if lhs in self.local_vars: return self.local_vars[lhs] if self.reverse_copies[lhs] in self.local_vars: return self.local_vars[self.reverse_copies[lhs]] # read type from file if file name and dset name are constant values # TODO: check for file availability file_name = get_constant(self.func_ir, file_var) dset_str = get_constant(self.func_ir, dset_var) if file_name is not NOT_CONSTANT and dset_str is not NOT_CONSTANT: f = h5py.File(file_name, "r") ndims = len(f[dset_str].shape) numba_dtype = numpy_support.from_dtype(f[dset_str].dtype) return types.Array(numba_dtype, ndims, 'C') raise RuntimeError("data set type not found")
def test_array_expr(self): flags = Flags() flags.set("enable_pyobject") global cnd_array_jitted scalty = types.float64 arrty = types.Array(scalty, 1, 'C') cr1 = compile_isolated(cnd_array, args=(arrty, ), flags=flags) cnd_array_jitted = cr1.entry_point cr2 = compile_isolated(blackscholes_arrayexpr_jitted, args=(arrty, arrty, arrty, scalty, scalty), flags=flags) jitted_bs = cr2.entry_point OPT_N = 400 iterations = 10 stockPrice = randfloat(self.random.random_sample(OPT_N), 5.0, 30.0) optionStrike = randfloat(self.random.random_sample(OPT_N), 1.0, 100.0) optionYears = randfloat(self.random.random_sample(OPT_N), 0.25, 10.0) args = stockPrice, optionStrike, optionYears, RISKFREE, VOLATILITY ts = timer() for i in range(iterations): callResultGold, putResultGold = blackscholes_arrayexpr(*args) te = timer() pytime = te - ts ts = timer() for i in range(iterations): callResultNumba, putResultNumba = jitted_bs(*args) te = timer() jittime = te - ts print("Python", pytime) print("Numba", jittime) print("Speedup: %s" % (pytime / jittime)) delta = np.abs(callResultGold - callResultNumba) L1norm = delta.sum() / np.abs(callResultGold).sum() print("L1 norm: %E" % L1norm) print("Max absolute error: %E" % delta.max()) self.assertEqual(delta.max(), 0)
def sum_expand(self, args, kws): """ sum can be called with or without an axis parameter. """ args_len = len(args) assert args_len <= 1 if args_len == 0: # No axis parameter so the return type of the summation is a scalar # of the type of the array. return signature(_expand_integer(self.this.dtype), *args, recvr=self.this) else: # There is an axis paramter so the return type of this summation is # an array of dimension one less than the input array. return_type = types.Array(dtype=_expand_integer(self.this.dtype), ndim=self.this.ndim - 1, layout='C') return signature(return_type, *args, recvr=self.this)