def jit_filter_function(filter_function): jitted_function = numba.jit(filter_function, nopython=True) @cfunc(intc(CPointer(float64), intp, CPointer(float64), voidptr)) def wrapped(values_ptr, len_values, result, data): values = carray(values_ptr, (len_values,), dtype=float64) result[0] = jitted_function(values) return 1 return LowLevelCallable(wrapped.ctypes)
def jit_filter_function(filter_function): """Decorator for use with scipy.ndimage.generic_filter.""" jitted_function = numba.jit(filter_function, nopython=True) @cfunc(intc(CPointer(float64), intp, CPointer(float64), voidptr)) def wrapped(values_ptr, len_values, result, data): values = carray(values_ptr, (len_values, ), dtype=float64) result[0] = jitted_function(values) return 1 return LowLevelCallable(wrapped.ctypes)
def jit_geometric_function(geometric_function): jitted_function = numba.jit(geometric_function, nopython=True) @cfunc(intc(CPointer(intp), CPointer(float64), intc, intc, voidptr)) def wrapped(output_ptr, input_ptr, output_rank, input_rank, user_data): output_coords = carray(output_ptr, (output_rank, ), dtype=intp) input_coords = carray(input_ptr, (output_rank, ), dtype=float64) jitted_function(output_coords, input_coords) return 1 return LowLevelCallable(wrapped.ctypes)
def cho_impl(a): ensure_lapack() _check_linalg_matrix(a, "cholesky") xxpotrf_sig = types.intc(types.int8, types.int8, types.intp, types.CPointer(a.dtype), types.intp) xxpotrf = types.ExternalFunction("numba_xxpotrf", xxpotrf_sig) kind = ord(get_blas_kind(a.dtype, "cholesky")) UP = ord('U') LO = ord('L') def cho_impl(a): n = a.shape[-1] if a.shape[-2] != n: msg = "Last 2 dimensions of the array must be square." raise np.linalg.LinAlgError(msg) # The output is allocated in C order out = a.copy() # Pass UP since xxpotrf() operates in F order # The semantics ensure this works fine # (out is really its Hermitian in F order, but UP instructs # xxpotrf to compute the Hermitian of the upper triangle # => they cancel each other) r = xxpotrf(kind, UP, n, out.ctypes, n) if r < 0: fatal_error_func() assert 0 # unreachable if r > 0: raise np.linalg.LinAlgError( "Matrix is not positive definite.") # Zero out upper triangle, in F order for col in range(n): out[:col, col] = 0 return out return cho_impl
def svd_impl(a, full_matrices=1): ensure_lapack() _check_linalg_matrix(a, "svd") F_layout = a.layout == 'F' # convert typing floats to numpy floats for use in the impl s_type = getattr(a.dtype, "underlying_float", a.dtype) if s_type.bitwidth == 32: s_dtype = np.float32 else: s_dtype = np.float64 numba_ez_gesdd_sig = types.intc( types.char, # kind types.char, # jobz types.intp, # m types.intp, # n types.CPointer(a.dtype), # a types.intp, # lda types.CPointer(s_type), # s types.CPointer(a.dtype), # u types.intp, # ldu types.CPointer(a.dtype), # vt types.intp # ldvt ) numba_ez_gesdd = types.ExternalFunction("numba_ez_gesdd", numba_ez_gesdd_sig) kind = ord(get_blas_kind(a.dtype, "svd")) JOBZ_A = ord('A') JOBZ_S = ord('S') def svd_impl(a, full_matrices=1): n = a.shape[-1] m = a.shape[-2] _check_finite_matrix(a) if F_layout: acpy = np.copy(a) else: acpy = np.asfortranarray(a) ldu = m minmn = min(m, n) if full_matrices: JOBZ = JOBZ_A ucol = m ldvt = n else: JOBZ = JOBZ_S ucol = minmn ldvt = minmn u = np.empty((ucol, ldu), dtype=a.dtype) s = np.empty(minmn, dtype=s_dtype) vt = np.empty((n, ldvt), dtype=a.dtype) r = numba_ez_gesdd( kind, # kind JOBZ, # jobz m, # m n, # n acpy.ctypes, # a m, # lda s.ctypes, # s u.ctypes, # u ldu, # ldu vt.ctypes, # vt ldvt # ldvt ) if r < 0: fatal_error_func() assert 0 # unreachable # help liveness analysis acpy.size vt.size u.size s.size return (u.T, s, vt.T) return svd_impl
def eig_impl(a): ensure_lapack() _check_linalg_matrix(a, "eig") numba_ez_rgeev_sig = types.intc(types.char, # kind types.char, # jobvl types.char, # jobvr types.intp, # n types.CPointer(a.dtype), # a types.intp, # lda types.CPointer(a.dtype), # wr types.CPointer(a.dtype), # wi types.CPointer(a.dtype), # vl types.intp, # ldvl types.CPointer(a.dtype), # vr types.intp # ldvr ) numba_ez_rgeev = types.ExternalFunction("numba_ez_rgeev", numba_ez_rgeev_sig) numba_ez_cgeev_sig = types.intc(types.char, # kind types.char, # jobvl types.char, # jobvr types.intp, # n types.CPointer(a.dtype), # a types.intp, # lda types.CPointer(a.dtype), # w types.CPointer(a.dtype), # vl types.intp, # ldvl types.CPointer(a.dtype), # vr types.intp # ldvr ) numba_ez_cgeev = types.ExternalFunction("numba_ez_cgeev", numba_ez_cgeev_sig) kind = ord(get_blas_kind(a.dtype, "eig")) JOBVL = ord('N') JOBVR = ord('V') F_layout = a.layout == 'F' def real_eig_impl(a): """ eig() implementation for real arrays. """ n = a.shape[-1] if a.shape[-2] != n: msg = "Last 2 dimensions of the array must be square." raise np.linalg.LinAlgError(msg) _check_finite_matrix(a) if F_layout: acpy = np.copy(a) else: acpy = np.asfortranarray(a) ldvl = 1 ldvr = n wr = np.empty(n, dtype=a.dtype) wi = np.empty(n, dtype=a.dtype) vl = np.empty((n, ldvl), dtype=a.dtype) vr = np.empty((n, ldvr), dtype=a.dtype) r = numba_ez_rgeev(kind, JOBVL, JOBVR, n, acpy.ctypes, n, wr.ctypes, wi.ctypes, vl.ctypes, ldvl, vr.ctypes, ldvr) if r < 0: fatal_error_func() assert 0 # unreachable # By design numba does not support dynamic return types, however, # Numpy does. Numpy uses this ability in the case of returning # eigenvalues/vectors of a real matrix. The return type of # np.linalg.eig(), when operating on a matrix in real space # depends on the values present in the matrix itself (recalling # that eigenvalues are the roots of the characteristic polynomial # of the system matrix, which will by construction depend on the # values present in the system matrix). As numba cannot handle # the case of a runtime decision based domain change relative to # the input type, if it is required numba raises as below. if np.any(wi): raise ValueError( "eig() argument must not cause a domain change.") # put these in to help with liveness analysis, # `.ctypes` doesn't keep the vars alive acpy.size vl.size vr.size wr.size wi.size return (wr, vr.T) def cmplx_eig_impl(a): """ eig() implementation for complex arrays. """ n = a.shape[-1] if a.shape[-2] != n: msg = "Last 2 dimensions of the array must be square." raise np.linalg.LinAlgError(msg) _check_finite_matrix(a) if F_layout: acpy = np.copy(a) else: acpy = np.asfortranarray(a) ldvl = 1 ldvr = n w = np.empty(n, dtype=a.dtype) vl = np.empty((n, ldvl), dtype=a.dtype) vr = np.empty((n, ldvr), dtype=a.dtype) r = numba_ez_cgeev(kind, JOBVL, JOBVR, n, acpy.ctypes, n, w.ctypes, vl.ctypes, ldvl, vr.ctypes, ldvr) if r < 0: fatal_error_func() assert 0 # unreachable # put these in to help with liveness analysis, # `.ctypes` doesn't keep the vars alive acpy.size vl.size vr.size w.size return (w, vr.T) if isinstance(a.dtype, types.scalars.Complex): return cmplx_eig_impl else: return real_eig_impl
def qr_impl(a): ensure_lapack() _check_linalg_matrix(a, "qr") # Need two functions, the first computes R, storing it in the upper # triangle of A with the below diagonal part of A containing elementary # reflectors needed to construct Q. The second turns the below diagonal # entries of A into Q, storing Q in A (creates orthonormal columns from # the elementary reflectors). numba_ez_geqrf_sig = types.intc( types.char, # kind types.intp, # m types.intp, # n types.CPointer(a.dtype), # a types.intp, # lda types.CPointer(a.dtype), # tau ) numba_ez_geqrf = types.ExternalFunction("numba_ez_geqrf", numba_ez_geqrf_sig) numba_ez_xxgqr_sig = types.intc( types.char, # kind types.intp, # m types.intp, # n types.intp, # k types.CPointer(a.dtype), # a types.intp, # lda types.CPointer(a.dtype), # tau ) numba_ez_xxgqr = types.ExternalFunction("numba_ez_xxgqr", numba_ez_xxgqr_sig) kind = ord(get_blas_kind(a.dtype, "qr")) F_layout = a.layout == 'F' def qr_impl(a): n = a.shape[-1] m = a.shape[-2] _check_finite_matrix(a) # copy A as it will be destroyed if F_layout: q = np.copy(a) else: q = np.asfortranarray(a) lda = m minmn = min(m, n) tau = np.empty((minmn), dtype=a.dtype) ret = numba_ez_geqrf( kind, # kind m, # m n, # n q.ctypes, # a m, # lda tau.ctypes # tau ) if ret < 0: fatal_error_func() assert 0 # unreachable # pull out R, this is transposed because of Fortran r = np.zeros((n, minmn), dtype=a.dtype).T # the triangle in R for i in range(minmn): for j in range(i + 1): r[j, i] = q[j, i] # and the possible square in R for i in range(minmn, n): for j in range(minmn): r[j, i] = q[j, i] ret = numba_ez_xxgqr( kind, # kind m, # m minmn, # n minmn, # k q.ctypes, # a m, # lda tau.ctypes # tau ) if ret < 0: fatal_error_func() assert 0 # unreachable # help liveness analysis tau.size q.size return (q[:, :minmn], r) return qr_impl
@lower_builtin(np.linalg.inv, types.Array) def inv(context, builder, sig, args): """ np.linalg.inv(a) """ ensure_lapack() ndims = sig.args[0].ndim if ndims == 2: return mat_inv(context, builder, sig, args) else: assert 0 fatal_error_sig = types.intc() fatal_error_func = types.ExternalFunction("numba_fatal_error", fatal_error_sig) @jit(nopython=True) def _check_finite_matrix(a): for v in np.nditer(a): if not np.isfinite(v.item()): raise np.linalg.LinAlgError( "Array must not contain infs or NaNs.") def _check_linalg_matrix(a, func_name): if not isinstance(a, types.Array): raise TypingError("np.linalg.%s() only supported for array types" % func_name)
def lstsq_impl(a, b, rcond=-1.0): ensure_lapack() _check_linalg_matrix(a, "lstsq") # B can be 1D or 2D. _check_linalg_1_or_2d_matrix(b, "lstsq") a_F_layout = a.layout == 'F' b_F_layout = b.layout == 'F' # the typing context is not easily accessible in `@overload` mode # so type unification etc. is done manually below a_np_dt = np_support.as_dtype(a.dtype) b_np_dt = np_support.as_dtype(b.dtype) np_shared_dt = np.promote_types(a_np_dt, b_np_dt) nb_shared_dt = np_support.from_dtype(np_shared_dt) # convert typing floats to np floats for use in the impl r_type = getattr(nb_shared_dt, "underlying_float", nb_shared_dt) if r_type.bitwidth == 32: real_dtype = np.float32 else: real_dtype = np.float64 # the lapack wrapper signature numba_ez_gelsd_sig = types.intc( types.char, # kind types.intp, # m types.intp, # n types.intp, # nrhs types.CPointer(nb_shared_dt), # a types.intp, # lda types.CPointer(nb_shared_dt), # b types.intp, # ldb types.CPointer(r_type), # S types.float64, # rcond types.CPointer(types.intc) # rank ) # the lapack wrapper function numba_ez_gelsd = types.ExternalFunction("numba_ez_gelsd", numba_ez_gelsd_sig) kind = ord(get_blas_kind(nb_shared_dt, "lstsq")) # The following functions select specialisations based on # information around 'b', a lot of this effort is required # as 'b' can be either 1D or 2D, and then there are # some optimisations available depending on real or complex # space. # get a specialisation for computing the number of RHS b_nrhs = _get_compute_nrhs(b) # get a specialised residual computation based on the dtype compute_res = _get_res_impl(nb_shared_dt, real_dtype, b) # b copy function b_copy_in = _get_copy_in_b_impl(b) # return blob function b_ret = _get_compute_return_impl(b) # check system is dimensionally valid function check_dimensionally_valid = _get_check_lstsq_dimensionally_valid_impl(a, b) def lstsq_impl(a, b, rcond=-1.0): n = a.shape[-1] m = a.shape[-2] nrhs = b_nrhs(b) # check the systems have no inf or NaN _check_finite_matrix(a) _check_finite_matrix(b) # check the systems is dimensionally valid check_dimensionally_valid(a, b) minmn = min(m, n) maxmn = max(m, n) # a is destroyed on exit, copy it acpy = a.astype(np_shared_dt) if a_F_layout: acpy = np.copy(acpy) else: acpy = np.asfortranarray(acpy) # b is overwritten on exit with the solution, copy allocate bcpy = np.empty((nrhs, maxmn), dtype=np_shared_dt).T # specialised copy in due to b being 1 or 2D b_copy_in(bcpy, b, nrhs) # Allocate returns s = np.empty(minmn, dtype=real_dtype) rank_ptr = np.empty(1, dtype=np.int32) r = numba_ez_gelsd( kind, # kind m, # m n, # n nrhs, # nrhs acpy.ctypes, # a m, # lda bcpy.ctypes, # a maxmn, # ldb s.ctypes, # s rcond, # rcond rank_ptr.ctypes # rank ) if r < 0: fatal_error_func() assert 0 # unreachable # set rank to that which was computed rank = rank_ptr[0] # compute residuals if rank < n or m <= n: res = np.empty((0), dtype=real_dtype) else: # this requires additional dispatch as there's a faster # impl if the result is in the real domain (no abs() required) res = compute_res(bcpy, n, nrhs) # extract 'x', the solution x = b_ret(bcpy, n) # help liveness analysis acpy.size bcpy.size s.size rank_ptr.size return (x, res, rank, s[:minmn]) return lstsq_impl
types.CPointer(types.uint64)) pre_check_state_sig_32 = types.uint32(types.uint32, types.uint32, types.CPointer(types.uint32)) pre_check_state_sig_64 = types.uint64(types.uint64, types.uint64, types.CPointer(types.uint64)) op_results_32 = types.Record.make_c_struct([ ('matrix_ele', types.complex128), ('state', types.uint32), ]) op_results_64 = types.Record.make_c_struct([('matrix_ele', types.complex128), ('state', types.uint64)]) op_sig_32 = types.intc(types.CPointer(op_results_32), types.char, types.intc, types.intc, types.CPointer(types.uint32)) op_sig_64 = types.intc(types.CPointer(op_results_64), types.char, types.intc, types.intc, types.CPointer(types.uint64)) count_particles_sig_32 = types.void(types.uint32, types.CPointer(types.intc), types.CPointer(types.intc)) count_particles_sig_64 = types.void(types.uint64, types.CPointer(types.intc), types.CPointer(types.intc)) __all__ = [ "map_sig_32", "map_sig_64", "next_state_sig_32", "next_state_sig_64", "op_func_sig_32", "op_func_sig_64", "user_basis" ] @njit
import os PINK = np.array([255, 15, 255]) / 255. YELLOW = np.array([255, 255, 15]) / 255. @jit def tss(a): # total sum of square difference total_sum_of_squares = np.sum((a - np.mean(a))**2) return total_sum_of_squares @cfunc(intc(CPointer(float64), intp, CPointer(float64), voidptr)) def nbtss(values_ptr, len_values, result, data): # total sum of square difference (C-implementation for speedup) values = carray(values_ptr, (len_values, ), dtype=float64) sum = 0.0 for v in values: sum += v mean = sum / float64(len_values) result[0] = 0 for v in values: result[0] += (v - mean)**2 return 1
def lstsq_impl(a, b, rcond=-1.0): ensure_lapack() _check_linalg_matrix(a, "lstsq") # B can be 1D or 2D. _check_linalg_1_or_2d_matrix(b, "lstsq") a_F_layout = a.layout == 'F' b_F_layout = b.layout == 'F' # the typing context is not easily accessible in `@overload` mode # so type unification etc. is done manually below a_np_dt = np_support.as_dtype(a.dtype) b_np_dt = np_support.as_dtype(b.dtype) np_shared_dt = np.promote_types(a_np_dt, b_np_dt) nb_shared_dt = np_support.from_dtype(np_shared_dt) # convert typing floats to np floats for use in the impl r_type = getattr(nb_shared_dt, "underlying_float", nb_shared_dt) if r_type.bitwidth == 32: real_dtype = np.float32 else: real_dtype = np.float64 # the lapack wrapper signature numba_ez_gelsd_sig = types.intc( types.char, # kind types.intp, # m types.intp, # n types.intp, # nrhs types.CPointer(nb_shared_dt), # a types.intp, # lda types.CPointer(nb_shared_dt), # b types.intp, # ldb types.CPointer(r_type), # S types.float64, # rcond types.CPointer(types.intc) # rank ) # the lapack wrapper function numba_ez_gelsd = types.ExternalFunction("numba_ez_gelsd", numba_ez_gelsd_sig) kind = ord(get_blas_kind(nb_shared_dt, "lstsq")) # The following functions select specialisations based on # information around 'b', a lot of this effort is required # as 'b' can be either 1D or 2D, and then there are # some optimisations available depending on real or complex # space. # get a specialisation for computing the number of RHS b_nrhs = _get_compute_nrhs(b) # get a specialised residual computation based on the dtype compute_res = _get_res_impl(nb_shared_dt, real_dtype, b) # b copy function b_copy_in = _get_copy_in_b_impl(b) # return blob function b_ret = _get_compute_return_impl(b) # check system is dimensionally valid function check_dimensionally_valid = _get_check_lstsq_dimensionally_valid_impl(a, b) def lstsq_impl(a, b, rcond=-1.0): n = a.shape[-1] m = a.shape[-2] nrhs = b_nrhs(b) # check the systems have no inf or NaN _check_finite_matrix(a) _check_finite_matrix(b) # check the systems is dimensionally valid check_dimensionally_valid(a, b) minmn = min(m, n) maxmn = max(m, n) # a is destroyed on exit, copy it acpy = a.astype(np_shared_dt) if a_F_layout: acpy = np.copy(acpy) else: acpy = np.asfortranarray(acpy) # b is overwritten on exit with the solution, copy allocate bcpy = np.empty((nrhs, maxmn), dtype=np_shared_dt).T # specialised copy in due to b being 1 or 2D b_copy_in(bcpy, b, nrhs) # Allocate returns s = np.empty(minmn, dtype=real_dtype) rank_ptr = np.empty(1, dtype=np.int32) r = numba_ez_gelsd( kind, # kind m, # m n, # n nrhs, # nrhs acpy.ctypes, # a m, # lda bcpy.ctypes, # a maxmn, # ldb s.ctypes, # s rcond, # rcond rank_ptr.ctypes # rank ) if r < 0: fatal_error_func() assert 0 # unreachable # set rank to that which was computed rank = rank_ptr[0] # compute residuals if rank < n or m <= n: res = np.empty((0), dtype=real_dtype) else: # this requires additional dispatch as there's a faster # impl if the result is in the real domain (no abs() required) res = compute_res(bcpy, n, nrhs) # extract 'x', the solution x = b_ret(bcpy, n) # help liveness analysis acpy.size bcpy.size s.size rank_ptr.size return (x, res, rank, s[:minmn]) return lstsq_impl