def dpnp_cumprod_impl(a): name = "cumprod" dpnp_lowering.ensure_dpnp(name) res_type = types.void """ dpnp source: https://github.com/IntelPython/dpnp/blob/0.5.1/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp#L110 Function declaration: void dpnp_cumprod_c(void* array1_in, void* result1, size_t size) """ sig = signature(res_type, types.voidptr, types.voidptr, types.intp) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) PRINT_DEBUG = dpnp_lowering.DEBUG if a.dtype == types.Integer: ret_dtype = np.int64 else: ret_dtype = a.dtype def dpnp_impl(a): out = np.arange(0, a.size, 1, ret_dtype) common_impl(a, out, dpnp_func, PRINT_DEBUG) return out return dpnp_impl
def dpnp_det_impl(a): name = "det" dpnp_lowering.ensure_dpnp(name) ret_type = types.void """ dpnp source: https://github.com/IntelPython/dpnp/blob/0.4.0/dpnp/backend/custom_kernels_linalg.cpp#L83 Function declaration: void custom_det_c(void* array1_in, void* result1, size_t* shape, size_t ndim) """ sig = signature(ret_type, types.voidptr, types.voidptr, types.voidptr, types.intp) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) PRINT_DEBUG = dpnp_lowering.DEBUG def dpnp_impl(a): n = a.shape[-1] if a.shape[-2] != n: raise ValueError("Input array must be square.") dpnp_ext._check_finite_matrix(a) if a.ndim == 2: out = np.empty((1, ), dtype=a.dtype) out[0] = -4 else: out = np.empty(a.shape[:-2], dtype=a.dtype) sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) dpnp_func(a_usm, out_usm, a.shapeptr, a.ndim) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([out.size, a.size]) if PRINT_DEBUG: print("dpnp implementation") if a.ndim == 2: return out[0] else: return out return dpnp_impl
def dpnp_matrix_rank_impl(M, tol=None, hermitian=False): name = "matrix_rank" dpnp_lowering.ensure_dpnp(name) ret_type = types.void """ dpnp source: https://github.com/IntelPython/dpnp/blob/0.4.0/dpnp/backend/custom_kernels_linalg.cpp#L186 Function declaration: void custom_matrix_rank_c(void* array1_in, void* result1, size_t* shape, size_t ndim) """ sig = signature(ret_type, types.voidptr, types.voidptr, types.voidptr, types.intp) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [M.dtype.name, "NONE"], sig) PRINT_DEBUG = dpnp_lowering.DEBUG def dpnp_impl(M, tol=None, hermitian=False): if tol is not None: raise ValueError( "tol is not supported for np.linalg.matrix_rank(M)") if hermitian: raise ValueError( "hermitian is not supported for np.linalg.matrix_rank(M)") if M.ndim > 2: raise ValueError( "np.linalg.matrix_rank(M) is only supported on 1 or 2-d arrays" ) out = np.empty(1, dtype=M.dtype) sycl_queue = dpctl_functions.get_current_queue() M_usm = dpctl_functions.malloc_shared(M.size * M.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, M_usm, M.ctypes, M.size * M.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) dpnp_func(M_usm, out_usm, M.shapeptr, M.ndim) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(M_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([out.size, M.size]) if PRINT_DEBUG: print("dpnp implementation") return out[0] return dpnp_impl
def dpnp_all_impl(a): name = "all" dpnp_lowering.ensure_dpnp(name) ret_type = types.void """ dpnp source: https://github.com/IntelPython/dpnp/blob/0.6.2/dpnp/backend/kernels/dpnp_krnl_logic.cpp#L36 Function declaration: void dpnp_all_c(const void* array1_in, void* result1, const size_t size) """ sig = signature(ret_type, types.voidptr, types.voidptr, types.intp) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) PRINT_DEBUG = dpnp_lowering.DEBUG def dpnp_impl(a): if a.size == 0: return True out = np.empty(1, dtype=np.bool_) sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) dpnp_func(a_usm, out_usm, a.size) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, out.size]) if PRINT_DEBUG: print("dpnp implementation") # TODO: sometimes all() returns ndarray return out[0] return dpnp_impl
def dpnp_sort_impl(a): name = "sort" dpnp_lowering.ensure_dpnp(name) ret_type = types.void """ dpnp source: https://github.com/IntelPython/dpnp/blob/0.5.0/dpnp/backend/kernels/dpnp_krnl_sorting.cpp#L90 Function declaration: void dpnp_sort_c(void* array1_in, void* result1, size_t size) """ sig = signature(ret_type, types.voidptr, types.voidptr, types.intp) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) res_dtype = a.dtype PRINT_DEBUG = dpnp_lowering.DEBUG def dpnp_impl(a): if a.size == 0: raise ValueError("Passed Empty array") sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out = np.arange(0, a.size, 1, res_dtype) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) dpnp_func(a_usm, out_usm, a.size) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, out.size]) if PRINT_DEBUG: print("dpnp implementation") return out return dpnp_impl
def dpnp_argmin_impl(a): name = "argmin" dpnp_lowering.ensure_dpnp(name) ret_type = types.void """ dpnp source: https://github.com/IntelPython/dpnp/blob/0.4.0/dpnp/backend/custom_kernels_searching.cpp#L56 Function declaration: void custom_argmin_c(void* array1_in, void* result1, size_t size) """ sig = signature(ret_type, types.voidptr, types.voidptr, types.intp) dpnp_func = dpnp_ext.dpnp_func( "dpnp_" + name, [a.dtype.name, np.dtype(np.int64).name], sig) res_dtype = np.int64 PRINT_DEBUG = dpnp_lowering.DEBUG def dpnp_impl(a): if a.size == 0: raise ValueError("Passed Empty array") sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out = np.empty(1, dtype=res_dtype) out_usm = dpctl_functions.malloc_shared(out.itemsize, sycl_queue) dpnp_func(a_usm, out_usm, a.size) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, out.size]) if PRINT_DEBUG: print("dpnp implementation") return out[0] return dpnp_impl
def dpnp_sum_impl(a): name = "sum" dpnp_lowering.ensure_dpnp(name) ret_type = types.void """ dpnp source: https://github.com/IntelPython/dpnp/blob/0.6.1dev/dpnp/backend/kernels/dpnp_krnl_reduction.cpp#L59 Function declaration: void dpnp_sum_c(void* result_out, const void* input_in, const size_t* input_shape, const size_t input_shape_ndim, const long* axes, const size_t axes_ndim, const void* initial, const long* where) """ sig = signature( ret_type, types.voidptr, # void* result_out, types.voidptr, # const void* input_in, types.voidptr, # const size_t* input_shape, types.intp, # const size_t input_shape_ndim, types.voidptr, # const long* axes, types.intp, # const size_t axes_ndim, types.voidptr, # const void* initial, types.voidptr, # const long* where) ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) PRINT_DEBUG = dpnp_lowering.DEBUG def dpnp_impl(a): out = np.empty(1, dtype=a.dtype) common_impl(a, out, dpnp_func, PRINT_DEBUG) return out[0] return dpnp_impl
def dpnp_repeat_impl(a, repeats): name = "repeat" dpnp_lowering.ensure_dpnp(name) ret_type = types.void """ dpnp source: https://github.com/IntelPython/dpnp/blob/0.6.2/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp#L46 Function declaration: void dpnp_repeat_c(const void* array1_in, void* result1, const size_t repeats, const size_t size) """ sig = signature( ret_type, types.voidptr, types.voidptr, types.intp, types.intp, ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) PRINT_DEBUG = dpnp_lowering.DEBUG def dpnp_impl(a, repeats): if a.size == 0: raise ValueError("Passed Empty array") if a.ndim >= 2: raise ValueError("Not supported in dpnp") new_size = a.size * repeats out = np.zeros(new_size, dtype=a.dtype) sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) dpnp_func(a_usm, out_usm, repeats, a.size) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, out.size]) if PRINT_DEBUG: print("dpnp implementation") return out return dpnp_impl
def dpnp_partition_impl(a, kth): name = "partition" dpnp_lowering.ensure_dpnp(name) ret_type = types.void """ dpnp source: https://github.com/IntelPython/dpnp/blob/0.6.2/dpnp/backend/kernels/dpnp_krnl_sorting.cpp#L90 Function declaration: void dpnp_partition_c( void* array1_in, void* array2_in, void* result1, const size_t kth, const size_t* shape_, const size_t ndim) """ sig = signature( ret_type, types.voidptr, types.voidptr, types.voidptr, types.intp, types.voidptr, types.intp, ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) PRINT_DEBUG = dpnp_lowering.DEBUG def dpnp_impl(a, kth): if a.size == 0: raise ValueError("Passed Empty array") kth_ = kth if kth >= 0 else (a.ndim + kth) arr2 = numba_dppy.dpnp.copy(a) out = np.empty(a.shape, dtype=a.dtype) sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) arr2_usm = dpctl_functions.malloc_shared(arr2.size * arr2.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, arr2_usm, arr2.ctypes, arr2.size * arr2.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) dpnp_func(a_usm, arr2_usm, out_usm, kth_, a.shapeptr, a.ndim) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(arr2_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, arr2.size, out.size]) if PRINT_DEBUG: print("dpnp implementation") return out return dpnp_impl
def dpnp_dot_impl(a, b): dpnp_lowering.ensure_dpnp("dot") ret_type = types.void """ dpnp source: https://github.com/IntelPython/dpnp/blame/67a101c90cf253cfe9b9ba80ac397811ce94edee/dpnp/backend/kernels/dpnp_krnl_common.cpp#L322 Function declaration: void dpnp_matmul_c(void* result_out, const size_t result_size, const size_t result_ndim, const size_t* result_shape, const size_t* result_strides, const void* input1_in, const size_t input1_size, const size_t input1_ndim, const size_t* input1_shape, const size_t* input1_strides, const void* input2_in, const size_t input2_size, const size_t input2_ndim, const size_t* input2_shape, const size_t* input2_strides) """ sig = signature( ret_type, types.voidptr, types.intp, types.intp, types.voidptr, types.voidptr, types.voidptr, types.intp, types.intp, types.voidptr, types.voidptr, types.voidptr, types.intp, types.intp, types.voidptr, types.voidptr, ) res_dtype = get_res_dtype(a, b) PRINT_DEBUG = dpnp_lowering.DEBUG ndims = [a.ndim, b.ndim] if ndims == [2, 2]: dpnp_func = dpnp_ext.dpnp_func("dpnp_matmul", [a.dtype.name, "NONE"], sig) def dot_2_mm(a, b): m, k = a.shape _k, n = b.shape if _k != k: raise ValueError("Incompatible array sizes for np.dot(a, b)") out = np.empty((m, n), dtype=res_dtype) common_matmul_impl(dpnp_func, a, b, out, m, n, k, PRINT_DEBUG) return out return dot_2_mm elif ndims == [2, 1]: dpnp_func = dpnp_ext.dpnp_func("dpnp_matmul", [a.dtype.name, "NONE"], sig) def dot_2_mv(a, b): m, k = a.shape (_n, ) = b.shape n = 1 if _n != k: raise ValueError("Incompatible array sizes for np.dot(a, b)") out = np.empty((m, ), dtype=res_dtype) common_matmul_impl(dpnp_func, a, b, out, m, n, k, PRINT_DEBUG) return out return dot_2_mv elif ndims == [1, 2]: dpnp_func = dpnp_ext.dpnp_func("dpnp_matmul", [a.dtype.name, "NONE"], sig) def dot_2_vm(a, b): (m, ) = a.shape k, n = b.shape if m != k: raise ValueError("Incompatible array sizes for np.dot(a, b)") out = np.empty((n, ), dtype=res_dtype) common_matmul_impl(dpnp_func, a, b, out, m, n, k, PRINT_DEBUG) return out return dot_2_vm elif ndims == [1, 1]: """ dpnp source: https://github.com/IntelPython/dpnp/blob/67a101c90cf253cfe9b9ba80ac397811ce94edee/dpnp/backend/kernels/dpnp_krnl_common.cpp#L79 Function declaration: void dpnp_dot_c(void* result_out, const size_t result_size, const size_t result_ndim, const size_t* result_shape, const size_t* result_strides, const void* input1_in, const size_t input1_size, const size_t input1_ndim, const size_t* input1_shape, const size_t* input1_strides, const void* input2_in, const size_t input2_size, const size_t input2_ndim, const size_t* input2_shape, const size_t* input2_strides) """ sig = signature( ret_type, types.voidptr, types.intp, types.intp, types.voidptr, types.voidptr, types.voidptr, types.intp, types.intp, types.voidptr, types.voidptr, types.voidptr, types.intp, types.intp, types.voidptr, types.voidptr, ) dpnp_func = dpnp_ext.dpnp_func("dpnp_dot", [a.dtype.name, "NONE"], sig) def dot_2_vv(a, b): (m, ) = a.shape (n, ) = b.shape if m != n: raise ValueError("Incompatible array sizes for np.dot(a, b)") out = np.empty(1, dtype=res_dtype) common_dot_impl(dpnp_func, a, b, out, m, PRINT_DEBUG) return out[0] return dot_2_vv else: assert 0
def dpnp_eig_impl(a): name = "eig" dpnp_lowering.ensure_dpnp(name) ret_type = types.void """ dpnp source: https://github.com/IntelPython/dpnp/blob/0.4.0/dpnp/backend/custom_kernels.cpp#L180 Function declaration: void dpnp_eig_c(const void* array_in, void* result1, void* result2, size_t size) """ sig = signature(ret_type, types.voidptr, types.voidptr, types.voidptr, types.intp) dpnp_eig = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) res_dtype = np.float64 if a.dtype == types.float32: res_dtype = np.float32 PRINT_DEBUG = dpnp_lowering.DEBUG def dpnp_impl(a): n = a.shape[-1] if a.shape[-2] != n: msg = "Last 2 dimensions of the array must be square." raise ValueError(msg) dpnp_ext._check_finite_matrix(a) wr = np.empty(n, dtype=res_dtype) vr = np.empty((n, n), dtype=res_dtype) if n == 0: return (wr, vr) sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) wr_usm = dpctl_functions.malloc_shared(wr.size * wr.itemsize, sycl_queue) vr_usm = dpctl_functions.malloc_shared(vr.size * vr.itemsize, sycl_queue) dpnp_eig(a_usm, wr_usm, vr_usm, n) event = dpctl_functions.queue_memcpy(sycl_queue, wr.ctypes, wr_usm, wr.size * wr.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) event = dpctl_functions.queue_memcpy(sycl_queue, vr.ctypes, vr_usm, vr.size * vr.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(wr_usm, sycl_queue) dpctl_functions.free_with_queue(vr_usm, sycl_queue) dpnp_ext._dummy_liveness_func([wr.size, vr.size]) if PRINT_DEBUG: print("dpnp implementation") return (wr, vr) return dpnp_impl
def dpnp_cov_impl(a): name = "cov" dpnp_lowering.ensure_dpnp(name) ret_type = types.void """ dpnp source: https://github.com/IntelPython/dpnp/blob/0.4.0/dpnp/backend/custom_kernels_statistics.cpp#L51 Function declaration: void custom_cov_c(void* array1_in, void* result1, size_t nrows, size_t ncols) """ sig = signature( ret_type, types.voidptr, types.voidptr, types.intp, types.intp ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) res_dtype = np.float64 copy_input_to_double = True if a.dtype == types.float64: copy_input_to_double = False PRINT_DEBUG = dpnp_lowering.DEBUG def dpnp_impl(a): if a.size == 0: raise ValueError("Passed Empty array") sycl_queue = dpctl_functions.get_current_queue() """ We have to pass a array in double precision to DpNp """ if copy_input_to_double: a_copy_in_double = a.astype(np.float64) else: a_copy_in_double = a a_usm = dpctl_functions.malloc_shared( a_copy_in_double.size * a_copy_in_double.itemsize, sycl_queue ) event = dpctl_functions.queue_memcpy( sycl_queue, a_usm, a_copy_in_double.ctypes, a_copy_in_double.size * a_copy_in_double.itemsize, ) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) if a.ndim == 2: rows = a.shape[0] cols = a.shape[1] out = np.empty((rows, rows), dtype=res_dtype) elif a.ndim == 1: rows = 1 cols = a.shape[0] out = np.empty(rows, dtype=res_dtype) out_usm = dpctl_functions.malloc_shared( out.size * out.itemsize, sycl_queue ) dpnp_func(a_usm, out_usm, rows, cols) event = dpctl_functions.queue_memcpy( sycl_queue, out.ctypes, out_usm, out.size * out.itemsize ) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a_copy_in_double.size, a.size, out.size]) if PRINT_DEBUG: print("dpnp implementation") if a.ndim == 2: return out elif a.ndim == 1: return out[0] return dpnp_impl
def dpnp_amax_impl(a): name = "max" dpnp_lowering.ensure_dpnp(name) ret_type = types.void """ dpnp source: https://github.com/IntelPython/dpnp/blob/e389248c709531b181be8bf33b1a270fca812a92/dpnp/backend/kernels/dpnp_krnl_statistics.cpp#L149 Function declaration: void dpnp_max_c(void* array1_in, void* result1, const size_t result_size, const size_t* shape, size_t ndim, const size_t* axis, size_t naxis) We are using void * in case of size_t * as Numba currently does not have any type to represent size_t *. Since, both the types are pointers, if the compiler allows there should not be any mismatch in the size of the container to hold different types of pointer. """ sig = signature( ret_type, types.voidptr, types.voidptr, types.intp, types.voidptr, types.intp, types.voidptr, types.intp, ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) PRINT_DEBUG = dpnp_lowering.DEBUG def dpnp_impl(a): if a.size == 0: raise ValueError("Passed Empty array") sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy( sycl_queue, a_usm, a.ctypes, a.size * a.itemsize ) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(a.itemsize, sycl_queue) axis, naxis = 0, 0 dpnp_func( a_usm, out_usm, a.size * a.itemsize, a.shapeptr, a.ndim, axis, naxis ) out = np.empty(1, dtype=a.dtype) event = dpctl_functions.queue_memcpy( sycl_queue, out.ctypes, out_usm, out.size * out.itemsize ) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, out.size]) if PRINT_DEBUG: print("dpnp implementation") return out[0] return dpnp_impl
def dpnp_diagonal_impl(a, offset=0): name = "diagonal" dpnp_lowering.ensure_dpnp(name) ret_type = types.void """ dpnp source: https://github.com/IntelPython/dpnp/blob/e389248c709531b181be8bf33b1a270fca812a92/dpnp/backend/kernels/dpnp_krnl_indexing.cpp#L39 Function declaration: void dpnp_diagonal_c( void* array1_in, const size_t input1_size, void* result1, const size_t offset, size_t* shape, size_t* res_shape, const size_t res_ndim) """ sig = signature( ret_type, types.voidptr, types.intp, types.voidptr, types.intp, types.voidptr, types.voidptr, types.intp, ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) PRINT_DEBUG = dpnp_lowering.DEBUG function_text = f"""\ def tuplizer(a): return ({", ".join(f"a[{i}]" for i in range(a.ndim - 1))}) """ locals = {} exec(function_text, globals(), locals) tuplizer = register_jitable(locals["tuplizer"]) def dpnp_impl(a, offset=0): if a.size == 0: raise ValueError("Passed Empty array") n = min(a.shape[0], a.shape[1]) res_shape = np.zeros(a.ndim - 1, dtype=np.int64) if a.ndim > 2: for i in range(a.ndim - 2): res_shape[i] = a.shape[i + 2] if (n + offset) > a.shape[1]: res_shape[-1] = a.shape[1] - offset elif (n + offset) > a.shape[0]: res_shape[-1] = a.shape[0] else: res_shape[-1] = n + offset shape = tuplizer(res_shape) out = np.empty(shape, dtype=a.dtype) sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) dpnp_func( a_usm, a.size * a.itemsize, out_usm, offset, a.shapeptr, out.shapeptr, out.ndim, ) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, out.size]) if PRINT_DEBUG: print("dpnp implementation") return out return dpnp_impl
def dpnp_take_impl(a, ind): name = "take" dpnp_lowering.ensure_dpnp(name) ret_type = types.void """ dpnp source: https://github.com/IntelPython/dpnp/blob/ca6eb1b8fc561957402b6f258529f862c4a8f945/dpnp/backend/kernels/dpnp_krnl_indexing.cpp#L479 Function declaration: void dpnp_take_c(void* array1_in, const size_t array1_size, void* indices1, void* result1, size_t size) """ sig = signature( ret_type, types.voidptr, types.intp, types.voidptr, types.voidptr, types.intp, ) dpnp_func = dpnp_ext.dpnp_func("dpnp_" + name, [a.dtype.name, "NONE"], sig) res_dtype = a.dtype PRINT_DEBUG = dpnp_lowering.DEBUG def dpnp_impl(a, ind): if a.size == 0: raise ValueError("Passed Empty array") sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) ind_usm = dpctl_functions.malloc_shared(ind.size * ind.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, ind_usm, ind.ctypes, ind.size * ind.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out = np.arange(0, ind.size, 1, res_dtype).reshape(ind.shape) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) dpnp_func(a_usm, a.size * a.itemsize, ind_usm, out_usm, ind.size) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(ind_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, ind.size, out.size]) if PRINT_DEBUG: print("dpnp implementation") return out return dpnp_impl