def common_impl(a, out, dpnp_func, print_debug):
    if a.size == 0:
        raise ValueError("Passed Empty array")

    sycl_queue = dpctl_functions.get_current_queue()
    a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
    event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes,
                                         a.size * a.itemsize)
    dpctl_functions.event_wait(event)
    dpctl_functions.event_delete(event)

    out_usm = dpctl_functions.malloc_shared(a.itemsize, sycl_queue)

    axes, axes_ndim = 0, 0
    initial = 0
    where = 0

    dpnp_func(out_usm, a_usm, a.shapeptr, a.ndim, axes, axes_ndim, initial,
              where)

    event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm,
                                         out.size * out.itemsize)
    dpctl_functions.event_wait(event)
    dpctl_functions.event_delete(event)

    dpctl_functions.free_with_queue(a_usm, sycl_queue)
    dpctl_functions.free_with_queue(out_usm, sycl_queue)

    dpnp_ext._dummy_liveness_func([a.size, out.size])

    if print_debug:
        print("dpnp implementation")
예제 #2
0
def common_impl_multinomial(n, pvals, res, dpnp_func, print_debug):
    sycl_queue = dpctl_functions.get_current_queue()
    res_usm = dpctl_functions.malloc_shared(res.size * res.itemsize,
                                            sycl_queue)

    pvals_usm = dpctl_functions.malloc_shared(pvals.size * pvals.itemsize,
                                              sycl_queue)
    event = dpctl_functions.queue_memcpy(sycl_queue, pvals_usm, pvals.ctypes,
                                         pvals.size * pvals.itemsize)
    dpctl_functions.event_wait(event)
    dpctl_functions.event_delete(event)

    dpnp_func(res_usm, n, pvals_usm, pvals.size, res.size)

    event = dpctl_functions.queue_memcpy(sycl_queue, res.ctypes, res_usm,
                                         res.size * res.itemsize)
    dpctl_functions.event_wait(event)
    dpctl_functions.event_delete(event)

    dpctl_functions.free_with_queue(res_usm, sycl_queue)
    dpctl_functions.free_with_queue(pvals_usm, sycl_queue)

    dpnp_ext._dummy_liveness_func([res.size])

    if print_debug:
        print("dpnp implementation")
예제 #3
0
    def dpnp_impl(a):
        if a.size == 0:
            raise ValueError("Passed Empty array")

        sycl_queue = dpctl_functions.get_current_queue()

        a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
        event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes,
                                             a.size * a.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        out = np.empty(1, dtype=res_dtype)
        out_usm = dpctl_functions.malloc_shared(out.itemsize, sycl_queue)

        dpnp_func(a_usm, out_usm, a.size)

        event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm,
                                             out.size * out.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        dpctl_functions.free_with_queue(a_usm, sycl_queue)
        dpctl_functions.free_with_queue(out_usm, sycl_queue)

        dpnp_ext._dummy_liveness_func([a.size, out.size])

        if PRINT_DEBUG:
            print("dpnp implementation")
        return out[0]
예제 #4
0
    def dpnp_impl(a):
        n = a.shape[-1]
        if a.shape[-2] != n:
            raise ValueError("Input array must be square.")

        out = a.copy()

        if n == 0:
            return out

        sycl_queue = dpctl_functions.get_current_queue()
        a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
        event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes,
                                             a.size * a.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize,
                                                sycl_queue)

        dpnp_func(a_usm, out_usm, a.shapeptr)

        event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm,
                                             out.size * out.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        dpctl_functions.free_with_queue(a_usm, sycl_queue)
        dpctl_functions.free_with_queue(out_usm, sycl_queue)

        dpnp_ext._dummy_liveness_func([out.size, a.size])

        if PRINT_DEBUG:
            print("dpnp implementation")
        return out
예제 #5
0
    def dpnp_impl(a, offset=0):
        if a.size == 0:
            raise ValueError("Passed Empty array")

        n = min(a.shape[0], a.shape[1])
        res_shape = np.zeros(a.ndim - 1, dtype=np.int64)

        if a.ndim > 2:
            for i in range(a.ndim - 2):
                res_shape[i] = a.shape[i + 2]

        if (n + offset) > a.shape[1]:
            res_shape[-1] = a.shape[1] - offset
        elif (n + offset) > a.shape[0]:
            res_shape[-1] = a.shape[0]
        else:
            res_shape[-1] = n + offset

        shape = tuplizer(res_shape)

        out = np.empty(shape, dtype=a.dtype)

        sycl_queue = dpctl_functions.get_current_queue()

        a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
        event = dpctl_functions.queue_memcpy(
            sycl_queue, a_usm, a.ctypes, a.size * a.itemsize
        )
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        out_usm = dpctl_functions.malloc_shared(
            out.size * out.itemsize, sycl_queue
        )

        dpnp_func(
            a_usm,
            a.size * a.itemsize,
            out_usm,
            offset,
            a.shapeptr,
            out.shapeptr,
            out.ndim,
        )

        event = dpctl_functions.queue_memcpy(
            sycl_queue, out.ctypes, out_usm, out.size * out.itemsize
        )
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        dpctl_functions.free_with_queue(a_usm, sycl_queue)
        dpctl_functions.free_with_queue(out_usm, sycl_queue)

        dpnp_ext._dummy_liveness_func([a.size, out.size])

        if PRINT_DEBUG:
            print("dpnp implementation")

        return out
예제 #6
0
def common_shape_impl(a, out, dpnp_func, PRINT_DEBUG):
    if a.size == 0:
        raise ValueError("Passed Empty array")

    sycl_queue = dpctl_functions.get_current_queue()

    a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
    event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes,
                                         a.size * a.itemsize)
    dpctl_functions.event_wait(event)
    dpctl_functions.event_delete(event)

    out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize,
                                            sycl_queue)

    dpnp_func(a_usm, out_usm, a.shapeptr, a.ndim)

    event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm,
                                         out.size * out.itemsize)
    dpctl_functions.event_wait(event)
    dpctl_functions.event_delete(event)

    dpctl_functions.free_with_queue(a_usm, sycl_queue)
    dpctl_functions.free_with_queue(out_usm, sycl_queue)

    dpnp_ext._dummy_liveness_func([a.size, out.size])

    if PRINT_DEBUG:
        print("dpnp implementation")
예제 #7
0
    def dpnp_impl(a):
        if a.size == 0:
            return True

        out = np.empty(1, dtype=np.bool_)

        sycl_queue = dpctl_functions.get_current_queue()

        a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
        event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes,
                                             a.size * a.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize,
                                                sycl_queue)

        dpnp_func(a_usm, out_usm, a.size)

        event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm,
                                             out.size * out.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        dpctl_functions.free_with_queue(a_usm, sycl_queue)
        dpctl_functions.free_with_queue(out_usm, sycl_queue)

        dpnp_ext._dummy_liveness_func([a.size, out.size])

        if PRINT_DEBUG:
            print("dpnp implementation")

        # TODO: sometimes all() returns ndarray
        return out[0]
예제 #8
0
    def dpnp_impl(a):
        if a.size == 0:
            raise ValueError("Passed Empty array")

        sycl_queue = dpctl_functions.get_current_queue()

        """ We have to pass a array in double precision to DpNp """
        if copy_input_to_double:
            a_copy_in_double = a.astype(np.float64)
        else:
            a_copy_in_double = a
        a_usm = dpctl_functions.malloc_shared(
            a_copy_in_double.size * a_copy_in_double.itemsize, sycl_queue
        )
        event = dpctl_functions.queue_memcpy(
            sycl_queue,
            a_usm,
            a_copy_in_double.ctypes,
            a_copy_in_double.size * a_copy_in_double.itemsize,
        )
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        if a.ndim == 2:
            rows = a.shape[0]
            cols = a.shape[1]
            out = np.empty((rows, rows), dtype=res_dtype)
        elif a.ndim == 1:
            rows = 1
            cols = a.shape[0]
            out = np.empty(rows, dtype=res_dtype)

        out_usm = dpctl_functions.malloc_shared(
            out.size * out.itemsize, sycl_queue
        )

        dpnp_func(a_usm, out_usm, rows, cols)

        event = dpctl_functions.queue_memcpy(
            sycl_queue, out.ctypes, out_usm, out.size * out.itemsize
        )
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        dpctl_functions.free_with_queue(a_usm, sycl_queue)
        dpctl_functions.free_with_queue(out_usm, sycl_queue)

        dpnp_ext._dummy_liveness_func([a_copy_in_double.size, a.size, out.size])

        if PRINT_DEBUG:
            print("dpnp implementation")
        if a.ndim == 2:
            return out
        elif a.ndim == 1:
            return out[0]
예제 #9
0
    def dpnp_impl(a):
        n = a.shape[-1]
        if a.shape[-2] != n:
            msg = "Last 2 dimensions of the array must be square."
            raise ValueError(msg)

        dpnp_ext._check_finite_matrix(a)

        wr = np.empty(n, dtype=res_dtype)
        vr = np.empty((n, n), dtype=res_dtype)

        if n == 0:
            return (wr, vr)

        sycl_queue = dpctl_functions.get_current_queue()
        a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
        event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes,
                                             a.size * a.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        wr_usm = dpctl_functions.malloc_shared(wr.size * wr.itemsize,
                                               sycl_queue)
        vr_usm = dpctl_functions.malloc_shared(vr.size * vr.itemsize,
                                               sycl_queue)

        dpnp_eig(a_usm, wr_usm, vr_usm, n)

        event = dpctl_functions.queue_memcpy(sycl_queue, wr.ctypes, wr_usm,
                                             wr.size * wr.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)
        event = dpctl_functions.queue_memcpy(sycl_queue, vr.ctypes, vr_usm,
                                             vr.size * vr.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        dpctl_functions.free_with_queue(a_usm, sycl_queue)
        dpctl_functions.free_with_queue(wr_usm, sycl_queue)
        dpctl_functions.free_with_queue(vr_usm, sycl_queue)

        dpnp_ext._dummy_liveness_func([wr.size, vr.size])

        if PRINT_DEBUG:
            print("dpnp implementation")
        return (wr, vr)
예제 #10
0
    def dpnp_impl(a, kth):
        if a.size == 0:
            raise ValueError("Passed Empty array")

        kth_ = kth if kth >= 0 else (a.ndim + kth)

        arr2 = numba_dppy.dpnp.copy(a)

        out = np.empty(a.shape, dtype=a.dtype)

        sycl_queue = dpctl_functions.get_current_queue()

        a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
        event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes,
                                             a.size * a.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        arr2_usm = dpctl_functions.malloc_shared(arr2.size * arr2.itemsize,
                                                 sycl_queue)
        event = dpctl_functions.queue_memcpy(sycl_queue, arr2_usm, arr2.ctypes,
                                             arr2.size * arr2.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize,
                                                sycl_queue)

        dpnp_func(a_usm, arr2_usm, out_usm, kth_, a.shapeptr, a.ndim)

        event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm,
                                             out.size * out.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        dpctl_functions.free_with_queue(a_usm, sycl_queue)
        dpctl_functions.free_with_queue(arr2_usm, sycl_queue)
        dpctl_functions.free_with_queue(out_usm, sycl_queue)

        dpnp_ext._dummy_liveness_func([a.size, arr2.size, out.size])

        if PRINT_DEBUG:
            print("dpnp implementation")

        return out
예제 #11
0
    def dpnp_impl(a):
        n = a.shape[-1]
        if a.shape[-2] != n:
            raise ValueError("Input array must be square.")

        dpnp_ext._check_finite_matrix(a)

        if a.ndim == 2:
            out = np.empty((1, ), dtype=a.dtype)
            out[0] = -4
        else:
            out = np.empty(a.shape[:-2], dtype=a.dtype)

        sycl_queue = dpctl_functions.get_current_queue()
        a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
        event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes,
                                             a.size * a.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize,
                                                sycl_queue)

        dpnp_func(a_usm, out_usm, a.shapeptr, a.ndim)

        event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm,
                                             out.size * out.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        dpctl_functions.free_with_queue(a_usm, sycl_queue)
        dpctl_functions.free_with_queue(out_usm, sycl_queue)

        dpnp_ext._dummy_liveness_func([out.size, a.size])

        if PRINT_DEBUG:
            print("dpnp implementation")
        if a.ndim == 2:
            return out[0]
        else:
            return out
예제 #12
0
    def dpnp_impl(M, tol=None, hermitian=False):
        if tol is not None:
            raise ValueError(
                "tol is not supported for np.linalg.matrix_rank(M)")
        if hermitian:
            raise ValueError(
                "hermitian is not supported for np.linalg.matrix_rank(M)")

        if M.ndim > 2:
            raise ValueError(
                "np.linalg.matrix_rank(M) is only supported on 1 or 2-d arrays"
            )

        out = np.empty(1, dtype=M.dtype)

        sycl_queue = dpctl_functions.get_current_queue()
        M_usm = dpctl_functions.malloc_shared(M.size * M.itemsize, sycl_queue)
        event = dpctl_functions.queue_memcpy(sycl_queue, M_usm, M.ctypes,
                                             M.size * M.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize,
                                                sycl_queue)

        dpnp_func(M_usm, out_usm, M.shapeptr, M.ndim)

        event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm,
                                             out.size * out.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        dpctl_functions.free_with_queue(M_usm, sycl_queue)
        dpctl_functions.free_with_queue(out_usm, sycl_queue)

        dpnp_ext._dummy_liveness_func([out.size, M.size])

        if PRINT_DEBUG:
            print("dpnp implementation")
        return out[0]
예제 #13
0
    def dpnp_impl(a, repeats):
        if a.size == 0:
            raise ValueError("Passed Empty array")

        if a.ndim >= 2:
            raise ValueError("Not supported in dpnp")

        new_size = a.size * repeats

        out = np.zeros(new_size, dtype=a.dtype)

        sycl_queue = dpctl_functions.get_current_queue()

        a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
        event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes,
                                             a.size * a.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize,
                                                sycl_queue)

        dpnp_func(a_usm, out_usm, repeats, a.size)

        event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm,
                                             out.size * out.itemsize)
        dpctl_functions.event_wait(event)
        dpctl_functions.event_delete(event)

        dpctl_functions.free_with_queue(a_usm, sycl_queue)
        dpctl_functions.free_with_queue(out_usm, sycl_queue)

        dpnp_ext._dummy_liveness_func([a.size, out.size])

        if PRINT_DEBUG:
            print("dpnp implementation")

        return out
예제 #14
0
def common_impl_multivariate_normal(mean, cov, size, check_valid, tol, res,
                                    dpnp_func, print_debug):
    sycl_queue = dpctl_functions.get_current_queue()
    res_usm = dpctl_functions.malloc_shared(res.size * res.itemsize,
                                            sycl_queue)

    mean_usm = dpctl_functions.malloc_shared(mean.size * mean.itemsize,
                                             sycl_queue)
    event = dpctl_functions.queue_memcpy(sycl_queue, mean_usm, mean.ctypes,
                                         mean.size * mean.itemsize)
    dpctl_functions.event_wait(event)
    dpctl_functions.event_delete(event)

    cov_usm = dpctl_functions.malloc_shared(cov.size * cov.itemsize,
                                            sycl_queue)
    event = dpctl_functions.queue_memcpy(sycl_queue, cov_usm, cov.ctypes,
                                         cov.size * cov.itemsize)
    dpctl_functions.event_wait(event)
    dpctl_functions.event_delete(event)

    dpnp_func(res_usm, mean.size, mean_usm, mean.size, cov_usm, cov.size,
              res.size)

    event = dpctl_functions.queue_memcpy(sycl_queue, res.ctypes, res_usm,
                                         res.size * res.itemsize)
    dpctl_functions.event_wait(event)
    dpctl_functions.event_delete(event)

    dpctl_functions.free_with_queue(res_usm, sycl_queue)
    dpctl_functions.free_with_queue(mean_usm, sycl_queue)
    dpctl_functions.free_with_queue(cov_usm, sycl_queue)

    dpnp_ext._dummy_liveness_func([res.size])

    if print_debug:
        print("dpnp implementation")
예제 #15
0
def common_impl(low, high, res, dpnp_func, print_debug):
    check_range(low, high)

    sycl_queue = dpctl_functions.get_current_queue()
    res_usm = dpctl_functions.malloc_shared(res.size * res.itemsize,
                                            sycl_queue)

    dpnp_func(res_usm, low, high, res.size)

    event = dpctl_functions.queue_memcpy(sycl_queue, res.ctypes, res_usm,
                                         res.size * res.itemsize)
    dpctl_functions.event_wait(event)
    dpctl_functions.event_delete(event)
    dpctl_functions.free_with_queue(res_usm, sycl_queue)

    dpnp_ext._dummy_liveness_func([res.size])

    if print_debug:
        print("dpnp implementation")
예제 #16
0
def common_impl_hypergeometric(ngood, nbad, nsample, res, dpnp_func,
                               print_debug):
    sycl_queue = dpctl_functions.get_current_queue()
    res_usm = dpctl_functions.malloc_shared(res.size * res.itemsize,
                                            sycl_queue)

    dpnp_func(res_usm, ngood, nbad, nsample, res.size)

    event = dpctl_functions.queue_memcpy(sycl_queue, res.ctypes, res_usm,
                                         res.size * res.itemsize)
    dpctl_functions.event_wait(event)
    dpctl_functions.event_delete(event)

    dpctl_functions.free_with_queue(res_usm, sycl_queue)

    dpnp_ext._dummy_liveness_func([res.size])

    if print_debug:
        print("dpnp implementation")
예제 #17
0
def common_impl_1_arg(arg1, res, dpnp_func, print_debug):
    sycl_queue = dpctl_functions.get_current_queue()
    res_usm = dpctl_functions.malloc_shared(res.size * res.itemsize,
                                            sycl_queue)

    try:
        dpnp_func(res_usm, arg1, res.size)
    except Exception:
        raise ValueError("Device not supported")

    event = dpctl_functions.queue_memcpy(sycl_queue, res.ctypes, res_usm,
                                         res.size * res.itemsize)
    dpctl_functions.event_wait(event)
    dpctl_functions.event_delete(event)
    dpctl_functions.free_with_queue(res_usm, sycl_queue)

    dpnp_ext._dummy_liveness_func([res.size])

    if print_debug:
        print("dpnp implementation")
예제 #18
0
def common_dot_impl(dpnp_func, a, b, out, m, print_debug):
    sycl_queue = dpctl_functions.get_current_queue()
    a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
    event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes,
                                         a.size * a.itemsize)
    dpctl_functions.event_wait(event)
    dpctl_functions.event_delete(event)

    b_usm = dpctl_functions.malloc_shared(b.size * b.itemsize, sycl_queue)
    event = dpctl_functions.queue_memcpy(sycl_queue, b_usm, b.ctypes,
                                         b.size * b.itemsize)
    dpctl_functions.event_wait(event)
    dpctl_functions.event_delete(event)

    out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize,
                                            sycl_queue)

    strides = np.array(1)

    result_out = out_usm
    result_size = out.size
    result_ndim = out.ndim
    result_shape = out.shapeptr
    result_strides = strides.ctypes

    input1_in = a_usm
    input1_size = a.size
    input1_ndim = a.ndim
    input1_shape = a.shapeptr
    input1_strides = strides.ctypes

    input2_in = b_usm
    input2_size = b.size
    input2_ndim = b.ndim
    input2_shape = b.shapeptr
    input2_strides = strides.ctypes

    dpnp_func(
        result_out,
        result_size,
        result_ndim,
        result_shape,
        result_strides,
        input1_in,
        input1_size,
        input1_ndim,
        input1_shape,
        input1_strides,
        input2_in,
        input2_size,
        input2_ndim,
        input2_shape,
        input2_strides,
    )

    event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm,
                                         out.size * out.itemsize)
    dpctl_functions.event_wait(event)
    dpctl_functions.event_delete(event)

    dpctl_functions.free_with_queue(a_usm, sycl_queue)
    dpctl_functions.free_with_queue(b_usm, sycl_queue)
    dpctl_functions.free_with_queue(out_usm, sycl_queue)

    dpnp_ext._dummy_liveness_func([a.size, b.size, out.size])

    if print_debug:
        print("dpnp implementation")