Python add_matvec Exemples, skcuda.misc.add_matvec Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_misc.py Projet : Abel-Ding/scikit-cuda

    def impl_test_binaryop_matvec(self, dtype):
        x = np.random.normal(scale=5.0, size=(3, 5)).astype(dtype)
        a = np.random.normal(scale=5.0, size=(1, 5)).astype(dtype)
        b = np.random.normal(scale=5.0, size=(3, 1)).astype(dtype)

        # the following two test correct broadcasting on 0D vectors
        c = np.random.normal(scale=5.0, size=(5, )).astype(dtype)
        d = np.random.normal(scale=5.0, size=(3, )).astype(dtype)
        x_gpu = gpuarray.to_gpu(x)
        a_gpu = gpuarray.to_gpu(a)
        b_gpu = gpuarray.to_gpu(b)
        c_gpu = gpuarray.to_gpu(c)
        d_gpu = gpuarray.to_gpu(d)
        out = gpuarray.empty(x.shape, dtype=dtype)
        # addition
        res = misc.add_matvec(x_gpu, a_gpu, out=out).get()
        assert np.allclose(res, x+a)
        assert np.allclose(misc.add_matvec(x_gpu, b_gpu).get(), x+b)
        assert np.allclose(misc.add_matvec(x_gpu, c_gpu).get(), x+c)
        assert_raises(ValueError, misc.add_matvec, x_gpu, d_gpu)
        # multiplication
        res = misc.mult_matvec(x_gpu, a_gpu, out=out).get()
        assert np.allclose(res, x*a)
        assert np.allclose(misc.mult_matvec(x_gpu, b_gpu).get(), x*b)
        assert np.allclose(misc.mult_matvec(x_gpu, c_gpu).get(), x*c)
        assert_raises(ValueError, misc.mult_matvec, x_gpu, d_gpu)
        # division
        res = misc.div_matvec(x_gpu, a_gpu, out=out).get()
        assert np.allclose(res, x/a)
        assert np.allclose(misc.div_matvec(x_gpu, b_gpu).get(), x/b)
        assert np.allclose(misc.div_matvec(x_gpu, c_gpu).get(), x/c)
        assert_raises(ValueError, misc.div_matvec, x_gpu, d_gpu)

Exemple #2

0

Afficher le fichier

    def impl_test_binaryop_matvec(self, dtype):
        x = np.random.normal(scale=5.0, size=(3, 5)).astype(dtype)
        a = np.random.normal(scale=5.0, size=(1, 5)).astype(dtype)
        b = np.random.normal(scale=5.0, size=(3, 1)).astype(dtype)

        # the following two test correct broadcasting on 0D vectors
        c = np.random.normal(scale=5.0, size=(5, )).astype(dtype)
        d = np.random.normal(scale=5.0, size=(3, )).astype(dtype)
        x_gpu = gpuarray.to_gpu(x)
        a_gpu = gpuarray.to_gpu(a)
        b_gpu = gpuarray.to_gpu(b)
        c_gpu = gpuarray.to_gpu(c)
        d_gpu = gpuarray.to_gpu(d)
        out = gpuarray.empty(x.shape, dtype=dtype)
        # addition
        res = misc.add_matvec(x_gpu, a_gpu, out=out).get()
        assert np.allclose(res, x + a)
        assert np.allclose(misc.add_matvec(x_gpu, b_gpu).get(), x + b)
        assert np.allclose(misc.add_matvec(x_gpu, c_gpu).get(), x + c)
        assert_raises(ValueError, misc.add_matvec, x_gpu, d_gpu)
        # multiplication
        res = misc.mult_matvec(x_gpu, a_gpu, out=out).get()
        assert np.allclose(res, x * a)
        assert np.allclose(misc.mult_matvec(x_gpu, b_gpu).get(), x * b)
        assert np.allclose(misc.mult_matvec(x_gpu, c_gpu).get(), x * c)
        assert_raises(ValueError, misc.mult_matvec, x_gpu, d_gpu)
        # division
        res = misc.div_matvec(x_gpu, a_gpu, out=out).get()
        assert np.allclose(res, x / a)
        assert np.allclose(misc.div_matvec(x_gpu, b_gpu).get(), x / b)
        assert np.allclose(misc.div_matvec(x_gpu, c_gpu).get(), x / c)
        assert_raises(ValueError, misc.div_matvec, x_gpu, d_gpu)

Exemple #3

0

Afficher le fichier

Fichier : kernel.py Projet : minghao2016/iScore

    def _rbf_kernel_vectorized_cublas(data1,
                                      data2,
                                      sigma=10):  # pragma: no cover
        """kernel for edge similarity computed with the vectorized method

        Args:
            data1 (TYPE): pssm data 1
            data2 (TYPE): pssm dta 2
            sigma (int, optional): exponent of the exponetial

        Returns:
            np.array: value of the rbk kernel for all the pairs
        """
        beta = 2 * sigma**2
        d1_ = gpuarray.to_gpu(data1.astype(np.float32))
        d2_ = gpuarray.to_gpu(data2.astype(np.float32))
        mgpu = -2 * culinalg.dot(d1_, d2_, transa='N', transb='T')
        vgpu = cumisc.sum(d1_**2, axis=1)[:, None]
        cumisc.add_matvec(mgpu, vgpu, out=mgpu)

        vgpu = cumisc.sum(d2_**2, axis=1)
        cumisc.add_matvec(mgpu, vgpu, out=mgpu)

        mcpu = mgpu.get()
        return np.exp(-mcpu / beta).reshape(-1)

Exemple #4

0

Afficher le fichier

Fichier : multilayer_perceptron_cuda.py Projet : ChaohuiYu/scikitlearn_plus

    def _forward_pass(self, activations):
        """Perform a forward pass on the network by computing the values
        of the neurons in the hidden layers and the output layer.

        Parameters
        ----------
        activations : list, length = n_layers - 1
            The ith element of the list holds the values of the ith layer.

        with_output_activation : bool, default True
            If True, the output passes through the output activation
            function, which is either the softmax function or the
            logistic function
        """
        hidden_activation = ACTIVATIONS[self.activation]
        # Iterate over the hidden layers
        for i in range(self.n_layers_ - 1):
            activations[i + 1] = safe_sparse_dot(activations[i],
                                                 self.coefs_[i])
            activations[i + 1] = cumisc.add_matvec(activations[i + 1], self.intercepts_[i], axis=1)

            # For the hidden layers
            if (i + 1) != (self.n_layers_ - 1):
                activations[i + 1] = hidden_activation(activations[i + 1])

        # For the last layer
        output_activation = ACTIVATIONS[self.out_activation_]
        activations[i + 1] = output_activation(activations[i + 1])

        return activations

Exemple #5

0

Afficher le fichier

Fichier : slfn_skcuda.py Projet : IstanbulBoy/hpelm

    def _dev_lin(self, devX, devW, devB):
        """Linear function on GPU.

        Returns:
            devH (gpuarray): GPU matrix with the result.
        """
        devH = misc.add_matvec(linalg.dot(devX, devW), devB, axis=1)
        return devH

Exemple #6

0

Afficher le fichier

Fichier : slfn_skcuda.py Projet : zhucer2003/hpelm

    def _dev_lin(self, devX, devW, devB):
        """Linear function on GPU.

        Returns:
            devH (gpuarray): GPU matrix with the result.
        """
        devH = misc.add_matvec(linalg.dot(devX, devW), devB, axis=1)
        return devH

Exemple #7

0

Afficher le fichier

Fichier : slfn_skcuda.py Projet : IstanbulBoy/hpelm

    def _dev_tanh(self, devX, devW, devB):
        """Hyperbolic tangent function on GPU.

        Returns:
            devH (gpuarray): GPU matrix with the result.
        """
        devH = misc.add_matvec(linalg.dot(devX, devW), devB, axis=1)
        cumath.tanh(devH, out=devH)
        return devH

Exemple #8

0

Afficher le fichier

Fichier : slfn_skcuda.py Projet : zhucer2003/hpelm

    def _dev_tanh(self, devX, devW, devB):
        """Hyperbolic tangent function on GPU.

        Returns:
            devH (gpuarray): GPU matrix with the result.
        """
        devH = misc.add_matvec(linalg.dot(devX, devW), devB, axis=1)
        cumath.tanh(devH, out=devH)
        return devH

Exemple #9

0

Afficher le fichier

    def get_distances_to_centers(self, data):

        # make sure the array is c order
        data = np.asarray(data, dtype=np.float32, order='C')

        # ship to gpu
        data_gpu = gpuarray.to_gpu(data)

        # alloc space on gpu for distances
        dists_shape = (data.shape[0], self.centers.shape[0])
        dists_gpu = gpuarray.zeros(dists_shape, np.float32)

        # calc data norms on gpu
        data_norms = cumisc.sum(data_gpu**2, axis=1)

        # calc distance on gpu
        cumisc.add_matvec(dists_gpu, self.center_norms, 1, dists_gpu)
        cumisc.add_matvec(dists_gpu, data_norms, 0, dists_gpu)
        culinalg.add_dot(data_gpu, self.centers_gpu,
            dists_gpu, transb='T', alpha=-2.0)
        return dists_gpu

Exemple #10

0

Afficher le fichier

Fichier : slfn_skcuda.py Projet : IstanbulBoy/hpelm

    def _dev_sigm(self, devX, devW, devB):
        """Compute Sigmoid on GPU for a given array and return array."""

#        def sigm(a):
#            block = a._block
#            grid = (int(np.ceil(1.0 * np.prod(a.shape) / block[0])), 1)
#            dev_sigm.prepared_call(grid, block, a.gpudata)
#            return a

        devH = misc.add_matvec(linalg.dot(devX, devW), devB, axis=1)
        block = devH._block
        grid = (int(np.ceil(1.0 * np.prod(devH.shape) / block[0])), 1)
        self.dev_sigm.prepared_call(grid, block, devH.gpudata)
        return devH

Exemple #11

0

Afficher le fichier

Fichier : slfn_skcuda.py Projet : zhucer2003/hpelm

    def _dev_sigm(self, devX, devW, devB):
        """Compute Sigmoid on GPU for a given array and return array."""

        #        def sigm(a):
        #            block = a._block
        #            grid = (int(np.ceil(1.0 * np.prod(a.shape) / block[0])), 1)
        #            dev_sigm.prepared_call(grid, block, a.gpudata)
        #            return a

        devH = misc.add_matvec(linalg.dot(devX, devW), devB, axis=1)
        block = devH._block
        grid = (int(np.ceil(1.0 * np.prod(devH.shape) / block[0])), 1)
        self.dev_sigm.prepared_call(grid, block, devH.gpudata)
        return devH

Exemple #12

0

Afficher le fichier

def demosaick_gpu(img):
    img = gp.to_gpu(img)
    p2x = im2col(img, _i2c2)
    cm.log(img + _eps, out=img)
    p1x = im2col(img, _i2c1)

    wA = p1x.shape[0]
    wB = p2x.shape[0]
    hA = p1x.shape[1]
    hB = p2x.shape[1]

    # Path 1
    p1x = p1x.reshape([wA * hA, 576])
    p1y = lg.dot(p1x, _wts.int1)
    cm.exp(p1y, out=p1y)

    p1y = p1y.reshape([wA * hA * 64, 3 * _ofac])
    p1x = lg.dot(p1y, _wts.int2)
    msc.add_matvec(p1x, _wts.int2b, out=p1x)
    p1x = p1x.reshape([wA * hA * 64 * 3, _ofac])

    # Path 2
    # conv1
    p2x = p2x.reshape([wB * hB, 64])
    p2y = lg.dot(p2x, _wts.c1)
    msc.add_matvec(p2y, _wts.c1b, out=p2y)
    gp.maximum(p2y, 0., p2y)
    p2y = p2y.reshape([wB, hB, _numsel])

    # conv2
    shI = [wB - 1, hB - 1, _numsel]
    shM = [(wB - 1) * (hB - 1), _numsel]
    p2x = gp.empty(shM, dtype=np.float32)
    pTT = gp.empty(shI, dtype=np.float32)

    pTT = pTT.reshape(shI)
    pTT[...] = p2y[0:-1, 0:-1, :]
    pTT = pTT.reshape(shM)
    p2x = lg.dot(pTT, _wts.c200)
    pTT = pTT.reshape(shI)
    pTT[...] = p2y[0:-1, 1:, :]
    pTT = pTT.reshape(shM)
    lg.add_dot(pTT, _wts.c201, p2x)
    pTT = pTT.reshape(shI)
    pTT[...] = p2y[1:, 0:-1, :]
    pTT = pTT.reshape(shM)
    lg.add_dot(pTT, _wts.c210, p2x)
    pTT = pTT.reshape(shI)
    pTT[...] = p2y[1:, 1:, :]
    pTT = pTT.reshape(shM)
    lg.add_dot(pTT, _wts.c211, p2x)
    msc.add_matvec(p2x, _wts.c2b, out=p2x)
    gp.maximum(p2x, 0., p2x)
    p2x = p2x.reshape(shI)

    # conv 3
    shI = [wB - 2, hB - 2, _numsel]
    shM = [(wB - 2) * (hB - 2), _numsel]
    p2y = gp.empty(shM, dtype=np.float32)
    pTT = gp.empty(shI, dtype=np.float32)

    pTT = pTT.reshape(shI)
    pTT[...] = p2x[0:-1, 0:-1, :]
    pTT = pTT.reshape(shM)
    p2y = lg.dot(pTT, _wts.c300)
    pTT = pTT.reshape(shI)
    pTT[...] = p2x[0:-1, 1:, :]
    pTT = pTT.reshape(shM)
    lg.add_dot(pTT, _wts.c301, p2y)
    pTT = pTT.reshape(shI)
    pTT[...] = p2x[1:, 0:-1, :]
    pTT = pTT.reshape(shM)
    lg.add_dot(pTT, _wts.c310, p2y)
    pTT = pTT.reshape(shI)
    pTT[...] = p2x[1:, 1:, :]
    pTT = pTT.reshape(shM)
    lg.add_dot(pTT, _wts.c311, p2y)
    msc.add_matvec(p2y, _wts.c3b, out=p2y)
    gp.maximum(p2y, 0., p2y)

    p2x = lg.dot(p2y, _wts.sout)

    msc.add_matvec(p2x, _wts.soutb, out=p2x)
    gp.maximum(p2x, 0., p2x)
    p2x = p2x.reshape(p1x.shape)

    # Combine
    p1x *= p2x
    p1 = msc.sum(p1x, axis=1)
    gp.maximum(p1, 0., p1)
    gp.minimum(p1, 1., p1)
    p1 = p1.reshape([wA, hA, 64 * 3])

    im = p2im(p1.get())

    return im

Exemple #13

0

Afficher le fichier

    def _impl_test_binaryop_matvec(self, dtype):
        if issubclass(dtype, numbers.Integral):
            x = np.random.randint(1, 10, 15).reshape((3, 5)).astype(dtype)
            a = np.random.randint(1, 10, 5).reshape((1, 5)).astype(dtype)
            b = np.random.randint(1, 10, 3).reshape((3, 1)).astype(dtype)

            # the following two test correct broadcasting on 0D vectors
            c = np.random.randint(1, 10, 5).reshape((5, )).astype(dtype)
            d = np.random.randint(1, 10, 3).reshape((3, )).astype(dtype)
        else:
            x = np.random.normal(scale=5.0, size=(3, 5)).astype(dtype)
            a = np.random.normal(scale=5.0, size=(1, 5)).astype(dtype)
            b = np.random.normal(scale=5.0, size=(3, 1)).astype(dtype)

            # the following two test correct broadcasting on 0D vectors
            c = np.random.normal(scale=5.0, size=(5, )).astype(dtype)
            d = np.random.normal(scale=5.0, size=(3, )).astype(dtype)
        x_gpu = gpuarray.to_gpu(x)
        a_gpu = gpuarray.to_gpu(a)
        b_gpu = gpuarray.to_gpu(b)
        c_gpu = gpuarray.to_gpu(c)
        d_gpu = gpuarray.to_gpu(d)
        out = gpuarray.empty(x.shape, dtype=dtype)

        # addition
        res = misc.add_matvec(x_gpu, a_gpu, out=out).get()
        assert_allclose(res,
                        x + a,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.add_matvec(x_gpu, b_gpu).get(),
                        x + b,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.add_matvec(x_gpu, c_gpu).get(),
                        x + c,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_raises(ValueError, misc.add_matvec, x_gpu, d_gpu)

        # multiplication
        res = misc.mult_matvec(x_gpu, a_gpu, out=out).get()
        assert_allclose(res,
                        x * a,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.mult_matvec(x_gpu, b_gpu).get(),
                        x * b,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.mult_matvec(x_gpu, c_gpu).get(),
                        x * c,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_raises(ValueError, misc.mult_matvec, x_gpu, d_gpu)

        # division
        res = misc.div_matvec(x_gpu, a_gpu, out=out).get()
        if issubclass(dtype, numbers.Integral):
            assert_allclose(res,
                            x // a,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.div_matvec(x_gpu, b_gpu).get(),
                            x // b,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.div_matvec(x_gpu, c_gpu).get(),
                            x // c,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
        else:
            assert_allclose(res,
                            x / a,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.div_matvec(x_gpu, b_gpu).get(),
                            x / b,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.div_matvec(x_gpu, c_gpu).get(),
                            x / c,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])

        assert_raises(ValueError, misc.div_matvec, x_gpu, d_gpu)

Exemple #14

0

Afficher le fichier

 def add_mv(self, m, v, out):
     cumisc.add_matvec(m, v, out=out)

Exemple #15

0

Afficher le fichier

Fichier : pycuda_handler.py Projet : fornnet/brainstorm

 def add_mv(self, m, v, out):
     cumisc.add_matvec(m, v, out=out)

Exemple #16

0

Afficher le fichier

Fichier : gpu_supp.py Projet : iamDecode/eucl_dist

def squared_sum(a, b, method):
    """
    Compute squared summations of rows and then their pairwise summations.

    Parameters
    ----------
    A : ndarray
    B : ndarray
    method : str
        This chooses the method for the computations.
        It can be 'add_togpu' or 'togpu_misc_add' or 'togpu_cuda_add'.

    Returns
    -------
    out : GPUArray
        Compute squared summations of each row for each of the ndarrays giving us
        two 1D arrays. Then, compute their pairwise summations to result in a 2D
        array.
        There are three workflows, thus three possible values for the
        corresponding argument that chooses one of those values for : 'method'.

        They are listed below:

        'add_togpu' : Compute squared sum of rows of the inputs and then perform
        broadcasted  element-wise summations, all on CPU. Then, transfer this
        array to GPU as the output.

        'togpu_misc_add' : Compute squared sum of rows of the inputs, giving us
        two `1D` arrays. Transfer these as two arrays onto GPU. Create a `zeros`
        array directly on GPU and in two steps add in the two summed arrays in a
        broadcasted manner, using 'skcuda.misc.add.add_matvec' along the rows and
        columns, giving us the pairwise summations.

        'togpu_cuda_add' : Same as previous one, but instead of using
        'skcuda.misc.add.add_matvec', we would roll out our own CUDA kernel,
        with the idea of having more control, specifically making use of
        threads and blocks and in the process attaining best possible performance.        

    """

    c_gpu = None  # Initialize output

    if method == "add_togpu":
        c = np.einsum('ij,ij->i', a, a)[:, None] + np.einsum('ij,ij->i', b, b)
        c_gpu = gpuarray.to_gpu(c)

    elif method == "togpu_misc_add":
        a1_gpu = gpuarray.to_gpu(np.einsum('ij,ij->i', a, a)[:, None])
        b1_gpu = gpuarray.to_gpu(np.einsum('ij,ij->i', b, b))

        M, N = a.shape[0], b.shape[0]
        c_gpu = gpuarray.zeros((M, N), dtype=np.float32)
        misc.add_matvec(c_gpu, a1_gpu, out=c_gpu)
        misc.add_matvec(c_gpu, b1_gpu, out=c_gpu)

    elif method == "togpu_cuda_add":
        c_gpu = addvecs(np.einsum('ij,ij->i', a, a),
                        np.einsum('ij,ij->i', b, b))

    else:
        raise Exception("Invalid method.")

    return c_gpu

Exemple #17

0

Afficher le fichier

Fichier : test_misc.py Projet : lebedov/scikit-cuda

    def _impl_test_binaryop_matvec(self, dtype):
        if issubclass(dtype, numbers.Integral):
            x = np.random.randint(1, 10, 15).reshape((3, 5)).astype(dtype)
            a = np.random.randint(1, 10, 5).reshape((1, 5)).astype(dtype)
            b = np.random.randint(1, 10, 3).reshape((3, 1)).astype(dtype)

            # the following two test correct broadcasting on 0D vectors
            c = np.random.randint(1, 10, 5).reshape((5, )).astype(dtype)
            d = np.random.randint(1, 10, 3).reshape((3, )).astype(dtype)
        else:
            x = np.random.normal(scale=5.0, size=(3, 5)).astype(dtype)
            a = np.random.normal(scale=5.0, size=(1, 5)).astype(dtype)
            b = np.random.normal(scale=5.0, size=(3, 1)).astype(dtype)

            # the following two test correct broadcasting on 0D vectors
            c = np.random.normal(scale=5.0, size=(5, )).astype(dtype)
            d = np.random.normal(scale=5.0, size=(3, )).astype(dtype)
        x_gpu = gpuarray.to_gpu(x)
        a_gpu = gpuarray.to_gpu(a)
        b_gpu = gpuarray.to_gpu(b)
        c_gpu = gpuarray.to_gpu(c)
        d_gpu = gpuarray.to_gpu(d)
        out = gpuarray.empty(x.shape, dtype=dtype)

        # addition
        res = misc.add_matvec(x_gpu, a_gpu, out=out).get()
        assert_allclose(res, x+a,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.add_matvec(x_gpu, b_gpu).get(), x+b,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.add_matvec(x_gpu, c_gpu).get(), x+c,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_raises(ValueError, misc.add_matvec, x_gpu, d_gpu)

        # multiplication
        res = misc.mult_matvec(x_gpu, a_gpu, out=out).get()
        assert_allclose(res, x*a,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.mult_matvec(x_gpu, b_gpu).get(), x*b,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.mult_matvec(x_gpu, c_gpu).get(), x*c,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_raises(ValueError, misc.mult_matvec, x_gpu, d_gpu)

        # division
        res = misc.div_matvec(x_gpu, a_gpu, out=out).get()
        if issubclass(dtype, numbers.Integral):
            assert_allclose(res, x//a,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.div_matvec(x_gpu, b_gpu).get(), x//b,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.div_matvec(x_gpu, c_gpu).get(), x//c,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
        else:
            assert_allclose(res, x/a,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.div_matvec(x_gpu, b_gpu).get(), x/b,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.div_matvec(x_gpu, c_gpu).get(), x/c,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])

        assert_raises(ValueError, misc.div_matvec, x_gpu, d_gpu)