def convert_to_cuDFs(data, labels):  # consider dlpack
    '''  build cuda DataFrames for data and labels from cupy arrays '''
    labels_cDF = cudf.DataFrame([('labels', labels)])
    data_cDF = cudf.DataFrame([('x', cupy.asfortranarray(data[:, 0])),
                               ('y', cupy.asfortranarray(data[:, 1])),
                               ('z', cupy.asfortranarray(data[:, 2]))])
    return data_cDF, labels_cDF
Exemple #2
0
 def __mul__(self, other):
     if cupy.isscalar(other):
         self.sum_duplicates()
         return self._with_data(self.data * other)
     elif isspmatrix_csr(other):
         self.sum_duplicates()
         other.sum_duplicates()
         return cusparse.csrgemm(self, other)
     elif csc.isspmatrix_csc(other):
         self.sum_duplicates()
         other.sum_duplicates()
         return cusparse.csrgemm(self, other.T, transb=True)
     elif base.isspmatrix(other):
         return self * other.tocsr()
     elif base.isdense(other):
         if other.ndim == 0:
             self.sum_duplicates()
             return self._with_data(self.data * other)
         elif other.ndim == 1:
             self.sum_duplicates()
             return cusparse.csrmv(self, cupy.asfortranarray(other))
         elif other.ndim == 2:
             self.sum_duplicates()
             return cusparse.csrmm2(self, cupy.asfortranarray(other))
         else:
             raise ValueError('could not interpret dimensions')
     else:
         return NotImplemented
Exemple #3
0
def sgemm(A, B,
          dim_x=16, dim_y=16, blk_m=64, blk_n=64, blk_k=4,
          dim_xa=64, dim_ya=4, dim_xb=4, dim_yb=64):
    assert A.dtype == cp.float32
    assert B.dtype == cp.float32
    assert(dim_x * dim_y == dim_xa * dim_ya == dim_xb * dim_yb)

    m, k = A.shape
    k, n = B.shape

    # Inputs matrices need to be in Fortran order.
    A = cp.asfortranarray(A)
    B = cp.asfortranarray(B)

    C = cp.empty((m, n), dtype=cp.float32, order='F')

    config = {'DIM_X': dim_x, 'DIM_Y': dim_y,
              'BLK_M': blk_m, 'BLK_N': blk_n, 'BLK_K': blk_k,
              'DIM_XA': dim_xa, 'DIM_YA': dim_ya,
              'DIM_XB': dim_xb, 'DIM_YB': dim_yb,
              'THR_M': blk_m // dim_x, 'THR_N': blk_n // dim_y}
    code = read_code(sgemm_file, params=config)
    kern = cp.RawKernel(code, 'sgemm')

    grid = (int(math.ceil(m / blk_m)), int(math.ceil(n / blk_n)), 1)
    block = (dim_x, dim_y, 1)
    args = (m, n, k, A, B, C)
    shared_mem = blk_k * (blk_m + 1) * 4 + blk_n * (blk_k + 1) * 4
    kern(grid, block, args=args, shared_mem=shared_mem)
    return C
Exemple #4
0
 def __mul__(self, other):
     if cupy.isscalar(other):
         self.sum_duplicates()
         return self._with_data(self.data * other)
     elif cupyx.scipy.sparse.isspmatrix_csr(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm'):
             a = self.T
             return cusparse.csrgemm(a, other, transa=True)
         elif cusparse.check_availability('csrgemm2'):
             a = self.tocsr()
             a.sum_duplicates()
             return cusparse.csrgemm2(a, other)
         else:
             raise NotImplementedError
     elif isspmatrix_csc(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm'):
             a = self.T
             b = other.T
             return cusparse.csrgemm(a, b, transa=True, transb=True)
         elif cusparse.check_availability('csrgemm2'):
             a = self.tocsr()
             b = other.tocsr()
             a.sum_duplicates()
             b.sum_duplicates()
             return cusparse.csrgemm2(a, b)
         else:
             raise NotImplementedError
     elif cupyx.scipy.sparse.isspmatrix(other):
         return self * other.tocsr()
     elif base.isdense(other):
         if other.ndim == 0:
             self.sum_duplicates()
             return self._with_data(self.data * other)
         elif other.ndim == 1:
             self.sum_duplicates()
             if cusparse.check_availability('csrmv'):
                 csrmv = cusparse.csrmv
             elif cusparse.check_availability('spmv'):
                 csrmv = cusparse.spmv
             else:
                 raise NotImplementedError
             return csrmv(self.T, cupy.asfortranarray(other), transa=True)
         elif other.ndim == 2:
             self.sum_duplicates()
             if cusparse.check_availability('csrmm2'):
                 csrmm = cusparse.csrmm2
             elif cusparse.check_availability('spmm'):
                 csrmm = cusparse.spmm
             else:
                 raise NotImplementedError
             return csrmm(self.T, cupy.asfortranarray(other), transa=True)
         else:
             raise ValueError('could not interpret dimensions')
     else:
         return NotImplemented
Exemple #5
0
def generate_synthetic_dataset(config):

    coilType = config['coilType']
    nSamples = config['nSamples']
    coilDensity = config['coilDensity']
    coil1StDev = config['coil1StDev']
    coil2StDev = config['coil2StDev']
    nGuidePointsPerCoil = config['nGuidePointsPerCoil']
    randomSeed = config['randomSeed']
    shuffleFlag = config['shuffleFlag']

    startTime = time.time()

    coil1Centers, coil2Centers = gen_two_coils(nPoints=nGuidePointsPerCoil,
                                               coilType=coilType,
                                               coilDensity=coilDensity)
    samplesPerCoil = nSamples // 2
    nDims = 3

    coil1Data, _ = cuml.make_blobs(n_samples=samplesPerCoil,
                                   n_features=nDims,
                                   centers=coil1Centers,
                                   cluster_std=coil1StDev,
                                   random_state=randomSeed,
                                   dtype='float')

    coil2Data, _ = cuml.make_blobs(n_samples=samplesPerCoil,
                                   n_features=nDims,
                                   centers=coil2Centers,
                                   cluster_std=coil2StDev,
                                   random_state=randomSeed,
                                   dtype='float')

    combinedData = cupy.empty(shape=(samplesPerCoil * 2, nDims),
                              dtype='float32',
                              order='F')
    combinedData[0::2] = coil1Data
    combinedData[1::2] = coil2Data

    combinedLabels = cupy.empty(shape=(samplesPerCoil * 2, 1),
                                dtype='int',
                                order='F')
    combinedLabels[0::2] = cupy.ones(shape=(samplesPerCoil, 1), dtype='int')
    combinedLabels[1::2] = cupy.zeros(shape=(samplesPerCoil, 1), dtype='int')

    if shuffleFlag:
        cupy.random.seed(randomSeed)
        shuffledInds = cupy.random.permutation(combinedData.shape[0])
        combinedData = cupy.asfortranarray(combinedData[shuffledInds, :])
        combinedLabels = cupy.asfortranarray(combinedLabels[shuffledInds])

    data = cudf.DataFrame.from_gpu_matrix(combinedData,
                                          columns=['x', 'y', 'z'])
    labels = cudf.DataFrame.from_gpu_matrix(combinedLabels, columns=['labels'])

    elapsedTime = time.time() - startTime
    return data, labels, elapsedTime
Exemple #6
0
def _solve(a, b, cublas_handle, cusolver_handle):
    a = cupy.asfortranarray(a)
    b = cupy.asfortranarray(b)
    dtype = a.dtype
    m, k = (b.size, 1) if b.ndim == 1 else b.shape
    dev_info = cupy.empty(1, dtype=numpy.int32)

    if dtype == 'f':
        geqrf = cusolver.sgeqrf
        geqrf_bufferSize = cusolver.sgeqrf_bufferSize
        ormqr = cusolver.sormqr
        trans = cublas.CUBLAS_OP_T
        trsm = cublas.strsm
    elif dtype == 'd':
        geqrf = cusolver.dgeqrf
        geqrf_bufferSize = cusolver.dgeqrf_bufferSize
        ormqr = cusolver.dormqr
        trans = cublas.CUBLAS_OP_T
        trsm = cublas.dtrsm
    elif dtype == 'F':
        geqrf = cusolver.cgeqrf
        geqrf_bufferSize = cusolver.cgeqrf_bufferSize
        ormqr = cusolver.cormqr
        trans = cublas.CUBLAS_OP_C
        trsm = cublas.ctrsm
    elif dtype == 'D':
        geqrf = cusolver.zgeqrf
        geqrf_bufferSize = cusolver.zgeqrf_bufferSize
        ormqr = cusolver.zormqr
        trans = cublas.CUBLAS_OP_C
        trsm = cublas.ztrsm
    else:
        raise NotImplementedError(dtype)

    # 1. QR decomposition (A = Q * R)
    buffersize = geqrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)
    tau = cupy.empty(m, dtype=dtype)
    geqrf(cusolver_handle, m, m, a.data.ptr, m, tau.data.ptr,
          workspace.data.ptr, buffersize, dev_info.data.ptr)
    cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed(
        geqrf, dev_info)

    # 2. ormqr (Q^T * B)
    ormqr(cusolver_handle, cublas.CUBLAS_SIDE_LEFT, trans, m, k, m, a.data.ptr,
          m, tau.data.ptr, b.data.ptr, m, workspace.data.ptr, buffersize,
          dev_info.data.ptr)
    cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed(
        ormqr, dev_info)

    # 3. trsm (X = R^{-1} * (Q^T * B))
    trsm(cublas_handle, cublas.CUBLAS_SIDE_LEFT, cublas.CUBLAS_FILL_MODE_UPPER,
         cublas.CUBLAS_OP_N, cublas.CUBLAS_DIAG_NON_UNIT, m, k, 1, a.data.ptr,
         m, b.data.ptr, m)
    return b
Exemple #7
0
def dense2csr(x):
    """Converts a dense matrix in CSR format.

    Args:
        x (cupy.ndarray): A matrix to be converted.

    Returns:
        cupy.sparse.csr_matrix: A converted matrix.

    """
    assert x.ndim == 2
    x = cupy.asfortranarray(x)
    nnz = numpy.empty((), dtype='i')
    handle = device.get_cusparse_handle()
    m, n = x.shape

    descr = MatDescriptor.create()
    nnz_per_row = cupy.empty(m, 'i')
    _call_cusparse(
        'nnz', x.dtype,
        handle, cusparse.CUSPARSE_DIRECTION_ROW, m, n, descr.descriptor,
        x.data.ptr, m, nnz_per_row.data.ptr, nnz.ctypes.data)

    nnz = int(nnz)
    data = cupy.empty(nnz, x.dtype)
    indptr = cupy.empty(m + 1, 'i')
    indices = cupy.empty(nnz, 'i')

    _call_cusparse(
        'dense2csr', x.dtype,
        handle, m, n, descr.descriptor,
        x.data.ptr, m, nnz_per_row.data.ptr,
        data.data.ptr, indptr.data.ptr, indices.data.ptr)
    # Note that a desciptor is recreated
    return cupy.sparse.csr_matrix((data, indices, indptr), shape=x.shape)
Exemple #8
0
def _fftn(a, s, axes, norm, direction, value_type='C2C', order='A', plan=None,
          overwrite_x=False, out=None):
    if norm not in (None, 'ortho'):
        raise ValueError('Invalid norm value %s, should be None or "ortho".'
                         % norm)

    axes, axes_sorted = _prep_fftn_axes(a.ndim, s, axes)
    if not axes_sorted:
        return a
    a = _convert_dtype(a, value_type)

    if order == 'A':
        if a.flags.f_contiguous:
            order = 'F'
        elif a.flags.c_contiguous:
            order = 'C'
        else:
            a = cupy.ascontiguousarray(a)
            order = 'C'
    elif order not in ['C', 'F']:
        raise ValueError('Unsupported order: {}'.format(order))

    # Note: need to call_cook_shape prior to sorting the axes
    a = _cook_shape(a, s, axes, value_type, order=order)

    if order == 'C' and not a.flags.c_contiguous:
        a = cupy.ascontiguousarray(a)
    elif order == 'F' and not a.flags.f_contiguous:
        a = cupy.asfortranarray(a)
    a = _exec_fftn(a, direction, value_type, norm=norm, axes=axes_sorted,
                   overwrite_x=overwrite_x, plan=plan, out=out)
    return a
Exemple #9
0
def _fftn(a,
          s,
          axes,
          norm,
          direction,
          value_type='C2C',
          order='A',
          plan=None,
          overwrite_x=False,
          out=None):
    if norm not in (None, 'ortho'):
        raise ValueError('Invalid norm value %s, should be None or "ortho".' %
                         norm)

    axes, axes_sorted = _prep_fftn_axes(a.ndim, s, axes, value_type)
    if not axes_sorted:
        if value_type == 'C2C':
            return a
        else:
            raise IndexError('list index out of range')
    a = _convert_dtype(a, value_type)

    if order == 'A':
        if a.flags.f_contiguous:
            order = 'F'
        elif a.flags.c_contiguous:
            order = 'C'
        else:
            a = cupy.ascontiguousarray(a)
            order = 'C'
    elif order not in ['C', 'F']:
        raise ValueError('Unsupported order: {}'.format(order))

    # Note: need to call _cook_shape prior to sorting the axes
    a = _cook_shape(a, s, axes, value_type, order=order)

    for n in a.shape:
        if n < 1:
            raise ValueError(
                'Invalid number of FFT data points (%d) specified.' % n)

    if order == 'C' and not a.flags.c_contiguous:
        a = cupy.ascontiguousarray(a)
    elif order == 'F' and not a.flags.f_contiguous:
        a = cupy.asfortranarray(a)

    # _cook_shape tells us input shape only, and not output shape
    out_size = _get_fftn_out_size(a.shape, s, axes_sorted[-1], value_type)

    a = _exec_fftn(a,
                   direction,
                   value_type,
                   norm=norm,
                   axes=axes_sorted,
                   overwrite_x=overwrite_x,
                   plan=plan,
                   out=out,
                   out_size=out_size)
    return a
 def test_get_multigpu(self, dtype):
     with cuda.Device(1):
         src = testing.shaped_arange((2, 3), xp=cupy, dtype=dtype)
         src = cupy.asfortranarray(src)
     with cuda.Device(0):
         dst = src.get()
     expected = testing.shaped_arange((2, 3), xp=numpy, dtype=dtype)
     np_testing.assert_array_equal(dst, expected)
Exemple #11
0
 def test_get_multigpu(self, dtype, order):
     with cuda.Device(1):
         src = testing.shaped_arange((2, 3), cupy, dtype, order)
         src = cupy.asfortranarray(src)
     with cuda.Device(0):
         dst = src.get()
     expected = testing.shaped_arange((2, 3), numpy, dtype, order)
     np_testing.assert_array_equal(dst, expected)
Exemple #12
0
def _fftn(a,
          s,
          axes,
          norm,
          direction,
          value_type='C2C',
          order='A',
          plan=None,
          overwrite_x=False,
          out=None):
    if norm not in (None, 'ortho'):
        raise ValueError('Invalid norm value %s, should be None or "ortho".' %
                         norm)

    a = _convert_dtype(a, value_type)

    if (s is not None) and (axes is not None) and len(s) != len(axes):
        raise ValueError('Shape and axes have different lengths.')

    if axes is None:
        if s is None:
            dim = a.ndim
        else:
            dim = len(s)
        axes = [i for i in six.moves.range(-dim, 0)]
    axes = tuple(axes)

    if order == 'A':
        if a.flags.f_contiguous:
            order = 'F'
        elif a.flags.c_contiguous:
            order = 'C'
        else:
            a = cupy.ascontiguousarray(a)
            order = 'C'
    elif order not in ['C', 'F']:
        raise ValueError('Unsupported order: {}'.format(order))

    a = _cook_shape(a, s, axes, value_type, order=order)
    if order == 'C' and not a.flags.c_contiguous:
        a = cupy.ascontiguousarray(a)
    elif order == 'F' and not a.flags.f_contiguous:
        a = cupy.asfortranarray(a)

    # sort the provided axes in ascending order
    axes = tuple(sorted(np.mod(axes, a.ndim)))

    a = _exec_fftn(a,
                   direction,
                   value_type,
                   norm=norm,
                   axes=axes,
                   overwrite_x=overwrite_x,
                   plan=plan,
                   out=out)
    return a
Exemple #13
0
 def test_overwrite_x_with_contiguous_view(self, dtype):
     # Test case for: https://github.com/cupy/cupy/issues/3079
     a = testing.shaped_random(self.shape, cupy, dtype)
     if self.data_order == 'C':
         # C-contiguous view
         b = a[:a.shape[0] // 2, ...]
     else:
         # F-contiguous view
         a = cupy.asfortranarray(a)
         b = a[..., :a.shape[-1] // 2]
     b_ptr = b.data.ptr
     out = cupyx.scipy.fftpack.fftn(b, overwrite_x=True)
     assert out.data.ptr == b_ptr
Exemple #14
0
def my_conv2(S1, sig, varargin=None):
    # S1 is the matrix to be filtered along a choice of axes
    # sig is either a scalar or a sequence of scalars, one for each axis to be filtered
    # varargin can be the dimensions to do filtering, if len(sig) != x.shape
    # if sig is scalar and no axes are provided, the default axis is 2
    if sig <= .25:
        return S1
    idims = 1
    if varargin is not None:
        idims = varargin
    idims = _make_vect(idims)
    if _is_vect(idims) and _is_vect(sig):
        sigall = sig
    else:
        sigall = np.tile(sig, len(idims))

    for sig, idim in zip(sigall, idims):
        Nd = S1.ndim
        S1 = cp.transpose(S1, [idim] + list(range(0, idim)) +
                          list(range(idim + 1, Nd)))
        dsnew = S1.shape
        S1 = cp.reshape(S1, (S1.shape[0], -1), order='F')
        dsnew2 = S1.shape

        tmax = ceil(4 * sig)
        dt = np.arange(-tmax, tmax + 1)
        gaus = np.exp(-dt**2 / (2 * sig**2))
        gaus = gaus[:, np.newaxis] / np.sum(gaus)

        cNorm = lfilter(gaus,
                        1,
                        cp.concatenate(
                            (cp.ones(dsnew2[0]), cp.zeros(tmax)))[:,
                                                                  np.newaxis],
                        axis=0)
        cNorm = cNorm[tmax:, :]
        S1 = lfilter(gaus,
                     1,
                     cp.asfortranarray(
                         cp.concatenate(
                             (S1, cp.zeros((tmax, dsnew2[1]), order='F')),
                             axis=0)),
                     axis=0)
        S1 = S1[tmax:, :]
        S1 = S1.reshape(dsnew, order='F')
        S1 = S1 / cNorm

        S1 = cp.transpose(
            S1,
            list(range(1, idim + 1)) + [0] + list(range(idim + 1, Nd)))
    return S1
Exemple #15
0
 def __mul__(self, other):
     if cupy.isscalar(other):
         self.sum_duplicates()
         return self._with_data(self.data * other)
     elif isspmatrix_csr(other):
         self.sum_duplicates()
         other.sum_duplicates()
         return cusparse.csrgemm(self, other)
     elif csc.isspmatrix_csc(other):
         self.sum_duplicates()
         other.sum_duplicates()
         return cusparse.csrgemm(self, other.T, transb=True)
     elif base.isspmatrix(other):
         return self * other.tocsr()
     elif base.isdense(other):
         if other.ndim == 0:
             self.sum_duplicates()
             return self._with_data(self.data * other)
         elif other.ndim == 1:
             self.sum_duplicates()
             other = cupy.asfortranarray(other)
             # csrmvEx does not work if nnz == 0
             if self.nnz > 0 and cusparse.csrmvExIsAligned(self, other):
                 if cupy.cuda.cub_enabled and other.flags.c_contiguous:
                     return device_csrmv(self.shape[0], self.shape[1],
                                         self.nnz, self.data, self.indptr,
                                         self.indices, other)
                 else:
                     return cusparse.csrmvEx(self, other)
             else:
                 return cusparse.csrmv(self, other)
         elif other.ndim == 2:
             self.sum_duplicates()
             return cusparse.csrmm2(self, cupy.asfortranarray(other))
         else:
             raise ValueError('could not interpret dimensions')
     else:
         return NotImplemented
Exemple #16
0
 def test_empty_like_reshape_contiguity2_cupy_only(self, dtype, order):
     a = testing.shaped_arange((2, 3, 4), cupy, dtype)
     a = cupy.asfortranarray(a)
     b = cupy.empty_like(a, order=order, shape=self.shape)
     b.fill(0)
     c = cupy.empty(self.shape)
     c.fill(0)
     shape = self.shape if not numpy.isscalar(self.shape) else (self.shape,)
     if (order in ['c', 'C'] or
             (order in ['k', 'K', None] and len(shape) != a.ndim)):
         assert b.flags.c_contiguous
     else:
         assert b.flags.f_contiguous
     testing.assert_array_equal(b, c)
Exemple #17
0
def _solve(a, b):
    a = cupy.asfortranarray(a)
    b = cupy.asfortranarray(b)
    dtype = a.dtype
    m, k = (b.size, 1) if b.ndim == 1 else b.shape
    cusolver_handle = device.get_cusolver_handle()
    cublas_handle = device.get_cublas_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)

    if dtype == 'f':
        geqrf = cusolver.sgeqrf
        geqrf_bufferSize = cusolver.sgeqrf_bufferSize
        ormqr = cusolver.sormqr
        trsm = cublas.strsm
    else:  # dtype == 'd'
        geqrf = cusolver.dgeqrf
        geqrf_bufferSize = cusolver.dgeqrf_bufferSize
        ormqr = cusolver.dormqr
        trsm = cublas.dtrsm

    # 1. QR decomposition (A = Q * R)
    buffersize = geqrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)
    tau = cupy.empty(m, dtype=dtype)
    geqrf(cusolver_handle, m, m, a.data.ptr, m, tau.data.ptr,
          workspace.data.ptr, buffersize, dev_info.data.ptr)
    _check_status(dev_info)
    # 2. ormqr (Q^T * B)
    ormqr(cusolver_handle, cublas.CUBLAS_SIDE_LEFT, cublas.CUBLAS_OP_T, m, k,
          m, a.data.ptr, m, tau.data.ptr, b.data.ptr, m, workspace.data.ptr,
          buffersize, dev_info.data.ptr)
    _check_status(dev_info)
    # 3. trsm (X = R^{-1} * (Q^T * B))
    trsm(cublas_handle, cublas.CUBLAS_SIDE_LEFT, cublas.CUBLAS_FILL_MODE_UPPER,
         cublas.CUBLAS_OP_N, cublas.CUBLAS_DIAG_NON_UNIT, m, k, 1, a.data.ptr,
         m, b.data.ptr, m)
    return b
Exemple #18
0
    def test_fftn_orders(self, dtype, enable_nd):
        for order in ['C', 'F']:
            a = testing.shaped_random(self.shape, cupy, dtype)
            if order == 'F':
                a = cupy.asfortranarray(a)
            out = cupy.fft.fftn(a, s=self.s, axes=self.axes)

            plan_type = _default_plan_type(a, s=self.s, axes=self.axes)
            if plan_type == 'nd':
                # nd plans have output with contiguity matching the input
                self.assertEqual(out.flags.c_contiguous, a.flags.c_contiguous)
                self.assertEqual(out.flags.f_contiguous, a.flags.f_contiguous)
            else:
                # 1d planning case doesn't guarantee preserved contiguity
                pass
Exemple #19
0
    def test_noncontiguous_view(self, dtype):
        a = testing.shaped_random(self.shape, cupy, dtype)
        if self.data_order == 'F':
            a = cupy.asfortranarray(a)
            sl = numpy.s_[..., ::2]
        else:
            sl = numpy.s_[::2, ...]

        # transform a non-contiguous view without pre-planning
        view = a[sl]
        expected = cupyx.scipy.fftpack.fftn(view)

        # create plan and then apply it to a non-contiguous view
        plan = cupyx.scipy.fftpack.get_fft_plan(view.copy())
        with plan:
            out = cupyx.scipy.fftpack.fftn(view)
        testing.assert_allclose(expected, out)
Exemple #20
0
def gpu_ap_approximate(adj, features, alpha, k, fetch):
    features = features.astype(np.float32)
    if fetch is None:
        new_features = features
    else:
        new_features = features[fetch]
    if sp.issparse(new_features):
        new_features = new_features.todense()

    smooth_time = 0
    adj = cp.sparse.csr_matrix(adj)
    adj.sum_duplicates()
    tile_width = 1024**3 // 4 // 2 // features.shape[0]
    for i in range(0, features.shape[1], tile_width):
        low = i
        high = min(features.shape[1], i + tile_width)
        # transfer data to GPU
        if sp.issparse(features):
            new_features_tile = cp.sparse.csr_matrix(features[:, low:high])
            features_tile = cp.sparse.csr_matrix(features[:, low:high])
            new_features_tile = new_features_tile.todense()
            features_tile = features_tile.todense()
        else:
            new_features_tile = cp.asarray(features[:, low:high])
            features_tile = cp.asarray(features[:, low:high])
        new_features_tile = cp.asfortranarray(new_features_tile)
        new_features_tile.device.synchronize()

        # calculate
        begin = time.time()
        for _ in range(k - 1):
            # new_feature = adj.dot(new_feature) + features
            new_features_tile = cp.cusparse.csrmm2(adj, new_features_tile,
                                                   new_features_tile)
            new_features_tile += features_tile
        new_features_tile *= alpha / (alpha + 1)
        new_features_tile.device.synchronize()
        smooth_time += time.time() - begin

        # fetch
        if fetch is None:
            new_features[:, low:high] = new_features_tile.get()
        else:
            new_features[:, low:high] = new_features_tile[fetch].get()
    return new_features, smooth_time
Exemple #21
0
    def test_contiguous_view(self, dtype):
        # Fortran-ordered case tests: https://github.com/cupy/cupy/issues/3079
        a = testing.shaped_random(self.shape, cupy, dtype)
        if self.data_order == 'F':
            a = cupy.asfortranarray(a)
            sl = numpy.s_[..., 0]
        else:
            sl = numpy.s_[0, ...]

        # transform a contiguous view without pre-planning
        view = a[sl]
        expected = cupyx.scipy.fftpack.fftn(view)

        # create plan and then apply it to a contiguous view
        plan = cupyx.scipy.fftpack.get_fft_plan(view)
        with plan:
            out = cupyx.scipy.fftpack.fftn(view)
        testing.assert_allclose(expected, out)
Exemple #22
0
def gpu_taubin_smoothing(step_transformor, features, repeat, fetch):
    # TODO: transfer sparse features to GPU
    # TODO: only fetch necessary data
    smooth_time = 0
    step_transformor = cp.sparse.csr_matrix(step_transformor)
    step_transformor.sum_duplicates()
    tile_width = 1024**3 // 4 // 4 // features.shape[0]
    # initialzie new_features
    if fetch is None:
        new_features = features
    else:
        new_features = features[fetch]
    if sp.issparse(new_features):
        new_features = new_features.todense()

    for i in range(0, features.shape[1], tile_width):
        low = i
        high = min(features.shape[1], i + tile_width)
        # transfer data to GPU
        if sp.issparse(features):
            tile = cp.sparse.csr_matrix(features[:, low:high])
            tile = tile.todense()
        else:
            tile = cp.asarray(features[:, low:high])
        tile = cp.asfortranarray(tile)
        tile.device.synchronize()

        # calculate
        begin = time.time()
        for i in range(repeat):
            tile = cp.cusparse.csrmm2(step_transformor, tile, tile)
            # tile = step_transformor.dot(tile)
        tile.device.synchronize()
        smooth_time += time.time() - begin

        # fetch
        if fetch is None:
            new_features[:, low:high] = tile.get()
        else:
            new_features[:, low:high] = tile[fetch].get()
    return new_features, smooth_time
Exemple #23
0
def dense2csc(x):
    """Converts a dense matrix in CSC format.

    Args:
        x (cupy.ndarray): A matrix to be converted.

    Returns:
        cupyx.scipy.sparse.csc_matrix: A converted matrix.

    """
    if not check_availability('dense2csc'):
        raise RuntimeError('dense2csc is not available.')

    assert x.ndim == 2
    x = cupy.asfortranarray(x)
    nnz = numpy.empty((), dtype='i')
    handle = device.get_cusparse_handle()
    m, n = x.shape

    descr = MatDescriptor.create()
    nnz_per_col = cupy.empty(m, 'i')
    _call_cusparse(
        'nnz', x.dtype,
        handle, cusparse.CUSPARSE_DIRECTION_COLUMN, m, n, descr.descriptor,
        x.data.ptr, m, nnz_per_col.data.ptr, nnz.ctypes.data)

    nnz = int(nnz)
    data = cupy.empty(nnz, x.dtype)
    indptr = cupy.empty(n + 1, 'i')
    indices = cupy.empty(nnz, 'i')

    _call_cusparse(
        'dense2csc', x.dtype,
        handle, m, n, descr.descriptor,
        x.data.ptr, m, nnz_per_col.data.ptr,
        data.data.ptr, indices.data.ptr, indptr.data.ptr)
    # Note that a desciptor is recreated
    csc = cupyx.scipy.sparse.csc_matrix((data, indices, indptr), shape=x.shape)
    csc._has_canonical_format = True
    return csc
Exemple #24
0
 def __mul__(self, other):
     if cupy.isscalar(other):
         self.sum_duplicates()
         return self._with_data(self.data * other)
     elif cupyx.scipy.sparse.isspmatrix_csr(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm') and not runtime.is_hip:
             # trans=True is still buggy as of ROCm 4.2.0
             a = self.T
             return cusparse.csrgemm(a, other, transa=True)
         elif cusparse.check_availability('csrgemm2'):
             a = self.tocsr()
             a.sum_duplicates()
             return cusparse.csrgemm2(a, other)
         else:
             raise NotImplementedError
     elif isspmatrix_csc(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm') and not runtime.is_hip:
             # trans=True is still buggy as of ROCm 4.2.0
             a = self.T
             b = other.T
             return cusparse.csrgemm(a, b, transa=True, transb=True)
         elif cusparse.check_availability('csrgemm2'):
             a = self.tocsr()
             b = other.tocsr()
             a.sum_duplicates()
             b.sum_duplicates()
             return cusparse.csrgemm2(a, b)
         else:
             raise NotImplementedError
     elif cupyx.scipy.sparse.isspmatrix(other):
         return self * other.tocsr()
     elif base.isdense(other):
         if other.ndim == 0:
             self.sum_duplicates()
             return self._with_data(self.data * other)
         elif other.ndim == 1:
             self.sum_duplicates()
             if cusparse.check_availability('csrmv') and not runtime.is_hip:
                 # trans=True is buggy as of ROCm 4.2.0
                 csrmv = cusparse.csrmv
             elif (cusparse.check_availability('spmv')
                   and not runtime.is_hip):
                 # trans=True is buggy as of ROCm 4.2.0
                 # (I got HIPSPARSE_STATUS_INTERNAL_ERROR...)
                 csrmv = cusparse.spmv
             else:
                 raise NotImplementedError
             return csrmv(self.T, cupy.asfortranarray(other), transa=True)
         elif other.ndim == 2:
             self.sum_duplicates()
             if (cusparse.check_availability('csrmm2')
                     and not runtime.is_hip):
                 # trans=True is buggy as of ROCm 4.2.0
                 csrmm = cusparse.csrmm2
             elif cusparse.check_availability('spmm'):
                 csrmm = cusparse.spmm
             else:
                 raise NotImplementedError
             return csrmm(self.T, cupy.asfortranarray(other), transa=True)
         else:
             raise ValueError('could not interpret dimensions')
     else:
         return NotImplemented
Exemple #25
0
 def test_astype_type_f_contiguous_no_copy(self, dtype, order):
     a = testing.shaped_arange((2, 3, 4), cupy, dtype)
     a = cupy.asfortranarray(a)
     b = a.astype(dtype, order=order, copy=False)
     self.assertTrue(b is a)
Exemple #26
0
 def __mul__(self, other):
     if cupy.isscalar(other):
         self.sum_duplicates()
         return self._with_data(self.data * other)
     elif isspmatrix_csr(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm2'):
             return cusparse.csrgemm2(self, other)
         elif cusparse.check_availability('csrgemm'):
             return cusparse.csrgemm(self, other)
         else:
             raise NotImplementedError
     elif csc.isspmatrix_csc(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm'):
             return cusparse.csrgemm(self, other.T, transb=True)
         elif cusparse.check_availability('csrgemm2'):
             b = other.tocsr()
             b.sum_duplicates()
             return cusparse.csrgemm2(self, b)
         else:
             raise NotImplementedError
     elif base.isspmatrix(other):
         return self * other.tocsr()
     elif base.isdense(other):
         if other.ndim == 0:
             self.sum_duplicates()
             return self._with_data(self.data * other)
         elif other.ndim == 1:
             self.sum_duplicates()
             other = cupy.asfortranarray(other)
             # csrmvEx does not work if nnz == 0
             if self.nnz > 0 and cusparse.csrmvExIsAligned(self, other):
                 for accelerator in _accelerator.get_routine_accelerators():
                     if (accelerator == _accelerator.ACCELERATOR_CUB
                             and other.flags.c_contiguous):
                         return cub.device_csrmv(self.shape[0],
                                                 self.shape[1], self.nnz,
                                                 self.data, self.indptr,
                                                 self.indices, other)
                 return cusparse.csrmvEx(self, other)
             else:
                 if cusparse.check_availability('csrmv'):
                     csrmv = cusparse.csrmv
                 elif cusparse.check_availability('spmv'):
                     csrmv = cusparse.spmv
                 else:
                     raise NotImplementedError
                 return csrmv(self, other)
         elif other.ndim == 2:
             self.sum_duplicates()
             if cusparse.check_availability('csrmm2'):
                 csrmm = cusparse.csrmm2
             elif cusparse.check_availability('spmm'):
                 csrmm = cusparse.spmm
             else:
                 raise NotImplementedError
             return csrmm(self, cupy.asfortranarray(other))
         else:
             raise ValueError('could not interpret dimensions')
     else:
         return NotImplemented
Exemple #27
0
def gesv(a, b):
    """Solve a linear matrix equation using cusolverDn<t>getr[fs]().

    Computes the solution to a system of linear equation ``ax = b``.

    Args:
        a (cupy.ndarray): The matrix with dimension ``(M, M)``.
        b (cupy.ndarray): The matrix with dimension ``(M)`` or ``(M, K)``.

    Returns:
        cupy.ndarray:
            The matrix with dimension ``(M)`` or ``(M, K)``.
    """
    if a.ndim != 2:
        raise ValueError('a.ndim must be 2 (actual: {})'.format(a.ndim))
    if b.ndim not in (1, 2):
        raise ValueError('b.ndim must be 1 or 2 (actual: {})'.format(b.ndim))
    if a.shape[0] != a.shape[1]:
        raise ValueError('a must be a square matrix.')
    if a.shape[0] != b.shape[0]:
        raise ValueError('shape mismatch (a: {}, b: {}).'.format(
            a.shape, b.shape))

    dtype = numpy.promote_types(a.dtype.char, 'f')
    if dtype == 'f':
        t = 's'
    elif dtype == 'd':
        t = 'd'
    elif dtype == 'F':
        t = 'c'
    elif dtype == 'D':
        t = 'z'
    else:
        raise ValueError('unsupported dtype (actual:{})'.format(a.dtype))
    helper = getattr(cusolver, t + 'getrf_bufferSize')
    getrf = getattr(cusolver, t + 'getrf')
    getrs = getattr(cusolver, t + 'getrs')

    n = b.shape[0]
    nrhs = b.shape[1] if b.ndim == 2 else 1
    a_data_ptr = a.data.ptr
    b_data_ptr = b.data.ptr
    a = cupy.asfortranarray(a, dtype=dtype)
    b = cupy.asfortranarray(b, dtype=dtype)
    if a.data.ptr == a_data_ptr:
        a = a.copy()
    if b.data.ptr == b_data_ptr:
        b = b.copy()

    handle = device.get_cusolver_handle()
    dipiv = cupy.empty(n, dtype=numpy.int32)
    dinfo = cupy.empty(1, dtype=numpy.int32)
    lwork = helper(handle, n, n, a.data.ptr, n)
    dwork = cupy.empty(lwork, dtype=a.dtype)
    # LU factrization (A = L * U)
    getrf(handle, n, n, a.data.ptr, n, dwork.data.ptr, dipiv.data.ptr,
          dinfo.data.ptr)
    cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed(
        getrf, dinfo)
    # Solves Ax = b
    getrs(handle, cublas.CUBLAS_OP_N, n, nrhs, a.data.ptr, n, dipiv.data.ptr,
          b.data.ptr, n, dinfo.data.ptr)
    cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed(
        getrs, dinfo)
    return b
def create_rand_integers():
    randint = cp.random.randint(30, size=(500, 20)).astype(cp.float64)
    randint = cp.asfortranarray(randint)
    return randint
Exemple #29
0
print('modularity', mod)
vertex = cp.fromDlpack(vertex.to_dlpack())
partition = cp.fromDlpack(partition.to_dlpack())
vertex = cp.reshape(vertex, XYZ_C.shape[0])
labelRE = cp.reshape(partition, XYZ_C.shape[0])
index = cp.argsort(vertex)
vertex = cp.take_along_axis(vertex, index, axis=0)
labelRE = cp.take_along_axis(labelRE, index, axis=0)
print(result)
print(vertex)
print(labelRE)
print(index)
louvain_endtime = datetime.datetime.now()
print(louvain_endtime - louvain_starttime)
print(labelRE)
labelRE = cp.asfortranarray(labelRE)
labelRE = cd.from_dlpack(labelRE.toDlpack())
#labelRE = cd.DataFrame(labelRE)
print(labelRE)
df = cd.DataFrame({'label': labelRE})
df.to_csv('HW_AI/HW_Final/gpu.csv')

'''
plt.figure(1)
ax = plt.axes(projection='3d')
z = XYZ_C[:,2]
x = XYZ_C[:,0]
y = XYZ_C[:,1]
c = labelRE
ax.scatter(x, y, z, c = c, cmap = plt.get_cmap('jet'))
plt.title('Cluster result by modularity')
Exemple #30
0
def extractTemplatesfromSnippets(proc=None,
                                 probe=None,
                                 params=None,
                                 Nbatch=None,
                                 nPCs=None):
    # this function is very similar to extractPCfromSnippets.
    # outputs not just the PC waveforms, but also the template "prototype",
    # basically k-means clustering of 1D waveforms.

    NT = params.NT
    # skip every this many batches
    nskip = params.nskip
    nPCs = nPCs or params.nPCs
    nt0min = params.nt0min
    Nchan = probe.Nchan
    batchstart = np.arange(0, NT * Nbatch + 1, NT).astype(np.int64)

    k = 0
    # preallocate matrix to hold 1D spike snippets
    # dd = cp.zeros((params.nt0, int(5e4)), dtype=np.float32, order='F')
    dds = []

    for ibatch in tqdm(range(0, Nbatch, nskip), desc="Extracting templates"):
        offset = Nchan * batchstart[ibatch]
        dat = proc.flat[offset:offset + NT * Nchan].reshape((-1, Nchan),
                                                            order='F')

        # move data to GPU and scale it back to unit variance
        dataRAW = cp.asarray(dat, dtype=np.float32) / params.scaleproc

        # find isolated spikes from each batch
        row, col, mu = isolated_peaks_new(dataRAW, params)

        # for each peak, get the voltage snippet from that channel
        c = get_SpikeSample(dataRAW, row, col, params)

        # if k + c.shape[1] > dd.shape[1]:
        #     dd = cp.pad(dd, (0, dd.shape[1]), mode='constant')

        # dd[:, k:k + c.shape[1]] = c
        dds.append(c)
        k = k + c.shape[1]
        if k > 1e5:
            break

    # discard empty samples
    # dd = dd[:, :k]
    dd = cp.asfortranarray(cp.concatenate(dds, axis=1).astype(np.float32))

    # initialize the template clustering with random waveforms
    uu = np.random.permutation(dd.shape[1])[:nPCs]
    wTEMP = dd[:, uu]
    wTEMP = wTEMP / cp.sum(wTEMP**2, axis=0)**.5  # normalize them

    for i in range(10):
        # at each iteration, assign the waveform to its most correlated cluster
        cc = cp.dot(wTEMP.T, dd)
        imax = cp.argmax(cc, axis=0)
        amax = cc[imax, np.arange(cc.shape[1])]
        for j in range(nPCs):
            # weighted average to get new cluster means
            wTEMP[:, j] = cp.dot(dd[:, imax == j], amax[imax == j].T)
        wTEMP = wTEMP / cp.sum(wTEMP**2, axis=0)**.5  # unit normalize

    # the PCs are just the left singular vectors of the waveforms
    U, Sv, V = svdecon(dd)

    # take as many as needed
    wPCA = U[:, :nPCs]

    # adjust the arbitrary sign of the first PC so its negativity is downward
    wPCA[:, 0] = -wPCA[:, 0] * cp.sign(wPCA[nt0min, 0])

    return wTEMP, wPCA
Exemple #31
0
 def __mul__(self, other):
     if cupy.isscalar(other):
         self.sum_duplicates()
         return self._with_data(self.data * other)
     elif isspmatrix_csr(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm2'):
             return cusparse.csrgemm2(self, other)
         elif cusparse.check_availability('csrgemm'):
             return cusparse.csrgemm(self, other)
         else:
             raise NotImplementedError
     elif csc.isspmatrix_csc(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm') and not runtime.is_hip:
             # trans=True is still buggy as of ROCm 4.2.0
             return cusparse.csrgemm(self, other.T, transb=True)
         elif cusparse.check_availability('csrgemm2'):
             b = other.tocsr()
             b.sum_duplicates()
             return cusparse.csrgemm2(self, b)
         else:
             raise NotImplementedError
     elif base.isspmatrix(other):
         return self * other.tocsr()
     elif base.isdense(other):
         if other.ndim == 0:
             self.sum_duplicates()
             return self._with_data(self.data * other)
         elif other.ndim == 1:
             self.sum_duplicates()
             other = cupy.asfortranarray(other)
             # need extra padding to ensure not stepping on the CUB bug,
             # see cupy/cupy#3679 for discussion
             is_cub_safe = (self.indptr.data.mem.size >
                            self.indptr.size * self.indptr.dtype.itemsize)
             # CUB spmv is buggy since CUDA 11.0, see
             # https://github.com/cupy/cupy/issues/3822#issuecomment-782607637
             is_cub_safe &= (cub._get_cuda_build_version() < 11000)
             for accelerator in _accelerator.get_routine_accelerators():
                 if (accelerator == _accelerator.ACCELERATOR_CUB
                         and not runtime.is_hip and is_cub_safe
                         and other.flags.c_contiguous):
                     return cub.device_csrmv(self.shape[0], self.shape[1],
                                             self.nnz, self.data,
                                             self.indptr, self.indices,
                                             other)
             if (cusparse.check_availability('csrmvEx') and self.nnz > 0
                     and cusparse.csrmvExIsAligned(self, other)):
                 # csrmvEx does not work if nnz == 0
                 csrmv = cusparse.csrmvEx
             elif cusparse.check_availability('csrmv'):
                 csrmv = cusparse.csrmv
             elif cusparse.check_availability('spmv'):
                 csrmv = cusparse.spmv
             else:
                 raise NotImplementedError
             return csrmv(self, other)
         elif other.ndim == 2:
             self.sum_duplicates()
             if cusparse.check_availability('csrmm2'):
                 csrmm = cusparse.csrmm2
             elif cusparse.check_availability('spmm'):
                 csrmm = cusparse.spmm
             else:
                 raise NotImplementedError
             return csrmm(self, cupy.asfortranarray(other))
         else:
             raise ValueError('could not interpret dimensions')
     else:
         return NotImplemented