Python Blas.axpy Examples

Programming Language: Python

Namespace/Package Name: numbapro.cudalib.cublas

Class/Type: Blas

Method/Function: axpy

Examples at hotexamples.com: 2

Python Blas.axpy - 2 examples found. These are the top rated real world Python examples of numbapro.cudalib.cublas.Blas.axpy extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

axpy(2)

Blas(1)

amax(1)

gemv(1)

Frequently Used Methods

axpy (2)

Blas (1)

amax (1)

gemv (1)

Example #1

Show file

File: kernels.py Project: elim723/MCEq

def kern_CUDA_dense(nsteps, dX, rho_inv, int_m, dec_m,
                    phi, grid_idcs, prog_bar=None):
    """`NVIDIA CUDA cuBLAS <https://developer.nvidia.com/cublas>`_ implementation 
    of forward-euler integration.
    
    Function requires a working :mod:`numbapro` installation. It is typically slower
    compared to :func:`kern_MKL_sparse` but it depends on your hardware.
    
    Args:
      nsteps (int): number of integration steps
      dX (numpy.array[nsteps]): vector of step-sizes :math:`\\Delta X_i` in g/cm**2
      rho_inv (numpy.array[nsteps]): vector of density values :math:`\\frac{1}{\\rho(X_i)}`
      int_m (numpy.array): interaction matrix :eq:`int_matrix` in dense or sparse representation
      dec_m (numpy.array): decay  matrix :eq:`dec_matrix` in dense or sparse representation
      phi (numpy.array): initial state vector :math:`\\Phi(X_0)` 
      prog_bar (object,optional): handle to :class:`ProgressBar` object
    Returns:
      numpy.array: state vector :math:`\\Phi(X_{nsteps})` after integration
    """
    
    calc_precision = None
    if config['CUDA_precision'] == 32:
        calc_precision = np.float32
    elif config['CUDA_precision'] == 64:
        calc_precision = np.float64
    else:
        raise Exception("kern_CUDA_dense(): Unknown precision specified.")    
    
    #=======================================================================
    # Setup GPU stuff and upload data to it
    #=======================================================================
    try:
        from numbapro.cudalib.cublas import Blas  # @UnresolvedImport
        from numbapro import cuda, float32  # @UnresolvedImport
    except ImportError:
        raise Exception("kern_CUDA_dense(): Numbapro CUDA libaries not " + 
                        "installed.\nCan not use GPU.")
    cubl = Blas()
    m, n = int_m.shape
    stream = cuda.stream()
    cu_int_m = cuda.to_device(int_m.astype(calc_precision), stream)
    cu_dec_m = cuda.to_device(dec_m.astype(calc_precision), stream)
    cu_curr_phi = cuda.to_device(phi.astype(calc_precision), stream)
    cu_delta_phi = cuda.device_array(phi.shape, dtype=calc_precision)
    for step in xrange(nsteps):
        if prog_bar:
            prog_bar.update(step)
        cubl.gemv(trans='T', m=m, n=n, alpha=float32(1.0), A=cu_int_m,
            x=cu_curr_phi, beta=float32(0.0), y=cu_delta_phi)
        cubl.gemv(trans='T', m=m, n=n, alpha=float32(rho_inv[step]),
            A=cu_dec_m, x=cu_curr_phi, beta=float32(1.0), y=cu_delta_phi)
        cubl.axpy(alpha=float32(dX[step]), x=cu_delta_phi, y=cu_curr_phi)

    return cu_curr_phi.copy_to_host()

Example #2

Show file

File: kernels.py Project: elim723/MCEq

def kern_CUDA_sparse(nsteps, dX, rho_inv, int_m, dec_m,
                    phi, grid_idcs, prog_bar=None):
    """`NVIDIA CUDA cuSPARSE <https://developer.nvidia.com/cusparse>`_ implementation 
    of forward-euler integration.
    
    Function requires a working :mod:`numbapro` installation.
    
    Note:
      Currently some bug in :mod:`numbapro` introduces unnecessary array copies and
      slows down the execution tremendously. 
    
    Args:
      nsteps (int): number of integration steps
      dX (numpy.array[nsteps]): vector of step-sizes :math:`\\Delta X_i` in g/cm**2
      rho_inv (numpy.array[nsteps]): vector of density values :math:`\\frac{1}{\\rho(X_i)}`
      int_m (numpy.array): interaction matrix :eq:`int_matrix` in dense or sparse representation
      dec_m (numpy.array): decay  matrix :eq:`dec_matrix` in dense or sparse representation
      phi (numpy.array): initial state vector :math:`\\Phi(X_0)` 
      prog_bar (object,optional): handle to :class:`ProgressBar` object
    Returns:
      numpy.array: state vector :math:`\\Phi(X_{nsteps})` after integration
    """
    calc_precision = None
    if config['CUDA_precision'] == 32:
        calc_precision = np.float32
    elif config['CUDA_precision'] == 64:
        calc_precision = np.float64
    else:
        raise Exception("kern_CUDA_sparse(): Unknown precision specified.")    
    print ("kern_CUDA_sparse(): Warning, the performance is slower than " + 
           "dense cuBLAS or any type of MKL.")
    #=======================================================================
    # Setup GPU stuff and upload data to it
    #=======================================================================
    try:
        from numbapro.cudalib import cusparse  # @UnresolvedImport
        from numbapro.cudalib.cublas import Blas
        from numbapro import cuda, float32  # @UnresolvedImport
    except ImportError:
        raise Exception("kern_CUDA_sparse(): Numbapro CUDA libaries not " + 
                        "installed.\nCan not use GPU.")
    cusp = cusparse.Sparse()
    cubl = Blas()
    m, n = int_m.shape
    int_m_nnz = int_m.nnz
    int_m_csrValA = cuda.to_device(int_m.data.astype(calc_precision))
    int_m_csrRowPtrA = cuda.to_device(int_m.indptr)
    int_m_csrColIndA = cuda.to_device(int_m.indices)
    
    dec_m_nnz = dec_m.nnz
    dec_m_csrValA = cuda.to_device(dec_m.data.astype(calc_precision))
    dec_m_csrRowPtrA = cuda.to_device(dec_m.indptr)
    dec_m_csrColIndA = cuda.to_device(dec_m.indices)
    
    cu_curr_phi = cuda.to_device(phi.astype(calc_precision))
    cu_delta_phi = cuda.device_array(phi.shape, dtype=calc_precision)

    descr = cusp.matdescr()
    descr.indexbase = cusparse.CUSPARSE_INDEX_BASE_ZERO
    
    for step in xrange(nsteps):
        if prog_bar and (step % 5 == 0):
            prog_bar.update(step)
        cusp.csrmv(trans='T', m=m, n=n, nnz=int_m_nnz,
                   descr=descr,
                   alpha=float32(1.0),
                   csrVal=int_m_csrValA,
                   csrRowPtr=int_m_csrRowPtrA,
                   csrColInd=int_m_csrColIndA,
                   x=cu_curr_phi, beta=float32(0.0), y=cu_delta_phi)
        cusp.csrmv(trans='T', m=m, n=n, nnz=dec_m_nnz,
                   descr=descr,
                   alpha=float32(rho_inv[step]),
                   csrVal=dec_m_csrValA,
                   csrRowPtr=dec_m_csrRowPtrA,
                   csrColInd=dec_m_csrColIndA,
                   x=cu_curr_phi, beta=float32(1.0), y=cu_delta_phi)
        cubl.axpy(alpha=float32(dX[step]), x=cu_delta_phi, y=cu_curr_phi)

    return cu_curr_phi.copy_to_host()