Python Blas.axpy Beispiele

Programmiersprache: Python

Namespace / Paketname: accelerate.cuda.blas

Klasse / Typ: Blas

Methode / Funktion: axpy

Beispiele auf hotexamples.com: 2

Python Blas.axpy - 2 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die accelerate.cuda.blas.Blas.axpy, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

Blas(7)

asum(2)

axpy(2)

gemm(2)

gemv(1)

Häufig verwendete Methoden

Blas (7)

asum (2)

axpy (2)

gemm (2)

gemv (1)

Beispiel #1

Datei anzeigen

Datei: kernels.py Projekt: HansN87/MCEq

class CUDASparseContext(object):
    def __init__(self, int_m, dec_m, device_id=0):

        if config['FP_precision'] == 32:
            self.fl_pr = np.float32
        elif config['FP_precision'] == 64:
            self.fl_pr = np.float64
        else:
            raise Exception(
                "CUDASparseContext(): Unknown precision specified.")
        #======================================================================
        # Setup GPU stuff and upload data to it
        #======================================================================
        try:
            from accelerate.cuda.blas import Blas
            import accelerate.cuda.sparse as cusparse
            from accelerate.cuda import cuda
        except ImportError:
            raise Exception("kern_CUDA_sparse(): Numbapro CUDA libaries not " +
                            "installed.\nCan not use GPU.")

        cuda.select_device(0)
        self.cuda = cuda
        self.cusp = cusparse.Sparse()
        self.cubl = Blas()
        self.set_matrices(int_m, dec_m)

    def set_matrices(self, int_m, dec_m):
        import accelerate.cuda.sparse as cusparse
        from accelerate.cuda import cuda

        self.m, self.n = int_m.shape
        self.int_m_nnz = int_m.nnz
        self.int_m_csrValA = cuda.to_device(int_m.data.astype(self.fl_pr))
        self.int_m_csrRowPtrA = cuda.to_device(int_m.indptr)
        self.int_m_csrColIndA = cuda.to_device(int_m.indices)

        self.dec_m_nnz = dec_m.nnz
        self.dec_m_csrValA = cuda.to_device(dec_m.data.astype(self.fl_pr))
        self.dec_m_csrRowPtrA = cuda.to_device(dec_m.indptr)
        self.dec_m_csrColIndA = cuda.to_device(dec_m.indices)

        self.descr = self.cusp.matdescr()
        self.descr.indexbase = cusparse.CUSPARSE_INDEX_BASE_ZERO
        self.cu_delta_phi = self.cuda.device_array_like(
            np.zeros(self.m, dtype=self.fl_pr))
        print np.zeros(self.m, dtype=self.fl_pr).shape

    def set_phi(self, phi):
        self.cu_curr_phi = self.cuda.to_device(phi.astype(self.fl_pr))

    def get_phi(self):
        return self.cu_curr_phi.copy_to_host()

    def do_step(self, rho_inv, dX):

        self.cusp.csrmv(trans='N',
                        m=self.m,
                        n=self.n,
                        nnz=self.int_m_nnz,
                        descr=self.descr,
                        alpha=self.fl_pr(1.0),
                        csrVal=self.int_m_csrValA,
                        csrRowPtr=self.int_m_csrRowPtrA,
                        csrColInd=self.int_m_csrColIndA,
                        x=self.cu_curr_phi,
                        beta=self.fl_pr(0.0),
                        y=self.cu_delta_phi)
        # print np.sum(cu_curr_phi.copy_to_host())
        self.cusp.csrmv(trans='N',
                        m=self.m,
                        n=self.n,
                        nnz=self.dec_m_nnz,
                        descr=self.descr,
                        alpha=self.fl_pr(rho_inv),
                        csrVal=self.dec_m_csrValA,
                        csrRowPtr=self.dec_m_csrRowPtrA,
                        csrColInd=self.dec_m_csrColIndA,
                        x=self.cu_curr_phi,
                        beta=self.fl_pr(1.0),
                        y=self.cu_delta_phi)
        self.cubl.axpy(alpha=self.fl_pr(dX),
                       x=self.cu_delta_phi,
                       y=self.cu_curr_phi)

Beispiel #2

Datei anzeigen

Datei: kernels.py Projekt: HansN87/MCEq

def kern_CUDA_dense(nsteps,
                    dX,
                    rho_inv,
                    int_m,
                    dec_m,
                    phi,
                    grid_idcs,
                    mu_egrid=None,
                    mu_dEdX=None,
                    mu_lidx_nsp=None,
                    prog_bar=None):
    """`NVIDIA CUDA cuBLAS <https://developer.nvidia.com/cublas>`_ implementation
    of forward-euler integration.

    Function requires a working :mod:`numbapro` installation. It is typically slower
    compared to :func:`kern_MKL_sparse` but it depends on your hardware.

    Args:
      nsteps (int): number of integration steps
      dX (numpy.array[nsteps]): vector of step-sizes :math:`\\Delta X_i` in g/cm**2
      rho_inv (numpy.array[nsteps]): vector of density values :math:`\\frac{1}{\\rho(X_i)}`
      int_m (numpy.array): interaction matrix :eq:`int_matrix` in dense or sparse representation
      dec_m (numpy.array): decay  matrix :eq:`dec_matrix` in dense or sparse representation
      phi (numpy.array): initial state vector :math:`\\Phi(X_0)`
      prog_bar (object,optional): handle to :class:`ProgressBar` object
    Returns:
      numpy.array: state vector :math:`\\Phi(X_{nsteps})` after integration
    """

    fl_pr = None
    if config['FP_precision'] == 32:
        fl_pr = np.float32
    elif config['FP_precision'] == 64:
        fl_pr = np.float64
    else:
        raise Exception("kern_CUDA_dense(): Unknown precision specified.")

    # if config['enable_muon_energyloss']:
    #     raise NotImplementedError('kern_CUDA_dense(): ' +
    #         'Energy loss not imlemented for this solver.')

    if config['enable_muon_energy_loss']:
        raise NotImplementedError(
            'kern_CUDA_dense(): ' +
            'Energy loss not imlemented for this solver.')

    #=======================================================================
    # Setup GPU stuff and upload data to it
    #=======================================================================
    try:
        from accelerate.cuda.blas import Blas
        from accelerate.cuda import cuda
    except ImportError:
        raise Exception("kern_CUDA_dense(): Numbapro CUDA libaries not " +
                        "installed.\nCan not use GPU.")
    cubl = Blas()
    m, n = int_m.shape
    stream = cuda.stream()
    cu_int_m = cuda.to_device(int_m.astype(fl_pr), stream)
    cu_dec_m = cuda.to_device(dec_m.astype(fl_pr), stream)
    cu_curr_phi = cuda.to_device(phi.astype(fl_pr), stream)
    cu_delta_phi = cuda.device_array(phi.shape, dtype=fl_pr)

    from time import time
    start = time()

    for step in xrange(nsteps):
        if prog_bar:
            prog_bar.update(step)
        cubl.gemv(trans='N',
                  m=m,
                  n=n,
                  alpha=fl_pr(1.0),
                  A=cu_int_m,
                  x=cu_curr_phi,
                  beta=fl_pr(0.0),
                  y=cu_delta_phi)
        cubl.gemv(trans='N',
                  m=m,
                  n=n,
                  alpha=fl_pr(rho_inv[step]),
                  A=cu_dec_m,
                  x=cu_curr_phi,
                  beta=fl_pr(1.0),
                  y=cu_delta_phi)
        cubl.axpy(alpha=fl_pr(dX[step]), x=cu_delta_phi, y=cu_curr_phi)

    print "Performance: {0:6.2f}ms/iteration".format(1e3 * (time() - start) /
                                                     float(nsteps))

    return cu_curr_phi.copy_to_host(), []