コード例 #1
0
ファイル: mgpuengine.py プロジェクト: acopar/fast-fusion
    def __init__(self, n_proc=1, stop='p10', epsilon=6, dtype=np.float32):
        self.stop = stop
        self.epsilon = epsilon

        self.profile = False
        self._locals = {}
        self.dtype = dtype
        self.comm = MPI.COMM_WORLD
        self.rank = self.comm.rank
        self.n_proc = n_proc
        #import ctypes
        #mkl_rt = ctypes.CDLL('libmkl_rt.so')
        #mkl_get_max_threads = mkl_rt.mkl_get_max_threads
        #def mkl_set_num_threads(cores):
        #    mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(cores)))

        #mkl_set_num_threads(6)
        #print("N thhreads", mkl_get_max_threads())
        self.sparse = False
        #ctx = make_default_context()
        #ngpus = driver.Device.count()
        #gpuid = self.rank % ngpus

        self.ctx = driver.Device(self.rank).make_context()

        linalg.init()
        cusparse.init()

        self.mod = SourceModule(gpucode.code)
コード例 #2
0
    def __enter__(self):
        self.load()

        ngpus = driver.Device.count()
        gpuid = self.rank % ngpus

        self.ctx = driver.Device(gpuid).make_context()

        cusparse.init()
        linalg.init()

        #from crow.transfer.kernels import *
        #self.operation.mod = mod

        self.load_gpu()
コード例 #3
0
    def __init__(self, n_proc=1, stop='p10', epsilon=6, dtype=np.float32):
        self.stop = stop
        self.epsilon = epsilon
        self.profile = False
        self.memory = {}
        self._locals = {}
        self.dtype = np.float32
        self.sparse = False
        self.n_proc = 1
        self.rank = 0
        #ctx = make_default_context()
        #ngpus = driver.Device.count()
        #gpuid = self.rank % ngpus

        self.ctx = driver.Device(0).make_context()

        linalg.init()
        cusparse.init()

        self.mod = SourceModule(gpucode.code)
コード例 #4
0
from cuda_cffi import cusparse

import numpy as np
from numpy.testing import (run_module_suite, assert_raises, assert_equal,
                           assert_almost_equal)

from unittest import skipIf

import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import pycuda.driver as drv

import scipy.sparse  # TODO: refactor to remove this

cusparse.init()

cusparse_real_dtypes = [np.float32, np.float64]
cusparse_complex_dtypes = [np.complex64, np.complex128]
cusparse_dtypes = cusparse_real_dtypes + cusparse_complex_dtypes
trans_list = [CUSPARSE_OPERATION_NON_TRANSPOSE,
              CUSPARSE_OPERATION_TRANSPOSE,
              CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE]


def test_context_create_destroy():
    handle = cusparseCreate()
    cusparseDestroy(handle)


def test_get_version():
コード例 #5
0
from __future__ import division

from cuda_cffi.cusparse import *
from cuda_cffi.cusparse import (_csrgeamNnz, _csrgemmNnz)

from cuda_cffi import cusparse
cusparse.init()

import numpy as np
from numpy.testing import assert_raises, assert_equal, assert_almost_equal

from unittest import skipIf

import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import pycuda.driver as drv

import scipy.sparse  # TODO: refactor to remove this

cusparse_real_dtypes = [np.float32, np.float64]
cusparse_complex_dtypes = [np.complex64, np.complex128]
cusparse_dtypes = cusparse_real_dtypes + cusparse_complex_dtypes
trans_list = [CUSPARSE_OPERATION_NON_TRANSPOSE,
              CUSPARSE_OPERATION_TRANSPOSE,
              CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE]


def test_context_create_destroy():
    handle = cusparseCreate()
    cusparseDestroy(handle)