コード例 #1
0
ファイル: nvcc-test.py プロジェクト: simudream/codepy
    'PyObject* newShape = Py_BuildValue("(i)", intLength)',
    'PyObject* kwargs = Py_BuildValue("{sOsOs%s}", "shape", newShape, "dtype", type, "gpudata", diffResult)' % ptr_sz_uint_conv,
    'PyObject* GPUArrayClass = PyObject_GetAttrString(gpuArray, "__class__")',
    'PyObject* remoteResult = PyObject_Call(GPUArrayClass, args, kwargs)',
    'return remoteResult']


host_mod.add_function(
    FunctionBody(
        FunctionDeclaration(Pointer(Value("PyObject", "adjacentDifference")),
                            [Pointer(Value("PyObject", "gpuArray"))]),
        Block([Statement(x) for x in statements])))
host_mod.add_to_preamble([Include('boost/python/extract.hpp')])

                                 
cuda_mod = CudaModule(host_mod)
cuda_mod.add_to_preamble([Include('cuda.h')])

globalIndex = 'int index = blockIdx.x * blockDim.x + threadIdx.x'
compute_diff = 'outputPtr[index] = inputPtr[index] - inputPtr[index-1]'
launch = ['CUdeviceptr output',
          'cuMemAlloc(&output, sizeof(T) * length)',
          'int bSize = 256',
          'int gSize = (length-1)/bSize + 1',
          'diffKernel<<<gSize, bSize>>>((T*)inputPtr, length, (T*)output)',
          'return output']
diff =[
    Template('typename T',
             CudaGlobal(FunctionDeclaration(Value('void', 'diffKernel'),
                [Value('T*', 'inputPtr'),
                 Value('int', 'length'),
コード例 #2
0
ファイル: cuda.py プロジェクト: jabooth/PyOP2
def _cusp_solver(M, parameters):
    cache_key = lambda t, p: (t,
                              p['ksp_type'],
                              p['pc_type'],
                              p['ksp_rtol'],
                              p['ksp_atol'],
                              p['ksp_max_it'],
                              p['ksp_gmres_restart'],
                              p['ksp_monitor'])
    module = _cusp_cache.get(cache_key(M.ctype, parameters))
    if module:
        return module

    import codepy.toolchain
    from cgen import FunctionBody, FunctionDeclaration
    from cgen import Block, Statement, Include, Value
    from codepy.bpl import BoostPythonModule
    from codepy.cuda import CudaModule
    gcc_toolchain = codepy.toolchain.guess_toolchain()
    nvcc_toolchain = codepy.toolchain.guess_nvcc_toolchain()
    if 'CUSP_HOME' in os.environ:
        nvcc_toolchain.add_library('cusp', [os.environ['CUSP_HOME']], [], [])
    host_mod = BoostPythonModule()
    nvcc_mod = CudaModule(host_mod)
    nvcc_includes = ['thrust/device_vector.h',
                     'thrust/fill.h',
                     'cusp/csr_matrix.h',
                     'cusp/krylov/cg.h',
                     'cusp/krylov/bicgstab.h',
                     'cusp/krylov/gmres.h',
                     'cusp/precond/diagonal.h',
                     'cusp/precond/smoothed_aggregation.h',
                     'cusp/precond/ainv.h',
                     'string']
    nvcc_mod.add_to_preamble([Include(s) for s in nvcc_includes])
    nvcc_mod.add_to_preamble([Statement('using namespace std')])

    # We're translating PETSc preconditioner types to CUSP
    diag = Statement('cusp::precond::diagonal< ValueType, cusp::device_memory >M(A)')
    ainv = Statement(
        'cusp::precond::scaled_bridson_ainv< ValueType, cusp::device_memory >M(A)')
    amg = Statement(
        'cusp::precond::smoothed_aggregation< IndexType, ValueType, cusp::device_memory >M(A)')
    none = Statement(
        'cusp::identity_operator< ValueType, cusp::device_memory >M(nrows, ncols)')
    preconditioners = {
        'diagonal': diag,
        'jacobi': diag,
        'ainv': ainv,
        'ainvcusp': ainv,
        'amg': amg,
        'hypre': amg,
        'none': none,
        None: none
    }
    try:
        precond_call = preconditioners[parameters['pc_type']]
    except KeyError:
        raise RuntimeError("Cusp does not support preconditioner type %s" %
                           parameters['pc_type'])
    solvers = {
        'cg': Statement('cusp::krylov::cg(A, x, b, monitor, M)'),
        'bicgstab': Statement('cusp::krylov::bicgstab(A, x, b, monitor, M)'),
        'gmres': Statement('cusp::krylov::gmres(A, x, b, %(ksp_gmres_restart)d, monitor, M)' % parameters)
    }
    try:
        solve_call = solvers[parameters['ksp_type']]
    except KeyError:
        raise RuntimeError("Cusp does not support solver type %s" %
                           parameters['ksp_type'])
    monitor = 'monitor(b, %(ksp_max_it)d, %(ksp_rtol)g, %(ksp_atol)g)' % parameters

    nvcc_function = FunctionBody(
        FunctionDeclaration(Value('void', '__cusp_solve'),
                            [Value('CUdeviceptr', '_rowptr'),
                             Value('CUdeviceptr', '_colidx'),
                             Value('CUdeviceptr', '_csrdata'),
                             Value('CUdeviceptr', '_b'),
                             Value('CUdeviceptr', '_x'),
                             Value('int', 'nrows'),
                             Value('int', 'ncols'),
                             Value('int', 'nnz')]),
        Block([
            Statement('typedef int IndexType'),
            Statement('typedef %s ValueType' % M.ctype),
            Statement(
                'typedef typename cusp::array1d_view< thrust::device_ptr<IndexType> > indices'),
            Statement(
                'typedef typename cusp::array1d_view< thrust::device_ptr<ValueType> > values'),
            Statement(
                'typedef cusp::csr_matrix_view< indices, indices, values, IndexType, ValueType, cusp::device_memory > matrix'),
            Statement('thrust::device_ptr< IndexType > rowptr((IndexType *)_rowptr)'),
            Statement('thrust::device_ptr< IndexType > colidx((IndexType *)_colidx)'),
            Statement('thrust::device_ptr< ValueType > csrdata((ValueType *)_csrdata)'),
            Statement('thrust::device_ptr< ValueType > d_b((ValueType *)_b)'),
            Statement('thrust::device_ptr< ValueType > d_x((ValueType *)_x)'),
            Statement('indices row_offsets(rowptr, rowptr + nrows + 1)'),
            Statement('indices column_indices(colidx, colidx + nnz)'),
            Statement('values matrix_values(csrdata, csrdata + nnz)'),
            Statement('values b(d_b, d_b + nrows)'),
            Statement('values x(d_x, d_x + ncols)'),
            Statement('thrust::fill(x.begin(), x.end(), (ValueType)0)'),
            Statement(
                'matrix A(nrows, ncols, nnz, row_offsets, column_indices, matrix_values)'),
            Statement('cusp::%s_monitor< ValueType > %s' %
                      ('verbose' if parameters['ksp_monitor'] else 'default',
                       monitor)),
            precond_call,
            solve_call
        ]))

    host_mod.add_to_preamble([Include('boost/python/extract.hpp'), Include('string')])
    host_mod.add_to_preamble([Statement('using namespace boost::python')])
    host_mod.add_to_preamble([Statement('using namespace std')])

    nvcc_mod.add_function(nvcc_function)

    host_mod.add_function(
        FunctionBody(
            FunctionDeclaration(Value('void', 'solve'),
                                [Value('object', '_rowptr'),
                                 Value('object', '_colidx'),
                                 Value('object', '_csrdata'),
                                 Value('object', '_b'),
                                 Value('object', '_x'),
                                 Value('object', '_nrows'),
                                 Value('object', '_ncols'),
                                 Value('object', '_nnz')]),
            Block([
                Statement(
                    'CUdeviceptr rowptr = extract<CUdeviceptr>(_rowptr.attr("gpudata"))'),
                Statement(
                    'CUdeviceptr colidx = extract<CUdeviceptr>(_colidx.attr("gpudata"))'),
                Statement(
                    'CUdeviceptr csrdata = extract<CUdeviceptr>(_csrdata.attr("gpudata"))'),
                Statement('CUdeviceptr b = extract<CUdeviceptr>(_b.attr("gpudata"))'),
                Statement('CUdeviceptr x = extract<CUdeviceptr>(_x.attr("gpudata"))'),
                Statement('int nrows = extract<int>(_nrows)'),
                Statement('int ncols = extract<int>(_ncols)'),
                Statement('int nnz = extract<int>(_nnz)'),
                Statement('__cusp_solve(rowptr, colidx, csrdata, b, x, nrows, ncols, nnz)')
            ])))

    nvcc_toolchain.cflags.append('-arch')
    nvcc_toolchain.cflags.append('sm_20')
    nvcc_toolchain.cflags.append('-O3')
    module = nvcc_mod.compile(gcc_toolchain, nvcc_toolchain, debug=configuration["debug"])

    _cusp_cache[cache_key(M.ctype, parameters)] = module
    return module
コード例 #3
0
ファイル: ThrustInterop.py プロジェクト: bbkiwi/SpyderWork

import pycuda
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np

from codepy.cgen import *
from codepy.bpl import BoostPythonModule
from codepy.cuda import CudaModule

#Make a host_module, compiled for CPU
host_mod = BoostPythonModule()

#Make a device module, compiled with NVCC
nvcc_mod = CudaModule(host_mod)

#Describe device module code
#NVCC includes
nvcc_includes = [
    'thrust/sort.h',
    'thrust/device_vector.h',
    'cuda.h',
    ]
#Add includes to module
nvcc_mod.add_to_preamble([Include(x) for x in nvcc_includes])

#NVCC function
nvcc_function = FunctionBody(
    FunctionDeclaration(Value('void', 'my_sort'),
                        [Value('CUdeviceptr', 'input_ptr'),
コード例 #4
0
        Pointer, Module, Block, Initializer, Assign, \
        Include, Statement, If
from codepy.bpl import BoostPythonModule
from codepy.cuda import CudaModule

import codepy.jit, codepy.toolchain

# INFO: the following code goes much along the tutorial to be found at
# http://wiki.tiker.net/PyCuda/Examples/ThrustInterop
# many thanks to Bryan Catanzaro!

#Make a host_module, compiled for CPU
host_mod = BoostPythonModule()

#Make a device module, compiled with NVCC
nvcc_mod = CudaModule(host_mod)

#Describe device module code
#NVCC includes
nvcc_includes = [
    'thrust/sort.h',
    'thrust/binary_search.h',
    'thrust/device_vector.h',
    'cuda.h',
    ]
#Add includes to module
nvcc_mod.add_to_preamble([Include(x) for x in nvcc_includes])

#NVCC function
nvcc_functions = [
    FunctionBody(
コード例 #5
0
ファイル: cuda.py プロジェクト: POETSII/PyOP2
def _cusp_solver(M, parameters):
    cache_key = lambda t, p: (t, p['ksp_type'], p['pc_type'], p['ksp_rtol'], p[
        'ksp_atol'], p['ksp_max_it'], p['ksp_gmres_restart'], p['ksp_monitor'])
    module = _cusp_cache.get(cache_key(M.ctype, parameters))
    if module:
        return module

    import codepy.toolchain
    from cgen import FunctionBody, FunctionDeclaration
    from cgen import Block, Statement, Include, Value
    from codepy.bpl import BoostPythonModule
    from codepy.cuda import CudaModule
    gcc_toolchain = codepy.toolchain.guess_toolchain()
    nvcc_toolchain = codepy.toolchain.guess_nvcc_toolchain()
    if 'CUSP_HOME' in os.environ:
        nvcc_toolchain.add_library('cusp', [os.environ['CUSP_HOME']], [], [])
    host_mod = BoostPythonModule()
    nvcc_mod = CudaModule(host_mod)
    nvcc_includes = [
        'thrust/device_vector.h', 'thrust/fill.h', 'cusp/csr_matrix.h',
        'cusp/krylov/cg.h', 'cusp/krylov/bicgstab.h', 'cusp/krylov/gmres.h',
        'cusp/precond/diagonal.h', 'cusp/precond/smoothed_aggregation.h',
        'cusp/precond/ainv.h', 'string'
    ]
    nvcc_mod.add_to_preamble([Include(s) for s in nvcc_includes])
    nvcc_mod.add_to_preamble([Statement('using namespace std')])

    # We're translating PETSc preconditioner types to CUSP
    diag = Statement(
        'cusp::precond::diagonal< ValueType, cusp::device_memory >M(A)')
    ainv = Statement(
        'cusp::precond::scaled_bridson_ainv< ValueType, cusp::device_memory >M(A)'
    )
    amg = Statement(
        'cusp::precond::smoothed_aggregation< IndexType, ValueType, cusp::device_memory >M(A)'
    )
    none = Statement(
        'cusp::identity_operator< ValueType, cusp::device_memory >M(nrows, ncols)'
    )
    preconditioners = {
        'diagonal': diag,
        'jacobi': diag,
        'ainv': ainv,
        'ainvcusp': ainv,
        'amg': amg,
        'hypre': amg,
        'none': none,
        None: none
    }
    try:
        precond_call = preconditioners[parameters['pc_type']]
    except KeyError:
        raise RuntimeError("Cusp does not support preconditioner type %s" %
                           parameters['pc_type'])
    solvers = {
        'cg':
        Statement('cusp::krylov::cg(A, x, b, monitor, M)'),
        'bicgstab':
        Statement('cusp::krylov::bicgstab(A, x, b, monitor, M)'),
        'gmres':
        Statement(
            'cusp::krylov::gmres(A, x, b, %(ksp_gmres_restart)d, monitor, M)' %
            parameters)
    }
    try:
        solve_call = solvers[parameters['ksp_type']]
    except KeyError:
        raise RuntimeError("Cusp does not support solver type %s" %
                           parameters['ksp_type'])
    monitor = 'monitor(b, %(ksp_max_it)d, %(ksp_rtol)g, %(ksp_atol)g)' % parameters

    nvcc_function = FunctionBody(
        FunctionDeclaration(Value('void', '__cusp_solve'), [
            Value('CUdeviceptr', '_rowptr'),
            Value('CUdeviceptr', '_colidx'),
            Value('CUdeviceptr', '_csrdata'),
            Value('CUdeviceptr', '_b'),
            Value('CUdeviceptr', '_x'),
            Value('int', 'nrows'),
            Value('int', 'ncols'),
            Value('int', 'nnz')
        ]),
        Block([
            Statement('typedef int IndexType'),
            Statement('typedef %s ValueType' % M.ctype),
            Statement(
                'typedef typename cusp::array1d_view< thrust::device_ptr<IndexType> > indices'
            ),
            Statement(
                'typedef typename cusp::array1d_view< thrust::device_ptr<ValueType> > values'
            ),
            Statement(
                'typedef cusp::csr_matrix_view< indices, indices, values, IndexType, ValueType, cusp::device_memory > matrix'
            ),
            Statement(
                'thrust::device_ptr< IndexType > rowptr((IndexType *)_rowptr)'
            ),
            Statement(
                'thrust::device_ptr< IndexType > colidx((IndexType *)_colidx)'
            ),
            Statement(
                'thrust::device_ptr< ValueType > csrdata((ValueType *)_csrdata)'
            ),
            Statement('thrust::device_ptr< ValueType > d_b((ValueType *)_b)'),
            Statement('thrust::device_ptr< ValueType > d_x((ValueType *)_x)'),
            Statement('indices row_offsets(rowptr, rowptr + nrows + 1)'),
            Statement('indices column_indices(colidx, colidx + nnz)'),
            Statement('values matrix_values(csrdata, csrdata + nnz)'),
            Statement('values b(d_b, d_b + nrows)'),
            Statement('values x(d_x, d_x + ncols)'),
            Statement('thrust::fill(x.begin(), x.end(), (ValueType)0)'),
            Statement(
                'matrix A(nrows, ncols, nnz, row_offsets, column_indices, matrix_values)'
            ),
            Statement('cusp::%s_monitor< ValueType > %s' %
                      ('verbose' if parameters['ksp_monitor'] else 'default',
                       monitor)), precond_call, solve_call
        ]))

    host_mod.add_to_preamble(
        [Include('boost/python/extract.hpp'),
         Include('string')])
    host_mod.add_to_preamble([Statement('using namespace boost::python')])
    host_mod.add_to_preamble([Statement('using namespace std')])

    nvcc_mod.add_function(nvcc_function)

    host_mod.add_function(
        FunctionBody(
            FunctionDeclaration(Value('void', 'solve'), [
                Value('object', '_rowptr'),
                Value('object', '_colidx'),
                Value('object', '_csrdata'),
                Value('object', '_b'),
                Value('object', '_x'),
                Value('object', '_nrows'),
                Value('object', '_ncols'),
                Value('object', '_nnz')
            ]),
            Block([
                Statement(
                    'CUdeviceptr rowptr = extract<CUdeviceptr>(_rowptr.attr("gpudata"))'
                ),
                Statement(
                    'CUdeviceptr colidx = extract<CUdeviceptr>(_colidx.attr("gpudata"))'
                ),
                Statement(
                    'CUdeviceptr csrdata = extract<CUdeviceptr>(_csrdata.attr("gpudata"))'
                ),
                Statement(
                    'CUdeviceptr b = extract<CUdeviceptr>(_b.attr("gpudata"))'
                ),
                Statement(
                    'CUdeviceptr x = extract<CUdeviceptr>(_x.attr("gpudata"))'
                ),
                Statement('int nrows = extract<int>(_nrows)'),
                Statement('int ncols = extract<int>(_ncols)'),
                Statement('int nnz = extract<int>(_nnz)'),
                Statement(
                    '__cusp_solve(rowptr, colidx, csrdata, b, x, nrows, ncols, nnz)'
                )
            ])))

    nvcc_toolchain.cflags.append('-arch')
    nvcc_toolchain.cflags.append('sm_20')
    nvcc_toolchain.cflags.append('-O3')
    module = nvcc_mod.compile(gcc_toolchain,
                              nvcc_toolchain,
                              debug=configuration["debug"])

    _cusp_cache[cache_key(M.ctype, parameters)] = module
    return module
コード例 #6
0
#!python

import pycuda
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np

from cgen import *
from codepy.bpl import BoostPythonModule
from codepy.cuda import CudaModule

#Make a host_module, compiled for CPU
host_mod = BoostPythonModule()

#Make a device module, compiled with NVCC
nvcc_mod = CudaModule(host_mod)

#Describe device module code
#NVCC includes
nvcc_includes = [
    'thrust/sort.h',
    'thrust/device_vector.h',
    'cuda.h',
]
#Add includes to module
nvcc_mod.add_to_preamble([Include(x) for x in nvcc_includes])

#NVCC function
nvcc_function = FunctionBody(
    FunctionDeclaration(
        Value('void', 'my_sort'),
コード例 #7
0
ファイル: vectorAdd.py プロジェクト: neostoic/SpeakerDiarizer
cleanup_s = """void Cleanup() {
    if (d_A) cudaFree(d_A);
    if (d_B) cudaFree(d_B);
    if (d_C) cudaFree(d_C);
    if (h_A) free(h_A);
    if (h_B) free(h_B);
    if (h_C) free(h_C);
    cutilSafeCall( cudaThreadExit() );
    return;}"""
host.add_function(c_main_rend, fname="main")
host.module.mod_body.append(Line(cleanup_s))
host.module.mod_body.insert(0, FunctionDeclaration(Value('void', "Cleanup"),[]))
host.module.mod_body.append(Line(randominit_s))
host.module.mod_body.insert(0, FunctionDeclaration(Value('void', "RandomInit"),[Pointer(POD(numpy.float32,'data')),POD(numpy.int32,'n')]))

cuda_mod = CudaModule(host.module)
cuda_mod.add_to_preamble([Include('cuda.h')])

cuda_mod.add_to_module([Line(cu_kern_rend)])

launch_statements = [   'int threadsPerBlock = 256;'
                        'int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;'
                        'VecAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, N);']
launch_func = FunctionBody(  
                FunctionDeclaration(Value('void', 'launch_VecAdd'),
                                [   Pointer(Value('float', 'd_A')), 
                                    Pointer(Value('float', 'd_B')), 
                                    Pointer(Value('float', 'd_C')), 
                                    Value('int', 'N')  ]),
                Block([Statement(s) for s in launch_statements]) )