Example #1
0
def init_all_devices():
    global DEVICES, DEVICE_INFO
    if DEVICES is not None:
        return  DEVICES
    log.info("CUDA initialization (this may take a few seconds)")
    driver.init()
    DEVICES = []
    DEVICE_INFO = {}
    log("CUDA driver version=%s", driver.get_driver_version())
    ngpus = driver.Device.count()
    if ngpus==0:
        log.info("CUDA %s / PyCUDA %s, no devices found", ".".join([str(x) for x in driver.get_version()]), pycuda.VERSION_TEXT)
        return DEVICES
    da = driver.device_attribute
    cf = driver.ctx_flags
    for i in range(ngpus):
        device = None
        context = None
        devinfo = "gpu %i" % i
        try:
            device = driver.Device(i)
            devinfo = device_info(device)
            log(" + testing device %s: %s", i, devinfo)
            DEVICE_INFO[i] = devinfo
            host_mem = device.get_attribute(da.CAN_MAP_HOST_MEMORY)
            if not host_mem:
                log.warn("skipping device %s (cannot map host memory)", devinfo)
                continue
            context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST)
            try:
                log("   created context=%s", context)
                log("   api version=%s", context.get_api_version())
                free, total = driver.mem_get_info()
                log("   memory: free=%sMB, total=%sMB",  int(free/1024/1024), int(total/1024/1024))
                log("   multi-processors: %s, clock rate: %s", device.get_attribute(da.MULTIPROCESSOR_COUNT), device.get_attribute(da.CLOCK_RATE))
                log("   max block sizes: (%s, %s, %s)", device.get_attribute(da.MAX_BLOCK_DIM_X), device.get_attribute(da.MAX_BLOCK_DIM_Y), device.get_attribute(da.MAX_BLOCK_DIM_Z))
                log("   max grid sizes: (%s, %s, %s)", device.get_attribute(da.MAX_GRID_DIM_X), device.get_attribute(da.MAX_GRID_DIM_Y), device.get_attribute(da.MAX_GRID_DIM_Z))
                max_width = device.get_attribute(da.MAXIMUM_TEXTURE2D_WIDTH)
                max_height = device.get_attribute(da.MAXIMUM_TEXTURE2D_HEIGHT)
                log("   maximum texture size: %sx%s", max_width, max_height)
                log("   max pitch: %s", device.get_attribute(da.MAX_PITCH))
                SMmajor, SMminor = device.compute_capability()
                compute = (SMmajor<<4) + SMminor
                log("   compute capability: %#x (%s.%s)", compute, SMmajor, SMminor)
                if i==0:
                    #we print the list info "header" from inside the loop
                    #so that the log output is bunched up together
                    log.info("CUDA %s / PyCUDA %s, found %s device%s:",
                             ".".join([str(x) for x in driver.get_version()]), pycuda.VERSION_TEXT, ngpus, engs(ngpus))
                DEVICES.append(i)
                log.info("  + %s (memory: %s%% free, compute: %s.%s)", device_info(device), 100*free/total, SMmajor, SMminor)
            finally:
                context.pop()
        except Exception as e:
            log.error("error on device %s: %s", devinfo, e)
    return DEVICES
Example #2
0
    def test_multi_context(self):
        if drv.get_version() < (2,0,0):
            return
        if drv.get_version() >= (2,2,0):
            if drv.Context.get_device().compute_mode == drv.compute_mode.EXCLUSIVE:
                return

        mem_a = drv.mem_alloc(50)
        ctx2 = drv.Context.get_device().make_context()
        mem_b = drv.mem_alloc(60)

        del mem_a
        del mem_b
        ctx2.detach()
Example #3
0
def get_pycuda_info():
    init_all_devices()
    return {"version"               : pycuda.VERSION,
            "version.text"          : pycuda.VERSION_TEXT,
            "version.status"        : pycuda.VERSION_STATUS,
            "driver.version"        : driver.get_version(),
            "driver.driver_version" : driver.get_driver_version()}
Example #4
0
def print_gpu_info():
    print('CUDA version : %d.%d.%d' % cuda.get_version())
    gpu0 = cuda.Device(0)
    ngpu = gpu0.count()

    gpu_list = [cuda.Device(i) for i in range(ngpu)]
    gpu_groups = {}
    for gpu in gpu_list:
        name = gpu.name()
        if not gpu_groups.has_key(name):
            gpu_groups[name] = {'count' : 1}
            gpu_groups[name]['compute capability'] = gpu.compute_capability()
            gpu_groups[name]['global mem size'] = gpu.total_memory()
            gpu_groups[name]['multiprocessor'] = \
                    gpu.get_attribute(cuda.device_attribute.MULTIPROCESSOR_COUNT)
        else:
            gpu_groups[name]['count'] += 1

    for name, props in gpu_groups.items():
        print('Device : %d GPU' % props['count'])
        print('  name: %s' % name)
        print('  compute capability: %d.%d' % props['compute capability'])
        print('  multiprocessor: %d' % props['multiprocessor'])
        print('  global mem size: %1.2f %s' % \
                common.binary_prefix_nbytes(props['global mem size']) )
        print('')
Example #5
0
def get_info():
    return {
        "version": pycuda.VERSION,
        "version.text": pycuda.VERSION_TEXT,
        "version.status": pycuda.VERSION_STATUS,
        "driver.version": driver.get_version(),
        "driver.driver_version": driver.get_driver_version(),
    }
Example #6
0
def get_cuda_info():
    init_all_devices()
    return {
            "driver"    : {
                           "version"        : driver.get_version(),
                           "driver_version" : driver.get_driver_version(),
                           }
            }
Example #7
0
    def test_register_host_memory(self):
        if drv.get_version() < (4,):
            from py.test import skip
            skip("register_host_memory only exists on CUDA 4.0 and later")

        import sys
        if sys.platform == "darwin":
            from py.test import skip
            skip("register_host_memory is not supported on OS X")

        a = drv.aligned_empty((2**20,), np.float64, alignment=4096)
        drv.register_host_memory(a)
Example #8
0
    def __init__(self, device=0, iterations=7, compiler_options=None):
        """instantiate CudaFunctions object used for interacting with the CUDA device

        Instantiating this object will inspect and store certain device properties at
        runtime, which are used during compilation and/or execution of kernels by the
        kernel tuner. It also maintains a reference to the most recently compiled
        source module for copying data to constant memory before kernel launch.

        :param device: Number of CUDA device to use for this context
        :type device: int

        :param iterations: Number of iterations used while benchmarking a kernel, 7 by default.
        :type iterations: int
        """
        self.allocations = []
        self.texrefs = []
        if not drv:
            raise ImportError("Error: pycuda not installed, please install e.g. using 'pip install pycuda'.")

        drv.init()
        self.context = drv.Device(device).make_context()

        #inspect device properties
        devprops = {str(k): v for (k, v) in self.context.get_device().get_attributes().items()}
        self.max_threads = devprops['MAX_THREADS_PER_BLOCK']
        cc = str(devprops.get('COMPUTE_CAPABILITY_MAJOR', '0')) + str(devprops.get('COMPUTE_CAPABILITY_MINOR', '0'))
        if cc == "00":
            cc = self.context.get_device().compute_capability()
        self.cc = str(cc[0])+str(cc[1])
        self.iterations = iterations
        self.current_module = None
        self.compiler_options = compiler_options or []

        #select PyCUDA source module
        if int(self.cc) >= 35:
            self.source_mod = DynamicSourceModule
        else:
            self.source_mod = SourceModule
        if not self.source_mod:
            raise ImportError("Error: pycuda not correctly installed, please ensure pycuda is installed on the same CUDA installation as you're using right now")

        #collect environment information
        env = dict()
        env["device_name"] = self.context.get_device().name()
        env["cuda_version"] = ".".join([str(i) for i in drv.get_version()])
        env["compute_capability"] = self.cc
        env["iterations"] = self.iterations
        env["compiler_options"] = compiler_options
        env["device_properties"] = devprops
        self.env = env
        self.name = env["device_name"]
Example #9
0
    def test_register_host_memory(self):
        if drv.get_version() < (4,):
            from py.test import skip
            skip("register_host_memory only exists on CUDA 4.0 and later")

        import sys
        if sys.platform == "darwin":
            from py.test import skip
            skip("register_host_memory is not supported on OS X")

        a = drv.aligned_empty((2**20,), np.float64)
        a_pin = drv.register_host_memory(a)

        gpu_ary = drv.mem_alloc_like(a)
        stream = drv.Stream()
        drv.memcpy_htod_async(gpu_ary, a_pin, stream)
        drv.Context.synchronize()
Example #10
0
def init_all_devices():
    global DEVICES
    if DEVICES is not None:
        return  DEVICES
    log.info("CUDA initialization (this may take a few seconds)")
    driver.init()
    DEVICES = []
    log("CUDA driver version=%s", driver.get_driver_version())
    log.info("PyCUDA version=%s", pycuda.VERSION_TEXT)
    ngpus = driver.Device.count()
    log.info("CUDA version=%s found %s device(s):", ".".join([str(x) for x in driver.get_version()]), ngpus)
    da = driver.device_attribute
    cf = driver.ctx_flags
    for i in range(ngpus):
        device = None
        context = None
        try:
            device = driver.Device(i)
            log(" + testing device %s: %s", i, device_info(device))
            host_mem = device.get_attribute(da.CAN_MAP_HOST_MEMORY)
            if not host_mem:
                log.warn("skipping device %s (cannot map host memory)", device_info(device))
                continue
            context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST)
            log("   created context=%s", context)
            log("   api version=%s", context.get_api_version())
            free, total = driver.mem_get_info()
            log("   memory: free=%sMB, total=%sMB",  int(free/1024/1024), int(total/1024/1024))
            log("   multi-processors: %s, clock rate: %s", device.get_attribute(da.MULTIPROCESSOR_COUNT), device.get_attribute(da.CLOCK_RATE))
            log("   max block sizes: (%s, %s, %s)", device.get_attribute(da.MAX_BLOCK_DIM_X), device.get_attribute(da.MAX_BLOCK_DIM_Y), device.get_attribute(da.MAX_BLOCK_DIM_Z))
            log("   max grid sizes: (%s, %s, %s)", device.get_attribute(da.MAX_GRID_DIM_X), device.get_attribute(da.MAX_GRID_DIM_Y), device.get_attribute(da.MAX_GRID_DIM_Z))
            max_width = device.get_attribute(da.MAXIMUM_TEXTURE2D_WIDTH)
            max_height = device.get_attribute(da.MAXIMUM_TEXTURE2D_HEIGHT)
            log("   maximum texture size: %sx%s", max_width, max_height)
            log("   max pitch: %s", device.get_attribute(da.MAX_PITCH))
            SMmajor, SMminor = device.compute_capability()
            compute = (SMmajor<<4) + SMminor
            log("   compute capability: %#x (%s.%s)", compute, SMmajor, SMminor)
            try:
                DEVICES.append(i)
                log.info(" + %s (memory %s%% free, compute %#x)", device_info(device), 100*free/total, compute)
            finally:
                context.pop()
        except Exception, e:
            log.error("error on device %s: %s", (device or i), e)
Example #11
0
def gpu_stat():
    if torch.cuda.is_available():

        def pretty_bytes(bytes, precision=1):
            abbrevs = ((1<<50, 'PB'),(1<<40, 'TB'),(1<<30, 'GB'),(1<<20, 'MB'),(1<<10, 'kB'),(1, 'bytes'))
            if bytes == 1:
                return '1 byte'
            for factor, suffix in abbrevs:
                if bytes >= factor:
                    break
            return '%.*f%s' % (precision, bytes / factor, suffix)

        device = autoinit.device
        print()
        print( 'GPU Name: %s' % device.name())
        print( 'GPU Memory: %s' % pretty_bytes(device.total_memory()))
        print( 'CUDA Version: %s' % str(driver.get_version()))
        print( 'GPU Free/Total Memory: %d%%' % ((driver.mem_get_info()[0] /driver.mem_get_info()[1]) * 100))
Example #12
0
 def init_gl(self, width, height):
     super(DenseDemo, self).init_gl(width, height)
     
     import pycuda.gl.autoinit
     print "CUDA version: %s" % str(drv.get_version())
     print "CUDA driver version: %s" % drv.get_driver_version()
     print "CUDA device: %s" % pycuda.gl.autoinit.device.name()
     print "\tCompute capability: %s" % str(pycuda.gl.autoinit.device.compute_capability())
     print "\tTotal memory: %s" % pycuda.gl.autoinit.device.total_memory()
     
     self.ffusion = FreenectFusion(kc.K_ir, kc.K_rgb, kc.T, side=128)
     self.bbox = self.ffusion.get_bounding_box()
     #freenect.sync_set_led(2)
     
     # Create a texture.
     self.gl_rgb_texture = gl.glGenTextures(1)
     gl.glBindTexture(gl.GL_TEXTURE_2D, self.gl_rgb_texture)
     gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR)
     gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR)
Example #13
0
    def init_gl(self, width, height):
        super(DenseDemo, self).init_gl(width, height)

        import pycuda.gl.autoinit
        print "CUDA version: %s" % str(drv.get_version())
        print "CUDA driver version: %s" % drv.get_driver_version()
        print "CUDA device: %s" % pycuda.gl.autoinit.device.name()
        print "\tCompute capability: %s" % str(
            pycuda.gl.autoinit.device.compute_capability())
        print "\tTotal memory: %s" % pycuda.gl.autoinit.device.total_memory()

        self.ffusion = FreenectFusion(kc.K_ir, kc.K_rgb, kc.T, side=128)
        self.bbox = self.ffusion.get_bounding_box()
        #freenect.sync_set_led(2)

        # Create a texture.
        self.gl_rgb_texture = gl.glGenTextures(1)
        gl.glBindTexture(gl.GL_TEXTURE_2D, self.gl_rgb_texture)
        gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER,
                           gl.GL_LINEAR)
        gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER,
                           gl.GL_LINEAR)
Example #14
0
    def __init__(self, blocking=False, use_cache=True):
        self.blocking = blocking
        self.use_cache = use_cache
        self.logger =  logging.getLogger(__name__)
        self.kernels = {}
        
        self.module_path = os.path.dirname(os.path.realpath(__file__))
        
        #Initialize cuda (must be first call to PyCUDA)
        cuda.init(flags=0)
        
        self.logger.info("PyCUDA version %s", str(pycuda.VERSION_TEXT))
        
        #Print some info about CUDA
        self.logger.info("CUDA version %s", str(cuda.get_version()))
        self.logger.info("Driver version %s",  str(cuda.get_driver_version()))

        self.cuda_device = cuda.Device(0)
        self.logger.info("Using '%s' GPU", self.cuda_device.name())
        self.logger.debug(" => compute capability: %s", str(self.cuda_device.compute_capability()))
        self.logger.debug(" => memory: %d MB", self.cuda_device.total_memory() / (1024*1024))

        # Create the CUDA context
        if (self.blocking):
            self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_BLOCKING_SYNC)
            self.logger.warning("Using blocking context")
        else:
            self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_AUTO)
        
        self.logger.info("Created context handle <%s>", str(self.cuda_context.handle))

        #Create cache dir for cubin files
        if (self.use_cache):
            self.cache_path = os.path.join(self.module_path, "cuda_cache") 
            if not os.path.isdir(self.cache_path):
                os.mkdir(self.cache_path)
            self.logger.debug("Using CUDA cache dir %s", self.cache_path)
Example #15
0
        }
    }
    """)
func = mod.get_function("double_array")
func(struct_arr, block=(32, 1, 1), grid=(2, 1))

print("doubled arrays")
print(array1)
print(array2)

func(numpy.uintp(do2_ptr), block=(32, 1, 1), grid=(1, 1))
print("doubled second only")
print(array1)
print(array2)

if cuda.get_version() < (4, ):
    func.prepare("P", block=(32, 1, 1))
    func.prepared_call((2, 1), struct_arr)
else:
    func.prepare("P")
    block = (32, 1, 1)
    func.prepared_call((2, 1), block, struct_arr)


print("doubled again")
print(array1)
print(array2)

if cuda.get_version() < (4, ):
    func.prepared_call((1, 1), do2_ptr)
else:
Example #16
0
#
# Example based on dnorm from RCUDA
# Timing code from http://wiki.tiker.net/PyCuda/Examples/SimpleSpeedTest
#

import pycuda.autoinit
import pycuda.driver as drv
import numpy as np
import scipy as sp
from scipy.stats import norm
from pycuda.compiler import SourceModule

# Versions:
drv.get_version()
drv.get_driver_version()

m = SourceModule("""
#include <stdio.h>
__global__ void dnorm_kernel(float *vals, float *x, int N, float mu, float sigma, int dbg)
{
    int myblock = blockIdx.x;   // 1D-grid
    int blocksize = blockDim.x; // 1D-block
    int subthread = threadIdx.x;
    int idx = myblock * blocksize + subthread;
    if (idx < N) {
        if (dbg){
            printf("thread idx: %04d\\t x[%d] = %f\\t (n=%d,mu=%f,sigma=%f)\\n",idx,idx,x[idx],N,mu,sigma);
        }
        float std = (x[idx] - mu)/sigma;
        float e = exp( - 0.5 * std * std);
        vals[idx] = e / ( sigma * sqrt(2 * 3.141592653589793));
Example #17
0
#!/usr/bin/env python
from __future__ import division, print_function, absolute_import
import functools
import numpy as np
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import pycuda.driver as drv

from cuda_cffi.misc import init

toolkit_version = drv.get_version()

if toolkit_version < (7, 0, 0):
    raise ImportError("cuSOLVER not present prior to v7.0 of the CUDA toolkit")

"""
Python interface to cuSOLVER functions.

Note: You may need to set the environment variable CUDA_ROOT to the base of
your CUDA installation.
"""
# import low level cuSOLVER python wrappers and constants

try:
    from cuda_cffi._cusolver_cffi import *
except Exception as e:
    print(repr(e))
    estr = "autogenerattion and import of cuSOLVER wrappers failed\n"
    estr += ("Try setting the CUDA_ROOT environment variable to the base of "
             "your CUDA installation.  The autogeneration script tries to "
             "find the CUSOLVER headers in CUDA_ROOT/include/\n")
Example #18
0
    #from brian.experimental.ccodegen import AutoCompiledNonlinearStateUpdater
    set_global_preferences(usecodegen=False)

    #duration = 10*second
    #N = 1000
    #domonitor = False

    duration = 1000 * ms
    N = 100
    domonitor = False
    showfinal = False
    forcesync = True
    method = 'gpu' # methods are 'c', 'python' and 'gpu'

    if drv.get_version() == (2, 0, 0): # cuda version
        precision = 'float'
    elif drv.get_version() > (2, 0, 0):
        precision = 'double'
    else:
        raise Exception, "CUDA 2.0 required"
    #precision = 'float'
    import buffering
    buffering.DEBUG_BUFFER_CACHE = False

#    eqs = Equations('''
#    #dV/dt = -V*V/(10*ms) : 1
#    dV/dt = cos(2*pi*t/(100*ms))/(10*ms) : 1
#    #dV/dt = -V*V*V*V*V/(100*ms) : 1
#    #dW/dt = -W*W*W*W*W/(100*ms) : 1
#    #dV/dt = cos(2*pi*t/(100*ms))/(10*ms) : 1
Example #19
0
def log_sys_info():
    log.info("PyCUDA version=%s", ".".join([str(x) for x in driver.get_version()]))
    log.info("PyCUDA driver version=%s", driver.get_driver_version())