Esempio n. 1
0
def get_default_handle():
    """Get the default handle of CuDNN."""

    global _handles, _pid

    pid = os.getpid()
    if _pid != pid:  # not initialized yet
        _handles = {}
        atexit.register(shutdown)
        _pid = pid

    device = cuda.Context.get_device()
    if device in _handles:
        return _handles[device]

    handle = libcudnn.cudnnCreate()
    _handles[device] = handle

    return handle
Esempio n. 2
0
import math
import numpy as np
import pycuda.autoinit
import libcudnn
from gputensor import GPUTensor

dt = np.float16 

xh = np.ones((1,1,4,4), dtype=dt) * 2.0
# print(xh)

cudnn_context = libcudnn.cudnnCreate()

print("CUDNN Version: %d" % libcudnn.cudnnGetVersion())

x = GPUTensor(xh)
y = GPUTensor(xh.shape, dtype=dt)

pdt = np.float32

w = GPUTensor(np.ones(1).reshape(1,1,1,1), dtype=pdt)
bias = GPUTensor(np.zeros(1).reshape(1,1,1,1), dtype=pdt)
mean = GPUTensor(np.ones(1).reshape(1,1,1,1), dtype=pdt)
var = GPUTensor(np.ones(1).reshape(1,1,1,1) * 0.5, dtype=pdt)
x_desc = x.get_cudnn_tensor_desc()
y_desc = y.get_cudnn_tensor_desc()
print(x_desc)
print(y_desc)

param_desc = var.get_cudnn_tensor_desc()
Esempio n. 3
0
start, end = (drv.Event(), drv.Event())

def start_bench():
    start.record()

def end_bench(op):
    end.record()
    end.synchronize()
    msecs  = end.time_since(start) / repeat
    gflops = conv.flops / (msecs * 1000000.0)
    print "%7.3f msecs %8.3f gflops (%s: %s)" % (msecs, gflops, op, conv)

ng = NervanaGPU(stochastic_round=False, bench=True)

# Create a cuDNN context
cudnn = libcudnn.cudnnCreate()

C_desc = libcudnn.cudnnCreateConvolutionDescriptor()
I_desc = libcudnn.cudnnCreateTensorDescriptor()
O_desc = libcudnn.cudnnCreateTensorDescriptor()
E_desc = libcudnn.cudnnCreateTensorDescriptor()
B_desc = libcudnn.cudnnCreateTensorDescriptor()
F_desc = libcudnn.cudnnCreateFilterDescriptor()
U_desc = libcudnn.cudnnCreateFilterDescriptor()

# Set some options and tensor dimensions
NCHW_fmt  = libcudnn.cudnnTensorFormat['CUDNN_TENSOR_NCHW']
cu_dtype  = libcudnn.cudnnDataType['CUDNN_DATA_FLOAT']
conv_mode = libcudnn.cudnnConvolutionMode['CUDNN_CROSS_CORRELATION']
fwd_pref  = libcudnn.cudnnConvolutionFwdPreference['CUDNN_CONVOLUTION_FWD_NO_WORKSPACE']
# CUDNN_CONVOLUTION_FWD_NO_WORKSPACE
Esempio n. 4
0
import pycuda.autoinit
import pycuda.driver as drv
from pycuda import gpuarray
import libcudnn, ctypes
import numpy as np

# Create a cuDNN context
cudnn_context = libcudnn.cudnnCreate()

# Set some options and tensor dimensions
tensor_format = libcudnn.cudnnTensorFormat['CUDNN_TENSOR_NCHW']
data_type = libcudnn.cudnnDataType['CUDNN_DATA_FLOAT']
convolution_mode = libcudnn.cudnnConvolutionMode['CUDNN_CROSS_CORRELATION']
convolution_fwd_pref = libcudnn.cudnnConvolutionFwdPreference['CUDNN_CONVOLUTION_FWD_PREFER_FASTEST']

start, end = (drv.Event(), drv.Event())

def start_bench():
    start.record()

def end_bench(op):
    end.record()
    end.synchronize()
    msecs  = end.time_since(start)
    print("%7.3f msecs" % (msecs))

n_input = 64
filters_in = 128
filters_out = 128
height_in = 112
width_in = 112
Esempio n. 5
0
def start_bench():
    start.record()


def end_bench(op):
    end.record()
    end.synchronize()
    msecs = end.time_since(start) / repeat
    gflops = conv.flops / (msecs * 1000000.0)
    print "%7.3f msecs %8.3f gflops (%s: %s)" % (msecs, gflops, op, conv)


ng = NervanaGPU(stochastic_round=False, bench=True)

# Create a cuDNN context
cudnn = libcudnn.cudnnCreate()

C_desc = libcudnn.cudnnCreateConvolutionDescriptor()
I_desc = libcudnn.cudnnCreateTensorDescriptor()
O_desc = libcudnn.cudnnCreateTensorDescriptor()
E_desc = libcudnn.cudnnCreateTensorDescriptor()
B_desc = libcudnn.cudnnCreateTensorDescriptor()
F_desc = libcudnn.cudnnCreateFilterDescriptor()
U_desc = libcudnn.cudnnCreateFilterDescriptor()

# Set some options and tensor dimensions
NCHW_fmt = libcudnn.cudnnTensorFormat['CUDNN_TENSOR_NCHW']
cu_dtype = libcudnn.cudnnDataType['CUDNN_DATA_FLOAT']
conv_mode = libcudnn.cudnnConvolutionMode['CUDNN_CROSS_CORRELATION']
fwd_pref = libcudnn.cudnnConvolutionFwdPreference[
    'CUDNN_CONVOLUTION_FWD_NO_WORKSPACE']
Esempio n. 6
0
import pycuda.driver as drv
from pycuda import gpuarray
import libcudnn, ctypes
import numpy as np

inputsize = 100
hiddensize = 200
seqlength = 50
minibatch = 8
numlayers = 2
inputmode = 0
direction = 0
mode = 0
datatype = 0

handle = libcudnn.cudnnCreate()

rnndesc = libcudnn.cudnnCreateRNNDescriptor()
dropoutdesc = libcudnn.cudnnCreateDropoutDescriptor()
cudnnSetDropoutDescriptor(dropoutdesc, handle, 0, 0, 0, 0)
libcudnn.cudnnSetRNNDescriptor(rnndesc, hiddensize, seqlength, numlayers, 
					  dropoutdesc, inputmode, direction, mode, datatype)


xdescs = [libcudnn.cudnnCreateTensorDescriptor() for _ in xrange(seqlength)]
[libcudnn.cudnnSetTensorNdDescriptor(xdesc, 0, 3, [inputsize, minibatch, seqlength]) for xdesc in xdescs]

hxdesc = libcudnn.cudnnCreateTensorDescriptor()
libcudnn.cudnnSetTensorNdDescriptor(hxdesc, 0, 3, [hiddensize, minibatch, numlayers])

cxdesc = libcudnn.cudnnCreateTensorDescriptor()