コード例 #1
0
ファイル: test_cublas.py プロジェクト: landersson/misc
def test_saved(precision):

    a = np.load("a%d.npy" % precision)
    b = np.load("b%d.npy" % precision)
    print("NANS:", np.isnan(a).any() or np.isnan(b).any())

    c = np.dot(a, b)
    print(b)
    print("A:", a.shape, a.dtype)
    print("B:", b.shape, b.dtype)
    print("C:", c.shape, c.dtype)

    # ad = gpuarray.to_gpu(a)
    # bd = gpuarray.to_gpu(b)
    ad = GPUTensor(a)
    bd = GPUTensor(b)
    cd = gpu_tensor_gemm(context.cublas, ad, bd)

    print("A:", ad.shape, ad.strides, ad.size, ad.mem_size,
          str(ad.flags.c_contiguous))
    print("B:", bd.shape, bd.strides, bd.size, bd.mem_size,
          str(bd.flags.c_contiguous))
    print("C:", cd.shape, cd.strides, cd.size, cd.mem_size,
          str(cd.flags.c_contiguous))
    c2 = cd.get()
    # print("C2:", c2.shape)
    check_results(c, c2)
コード例 #2
0
ファイル: inference.py プロジェクト: landersson/misc
class BatchNormalization(Layer):
    def __init__(self, config):
        super().__init__("BatchNormalization")

        assert(config["affine"])
        
        self.eps = config["eps"]

        variance = np.load(os.path.join(config["baseDir"], config["parameterFiles"][3]))
        #variance = self.load_tensor(config, 3, dtype=np.float32)
        nelem = variance.shape[0]

        if config["varianceFormat"] == "variance" and libcudnn.cudnnGetVersion() < 5000:
            # print("FIXING variance format")
            variance += self.eps
            variance = np.reciprocal(np.sqrt(variance))
            
        self.variance = GPUTensor(variance, dtype=np.float32, shape=(1, nelem, 1, 1))

        self.W = self.load_tensor(config, 0, dtype=np.float32, shape=(1, nelem, 1, 1))
        self.bias = self.load_tensor(config, 1, dtype=np.float32, shape=(1, nelem, 1, 1))
                # shape=(1, self.W.shape[0], 1, 1))
        self.average = self.load_tensor(config, 2, dtype=np.float32, shape=(1, nelem, 1, 1))

        self.param_desc = self.average.get_cudnn_tensor_desc()
        self.in_desc = None
        self.out_desc = None

    def configure(self, input):
        self.output = GPUTensor(input.shape, input.dtype)

        if self.in_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.in_desc.ptr)
        if self.out_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.out_desc.ptr)

        self.in_desc = input.get_cudnn_tensor_desc()
        self.out_desc = self.output.get_cudnn_tensor_desc()
        # print("BatchNormalization:configure() input=", input.shape, self.W.shape[0])

    def fprop(self, input):
        # The input transformation performed by this function is defined as: 
        # y := alpha*y + beta *(bnScale * (x-estimatedMean)/sqrt(epsilon + estimatedVariance)+bnBias)
        # print("IN:", self.in_desc)
        # print("OUT:", self.out_desc)
        # print("PARAM:", self.param_desc)
        # print("EPSILON:", self.eps)
        # print("VARP:", self.variance.get_gpu_voidp())
        libcudnn.cudnnBatchNormalizationForwardInference(context.cudnn, 
                libcudnn.cudnnBatchNormMode['CUDNN_BATCHNORM_SPATIAL'],
                1.0, 0.0, self.in_desc.ptr, input.get_gpu_voidp(),
                self.out_desc.ptr, self.output.get_gpu_voidp(),
                self.param_desc.ptr, self.W.get_gpu_voidp(), self.bias.get_gpu_voidp(),
                self.average.get_gpu_voidp(), self.variance.get_gpu_voidp(), self.eps)

        self.check_truth()

    def __str__(self):
        return "BatchNormalization: %dx%d" % (self.W.shape[0], self.bias.shape[0])
コード例 #3
0
class Linear(Layer):
    def __init__(self, config):
        super().__init__("Linear")

        self.W = self.load_tensor(config, 0)
        self.bias = self.load_tensor(config,
                                     1,
                                     shape=(1, self.W.shape[0], 1, 1))
        # self.bias = GPUTensor(os.path.join(config["baseDir"], config["parameterFiles"][1]))
        self.b_desc = self.bias.get_cudnn_tensor_desc()
        # print(self.W.shape)

    def configure(self, input):
        # print("Linear::configure: input shape =", input.shape)
        # print("Linear::configure: W shape =", self.W.shape)
        # print("Linear::configure: b shape =", self.bias.shape)

        elems_per_image = np.prod(input.shape)
        # print(elems_per_image, self.W.shape[1])

        assert (elems_per_image == self.W.shape[1])
        self.output = GPUTensor((1, self.W.shape[0], 1, 1), dtype=input.dtype)
        self.output_desc = self.output.get_cudnn_tensor_desc()

        if self.truth is not None:
            print("OUTPUT TRUTH SHAPE:", self.truth.shape, self.output.shape)

    def fprop(self, input):
        # print("PAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
        input_2d = input.reshape((self.W.shape[1], 1))
        output_2d = self.output.reshape(self.W.shape[0], 1)
        # print(input_2d.flags.c_contiguous)
        # print(output_2d.flags.c_contiguous)

        # test_cublas()
        # np.save("a16.npy", self.W.get())
        # np.save("b16.npy", input_2d.get())
        # exit(0)

        # ad = self.W
        # print("A:", ad.shape, ad.strides, ad.size, ad.mem_size, str(ad.flags.c_contiguous))
        # print("B:", input.shape, input.strides, input.size, input.mem_size, str(input.flags.c_contiguous))
        # print("B':", input_2d.shape, input_2d.strides, input_2d.size, input_2d.mem_size, str(input_2d.flags.c_contiguous))
        # print("C:", output_2d.shape, output_2d.strides, output_2d.size, output_2d.mem_size, str(output_2d.flags.c_contiguous))
        # print("Linear::fprop()", self.W.shape, input_2d.shape, output_2d.shape)
        cublas_dot.cublas_gemm(context.cublas, self.W, input_2d, output_2d)

        # print("Linear::fprop()", self.output.shape)
        libcudnn.cudnnAddTensor(context.cudnn, 1.0, self.b_desc.ptr,
                                self.bias.get_gpu_voidp(), 1.0,
                                self.output_desc.ptr,
                                self.output.get_gpu_voidp())

        self.check_truth()

    def __str__(self):
        return "Linear: %dx%d" % (self.W.shape[0], self.W.shape[1])
コード例 #4
0
    def configure(self, input):
        self.output = GPUTensor(input.shape, input.dtype)

        if self.in_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.in_desc.ptr)
        if self.out_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.out_desc.ptr)

        self.in_desc = input.get_cudnn_tensor_desc()
        self.out_desc = self.output.get_cudnn_tensor_desc()
コード例 #5
0
ファイル: inference.py プロジェクト: landersson/misc
class Linear(Layer):
    def __init__(self, config):
        super().__init__("Linear")

        self.W = self.load_tensor(config, 0)
        self.bias = self.load_tensor(config, 1, shape=(1, self.W.shape[0], 1, 1))
        # self.bias = GPUTensor(os.path.join(config["baseDir"], config["parameterFiles"][1]))
        self.b_desc = self.bias.get_cudnn_tensor_desc()
        # print(self.W.shape)
    
    def configure(self, input):
        # print("Linear::configure: input shape =", input.shape)
        # print("Linear::configure: W shape =", self.W.shape)
        # print("Linear::configure: b shape =", self.bias.shape)

        elems_per_image  = np.prod(input.shape)
        # print(elems_per_image, self.W.shape[1])

        assert(elems_per_image == self.W.shape[1])
        self.output = GPUTensor((1,self.W.shape[0], 1, 1), dtype=input.dtype)
        self.output_desc = self.output.get_cudnn_tensor_desc()
        
        if self.truth is not None:
            print("OUTPUT TRUTH SHAPE:", self.truth.shape, self.output.shape)

    def fprop(self, input):
        # print("PAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
        input_2d = input.reshape((self.W.shape[1], 1)) 
        output_2d = self.output.reshape(self.W.shape[0], 1)
        # print(input_2d.flags.c_contiguous)
        # print(output_2d.flags.c_contiguous)

        # test_cublas()
        # np.save("a16.npy", self.W.get())
        # np.save("b16.npy", input_2d.get())
        # exit(0)

        # ad = self.W
        # print("A:", ad.shape, ad.strides, ad.size, ad.mem_size, str(ad.flags.c_contiguous))
        # print("B:", input.shape, input.strides, input.size, input.mem_size, str(input.flags.c_contiguous))
        # print("B':", input_2d.shape, input_2d.strides, input_2d.size, input_2d.mem_size, str(input_2d.flags.c_contiguous))
        # print("C:", output_2d.shape, output_2d.strides, output_2d.size, output_2d.mem_size, str(output_2d.flags.c_contiguous))
        # print("Linear::fprop()", self.W.shape, input_2d.shape, output_2d.shape)
        cublas_dot.cublas_gemm(context.cublas, self.W, input_2d, output_2d)

        # print("Linear::fprop()", self.output.shape)
        libcudnn.cudnnAddTensor(context.cudnn, 1.0, self.b_desc.ptr, self.bias.get_gpu_voidp(),
                1.0, self.output_desc.ptr, self.output.get_gpu_voidp())

        self.check_truth()


    def __str__(self):
        return "Linear: %dx%d" % (self.W.shape[0], self.W.shape[1])
コード例 #6
0
    def configure(self, input):
        # print("Convolution::configure: input shape =", input.shape)

        in_images = input.shape[0]
        in_channels = input.shape[1]
        in_height = input.shape[2]
        in_width = input.shape[3]

        assert (in_channels == self.num_filter_channels)

        out_width = int((1.0 * in_width + 2 * self.padW - self.kW) / self.dW +
                        1)
        out_height = int((1.0 * in_height + 2 * self.padH - self.kH) /
                         self.dH + 1)

        self.output = GPUTensor(
            (in_images, self.num_filter_maps, out_height, out_width),
            input.dtype)
        # print("ONCV:", input.dtype, self.output.dtype)
        # print("Convolution::configure: output shape =", self.output.shape)

        # initialize cudnn descriptors
        if self.in_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.in_desc.ptr)
        if self.out_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.out_desc.ptr)

        self.in_desc = input.get_cudnn_tensor_desc()

        # Get output dimensions (first two values are n_input and filters_out)
        _, _, out_height2, out_width2 = libcudnn.cudnnGetConvolution2dForwardOutputDim(
            self.conv_desc, self.in_desc.ptr, self.filt_desc)

        assert (out_width == out_width2)
        assert (out_height == out_height2)

        self.out_desc = self.output.get_cudnn_tensor_desc()

        # find best convolution algorithm
        self.algo = libcudnn.cudnnGetConvolutionForwardAlgorithm(
            context.cudnn, self.in_desc.ptr, self.filt_desc, self.conv_desc,
            self.out_desc.ptr, self.convolution_fwd_pref, 0)

        print("Convolution::configure: algo=%s" % str(self.algo.value))

        self.ws_size = libcudnn.cudnnGetConvolutionForwardWorkspaceSize(
            context.cudnn, self.in_desc.ptr, self.filt_desc, self.conv_desc,
            self.out_desc.ptr, self.algo)
        self.ws_ptr = drv.mem_alloc(
            self.ws_size.value) if self.ws_size.value > 0 else 0

        print("Convolution::configure: workspace size=%d" % self.ws_size.value)
コード例 #7
0
    def configure(self, input):
        # print("Linear::configure: input shape =", input.shape)
        # print("Linear::configure: W shape =", self.W.shape)
        # print("Linear::configure: b shape =", self.bias.shape)

        elems_per_image = np.prod(input.shape)
        # print(elems_per_image, self.W.shape[1])

        assert (elems_per_image == self.W.shape[1])
        self.output = GPUTensor((1, self.W.shape[0], 1, 1), dtype=input.dtype)
        self.output_desc = self.output.get_cudnn_tensor_desc()

        if self.truth is not None:
            print("OUTPUT TRUTH SHAPE:", self.truth.shape, self.output.shape)
コード例 #8
0
def benchmark(datasrc, model):
    start = time.time()
    label, data = datasrc.get_item()
    print("Data load time: %.2fms" % ((time.time() - start) * 1000.0))

    start = time.time()
    data = np.ascontiguousarray(np.expand_dims(np.rollaxis(data, 2),
                                               0)).astype(model.dtype)
    data = model.normalize(data)
    print("Data prep time: %.2fms" % ((time.time() - start) * 1000.0))

    input_tensor = GPUTensor(data)
    # warmup...
    for i in range(1):
        model.evaluate(input_tensor)
    start = time.time()
    num_iterations = 100
    print("Timing %d iterations..." % num_iterations)
    for i in range(num_iterations):
        if i == num_iterations - 1:
            drv.start_profiler()
        y = model.evaluate(input_tensor)
        print(y)
    drv.stop_profiler()

    et = (time.time() - start) * 1000 / num_iterations
    print("Model eval time: %.2fms = %.1ffps" % (et, 1000.0 / et))
コード例 #9
0
ファイル: inference.py プロジェクト: landersson/misc
    def __init__(self, config):
        super().__init__("BatchNormalization")

        assert(config["affine"])
        
        self.eps = config["eps"]

        variance = np.load(os.path.join(config["baseDir"], config["parameterFiles"][3]))
        #variance = self.load_tensor(config, 3, dtype=np.float32)
        nelem = variance.shape[0]

        if config["varianceFormat"] == "variance" and libcudnn.cudnnGetVersion() < 5000:
            # print("FIXING variance format")
            variance += self.eps
            variance = np.reciprocal(np.sqrt(variance))
            
        self.variance = GPUTensor(variance, dtype=np.float32, shape=(1, nelem, 1, 1))

        self.W = self.load_tensor(config, 0, dtype=np.float32, shape=(1, nelem, 1, 1))
        self.bias = self.load_tensor(config, 1, dtype=np.float32, shape=(1, nelem, 1, 1))
                # shape=(1, self.W.shape[0], 1, 1))
        self.average = self.load_tensor(config, 2, dtype=np.float32, shape=(1, nelem, 1, 1))

        self.param_desc = self.average.get_cudnn_tensor_desc()
        self.in_desc = None
        self.out_desc = None
コード例 #10
0
ファイル: inference.py プロジェクト: landersson/misc
    def configure(self, input):
        
        in_images = input.shape[0]
        in_channels = input.shape[1]
        in_height = input.shape[2]
        in_width = input.shape[3]

        assert(in_width >= self.kW)
        assert(in_height >= self.kH)

        out_width  = int((math.floor(1.0 * in_width - self.kW + 2*self.padW) / self.dW) + 1)
        out_height = int((math.floor(1.0 * in_height - self.kH + 2*self.padH) / self.dH) + 1)

        self.output = GPUTensor( (in_images, in_channels, out_height, out_width), input.dtype ) 

        if self.pool_desc:
            libcudnn.cudnnDestroyPoolingDescriptor(self.pool_desc)
        if self.in_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.in_desc)
        if self.out_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.out_desc)

        self.in_desc = input.get_cudnn_tensor_desc()
        self.out_desc = self.output.get_cudnn_tensor_desc()

        self.pool_desc = libcudnn.cudnnCreatePoolingDescriptor()
        libcudnn.cudnnSetPooling2dDescriptor(self.pool_desc,
            libcudnn.cudnnPoolingMode["CUDNN_POOLING_MAX"],
            # libcudnn.cudnnNanPropagation["CUDNN_NOT_PROPAGATE_NAN"],
            self.kH, self.kW, self.padH, self.padW, self.dH, self.dW)
コード例 #11
0
    def load_tensor(self, config, index, dtype=None, shape=None):
        filename = os.path.join(config["baseDir"],
                                config["parameterFiles"][index])

        if dtype is None:
            dtype = config["dtype"]

        return GPUTensor(filename, dtype, shape)
コード例 #12
0
ファイル: inference.py プロジェクト: landersson/misc
    def configure(self, input):
        self.output = GPUTensor(input.shape, input.dtype)

        if self.in_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.in_desc.ptr)
        if self.out_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.out_desc.ptr)

        self.in_desc = input.get_cudnn_tensor_desc()
        self.out_desc = self.output.get_cudnn_tensor_desc()
コード例 #13
0
    def configure(self, input):

        in_images = input.shape[0]
        in_channels = input.shape[1]
        in_height = input.shape[2]
        in_width = input.shape[3]

        assert (in_width >= self.kW)
        assert (in_height >= self.kH)

        out_width = int((math.floor(1.0 * in_width - self.kW + 2 * self.padW) /
                         self.dW) + 1)
        out_height = int(
            (math.floor(1.0 * in_height - self.kH + 2 * self.padH) / self.dH) +
            1)

        self.output = GPUTensor(
            (in_images, in_channels, out_height, out_width), input.dtype)

        if self.pool_desc:
            libcudnn.cudnnDestroyPoolingDescriptor(self.pool_desc)
        if self.in_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.in_desc)
        if self.out_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.out_desc)

        self.in_desc = input.get_cudnn_tensor_desc()
        self.out_desc = self.output.get_cudnn_tensor_desc()

        self.pool_desc = libcudnn.cudnnCreatePoolingDescriptor()
        libcudnn.cudnnSetPooling2dDescriptor(
            self.pool_desc,
            libcudnn.cudnnPoolingMode["CUDNN_POOLING_MAX"],
            # libcudnn.cudnnNanPropagation["CUDNN_NOT_PROPAGATE_NAN"],
            self.kH,
            self.kW,
            self.padH,
            self.padW,
            self.dH,
            self.dW)
コード例 #14
0
    def __init__(self, config):
        super().__init__("BatchNormalization")

        assert (config["affine"])

        self.eps = config["eps"]

        variance = np.load(
            os.path.join(config["baseDir"], config["parameterFiles"][3]))
        #variance = self.load_tensor(config, 3, dtype=np.float32)
        nelem = variance.shape[0]

        if config["varianceFormat"] == "variance" and libcudnn.cudnnGetVersion(
        ) < 5000:
            # print("FIXING variance format")
            variance += self.eps
            variance = np.reciprocal(np.sqrt(variance))

        self.variance = GPUTensor(variance,
                                  dtype=np.float32,
                                  shape=(1, nelem, 1, 1))

        self.W = self.load_tensor(config,
                                  0,
                                  dtype=np.float32,
                                  shape=(1, nelem, 1, 1))
        self.bias = self.load_tensor(config,
                                     1,
                                     dtype=np.float32,
                                     shape=(1, nelem, 1, 1))
        # shape=(1, self.W.shape[0], 1, 1))
        self.average = self.load_tensor(config,
                                        2,
                                        dtype=np.float32,
                                        shape=(1, nelem, 1, 1))

        self.param_desc = self.average.get_cudnn_tensor_desc()
        self.in_desc = None
        self.out_desc = None
コード例 #15
0
ファイル: inference.py プロジェクト: landersson/misc
    def configure(self, input):
        # print("Linear::configure: input shape =", input.shape)
        # print("Linear::configure: W shape =", self.W.shape)
        # print("Linear::configure: b shape =", self.bias.shape)

        elems_per_image  = np.prod(input.shape)
        # print(elems_per_image, self.W.shape[1])

        assert(elems_per_image == self.W.shape[1])
        self.output = GPUTensor((1,self.W.shape[0], 1, 1), dtype=input.dtype)
        self.output_desc = self.output.get_cudnn_tensor_desc()
        
        if self.truth is not None:
            print("OUTPUT TRUTH SHAPE:", self.truth.shape, self.output.shape)
コード例 #16
0
ファイル: inference.py プロジェクト: landersson/misc
    def configure(self, input):
        # print("Convolution::configure: input shape =", input.shape)
        
        in_images = input.shape[0]
        in_channels = input.shape[1]
        in_height = input.shape[2]
        in_width = input.shape[3]

        assert(in_channels == self.num_filter_channels)
       
        out_width  = int((1.0 * in_width + 2*self.padW - self.kW) / self.dW + 1);
        out_height = int((1.0 * in_height + 2*self.padH - self.kH) / self.dH + 1);

        self.output = GPUTensor((in_images, self.num_filter_maps, out_height, out_width),
                input.dtype)
        # print("ONCV:", input.dtype, self.output.dtype)
        # print("Convolution::configure: output shape =", self.output.shape)
   
        # initialize cudnn descriptors
        if self.in_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.in_desc.ptr)
        if self.out_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.out_desc.ptr)

        self.in_desc = input.get_cudnn_tensor_desc()

        # Get output dimensions (first two values are n_input and filters_out)
        _, _, out_height2, out_width2 = libcudnn.cudnnGetConvolution2dForwardOutputDim(
            self.conv_desc, self.in_desc.ptr, self.filt_desc)

        assert(out_width == out_width2)
        assert(out_height == out_height2)

        self.out_desc = self.output.get_cudnn_tensor_desc()
        
        # find best convolution algorithm
        self.algo = libcudnn.cudnnGetConvolutionForwardAlgorithm(context.cudnn, self.in_desc.ptr,
            self.filt_desc, self.conv_desc, self.out_desc.ptr, self.convolution_fwd_pref, 0)
 
        print("Convolution::configure: algo=%s" % str(self.algo.value))

        self.ws_size = libcudnn.cudnnGetConvolutionForwardWorkspaceSize(context.cudnn, 
                self.in_desc.ptr, self.filt_desc, self.conv_desc, self.out_desc.ptr, self.algo)
        self.ws_ptr  = drv.mem_alloc(self.ws_size.value) if self.ws_size.value > 0 else 0

        print("Convolution::configure: workspace size=%d" % self.ws_size.value)
コード例 #17
0
ファイル: test_cudnn_bn.py プロジェクト: landersson/misc
import math
import numpy as np
import pycuda.autoinit
import libcudnn
from gputensor import GPUTensor

dt = np.float16 

xh = np.ones((1,1,4,4), dtype=dt) * 2.0
# print(xh)

cudnn_context = libcudnn.cudnnCreate()

print("CUDNN Version: %d" % libcudnn.cudnnGetVersion())

x = GPUTensor(xh)
y = GPUTensor(xh.shape, dtype=dt)

pdt = np.float32

w = GPUTensor(np.ones(1).reshape(1,1,1,1), dtype=pdt)
bias = GPUTensor(np.zeros(1).reshape(1,1,1,1), dtype=pdt)
mean = GPUTensor(np.ones(1).reshape(1,1,1,1), dtype=pdt)
var = GPUTensor(np.ones(1).reshape(1,1,1,1) * 0.5, dtype=pdt)
x_desc = x.get_cudnn_tensor_desc()
y_desc = y.get_cudnn_tensor_desc()
print(x_desc)
print(y_desc)

param_desc = var.get_cudnn_tensor_desc()
print(param_desc)
コード例 #18
0
ファイル: inference.py プロジェクト: landersson/misc
class Convolution(SlidingLayer):

    convolution_mode = libcudnn.cudnnConvolutionMode['CUDNN_CROSS_CORRELATION']
    # convolution_mode = libcudnn.cudnnConvolutionMode['CUDNN_CONVOLUTION']
    convolution_fwd_pref = libcudnn.cudnnConvolutionFwdPreference['CUDNN_CONVOLUTION_FWD_PREFER_FASTEST']

    def __init__(self, config, name="Convolution"):
        super().__init__(config, name)
        self.output = None

        self.W = self.load_tensor(config, 0)

        self.alpha = 1.0
        self.beta = 0.0

        self.in_desc = None
        self.out_desc = None

        self.num_filter_maps = self.W.shape[0]
        self.num_filter_channels = self.W.shape[1]

        self.bias = self.load_tensor(config, 1, shape=(1, self.num_filter_maps, 1, 1))

        # assert(self.bias.shape[0] == self.num_filter_maps)
        # self.bias = self.bias.reshape((1, self.num_filter_maps, 1, 1))
        # print(self.bias.shape)
        self.b_desc = self.bias.get_cudnn_tensor_desc()

        self.filt_desc = libcudnn.cudnnCreateFilterDescriptor()
        print("FILT:", self.W.dtype, gputensor.np_2_cudnn_dtype[self.W.dtype])
        print("FILT:", self.W.shape, self.num_filter_maps, self.num_filter_channels, self.kH, self.kW)
        libcudnn.cudnnSetFilter4dDescriptor(self.filt_desc, 
                gputensor.np_2_cudnn_dtype[self.W.dtype], self.num_filter_maps,
                self.num_filter_channels, self.kH, self.kW)

        # print("B:", self.bias.shape)
        # self.bias_desc = 
        self.conv_desc = libcudnn.cudnnCreateConvolutionDescriptor()
        libcudnn.cudnnSetConvolution2dDescriptor(self.conv_desc, self.padH, self.padW,
                self.dH, self.dW, 1, 1, self.convolution_mode)

    def __del__(self):
        pass
        # if self.filt_desc:
            # libcudnn.cudnnDestroyFilterDescriptor(self.filt_desc)
        # if self.conv_desc:
            # libcudnn.cudnnDestroyConvolutionDescriptor(self.conv_desc)

    def configure(self, input):
        # print("Convolution::configure: input shape =", input.shape)
        
        in_images = input.shape[0]
        in_channels = input.shape[1]
        in_height = input.shape[2]
        in_width = input.shape[3]

        assert(in_channels == self.num_filter_channels)
       
        out_width  = int((1.0 * in_width + 2*self.padW - self.kW) / self.dW + 1);
        out_height = int((1.0 * in_height + 2*self.padH - self.kH) / self.dH + 1);

        self.output = GPUTensor((in_images, self.num_filter_maps, out_height, out_width),
                input.dtype)
        # print("ONCV:", input.dtype, self.output.dtype)
        # print("Convolution::configure: output shape =", self.output.shape)
   
        # initialize cudnn descriptors
        if self.in_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.in_desc.ptr)
        if self.out_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.out_desc.ptr)

        self.in_desc = input.get_cudnn_tensor_desc()

        # Get output dimensions (first two values are n_input and filters_out)
        _, _, out_height2, out_width2 = libcudnn.cudnnGetConvolution2dForwardOutputDim(
            self.conv_desc, self.in_desc.ptr, self.filt_desc)

        assert(out_width == out_width2)
        assert(out_height == out_height2)

        self.out_desc = self.output.get_cudnn_tensor_desc()
        
        # find best convolution algorithm
        self.algo = libcudnn.cudnnGetConvolutionForwardAlgorithm(context.cudnn, self.in_desc.ptr,
            self.filt_desc, self.conv_desc, self.out_desc.ptr, self.convolution_fwd_pref, 0)
 
        print("Convolution::configure: algo=%s" % str(self.algo.value))

        self.ws_size = libcudnn.cudnnGetConvolutionForwardWorkspaceSize(context.cudnn, 
                self.in_desc.ptr, self.filt_desc, self.conv_desc, self.out_desc.ptr, self.algo)
        self.ws_ptr  = drv.mem_alloc(self.ws_size.value) if self.ws_size.value > 0 else 0

        print("Convolution::configure: workspace size=%d" % self.ws_size.value)

    def fprop(self, input):

        # print("\nConvolution::fprop: alpha=%f, beta=%f" % (self.alpha, self.beta))
        
        ws_data = ctypes.c_void_p(int(self.ws_ptr))

        self.start.record()
        libcudnn.cudnnConvolutionForward(context.cudnn, self.alpha, 
                self.in_desc.ptr, input.get_gpu_voidp(),
                self.filt_desc, self.W.get_gpu_voidp(), 
                self.conv_desc, self.algo, ws_data, self.ws_size.value, self.beta, 
                self.out_desc.ptr, self.output.get_gpu_voidp())

        libcudnn.cudnnAddTensor(context.cudnn, 1.0, self.b_desc.ptr, self.bias.get_gpu_voidp(),
                1.0, self.out_desc.ptr, self.output.get_gpu_voidp())

        self.check_truth()

    def __str__(self):
        return "%s, W=%s, b=%s" % (SlidingLayer.__str__(self), self.W.shape, self.bias.shape)
コード例 #19
0
ファイル: test_cudnn_pooling.py プロジェクト: landersson/misc
import math
import numpy as np
import pycuda.autoinit
import libcudnn
from gputensor import GPUTensor

xh = np.array( [[[[ 1 + m * x for x in list(range(4)) ] for m in range(4) ]]], dtype=np.float32 )
print(xh)
print(xh.shape)

cudnn_context = libcudnn.cudnnCreate()

x = GPUTensor(xh)

x_desc = x.get_cudnn_tensor_desc()
print(x_desc)

kW = 2
kH = 2
dW = 1
dH = 1
padW = 0
padH = 0
in_width = x.shape[3]
in_height = x.shape[2]

out_width  = int((math.floor(1.0 * in_width - kW + 2*padW) / dW) + 1)
out_height = int((math.floor(1.0 * in_height - kH + 2*padH) / dH) + 1)

print("Ot:", out_width, out_height)
コード例 #20
0
ファイル: test_cudnn_bn.py プロジェクト: landersson/misc
import math
import numpy as np
import pycuda.autoinit
import libcudnn
from gputensor import GPUTensor

dt = np.float16

xh = np.ones((1, 1, 4, 4), dtype=dt) * 2.0
# print(xh)

cudnn_context = libcudnn.cudnnCreate()

print("CUDNN Version: %d" % libcudnn.cudnnGetVersion())

x = GPUTensor(xh)
y = GPUTensor(xh.shape, dtype=dt)

pdt = np.float32

w = GPUTensor(np.ones(1).reshape(1, 1, 1, 1), dtype=pdt)
bias = GPUTensor(np.zeros(1).reshape(1, 1, 1, 1), dtype=pdt)
mean = GPUTensor(np.ones(1).reshape(1, 1, 1, 1), dtype=pdt)
var = GPUTensor(np.ones(1).reshape(1, 1, 1, 1) * 0.5, dtype=pdt)
x_desc = x.get_cudnn_tensor_desc()
y_desc = y.get_cudnn_tensor_desc()
print(x_desc)
print(y_desc)

param_desc = var.get_cudnn_tensor_desc()
print(param_desc)
コード例 #21
0
class BatchNormalization(Layer):
    def __init__(self, config):
        super().__init__("BatchNormalization")

        assert (config["affine"])

        self.eps = config["eps"]

        variance = np.load(
            os.path.join(config["baseDir"], config["parameterFiles"][3]))
        #variance = self.load_tensor(config, 3, dtype=np.float32)
        nelem = variance.shape[0]

        if config["varianceFormat"] == "variance" and libcudnn.cudnnGetVersion(
        ) < 5000:
            # print("FIXING variance format")
            variance += self.eps
            variance = np.reciprocal(np.sqrt(variance))

        self.variance = GPUTensor(variance,
                                  dtype=np.float32,
                                  shape=(1, nelem, 1, 1))

        self.W = self.load_tensor(config,
                                  0,
                                  dtype=np.float32,
                                  shape=(1, nelem, 1, 1))
        self.bias = self.load_tensor(config,
                                     1,
                                     dtype=np.float32,
                                     shape=(1, nelem, 1, 1))
        # shape=(1, self.W.shape[0], 1, 1))
        self.average = self.load_tensor(config,
                                        2,
                                        dtype=np.float32,
                                        shape=(1, nelem, 1, 1))

        self.param_desc = self.average.get_cudnn_tensor_desc()
        self.in_desc = None
        self.out_desc = None

    def configure(self, input):
        self.output = GPUTensor(input.shape, input.dtype)

        if self.in_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.in_desc.ptr)
        if self.out_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.out_desc.ptr)

        self.in_desc = input.get_cudnn_tensor_desc()
        self.out_desc = self.output.get_cudnn_tensor_desc()
        # print("BatchNormalization:configure() input=", input.shape, self.W.shape[0])

    def fprop(self, input):
        # The input transformation performed by this function is defined as:
        # y := alpha*y + beta *(bnScale * (x-estimatedMean)/sqrt(epsilon + estimatedVariance)+bnBias)
        # print("IN:", self.in_desc)
        # print("OUT:", self.out_desc)
        # print("PARAM:", self.param_desc)
        # print("EPSILON:", self.eps)
        # print("VARP:", self.variance.get_gpu_voidp())
        libcudnn.cudnnBatchNormalizationForwardInference(
            context.cudnn,
            libcudnn.cudnnBatchNormMode['CUDNN_BATCHNORM_SPATIAL'], 1.0, 0.0,
            self.in_desc.ptr, input.get_gpu_voidp(), self.out_desc.ptr,
            self.output.get_gpu_voidp(), self.param_desc.ptr,
            self.W.get_gpu_voidp(), self.bias.get_gpu_voidp(),
            self.average.get_gpu_voidp(), self.variance.get_gpu_voidp(),
            self.eps)

        self.check_truth()

    def __str__(self):
        return "BatchNormalization: %dx%d" % (self.W.shape[0],
                                              self.bias.shape[0])
コード例 #22
0
ファイル: test_cudnn_addbias.py プロジェクト: landersson/misc
import math
import numpy as np
import pycuda.autoinit
import libcudnn
from gputensor import GPUTensor

xh = np.array(
    [[[[1, 2, 3], [2, 3, 4], [3, 4, 5]], [[1, 2, 3], [2, 3, 4], [3, 4, 5]],
      [[1, 2, 3], [2, 3, 4], [3, 4, 5]]]],
    dtype=np.float32)
print(xh.shape)
print(xh)
cudnn_context = libcudnn.cudnnCreate()

x = GPUTensor(xh)
x_desc = x.get_cudnn_tensor_desc()
print(x_desc)

print("X:\n", x.get())
b = GPUTensor(np.array([1, 2, 3], dtype=np.float32).reshape((1, 3, 1, 1)))
b_desc = b.get_cudnn_tensor_desc()

print(x.shape)
print(b.shape)
# print(b.get())

libcudnn.cudnnAddTensor(cudnn_context, 1.0, b_desc.ptr, b.get_gpu_voidp(), 1.0,
                        x_desc.ptr, x.get_gpu_voidp())

print("X2:\n", x.get())
コード例 #23
0
class Pooling(SlidingLayer):
    class Mode(enum.IntEnum):
        MAX = 1,
        AVG = 2

    def __init__(self, mode, config, name="Pooling"):
        super().__init__(config, name)
        self.mode = mode

        assert (config["ceil_mode"] == False)

        self.alpha = 1.0
        self.beta = 0.0

        self.pool_desc = None
        self.in_desc = None
        self.out_desc = None

    def configure(self, input):

        in_images = input.shape[0]
        in_channels = input.shape[1]
        in_height = input.shape[2]
        in_width = input.shape[3]

        assert (in_width >= self.kW)
        assert (in_height >= self.kH)

        out_width = int((math.floor(1.0 * in_width - self.kW + 2 * self.padW) /
                         self.dW) + 1)
        out_height = int(
            (math.floor(1.0 * in_height - self.kH + 2 * self.padH) / self.dH) +
            1)

        self.output = GPUTensor(
            (in_images, in_channels, out_height, out_width), input.dtype)

        if self.pool_desc:
            libcudnn.cudnnDestroyPoolingDescriptor(self.pool_desc)
        if self.in_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.in_desc)
        if self.out_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.out_desc)

        self.in_desc = input.get_cudnn_tensor_desc()
        self.out_desc = self.output.get_cudnn_tensor_desc()

        self.pool_desc = libcudnn.cudnnCreatePoolingDescriptor()
        libcudnn.cudnnSetPooling2dDescriptor(
            self.pool_desc,
            libcudnn.cudnnPoolingMode["CUDNN_POOLING_MAX"],
            # libcudnn.cudnnNanPropagation["CUDNN_NOT_PROPAGATE_NAN"],
            self.kH,
            self.kW,
            self.padH,
            self.padW,
            self.dH,
            self.dW)

    def fprop(self, input):
        in_data = ctypes.c_void_p(int(input.gpudata))
        out_data = ctypes.c_void_p(int(self.output.gpudata))

        # print("Pooling::fprop()")
        # print("in_data:", input.ptr)
        # print("out_data:", self.output.ptr)

        libcudnn.cudnnPoolingForward(context.cudnn, self.pool_desc,
                                     self.alpha, self.in_desc.ptr,
                                     input.get_gpu_voidp(), self.beta,
                                     self.out_desc.ptr,
                                     self.output.get_gpu_voidp())

        self.check_truth()
コード例 #24
0
import math
import numpy as np
import pycuda.autoinit
import libcudnn
from gputensor import GPUTensor

xh = np.array([[[[1 + m * x for x in list(range(4))] for m in range(4)]]],
              dtype=np.float32)
print(xh)
print(xh.shape)

cudnn_context = libcudnn.cudnnCreate()

x = GPUTensor(xh)

x_desc = x.get_cudnn_tensor_desc()
print(x_desc)

kW = 2
kH = 2
dW = 1
dH = 1
padW = 0
padH = 0
in_width = x.shape[3]
in_height = x.shape[2]

out_width = int((math.floor(1.0 * in_width - kW + 2 * padW) / dW) + 1)
out_height = int((math.floor(1.0 * in_height - kH + 2 * padH) / dH) + 1)

print("Ot:", out_width, out_height)
コード例 #25
0
class Convolution(SlidingLayer):

    convolution_mode = libcudnn.cudnnConvolutionMode['CUDNN_CROSS_CORRELATION']
    # convolution_mode = libcudnn.cudnnConvolutionMode['CUDNN_CONVOLUTION']
    convolution_fwd_pref = libcudnn.cudnnConvolutionFwdPreference[
        'CUDNN_CONVOLUTION_FWD_PREFER_FASTEST']

    def __init__(self, config, name="Convolution"):
        super().__init__(config, name)
        self.output = None

        self.W = self.load_tensor(config, 0)

        self.alpha = 1.0
        self.beta = 0.0

        self.in_desc = None
        self.out_desc = None

        self.num_filter_maps = self.W.shape[0]
        self.num_filter_channels = self.W.shape[1]

        self.bias = self.load_tensor(config,
                                     1,
                                     shape=(1, self.num_filter_maps, 1, 1))

        # assert(self.bias.shape[0] == self.num_filter_maps)
        # self.bias = self.bias.reshape((1, self.num_filter_maps, 1, 1))
        # print(self.bias.shape)
        self.b_desc = self.bias.get_cudnn_tensor_desc()

        self.filt_desc = libcudnn.cudnnCreateFilterDescriptor()
        print("FILT:", self.W.dtype, gputensor.np_2_cudnn_dtype[self.W.dtype])
        print("FILT:", self.W.shape, self.num_filter_maps,
              self.num_filter_channels, self.kH, self.kW)
        libcudnn.cudnnSetFilter4dDescriptor(
            self.filt_desc, gputensor.np_2_cudnn_dtype[self.W.dtype],
            self.num_filter_maps, self.num_filter_channels, self.kH, self.kW)

        # print("B:", self.bias.shape)
        # self.bias_desc =
        self.conv_desc = libcudnn.cudnnCreateConvolutionDescriptor()
        libcudnn.cudnnSetConvolution2dDescriptor(self.conv_desc, self.padH,
                                                 self.padW, self.dH, self.dW,
                                                 1, 1, self.convolution_mode)

    def __del__(self):
        pass
        # if self.filt_desc:
        # libcudnn.cudnnDestroyFilterDescriptor(self.filt_desc)
        # if self.conv_desc:
        # libcudnn.cudnnDestroyConvolutionDescriptor(self.conv_desc)

    def configure(self, input):
        # print("Convolution::configure: input shape =", input.shape)

        in_images = input.shape[0]
        in_channels = input.shape[1]
        in_height = input.shape[2]
        in_width = input.shape[3]

        assert (in_channels == self.num_filter_channels)

        out_width = int((1.0 * in_width + 2 * self.padW - self.kW) / self.dW +
                        1)
        out_height = int((1.0 * in_height + 2 * self.padH - self.kH) /
                         self.dH + 1)

        self.output = GPUTensor(
            (in_images, self.num_filter_maps, out_height, out_width),
            input.dtype)
        # print("ONCV:", input.dtype, self.output.dtype)
        # print("Convolution::configure: output shape =", self.output.shape)

        # initialize cudnn descriptors
        if self.in_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.in_desc.ptr)
        if self.out_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.out_desc.ptr)

        self.in_desc = input.get_cudnn_tensor_desc()

        # Get output dimensions (first two values are n_input and filters_out)
        _, _, out_height2, out_width2 = libcudnn.cudnnGetConvolution2dForwardOutputDim(
            self.conv_desc, self.in_desc.ptr, self.filt_desc)

        assert (out_width == out_width2)
        assert (out_height == out_height2)

        self.out_desc = self.output.get_cudnn_tensor_desc()

        # find best convolution algorithm
        self.algo = libcudnn.cudnnGetConvolutionForwardAlgorithm(
            context.cudnn, self.in_desc.ptr, self.filt_desc, self.conv_desc,
            self.out_desc.ptr, self.convolution_fwd_pref, 0)

        print("Convolution::configure: algo=%s" % str(self.algo.value))

        self.ws_size = libcudnn.cudnnGetConvolutionForwardWorkspaceSize(
            context.cudnn, self.in_desc.ptr, self.filt_desc, self.conv_desc,
            self.out_desc.ptr, self.algo)
        self.ws_ptr = drv.mem_alloc(
            self.ws_size.value) if self.ws_size.value > 0 else 0

        print("Convolution::configure: workspace size=%d" % self.ws_size.value)

    def fprop(self, input):

        # print("\nConvolution::fprop: alpha=%f, beta=%f" % (self.alpha, self.beta))

        ws_data = ctypes.c_void_p(int(self.ws_ptr))

        self.start.record()
        libcudnn.cudnnConvolutionForward(context.cudnn,
                                         self.alpha, self.in_desc.ptr,
                                         input.get_gpu_voidp(), self.filt_desc,
                                         self.W.get_gpu_voidp(),
                                         self.conv_desc, self.algo, ws_data,
                                         self.ws_size.value, self.beta,
                                         self.out_desc.ptr,
                                         self.output.get_gpu_voidp())

        libcudnn.cudnnAddTensor(context.cudnn, 1.0, self.b_desc.ptr,
                                self.bias.get_gpu_voidp(), 1.0,
                                self.out_desc.ptr, self.output.get_gpu_voidp())

        self.check_truth()

    def __str__(self):
        return "%s, W=%s, b=%s" % (SlidingLayer.__str__(self), self.W.shape,
                                   self.bias.shape)
コード例 #26
0
    num_errors = 0
    num = datasrc.num_items() if args.num_images == 0 else args.num_images

    # inputs = np.load("truth/input.npy")
    # results = [["n01986214","n04252225" ],
    # ["n03938244","n02840245"],
    # ["n01644900","n01770393"],
    # ["n04019541","n04019541"]]

    for i in range(num):
        yt, data = datasrc.get_item()
        data = np.ascontiguousarray(np.expand_dims(np.rollaxis(data, 2),
                                                   0)).astype(model.dtype)
        data = model.normalize(data)
        # yt = results[i][0]
        # data = np.expand_dims(inputs[i], 0).astype(input_dtype)
        # print(data.shape, data.dtype)
        # print(data2.shape, data2.dtype)
        # print(np.allclose(data,data2))
        # continue
        # exit(0)
        input_tensor = GPUTensor(data)
        # print(data.shape)
        # model.configure(input_tensor)
        y = model.evaluate(input_tensor)
        print(y, yt)
        if y != yt:
            num_errors += 1
    print("DONE: %d images classified, error rate=%.4f" %
          (num, 1.0 * num_errors / num))
コード例 #27
0
ファイル: inference.py プロジェクト: landersson/misc
class Pooling(SlidingLayer):
    class Mode(enum.IntEnum):
        MAX = 1,
        AVG = 2

    def __init__(self, mode, config, name="Pooling"):
        super().__init__(config, name)
        self.mode = mode
        
        assert(config["ceil_mode"] == False)

        self.alpha = 1.0
        self.beta = 0.0

        self.pool_desc = None
        self.in_desc = None
        self.out_desc = None


    def configure(self, input):
        
        in_images = input.shape[0]
        in_channels = input.shape[1]
        in_height = input.shape[2]
        in_width = input.shape[3]

        assert(in_width >= self.kW)
        assert(in_height >= self.kH)

        out_width  = int((math.floor(1.0 * in_width - self.kW + 2*self.padW) / self.dW) + 1)
        out_height = int((math.floor(1.0 * in_height - self.kH + 2*self.padH) / self.dH) + 1)

        self.output = GPUTensor( (in_images, in_channels, out_height, out_width), input.dtype ) 

        if self.pool_desc:
            libcudnn.cudnnDestroyPoolingDescriptor(self.pool_desc)
        if self.in_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.in_desc)
        if self.out_desc:
            libcudnn.cudnnDestroyTensorDescriptor(self.out_desc)

        self.in_desc = input.get_cudnn_tensor_desc()
        self.out_desc = self.output.get_cudnn_tensor_desc()

        self.pool_desc = libcudnn.cudnnCreatePoolingDescriptor()
        libcudnn.cudnnSetPooling2dDescriptor(self.pool_desc,
            libcudnn.cudnnPoolingMode["CUDNN_POOLING_MAX"],
            # libcudnn.cudnnNanPropagation["CUDNN_NOT_PROPAGATE_NAN"],
            self.kH, self.kW, self.padH, self.padW, self.dH, self.dW)

    def fprop(self, input):
        in_data = ctypes.c_void_p(int(input.gpudata))
        out_data = ctypes.c_void_p(int(self.output.gpudata))

        # print("Pooling::fprop()")
        # print("in_data:", input.ptr)
        # print("out_data:", self.output.ptr)

        libcudnn.cudnnPoolingForward(context.cudnn, self.pool_desc, self.alpha,
                self.in_desc.ptr, input.get_gpu_voidp(), 
                self.beta, self.out_desc.ptr, self.output.get_gpu_voidp())

        self.check_truth()
コード例 #28
0
ファイル: test_cudnn_softmax.py プロジェクト: landersson/misc
import numpy as np
import pycuda.autoinit
import libcudnn, ctypes
from gputensor import GPUTensor
from scipy.misc import logsumexp

xo = np.array([1,2,3,4,5,6,7,8], dtype=np.float32)
print(np.log(np.exp(xo) / np.sum(np.exp(xo))))
xn = xo.reshape((1,8,1,1))

print("LOGSUMEXP", logsumexp(xn))
print(xn.shape)
x = GPUTensor(xn)
y = GPUTensor((1,8,1,1), dtype=np.float32)
print(x.shape, x.dtype)
print(y.shape, y.dtype)

cudnn_context = libcudnn.cudnnCreate()

x_desc = x.get_cudnn_tensor_desc()
y_desc = y.get_cudnn_tensor_desc()

# print(libcudnn.cudnnGetTensor4dDescriptor(x_desc))
# exit(0)

algo = libcudnn.cudnnSoftmaxAlgorithm["CUDNN_SOFTMAX_LOG"]
mode = libcudnn.cudnnSoftmaxMode['CUDNN_SOFTMAX_MODE_CHANNEL']
# mode = libcudnn.cudnnSoftmaxMode['CUDNN_SOFTMAX_MODE_INSTANCE']

alpha = 1.0
コード例 #29
0
ファイル: test_cudnn_addbias.py プロジェクト: landersson/misc
import math
import numpy as np
import pycuda.autoinit
import libcudnn
from gputensor import GPUTensor

xh = np.array( [[ [[1,2,3],[2,3,4],[3,4,5]],
                [[1,2,3],[2,3,4],[3,4,5]],
                [[1,2,3],[2,3,4],[3,4,5]] ] ], dtype=np.float32)
print(xh.shape)
print(xh)
cudnn_context = libcudnn.cudnnCreate()

x = GPUTensor(xh)
x_desc = x.get_cudnn_tensor_desc()
print(x_desc)

print("X:\n", x.get())
b = GPUTensor(np.array([ 1, 2, 3 ],dtype=np.float32).reshape((1,3,1,1)))
b_desc = b.get_cudnn_tensor_desc()

print(x.shape)
print(b.shape)
# print(b.get())

libcudnn.cudnnAddTensor(cudnn_context, 1.0, b_desc.ptr, b.get_gpu_voidp(),
       1.0, x_desc.ptr, x.get_gpu_voidp())

print("X2:\n", x.get())
コード例 #30
0
ファイル: test_cudnn_fp16.py プロジェクト: landersson/misc
import math
import numpy as np
import pycuda.autoinit
import libcudnn
from gputensor import GPUTensor

xh = np.array( [[[[ 1 + m * x for x in list(range(4)) ] for m in range(4) ]]], dtype=np.float16 )
yh = xh + 0.5
print(xh)
print(yh)
print(xh.shape)

cudnn_context = libcudnn.cudnnCreate()

x = GPUTensor(xh)
y = GPUTensor(yh)
print(x.dtype, y.dtype)

x_desc = x.get_cudnn_tensor_desc()
y_desc = y.get_cudnn_tensor_desc()
print(x_desc)
libcudnn.cudnnAddTensor(cudnn_context, 1.0, x_desc.ptr, x.get_gpu_voidp(),
        1.0,  y_desc.ptr, y.get_gpu_voidp())

yh2 = y.get()
print(y)
print(y.dtype)
コード例 #31
0
ファイル: test_cudnn_softmax.py プロジェクト: landersson/misc
import numpy as np
import pycuda.autoinit
import libcudnn, ctypes
from gputensor import GPUTensor
from scipy.misc import logsumexp

xo = np.array([1, 2, 3, 4, 5, 6, 7, 8], dtype=np.float32)
print(np.log(np.exp(xo) / np.sum(np.exp(xo))))
xn = xo.reshape((1, 8, 1, 1))

print("LOGSUMEXP", logsumexp(xn))
print(xn.shape)
x = GPUTensor(xn)
y = GPUTensor((1, 8, 1, 1), dtype=np.float32)
print(x.shape, x.dtype)
print(y.shape, y.dtype)

cudnn_context = libcudnn.cudnnCreate()

x_desc = x.get_cudnn_tensor_desc()
y_desc = y.get_cudnn_tensor_desc()

# print(libcudnn.cudnnGetTensor4dDescriptor(x_desc))
# exit(0)

algo = libcudnn.cudnnSoftmaxAlgorithm["CUDNN_SOFTMAX_LOG"]
mode = libcudnn.cudnnSoftmaxMode['CUDNN_SOFTMAX_MODE_CHANNEL']
# mode = libcudnn.cudnnSoftmaxMode['CUDNN_SOFTMAX_MODE_INSTANCE']

alpha = 1.0
beta = 0.0
コード例 #32
0
import math
import numpy as np
import pycuda.autoinit
import libcudnn
from gputensor import GPUTensor

xh = np.array([[[[1 + m * x for x in list(range(4))] for m in range(4)]]],
              dtype=np.float16)
yh = xh + 0.5
print(xh)
print(yh)
print(xh.shape)

cudnn_context = libcudnn.cudnnCreate()

x = GPUTensor(xh)
y = GPUTensor(yh)
print(x.dtype, y.dtype)

x_desc = x.get_cudnn_tensor_desc()
y_desc = y.get_cudnn_tensor_desc()
print(x_desc)
libcudnn.cudnnAddTensor(cudnn_context, 1.0, x_desc.ptr, x.get_gpu_voidp(), 1.0,
                        y_desc.ptr, y.get_gpu_voidp())

yh2 = y.get()
print(y)
print(y.dtype)