Exemple #1
0
    def fprop(self, input):

        # print("\nConvolution::fprop: alpha=%f, beta=%f" % (self.alpha, self.beta))
        
        ws_data = ctypes.c_void_p(int(self.ws_ptr))

        self.start.record()
        libcudnn.cudnnConvolutionForward(context.cudnn, self.alpha, 
                self.in_desc.ptr, input.get_gpu_voidp(),
                self.filt_desc, self.W.get_gpu_voidp(), 
                self.conv_desc, self.algo, ws_data, self.ws_size.value, self.beta, 
                self.out_desc.ptr, self.output.get_gpu_voidp())

        libcudnn.cudnnAddTensor(context.cudnn, 1.0, self.b_desc.ptr, self.bias.get_gpu_voidp(),
                1.0, self.out_desc.ptr, self.output.get_gpu_voidp())

        self.check_truth()
Exemple #2
0
    libcudnn.cudnnSetTensor4dDescriptor(O_desc, NCHW_fmt, cu_dtype, N, K, P, Q)
    libcudnn.cudnnSetTensor4dDescriptor(E_desc, NCHW_fmt, cu_dtype, N, K, P, Q)
    libcudnn.cudnnSetFilter4dDescriptor(F_desc, cu_dtype, K, C, R, S)
    libcudnn.cudnnSetFilter4dDescriptor(U_desc, cu_dtype, K, C, R, S)

    algo    = libcudnn.cudnnGetConvolutionForwardAlgorithm(cudnn, I_desc, F_desc, C_desc, O_desc, fwd_pref, 0)
    ws_size = libcudnn.cudnnGetConvolutionForwardWorkspaceSize(cudnn, I_desc, F_desc, C_desc, O_desc, algo)

    #print algo.value, ws_size.value

    ws_ptr  = drv.mem_alloc(ws_size.value) if ws_size.value > 0 else 0
    ws_data = ctypes.c_void_p(int(ws_ptr))

    start_bench()
    for r in (range(repeat)):
        libcudnn.cudnnConvolutionForward(cudnn, alpha, I_desc, I_data, F_desc, F_data, C_desc, algo, ws_data, ws_size.value, beta, O_desc, O_data)
    end_bench("fprop")

    ws_ptr = None

    start_bench()
    for r in (range(repeat)):
        libcudnn.cudnnConvolutionBackwardData(cudnn, alpha, F_desc, F_data, E_desc, E_data, C_desc, beta, B_desc, B_data)
    end_bench("bprop")

    start_bench()
    for r in (range(repeat)):
        libcudnn.cudnnConvolutionBackwardFilter(cudnn, alpha, I_desc, I_data, E_desc, E_data, C_desc, beta, U_desc, U_data)
    end_bench("updat")

X_data = ctypes.c_void_p(int(X.gpudata))
filters_data = ctypes.c_void_p(int(filters.gpudata))
Y_data = ctypes.c_void_p(int(Y.gpudata))

# Perform convolution
algo = libcudnn.cudnnGetConvolutionForwardAlgorithm(cudnn_context, X_desc,
    filters_desc, conv_desc, Y_desc, convolution_fwd_pref, 0)

print("Cudnn algorithm = %d" % algo.value)

ws_size = libcudnn.cudnnGetConvolutionForwardWorkspaceSize(cudnn_context, X_desc, filters_desc, conv_desc, Y_desc, algo)
ws_ptr  = drv.mem_alloc(ws_size.value) if ws_size.value > 0 else 0
ws_data = ctypes.c_void_p(int(ws_ptr))

start_bench()

libcudnn.cudnnConvolutionForward(cudnn_context, alpha, X_desc, X_data,
    filters_desc, filters_data, conv_desc, algo, ws_data, ws_size.value, beta,
    Y_desc, Y_data)

end_bench("fprop")

ws_ptr = None

# Clean up
libcudnn.cudnnDestroyTensorDescriptor(X_desc)
libcudnn.cudnnDestroyTensorDescriptor(Y_desc)
libcudnn.cudnnDestroyFilterDescriptor(filters_desc)
libcudnn.cudnnDestroyConvolutionDescriptor(conv_desc)
libcudnn.cudnnDestroy(cudnn_context)
Exemple #4
0
    convolution_mode)

# Get output dimensions (first two values are n_input and filters_out)
_, _, height_output, width_output = libcudnn.cudnnGetConvolution2dForwardOutputDim(
    conv_desc, X_desc, filters_desc)

# Output tensor
Y = gpuarray.empty((n_input, filters_out, height_output, width_output), np.float32)
Y_desc = libcudnn.cudnnCreateTensorDescriptor()
libcudnn.cudnnSetTensor4dDescriptor(Y_desc, tensor_format, data_type, n_input,
    filters_out, height_output, width_output)

# Get pointers to GPU memory
X_data = ctypes.c_void_p(int(X.gpudata))
filters_data = ctypes.c_void_p(int(filters.gpudata))
Y_data = ctypes.c_void_p(int(Y.gpudata))

# Perform convolution
algo = libcudnn.cudnnGetConvolutionForwardAlgorithm(cudnn_context, X_desc,
    filters_desc, conv_desc, Y_desc, convolution_fwd_pref, 0)
libcudnn.cudnnConvolutionForward(cudnn_context, alpha, X_desc, X_data,
    filters_desc, filters_data, conv_desc, algo, None, 0, beta,
    Y_desc, Y_data)

# Clean up
libcudnn.cudnnDestroyTensorDescriptor(X_desc)
libcudnn.cudnnDestroyTensorDescriptor(Y_desc)
libcudnn.cudnnDestroyFilterDescriptor(filters_desc)
libcudnn.cudnnDestroyConvolutionDescriptor(conv_desc)
libcudnn.cudnnDestroy(cudnn_context)