def create_convolution_descriptor(pad, stride, dtype, mode=cudnn.CUDNN_CROSS_CORRELATION): desc = Descriptor(cudnn.createConvolutionDescriptor(), cudnn.destroyConvolutionDescriptor) ndim = len(pad) if ndim != len(stride): raise ValueError('pad and stride must be of same length') if ndim == 2: cudnn.setConvolution2dDescriptor(desc.value, pad[0], pad[1], stride[0], stride[1], 1, 1, mode) else: c_pad = _to_ctypes_array(pad) c_stride = _to_ctypes_array(stride) c_upscale = _to_ctypes_array((1, ) * ndim) if _cudnn_version >= 3000: data_type = get_data_type(dtype) # TODO(takagi) Temporarily use computing precision of FP32 for # storing precision of FP16. if dtype == numpy.float16: data_type = cudnn.CUDNN_DATA_FLOAT cudnn.setConvolutionNdDescriptor_v3(desc.value, ndim, c_pad.data, c_stride.data, c_upscale.data, mode, data_type) else: cudnn.setConvolutionNdDescriptor_v2(desc.value, ndim, c_pad.data, c_stride.data, c_upscale.data, mode) return desc
def create_convolution_descriptor(pad, stride, dtype, mode=cudnn.CUDNN_CROSS_CORRELATION): desc = Descriptor(cudnn.createConvolutionDescriptor(), cudnn.destroyConvolutionDescriptor) ndim = len(pad) if ndim != len(stride): raise ValueError("pad and stride must be of same length") if ndim == 2: cudnn.setConvolution2dDescriptor(desc.value, pad[0], pad[1], stride[0], stride[1], 1, 1, mode) else: c_pad = _to_ctypes_array(pad) c_stride = _to_ctypes_array(stride) c_upscale = _to_ctypes_array((1,) * ndim) if _cudnn_version >= 3000: data_type = get_data_type(dtype) # TODO(takagi) Temporarily use computing precision of FP32 for # storing precision of FP16. if dtype == numpy.float16: data_type = cudnn.CUDNN_DATA_FLOAT cudnn.setConvolutionNdDescriptor_v3( desc.value, ndim, c_pad.data, c_stride.data, c_upscale.data, mode, data_type ) else: cudnn.setConvolutionNdDescriptor_v2(desc.value, ndim, c_pad.data, c_stride.data, c_upscale.data, mode) return desc
def create_convolution_descriptor(pad, stride, dtype, mode=cudnn.CUDNN_CROSS_CORRELATION, dilation=(1, 1), use_tensor_core=False): desc = Descriptor(cudnn.createConvolutionDescriptor(), cudnn.destroyConvolutionDescriptor) ndim = len(pad) if ndim != len(stride): raise ValueError('pad and stride must be of same length') if ndim == 2: if _cudnn_version < 6000: if dilation[0] != 1 or dilation[1] != 1: raise ValueError('dilation must be one when cudnn < 6.0') if _cudnn_version >= 5000: compute_type = get_data_type(dtype) # TODO(takagi) Temporarily use computing precision of FP32 for # storing precision of FP16. if dtype == numpy.float16: compute_type = cudnn.CUDNN_DATA_FLOAT cudnn.setConvolution2dDescriptor_v5(desc.value, pad[0], pad[1], stride[0], stride[1], dilation[0], dilation[1], mode, compute_type) if _cudnn_version >= 7000: if use_tensor_core: math_type = cudnn.CUDNN_TENSOR_OP_MATH cudnn.setConvolutionMathType(desc.value, math_type) else: cudnn.setConvolution2dDescriptor_v4(desc.value, pad[0], pad[1], stride[0], stride[1], 1, 1, mode) else: c_pad = _to_ctypes_array(pad) c_stride = _to_ctypes_array(stride) c_dilation = _to_ctypes_array((1, ) * ndim) compute_type = get_data_type(dtype) # TODO(takagi) Temporarily use computing precision of FP32 for # storing precision of FP16. if dtype == numpy.float16: compute_type = cudnn.CUDNN_DATA_FLOAT cudnn.setConvolutionNdDescriptor_v3(desc.value, ndim, c_pad.data, c_stride.data, c_dilation.data, mode, compute_type) return desc