Esempio n. 1
0
def create_convolution_descriptor(pad, stride, dtype, mode=cudnn.CUDNN_CROSS_CORRELATION):
    desc = Descriptor(cudnn.createConvolutionDescriptor(), cudnn.destroyConvolutionDescriptor)
    ndim = len(pad)
    if ndim != len(stride):
        raise ValueError("pad and stride must be of same length")

    if ndim == 2:
        cudnn.setConvolution2dDescriptor(desc.value, pad[0], pad[1], stride[0], stride[1], 1, 1, mode)
    else:
        c_pad = _to_ctypes_array(pad)
        c_stride = _to_ctypes_array(stride)
        c_upscale = _to_ctypes_array((1,) * ndim)
        if _cudnn_version >= 3000:
            data_type = get_data_type(dtype)
            # TODO(takagi) Temporarily use computing precision of FP32 for
            #     storing precision of FP16.
            if dtype == numpy.float16:
                data_type = cudnn.CUDNN_DATA_FLOAT
            cudnn.setConvolutionNdDescriptor_v3(
                desc.value, ndim, c_pad.data, c_stride.data, c_upscale.data, mode, data_type
            )
        else:
            cudnn.setConvolutionNdDescriptor_v2(desc.value, ndim, c_pad.data, c_stride.data, c_upscale.data, mode)

    return desc
Esempio n. 2
0
def create_convolution_descriptor(pad,
                                  stride,
                                  dtype,
                                  mode=cudnn.CUDNN_CROSS_CORRELATION):
    desc = Descriptor(cudnn.createConvolutionDescriptor(),
                      cudnn.destroyConvolutionDescriptor)
    ndim = len(pad)
    if ndim != len(stride):
        raise ValueError('pad and stride must be of same length')

    if ndim == 2:
        cudnn.setConvolution2dDescriptor(desc.value, pad[0], pad[1], stride[0],
                                         stride[1], 1, 1, mode)
    else:
        c_pad = _to_ctypes_array(pad)
        c_stride = _to_ctypes_array(stride)
        c_upscale = _to_ctypes_array((1, ) * ndim)
        if _cudnn_version >= 3000:
            data_type = get_data_type(dtype)
            # TODO(takagi) Temporarily use computing precision of FP32 for
            #     storing precision of FP16.
            if dtype == numpy.float16:
                data_type = cudnn.CUDNN_DATA_FLOAT
            cudnn.setConvolutionNdDescriptor_v3(desc.value, ndim, c_pad.data,
                                                c_stride.data, c_upscale.data,
                                                mode, data_type)
        else:
            cudnn.setConvolutionNdDescriptor_v2(desc.value, ndim, c_pad.data,
                                                c_stride.data, c_upscale.data,
                                                mode)

    return desc
Esempio n. 3
0
def create_convolution_descriptor(pad,
                                  stride,
                                  dtype,
                                  mode=cudnn.CUDNN_CROSS_CORRELATION,
                                  dilation=(1, 1),
                                  use_tensor_core=False):
    desc = Descriptor(cudnn.createConvolutionDescriptor(),
                      cudnn.destroyConvolutionDescriptor)
    ndim = len(pad)
    if ndim != len(stride):
        raise ValueError('pad and stride must be of same length')

    if ndim == 2:
        if _cudnn_version < 6000:
            if dilation[0] != 1 or dilation[1] != 1:
                raise ValueError('dilation must be one when cudnn < 6.0')
        if _cudnn_version >= 5000:
            compute_type = get_data_type(dtype)
            # TODO(takagi) Temporarily use computing precision of FP32 for
            #     storing precision of FP16.
            if dtype == numpy.float16:
                compute_type = cudnn.CUDNN_DATA_FLOAT
            cudnn.setConvolution2dDescriptor_v5(desc.value, pad[0], pad[1],
                                                stride[0], stride[1],
                                                dilation[0], dilation[1], mode,
                                                compute_type)

            if _cudnn_version >= 7000:
                if use_tensor_core:
                    math_type = cudnn.CUDNN_TENSOR_OP_MATH
                    cudnn.setConvolutionMathType(desc.value, math_type)
        else:
            cudnn.setConvolution2dDescriptor_v4(desc.value, pad[0], pad[1],
                                                stride[0], stride[1], 1, 1,
                                                mode)
    else:
        c_pad = _to_ctypes_array(pad)
        c_stride = _to_ctypes_array(stride)
        c_dilation = _to_ctypes_array((1, ) * ndim)
        compute_type = get_data_type(dtype)
        # TODO(takagi) Temporarily use computing precision of FP32 for
        #     storing precision of FP16.
        if dtype == numpy.float16:
            compute_type = cudnn.CUDNN_DATA_FLOAT
        cudnn.setConvolutionNdDescriptor_v3(desc.value, ndim, c_pad.data,
                                            c_stride.data, c_dilation.data,
                                            mode, compute_type)

    return desc
Esempio n. 4
0
def create_convolution_descriptor(pad, stride, mode=cudnn.CUDNN_CROSS_CORRELATION):
    desc = Descriptor(cudnn.createConvolutionDescriptor(), cudnn.destroyConvolutionDescriptor)
    ndim = len(pad)
    if ndim != len(stride):
        raise ValueError("pad and stride must be of same length")

    if ndim == 2:
        cudnn.setConvolution2dDescriptor(desc.value, pad[0], pad[1], stride[0], stride[1], 1, 1, mode)
    else:
        c_pad = _to_ctypes_array(pad)
        c_stride = _to_ctypes_array(stride)
        c_upscale = _to_ctypes_array((1,) * ndim)
        cudnn.setConvolutionNdDescriptor_v2(desc.value, ndim, c_pad.data, c_stride.data, c_upscale.data, mode)

    return desc
Esempio n. 5
0
def create_convolution_descriptor(pad, stride,
                                  mode=cudnn.CUDNN_CROSS_CORRELATION):
    desc = Descriptor(cudnn.createConvolutionDescriptor(),
                      cudnn.destroyConvolutionDescriptor)
    ndim = len(pad)
    if ndim != len(stride):
        raise ValueError('pad and stride must be of same length')

    if ndim == 2:
        cudnn.setConvolution2dDescriptor(
            desc.value, pad[0], pad[1], stride[0], stride[1], 1, 1, mode)
    else:
        upscale = (1,) * ndim
        cudnn.setConvolutionNdDescriptor(
            desc.value, ndim, _to_ctypes_array(pad), _to_ctypes_array(stride),
            _to_ctypes_array(upscale), mode)

    return desc