Example #1
0
File: env.py Project: vitteemou/DLS
def check_cudnn():
    result  ={}
    try:
        result['available'] = dnn.dnn_available()
        if len(dnn.version()) > 0:
            result['version'] = str(dnn.version()[0])
    except:
        result['available'] = False
    return result
Example #2
0
def test_dnn_conv_grad():
    if not cuda.dnn.dnn_available() or dnn.version() == -1:
        raise SkipTest("alpha != 1.0 not supported in cudnn v1")

    b = 1
    c = 4
    f = 3
    ih = 2
    iw = 8
    kh = 2
    kw = 2
    img_val = numpy.random.random((b, c, ih, iw)).astype("float32")
    kern_val = numpy.random.random((f, c, kh, kw)).astype("float32")
    out_val = numpy.random.random((b, f, ih - kw + 1, iw - kw + 1)).astype("float32")

    def dconv(img, kern, out):
        desc = dnn.GpuDnnConvDesc(border_mode="valid", subsample=(1, 1), conv_mode="conv")(img.shape, kern.shape)
        return dnn.GpuDnnConv()(img, kern, out, desc, alpha=0.5, beta=0.75)

    def dconvi(img, kern, out):
        desc = dnn.GpuDnnConvDesc(border_mode="valid", subsample=(1, 1), conv_mode="conv")(img.shape, kern.shape)
        return dnn.GpuDnnConvGradI()(kern, out, img, desc, alpha=-1.0, beta=0.0)

    def dconvw(img, kern, out):
        desc = dnn.GpuDnnConvDesc(border_mode="valid", subsample=(1, 1), conv_mode="conv")(img.shape, kern.shape)
        return dnn.GpuDnnConvGradW()(img, out, kern, desc, alpha=0.75, beta=-1.0)

    utt.verify_grad(dconv, [img_val, kern_val, out_val])
    utt.verify_grad(dconvi, [img_val, kern_val, out_val])
    utt.verify_grad(dconvw, [img_val, kern_val, out_val])
Example #3
0
    def test_conv3d_gradi(self):
        if not (cuda.dnn.dnn_available() and dnn.version() >= (2000, 2000)):
            raise SkipTest('"CuDNN 3D convolution requires CuDNN v2')
        ftensor5 = T.TensorType(dtype="float32", broadcastable=(False, ) * 5)
        img = ftensor5('img')
        kerns = ftensor5('kerns')
        out = ftensor5('out')
        img_val = numpy.asarray(numpy.random.rand(8, 4, 6, 7, 5),
                                dtype='float32')
        kern_vals = numpy.asarray(numpy.random.rand(9, 4, 5, 1, 2),
                                  dtype='float32')

        for params in product(['valid', 'full'], [(1, 1, 1), (2, 2, 2)],
                              ['conv', 'cross']):
            out_vals = numpy.zeros(dnn.GpuDnnConv3d.get_out_shape(
                img_val.shape,
                kern_vals.shape,
                border_mode=params[0],
                subsample=params[1]),
                                   dtype='float32')

            desc = dnn.GpuDnnConvDesc(border_mode=params[0],
                                      subsample=params[1],
                                      conv_mode=params[2])(img.shape,
                                                           kerns.shape)
            conv_grad_i = dnn.GpuDnnConv3dGradI()(
                kerns,
                out,
                img,
                desc,
            )
            self._compile_and_check([kerns, out, img], [conv_grad_i],
                                    [kern_vals, out_vals, img_val],
                                    dnn.GpuDnnConv3dGradI)
Example #4
0
    def test_conv3d_gradi(self):
        if not (cuda.dnn.dnn_available() and dnn.version() >= (2000, 2000)):
            raise SkipTest('"CuDNN 3D convolution requires CuDNN v2')
        ftensor5 = T.TensorType(dtype="float32", broadcastable=(False,) * 5)
        img = ftensor5("img")
        kerns = ftensor5("kerns")
        out = ftensor5("out")
        img_val = numpy.asarray(numpy.random.rand(8, 4, 6, 7, 5), dtype="float32")
        kern_vals = numpy.asarray(numpy.random.rand(9, 4, 5, 1, 2), dtype="float32")

        for params in product(["valid", "full"], [(1, 1, 1), (2, 2, 2)], ["conv", "cross"]):
            out_vals = numpy.zeros(
                dnn.GpuDnnConv3d.get_out_shape(
                    img_val.shape, kern_vals.shape, border_mode=params[0], subsample=params[1]
                ),
                dtype="float32",
            )

            desc = dnn.GpuDnnConvDesc(border_mode=params[0], subsample=params[1], conv_mode=params[2])(
                img.shape, kerns.shape
            )
            conv_grad_i = dnn.GpuDnnConv3dGradI()(kerns, out, img, desc)
            self._compile_and_check(
                [kerns, out, img], [conv_grad_i], [kern_vals, out_vals, img_val], dnn.GpuDnnConv3dGradI
            )
Example #5
0
File: pool.py Project: zhjpqq/denet
    def __init__(self,
                 layers,
                 size=(2, 2),
                 stride=None,
                 pad=(0, 0),
                 mode="max",
                 ignore_border=True,
                 json_param={}):
        super().__init__(layer_index=len(layers))

        self.input = layers[-1].output
        self.input_shape = layers[-1].output_shape

        self.size = json_param.get("size", size)
        self.pad = json_param.get("pad", pad)
        self.ignore_border = json_param.get("ignoreBorder", ignore_border)
        self.mode = json_param.get("mode", mode)
        self.stride = json_param.get("stride", stride)
        if self.stride is None:
            self.stride = self.size

        #output dim
        if self.ignore_border:
            h = int(
                math.floor(
                    (self.input_shape[2] + 2 * self.pad[0] - self.size[0]) /
                    self.stride[0])) + 1
            w = int(
                math.floor(
                    (self.input_shape[3] + 2 * self.pad[1] - self.size[1]) /
                    self.stride[1])) + 1
        else:
            h = int(
                math.ceil(
                    (self.input_shape[2] + 2 * self.pad[0]) / self.stride[0]))
            w = int(
                math.ceil(
                    (self.input_shape[3] + 2 * self.pad[1]) / self.stride[1]))

        #theano optimizer is sometimes failing to use cudnn pooling!
        use_cudnn = (dnn.dnn_available() and dnn.version() >= (4000, 4000)
                     and self.ignore_border)
        if use_cudnn:
            self.output = dnn.dnn_pool(self.input,
                                       ws=self.size,
                                       pad=self.pad,
                                       stride=self.stride,
                                       mode=self.mode)
        else:
            self.output = tensor.signal.pool.pool_2d(
                self.input,
                ds=self.size,
                padding=self.pad,
                ignore_border=self.ignore_border,
                st=self.stride,
                mode=self.mode)

        self.output_shape = (self.input_shape[0], self.input_shape[1], h, w)
        logging.verbose("Adding", self)
Example #6
0
 def get_op_params(self):
     if self.inplace:
         inpl_def = [('CONV_INPLACE', '1')]
     else:
         inpl_def = []
     if version() == -1:
         alg_def = ('CONV_ALGO', "0")
     else:
         # it seems only this works for nd convolutions?
         alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM')
     return [alg_def] + inpl_def
Example #7
0
def test_dnn_conv_grad():
    if not cuda.dnn.dnn_available() or dnn.version() == -1:
        raise SkipTest('alpha != 1.0 not supported in cudnn v1')

    b = 1
    c = 4
    f = 3
    ih = 2
    iw = 8
    kh = 2
    kw = 2
    img_val = numpy.random.random((b, c, ih, iw)).astype('float32')
    kern_val = numpy.random.random((f, c, kh, kw)).astype('float32')
    out_val = numpy.random.random(
        (b, f, ih - kw + 1, iw - kw + 1)).astype('float32')

    def dconv(img, kern, out):
        desc = dnn.GpuDnnConvDesc(border_mode='valid',
                                  subsample=(1, 1),
                                  conv_mode='conv')(img.shape, kern.shape)
        return dnn.GpuDnnConv()(img, kern, out, desc, alpha=0.5, beta=0.75)

    def dconvi(img, kern, out):
        desc = dnn.GpuDnnConvDesc(border_mode='valid',
                                  subsample=(1, 1),
                                  conv_mode='conv')(img.shape, kern.shape)
        return dnn.GpuDnnConvGradI()(kern,
                                     out,
                                     img,
                                     desc,
                                     alpha=-1.0,
                                     beta=0.0)

    def dconvw(img, kern, out):
        desc = dnn.GpuDnnConvDesc(border_mode='valid',
                                  subsample=(1, 1),
                                  conv_mode='conv')(img.shape, kern.shape)
        return dnn.GpuDnnConvGradW()(img,
                                     out,
                                     kern,
                                     desc,
                                     alpha=0.75,
                                     beta=-1.0)

    utt.verify_grad(dconv, [img_val, kern_val, out_val])
    utt.verify_grad(dconvi, [img_val, kern_val, out_val])
    utt.verify_grad(dconvw, [img_val, kern_val, out_val])
Example #8
0
    def test_conv3d_gradw(self):
        if not (cuda.dnn.dnn_available() and dnn.version() >= (2000, 2000)):
            raise SkipTest('"CuDNN 3D convolution requires CuDNN v2')
        ftensor5 = T.TensorType(dtype="float32", broadcastable=(False,) * 5)
        img = ftensor5('img')
        kerns = ftensor5('kerns')
        out = ftensor5('out')
        img_val = numpy.asarray(
            numpy.random.rand(9, 2, 4, 8, 7),
            dtype='float32'
        )
        kern_vals = numpy.asarray(
            numpy.random.rand(11, 2, 3, 1, 4),
            dtype='float32'
        )

        for params in product(
            ['valid', 'full'],
            [(1, 1, 1), (2, 2, 2)],
            ['conv', 'cross']
        ):
            out_vals = numpy.zeros(
                dnn.GpuDnnConv3d.get_out_shape(img_val.shape, kern_vals.shape,
                                               border_mode=params[0],
                                               subsample=params[1]),
                dtype='float32')

            desc = dnn.GpuDnnConvDesc(
                border_mode=params[0],
                subsample=params[1],
                conv_mode=params[2]
            )(img.shape, out.shape)
            conv_grad_w = dnn.GpuDnnConv3dGradW()(
                img,
                out,
                kerns,
                desc,
            )
            self._compile_and_check(
                [img, out, kerns],
                [conv_grad_w],
                [img_val, out_vals, kern_vals],
                dnn.GpuDnnConv3dGradW
            )
Example #9
0
def get_conv3d_test_cases():
    # Every element of test_shapes follows the format
    # [input_shape, filter_shape, subsample]
    test_shapes = [
        [(128, 3, 5, 5, 5), (64, 3, 1, 2, 4), (1, 1, 1)],
        [(8, 4, 20, 12, 15), (5, 4, 6, 12, 4), (2, 2, 2)],
        [(8, 1, 20, 12, 15), (5, 1, 6, 12, 4), (3, 3, 3)],
        [(8, 1, 20, 12, 15), (5, 1, 6, 12, 4), (3, 2, 1)],
        [(8, 1, 20, 12, 15), (5, 1, 6, 12, 4), (3, 2, 1)],
        # Test with 1x1x1 filters
        [(8, 1, 10, 10, 10), (10, 1, 1, 1, 1), (1, 1, 1)],
        # Test with dimensions larger than 1024 (thread block dim)
        [(1025, 1, 2, 3, 4), (5, 1, 1, 2, 3), (1, 1, 1)],
        [(8, 1, 2, 3, 4), (1025, 1, 1, 2, 3), (1, 1, 1)],
        [(8, 1025, 2, 3, 4), (5, 1025, 1, 1, 2), (1, 1, 1)],
        [(8, 1, 1030, 3, 4), (5, 1, 1025, 1, 1), (1, 1, 1)],
        [(8, 1, 2, 1030, 4), (5, 1, 2, 1025, 1), (1, 1, 1)],
        [(8, 1, 2, 3, 1030), (5, 1, 1, 2, 1025), (1, 1, 1)],
        # The equivalent of this caused a crash with conv2d
        [(1, 1, 1, 44800, 1), (6, 1, 1, 1, 1), (1, 1, 1)],
    ]

    # With border mode 'full', test with kernel bigger than image in some/all
    # dimensions
    test_shapes_full = [
        [(6, 2, 2, 2, 2), (4, 2, 3, 1, 1), (1, 1, 1)],
        [(6, 2, 2, 2, 2), (4, 2, 1, 3, 1), (1, 1, 1)],
        [(6, 2, 2, 2, 2), (4, 2, 1, 1, 3), (1, 1, 1)],
        [(6, 2, 2, 2, 2), (4, 2, 5, 5, 5), (1, 1, 1)],
    ]
    border_modes = ["valid", "full", (1, 2, 3), (3, 2, 1), 1, 2]
    conv_modes = ["conv", "cross"]

    if cuda.dnn.dnn_available() and dnn.version() >= (3000, 3000):
        itt = chain(product(test_shapes, border_modes, conv_modes), product(test_shapes_full, ["full"], conv_modes))
    else:
        # CuDNN, before V3, did not support kernels larger than the inputs,
        # even if the original inputs were padded so they would be larger than
        # the kernels. If using a version older than V3 don't run the tests
        # with kernels larger than the unpadded inputs.
        itt = product(test_shapes, border_modes, conv_modes)

    return itt
Example #10
0
def get_conv3d_test_cases():
    # Every element of test_shapes follows the format
    # [input_shape, filter_shape, subsample]
    test_shapes = [
        [(128, 3, 5, 5, 5), (64, 3, 1, 2, 4), (1, 1, 1)],
        [(8, 4, 20, 12, 15), (5, 4, 6, 12, 4), (2, 2, 2)],
        [(8, 1, 20, 12, 15), (5, 1, 6, 12, 4), (3, 3, 3)],
        [(8, 1, 20, 12, 15), (5, 1, 6, 12, 4), (3, 2, 1)],
        [(8, 1, 20, 12, 15), (5, 1, 6, 12, 4), (3, 2, 1)],
        # Test with 1x1x1 filters
        [(8, 1, 10, 10, 10), (10, 1, 1, 1, 1), (1, 1, 1)],
        # Test with dimensions larger than 1024 (thread block dim)
        [(1025, 1, 2, 3, 4), (5, 1, 1, 2, 3), (1, 1, 1)],
        [(8, 1, 2, 3, 4), (1025, 1, 1, 2, 3), (1, 1, 1)],
        [(8, 1025, 2, 3, 4), (5, 1025, 1, 1, 2), (1, 1, 1)],
        [(8, 1, 1030, 3, 4), (5, 1, 1025, 1, 1), (1, 1, 1)],
        [(8, 1, 2, 1030, 4), (5, 1, 2, 1025, 1), (1, 1, 1)],
        [(8, 1, 2, 3, 1030), (5, 1, 1, 2, 1025), (1, 1, 1)],
        # The equivalent of this caused a crash with conv2d
        [(1, 1, 1, 44800, 1), (6, 1, 1, 1, 1), (1, 1, 1)]
    ]

    # With border mode 'full', test with kernel bigger than image in some/all
    # dimensions
    test_shapes_full = [[(6, 2, 2, 2, 2), (4, 2, 3, 1, 1), (1, 1, 1)],
                        [(6, 2, 2, 2, 2), (4, 2, 1, 3, 1), (1, 1, 1)],
                        [(6, 2, 2, 2, 2), (4, 2, 1, 1, 3), (1, 1, 1)],
                        [(6, 2, 2, 2, 2), (4, 2, 5, 5, 5), (1, 1, 1)]]
    border_modes = ['valid', 'full', (1, 2, 3), (3, 2, 1), 1, 2]
    conv_modes = ['conv', 'cross']

    if cuda.dnn.dnn_available() and dnn.version() >= (3000, 3000):
        itt = chain(product(test_shapes, border_modes, conv_modes),
                    product(test_shapes_full, ['full'], conv_modes))
    else:
        # CuDNN, before V3, did not support kernels larger than the inputs,
        # even if the original inputs were padded so they would be larger than
        # the kernels. If using a version older than V3 don't run the tests
        # with kernels larger than the unpadded inputs.
        itt = product(test_shapes, border_modes, conv_modes)

    return itt
Example #11
0
def test_conv3d_bwd():

    if not (cuda.dnn.dnn_available() and dnn.version() >= (2000, 2000)):
        raise SkipTest('"CuDNN 3D convolution requires CuDNN v2')

    def run_conv3d_bwd(inputs_shape, filters_shape, subsample,
                       border_mode, conv_mode):

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))

        # Compile a theano function for the CuDNN implementation
        conv = dnn.dnn_conv3d(img=inputs, kerns=filters,
                              border_mode=border_mode, subsample=subsample,
                              conv_mode=conv_mode)

        grad_i, grad_w = theano.tensor.grad(conv.sum(), [inputs, filters])

        f = theano.function([], [grad_i, grad_w], mode=mode_with_gpu)

        # If conv_mode is 'conv' the reference implementation should use
        # filters filpped according to the width, height and time axis
        if conv_mode == 'conv':
            flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
        else:
            flipped_filters = filters

        # If border mode is anything but 'valid', the reference implementation
        # should operate on padded inputs
        if border_mode == 'valid':
            padded_inputs = inputs
        else:
            if border_mode == 'full':
                pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)]
            else:
                if isinstance(border_mode, int):
                    pad_per_dim = [border_mode] * 3
                else:
                    pad_per_dim = border_mode

            pad_before_after = ([(0, 0), (0, 0)] +
                                [(p, p) for p in pad_per_dim])
            padded_inputs_val = numpy.pad(inputs_val, pad_before_after,
                                          'constant')
            padded_inputs = shared(padded_inputs_val)

        # Compile a theano function for the reference implementation
        conv_ref = theano.tensor.nnet.conv3D(
            V=padded_inputs.dimshuffle(0, 2, 3, 4, 1),
            W=flipped_filters.dimshuffle(0, 2, 3, 4, 1),
            b=bias, d=subsample)
        (grad_padded_i_ref,
         grad_w_ref) = theano.tensor.grad(conv_ref.sum(),
                                          [padded_inputs, filters])

        # Recover grad_i_ref from grad_padded_i_ref
        if border_mode == 'valid':
            grad_i_ref = grad_padded_i_ref
        else:
            shp = grad_padded_i_ref.shape
            grad_i_ref = grad_padded_i_ref[
                :, :,
                pad_per_dim[0]:shp[2] - pad_per_dim[0],
                pad_per_dim[1]:shp[3] - pad_per_dim[1],
                pad_per_dim[2]:shp[4] - pad_per_dim[2]]

        f_ref = theano.function([], [grad_i_ref, grad_w_ref])

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref[0], res[0])
        utt.assert_allclose(res_ref[1], res[1])

    test_cases = get_conv3d_test_cases()
    for (i_shape, f_shape, subsample), border_mode, conv_mode in test_cases:
        yield (run_conv3d_bwd, i_shape, f_shape, subsample, border_mode,
               conv_mode)
Example #12
0
def test_conv3d_fwd():

    if not (cuda.dnn.dnn_available() and dnn.version() >= (2000, 2000)):
        raise SkipTest('"CuDNN 3D convolution requires CuDNN v2')

    def run_conv3d_fwd(inputs_shape, filters_shape, subsample,
                       border_mode, conv_mode):

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        # Scale down the input values to prevent very large absolute errors
        # due to float rounding
        inputs_val /= 10
        filters_val /= 10

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))

        # Compile a theano function for the CuDNN implementation
        conv = dnn.dnn_conv3d(img=inputs, kerns=filters,
                              border_mode=border_mode, subsample=subsample,
                              conv_mode=conv_mode)
        f = theano.function([], conv, mode=mode_with_gpu)

        # If conv_mode is 'conv' the reference implementation should use
        # filters filpped according to the width, height and time axis
        if conv_mode == 'conv':
            flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
        else:
            flipped_filters = filters

        # If border mode is anything but 'valid', the reference implementation
        # should operate on padded inputs
        if border_mode == 'valid':
            padded_inputs = inputs
        else:
            if border_mode == 'full':
                pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)]
            else:
                if isinstance(border_mode, int):
                    pad_per_dim = [border_mode] * 3
                else:
                    pad_per_dim = border_mode

            pad_before_after = ([(0, 0), (0, 0)] +
                                [(p, p) for p in pad_per_dim])
            padded_inputs_val = numpy.pad(inputs_val, pad_before_after,
                                          'constant')
            padded_inputs = shared(padded_inputs_val)

        # Compile a theano function for the reference implementation
        conv_ref = theano.tensor.nnet.conv3D(
            V=padded_inputs.dimshuffle(0, 2, 3, 4, 1),
            W=flipped_filters.dimshuffle(0, 2, 3, 4, 1),
            b=bias, d=subsample)
        f_ref = theano.function([], conv_ref.dimshuffle(0, 4, 1, 2, 3))

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)

    test_cases = get_conv3d_test_cases()
    for (i_shape, f_shape, subsample), border_mode, conv_mode in test_cases:
        yield (run_conv3d_fwd, i_shape, f_shape, subsample, border_mode,
               conv_mode)
Example #13
0
def test_conv3d_bwd():

    if not (cuda.dnn.dnn_available() and dnn.version() >= (2000, 2000)):
        raise SkipTest('"CuDNN 3D convolution requires CuDNN v2')

    def run_conv3d_bwd(inputs_shape, filters_shape, subsample, border_mode,
                       conv_mode):

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))

        # Compile a theano function for the CuDNN implementation
        conv = dnn.dnn_conv3d(img=inputs,
                              kerns=filters,
                              border_mode=border_mode,
                              subsample=subsample,
                              conv_mode=conv_mode)

        grad_i, grad_w = theano.tensor.grad(conv.sum(), [inputs, filters])

        f = theano.function([], [grad_i, grad_w], mode=mode_with_gpu)

        # If conv_mode is 'conv' the reference implementation should use
        # filters filpped according to the width, height and time axis
        if conv_mode == 'conv':
            flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
        else:
            flipped_filters = filters

        # If border mode is anything but 'valid', the reference implementation
        # should operate on padded inputs
        if border_mode == 'valid':
            padded_inputs = inputs
        else:
            if border_mode == 'full':
                pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)]
            else:
                if isinstance(border_mode, int):
                    pad_per_dim = [border_mode] * 3
                else:
                    pad_per_dim = border_mode

            pad_before_after = ([(0, 0),
                                 (0, 0)] + [(p, p) for p in pad_per_dim])
            padded_inputs_val = numpy.pad(inputs_val, pad_before_after,
                                          'constant')
            padded_inputs = shared(padded_inputs_val)

        # Compile a theano function for the reference implementation
        conv_ref = theano.tensor.nnet.conv3D(
            V=padded_inputs.dimshuffle(0, 2, 3, 4, 1),
            W=flipped_filters.dimshuffle(0, 2, 3, 4, 1),
            b=bias,
            d=subsample)
        (grad_padded_i_ref,
         grad_w_ref) = theano.tensor.grad(conv_ref.sum(),
                                          [padded_inputs, filters])

        # Recover grad_i_ref from grad_padded_i_ref
        if border_mode == 'valid':
            grad_i_ref = grad_padded_i_ref
        else:
            shp = grad_padded_i_ref.shape
            grad_i_ref = grad_padded_i_ref[:, :, pad_per_dim[0]:shp[2] -
                                           pad_per_dim[0],
                                           pad_per_dim[1]:shp[3] -
                                           pad_per_dim[1],
                                           pad_per_dim[2]:shp[4] -
                                           pad_per_dim[2]]

        f_ref = theano.function([], [grad_i_ref, grad_w_ref])

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref[0], res[0])
        utt.assert_allclose(res_ref[1], res[1])

    test_cases = get_conv3d_test_cases()
    for (i_shape, f_shape, subsample), border_mode, conv_mode in test_cases:
        yield (run_conv3d_bwd, i_shape, f_shape, subsample, border_mode,
               conv_mode)
Example #14
0
def test_conv3d_fwd():

    if not (cuda.dnn.dnn_available() and dnn.version() >= (2000, 2000)):
        raise SkipTest('"CuDNN 3D convolution requires CuDNN v2')

    def run_conv3d_fwd(inputs_shape, filters_shape, subsample, border_mode,
                       conv_mode):

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        # Scale down the input values to prevent very large absolute errors
        # due to float rounding
        inputs_val /= 10
        filters_val /= 10

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))

        # Compile a theano function for the CuDNN implementation
        conv = dnn.dnn_conv3d(img=inputs,
                              kerns=filters,
                              border_mode=border_mode,
                              subsample=subsample,
                              conv_mode=conv_mode)
        f = theano.function([], conv, mode=mode_with_gpu)

        # If conv_mode is 'conv' the reference implementation should use
        # filters filpped according to the width, height and time axis
        if conv_mode == 'conv':
            flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
        else:
            flipped_filters = filters

        # If border mode is anything but 'valid', the reference implementation
        # should operate on padded inputs
        if border_mode == 'valid':
            padded_inputs = inputs
        else:
            if border_mode == 'full':
                pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)]
            else:
                if isinstance(border_mode, int):
                    pad_per_dim = [border_mode] * 3
                else:
                    pad_per_dim = border_mode

            pad_before_after = ([(0, 0),
                                 (0, 0)] + [(p, p) for p in pad_per_dim])
            padded_inputs_val = numpy.pad(inputs_val, pad_before_after,
                                          'constant')
            padded_inputs = shared(padded_inputs_val)

        # Compile a theano function for the reference implementation
        conv_ref = theano.tensor.nnet.conv3D(
            V=padded_inputs.dimshuffle(0, 2, 3, 4, 1),
            W=flipped_filters.dimshuffle(0, 2, 3, 4, 1),
            b=bias,
            d=subsample)
        f_ref = theano.function([], conv_ref.dimshuffle(0, 4, 1, 2, 3))

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)

    test_cases = get_conv3d_test_cases()
    for (i_shape, f_shape, subsample), border_mode, conv_mode in test_cases:
        yield (run_conv3d_fwd, i_shape, f_shape, subsample, border_mode,
               conv_mode)
Example #15
0
def local_dnn3d_convi_alpha_merge(node, *inputs):
    if not dnn_available() or version() == -1:
        return None
    return [GpuDnn3dConvGradI()(*inputs)]
Example #16
0
 def c_code_cache_version(self):
     return (2, version())