Esempio n. 1
0
    def apply(self, input_):
        """Perform the convolution.

        Parameters
        ----------
        input_ : :class:`~tensor.TensorVariable`
            A 5D tensor with the axes representing batch size, number of
            channels, height, width and time.

        Returns
        -------
        output : :class:`~tensor.TensorVariable`
            A 5D tensor of filtered images (feature maps) with dimensions
            representing batch size, number of filters, feature map height,
            feature map width and feature map time.
        """
        if self.use_bias:
            W, b = self.parameters
        else:
            W, = self.parameters

        if self.cudnn_impl:
            output = dnn_conv3d(input_,
                                W,
                                subsample=tuple(self.kernel_stride),
                                border_mode=self.padding)
        else:
            output = GpuCorr3dMM(subsample=tuple(self.step),
                                 pad=self.padding)(input_, W)
        if self.use_bias:
            if self.shared_bias:
                output += b.dimshuffle('x', 0, 'x', 'x', 'x')
            else:
                output += b.dimshuffle('x', 0, 1, 2, 3)
        return output
    def run_conv_valid(self,
                       inputs_shape,
                       filters_shape,
                       border_mode='valid',
                       filter_dilation=(1, 1, 1),
                       subsample=(1, 1, 1),
                       verify_grad=False):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)

        conv_ref = Corr3dMM(border_mode=border_mode,
                            filter_dilation=filter_dilation,
                            subsample=subsample)(inputs.dimshuffle(
                                0, 4, 1, 2,
                                3), filters.dimshuffle(0, 4, 1, 2, 3))
        conv_ref = conv_ref.dimshuffle(0, 2, 3, 4, 1)
        f_ref = theano.function([], conv_ref, mode='FAST_RUN')

        conv = GpuCorr3dMM(border_mode=border_mode,
                           filter_dilation=filter_dilation,
                           subsample=subsample)(inputs.dimshuffle(
                               0, 4, 1, 2,
                               3), filters.dimshuffle(0, 4, 1, 2, 3))
        conv = conv.dimshuffle(0, 2, 3, 4, 1)
        f = theano.function([], conv, mode=mode_with_gpu)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)

        if verify_grad:
            utt.verify_grad(GpuCorr3dMM(border_mode=border_mode,
                                        filter_dilation=filter_dilation,
                                        subsample=subsample),
                            [
                                inputs_val.transpose(0, 4, 1, 2, 3),
                                filters_val.transpose(0, 4, 1, 2, 3)
                            ],
                            mode=mode_with_gpu)
Esempio n. 3
0
 def fprop_conv(self, state_below, stride=1):
     """fprop theano state_below using conv method
     :param state_below: batch, chl, x, y, z"""
     import theano
     import theano.tensor as T
     from theano.sandbox.cuda.blas import GpuCorr3dMM
     assert state_below.ndim == 5
     stride = int(stride)
     corr = GpuCorr3dMM(subsample=(stride, stride, stride))
     conv_rst = corr(state_below, sharedX(self.get_conv_coeff()))
     conv_rst += sharedX(self.bias).dimshuffle('x', 0, 'x', 'x', 'x')
     sqr = T.square(conv_rst)
     vsum = T.tensordot(sharedX(self.outhid_conn), sqr, axes=[1, 1])
     vsum = vsum.dimshuffle(1, 0, 2, 3, 4)
     return T.sqrt(vsum)
Esempio n. 4
0
    def run_conv_valid(self, inputs_shape, filters_shape,
                       subsample=(1, 1, 1)):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))
        conv_ref = theano.tensor.nnet.conv3D(V=inputs, W=filters,
                                             b=bias, d=subsample)
        conv = GpuCorr3dMM(border_mode = "valid",
                           subsample=subsample)(inputs.dimshuffle(0, 4, 1, 2, 3),
                                                filters.dimshuffle(0, 4, 1, 2, 3))
        conv = conv.dimshuffle(0, 2, 3, 4, 1)

        f_ref = theano.function([], conv_ref)
        f = theano.function([], conv, mode=mode_with_gpu)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)
Esempio n. 5
0
    def __init__(
            self,
            input_layer,
            num_filters,
            filter_size,
            strides=(1, 1, 1),
            border_mode=None,
            W=lasagne.init.Normal(std=0.001),  # usually 0.01
            b=lasagne.init.Constant(0.),
            nonlinearity=lasagne.nonlinearities.rectify,
            pad=None,
            flip_filters=True,
            **kwargs):
        """
        input_shape: (frames, height, width)
        W_shape: (out, in, kern_frames, kern_height, kern_width)
        """
        super(Conv3dMMLayer, self).__init__(input_layer, **kwargs)

        # TODO note that lasagne allows 'untied' biases, the same shape
        # as the input filters.
        self.num_filters = num_filters
        self.filter_size = filter_size
        if strides is None:
            self.strides = (1, 1, 1)
        else:
            self.strides = tuple(strides)
        self.flip_filters = flip_filters
        if nonlinearity is None:
            self.nonlinearity = lasagne.nonlinearities.identity
        else:
            self.nonlinearity = nonlinearity

        if border_mode is not None and pad is not None:
            raise RuntimeError(
                "You cannot specify both 'border_mode' and 'pad'. To avoid ambiguity, please specify only one of them."
            )
        elif border_mode is None and pad is None:
            # no option specified, default to valid mode
            self.pad = (0, 0, 0)
        elif border_mode is not None:
            if border_mode == 'valid':
                self.pad = (0, 0, 0)
            elif border_mode == 'full':
                self.pad = (self.filter_size[0] - 1, self.filter_size[1] - 1,
                            self.filter_size[2] - 1)
            elif border_mode == 'same':
                # only works for odd filter size, but the even filter size case is probably not worth supporting.
                self.pad = ((self.filter_size[0] - 1) // 2,
                            (self.filter_size[1] - 1) // 2,
                            (self.filter_size[2] - 1) // 2)
            else:
                raise RuntimeError(
                    "Unsupported border_mode for Conv3dLayer: %s" %
                    border_mode)
        else:
            self.pad = tuple(pad)

        self.W = self.add_param(W, self.get_W_shape(), name='W')
        if b is None:
            self.b = None
        else:
            self.b = self.add_param(b, (num_filters, ),
                                    name='b',
                                    regularizable=False)
        self.corr_mm_op = GpuCorr3dMM(subsample=self.strides, pad=self.pad)
Esempio n. 6
0
print(sys.version)

from voxnet import isovox
from theano.sandbox.cuda.basic_ops import gpu_contiguous
from theano.sandbox.cuda.blas import GpuCorr3dMM
import numpy as np

vis_mat = np.load('../../voxnet/scripts/weights.npz')
W1 = vis_mat['conv1.W']  # size(32,1,5,5,5)
W2 = vis_mat['conv2.W']  # size(32,32,3,3,3)

contiguous_W1 = gpu_contiguous(W1)
contiguous_W2 = gpu_contiguous(W2)
strides = (1, 1, 1)
pad = (2, 2, 2)
corr_mm_op = GpuCorr3dMM(subsample=strides, pad=pad)(contiguous_W2,
                                                     contiguous_W1)
print(corr_mm_op)

size = 32
wviz = W1[:, 0, :, :, :]
for i in range(wviz.shape[0]):
    w = wviz[i, :, :, :]
    # centerize the plot
    fz = len(w)
    xd = np.zeros((size, size, size))
    pad = (size - fz) / 2
    xd[pad:pad + fz, pad:pad + fz, pad:pad + fz] = w
    # only visualize the largest value
    t = 0.2
    xd[xd < t] = 0
    # store as png
 def conv_and_add_bias(self, x):
     x = gpu_contiguous(x)
     rval = GpuCorr3dMM(subsample=tuple(self.kernel_stride))(x,
                                                             self.filters)
     rval = rval + self.bias.dimshuffle('x', 0, 'x', 'x', 'x')
     return rval
Esempio n. 8
0
def conv3d(x,
           kernel,
           strides=(1, 1, 1),
           border_mode='valid',
           dim_ordering=_IMAGE_DIM_ORDERING,
           volume_shape=None,
           filter_shape=None,
           conv_algo="GpuCorr3dMM"):
    '''
    Run on cuDNN if available.
    border_mode: string, "same" or "valid".
    conv_algo: string,
               "conv3d2d": internally reshapes the data and performs 2d convs
               "dnn_conv3d": uses CuDNNs 3d convolution
               "GpuCorr3dM": performs a correlation, not a conolution
                             (filter not flipped), uses the "Toeplitz"-
                             matrix (which means it needs a little more memory)
    '''
    if dim_ordering not in {'th', 'tf'}:
        raise Exception('Unknown dim_ordering ' + str(dim_ordering))

    if border_mode not in {'same', 'valid'}:
        raise Exception('Invalid border mode: ' + str(border_mode))

    if dim_ordering == 'tf':
        # TF uses the last dimension as channel dimension,
        # instead of the 2nd one.
        # TH input shape: (samples, input_depth, conv_dim1, conv_dim2, conv_dim3)
        # TF input shape: (samples, conv_dim1, conv_dim2, conv_dim3, input_depth)
        # TH kernel shape: (out_depth, input_depth, kernel_dim1, kernel_dim2, kernel_dim3)
        # TF kernel shape: (kernel_dim1, kernel_dim2, kernel_dim3, input_depth, out_depth)
        x = x.dimshuffle((0, 4, 1, 2, 3))
        kernel = kernel.dimshuffle((4, 3, 0, 1, 2))
        if volume_shape:
            volume_shape = (volume_shape[0], volume_shape[4], volume_shape[1],
                            volume_shape[2], volume_shape[3])
        if filter_shape:
            filter_shape = (filter_shape[4], filter_shape[3], filter_shape[0],
                            filter_shape[1], filter_shape[2])

    if border_mode == 'same':
        assert (strides == (1, 1, 1))
        pad_dim1 = (kernel.shape[2] - 1)
        pad_dim2 = (kernel.shape[3] - 1)
        pad_dim3 = (kernel.shape[4] - 1)
        output_shape = (x.shape[0], x.shape[1], x.shape[2] + pad_dim1,
                        x.shape[3] + pad_dim2, x.shape[4] + pad_dim3)
        output = T.zeros(output_shape)
        indices = (slice(None), slice(None),
                   slice(pad_dim1 // 2, x.shape[2] + pad_dim1 // 2),
                   slice(pad_dim2 // 2, x.shape[3] + pad_dim2 // 2),
                   slice(pad_dim3 // 2, x.shape[4] + pad_dim3 // 2))
        x = T.set_subtensor(output[indices], x)
        border_mode = 'valid'

    border_mode_3d = (border_mode, border_mode, border_mode)
    if conv_algo == "conv3d2d":
        conv_out = conv3d2d.conv3d(signals=x.dimshuffle(0, 2, 1, 3, 4),
                                   filters=kernel.dimshuffle(0, 2, 1, 3, 4),
                                   border_mode=border_mode_3d)
        conv_out = conv_out.dimshuffle(0, 2, 1, 3, 4)
    elif conv_algo == "dnn_conv3d":
        conv_out = dnn_conv3d(img=x, kerns=kernel, border_mode=border_mode)
    elif conv_algo == "GpuCorr3dMM":
        bias = np.zeros((volume_shape[1]))
        conv_out = GpuCorr3dMM()(x, kernel)
    else:
        raise ("Unknown algorithm to perform 3d convolution")

    # support strides by manually slicing the output
    if strides != (1, 1, 1):
        conv_out = conv_out[:, :, ::strides[0], ::strides[1], ::strides[2]]

    if dim_ordering == 'tf':
        conv_out = conv_out.dimshuffle((0, 2, 3, 4, 1))

    return conv_out