def apply(self, input_): """Perform the convolution. Parameters ---------- input_ : :class:`~tensor.TensorVariable` A 5D tensor with the axes representing batch size, number of channels, height, width and time. Returns ------- output : :class:`~tensor.TensorVariable` A 5D tensor of filtered images (feature maps) with dimensions representing batch size, number of filters, feature map height, feature map width and feature map time. """ if self.use_bias: W, b = self.parameters else: W, = self.parameters if self.cudnn_impl: output = dnn_conv3d(input_, W, subsample=tuple(self.kernel_stride), border_mode=self.padding) else: output = GpuCorr3dMM(subsample=tuple(self.step), pad=self.padding)(input_, W) if self.use_bias: if self.shared_bias: output += b.dimshuffle('x', 0, 'x', 'x', 'x') else: output += b.dimshuffle('x', 0, 1, 2, 3) return output
def run_conv_valid(self, inputs_shape, filters_shape, border_mode='valid', filter_dilation=(1, 1, 1), subsample=(1, 1, 1), verify_grad=False): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) conv_ref = Corr3dMM(border_mode=border_mode, filter_dilation=filter_dilation, subsample=subsample)(inputs.dimshuffle( 0, 4, 1, 2, 3), filters.dimshuffle(0, 4, 1, 2, 3)) conv_ref = conv_ref.dimshuffle(0, 2, 3, 4, 1) f_ref = theano.function([], conv_ref, mode='FAST_RUN') conv = GpuCorr3dMM(border_mode=border_mode, filter_dilation=filter_dilation, subsample=subsample)(inputs.dimshuffle( 0, 4, 1, 2, 3), filters.dimshuffle(0, 4, 1, 2, 3)) conv = conv.dimshuffle(0, 2, 3, 4, 1) f = theano.function([], conv, mode=mode_with_gpu) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res) if verify_grad: utt.verify_grad(GpuCorr3dMM(border_mode=border_mode, filter_dilation=filter_dilation, subsample=subsample), [ inputs_val.transpose(0, 4, 1, 2, 3), filters_val.transpose(0, 4, 1, 2, 3) ], mode=mode_with_gpu)
def fprop_conv(self, state_below, stride=1): """fprop theano state_below using conv method :param state_below: batch, chl, x, y, z""" import theano import theano.tensor as T from theano.sandbox.cuda.blas import GpuCorr3dMM assert state_below.ndim == 5 stride = int(stride) corr = GpuCorr3dMM(subsample=(stride, stride, stride)) conv_rst = corr(state_below, sharedX(self.get_conv_coeff())) conv_rst += sharedX(self.bias).dimshuffle('x', 0, 'x', 'x', 'x') sqr = T.square(conv_rst) vsum = T.tensordot(sharedX(self.outhid_conn), sqr, axes=[1, 1]) vsum = vsum.dimshuffle(1, 0, 2, 3, 4) return T.sqrt(vsum)
def run_conv_valid(self, inputs_shape, filters_shape, subsample=(1, 1, 1)): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[0]).astype('float32')) conv_ref = theano.tensor.nnet.conv3D(V=inputs, W=filters, b=bias, d=subsample) conv = GpuCorr3dMM(border_mode = "valid", subsample=subsample)(inputs.dimshuffle(0, 4, 1, 2, 3), filters.dimshuffle(0, 4, 1, 2, 3)) conv = conv.dimshuffle(0, 2, 3, 4, 1) f_ref = theano.function([], conv_ref) f = theano.function([], conv, mode=mode_with_gpu) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res)
def __init__( self, input_layer, num_filters, filter_size, strides=(1, 1, 1), border_mode=None, W=lasagne.init.Normal(std=0.001), # usually 0.01 b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, pad=None, flip_filters=True, **kwargs): """ input_shape: (frames, height, width) W_shape: (out, in, kern_frames, kern_height, kern_width) """ super(Conv3dMMLayer, self).__init__(input_layer, **kwargs) # TODO note that lasagne allows 'untied' biases, the same shape # as the input filters. self.num_filters = num_filters self.filter_size = filter_size if strides is None: self.strides = (1, 1, 1) else: self.strides = tuple(strides) self.flip_filters = flip_filters if nonlinearity is None: self.nonlinearity = lasagne.nonlinearities.identity else: self.nonlinearity = nonlinearity if border_mode is not None and pad is not None: raise RuntimeError( "You cannot specify both 'border_mode' and 'pad'. To avoid ambiguity, please specify only one of them." ) elif border_mode is None and pad is None: # no option specified, default to valid mode self.pad = (0, 0, 0) elif border_mode is not None: if border_mode == 'valid': self.pad = (0, 0, 0) elif border_mode == 'full': self.pad = (self.filter_size[0] - 1, self.filter_size[1] - 1, self.filter_size[2] - 1) elif border_mode == 'same': # only works for odd filter size, but the even filter size case is probably not worth supporting. self.pad = ((self.filter_size[0] - 1) // 2, (self.filter_size[1] - 1) // 2, (self.filter_size[2] - 1) // 2) else: raise RuntimeError( "Unsupported border_mode for Conv3dLayer: %s" % border_mode) else: self.pad = tuple(pad) self.W = self.add_param(W, self.get_W_shape(), name='W') if b is None: self.b = None else: self.b = self.add_param(b, (num_filters, ), name='b', regularizable=False) self.corr_mm_op = GpuCorr3dMM(subsample=self.strides, pad=self.pad)
print(sys.version) from voxnet import isovox from theano.sandbox.cuda.basic_ops import gpu_contiguous from theano.sandbox.cuda.blas import GpuCorr3dMM import numpy as np vis_mat = np.load('../../voxnet/scripts/weights.npz') W1 = vis_mat['conv1.W'] # size(32,1,5,5,5) W2 = vis_mat['conv2.W'] # size(32,32,3,3,3) contiguous_W1 = gpu_contiguous(W1) contiguous_W2 = gpu_contiguous(W2) strides = (1, 1, 1) pad = (2, 2, 2) corr_mm_op = GpuCorr3dMM(subsample=strides, pad=pad)(contiguous_W2, contiguous_W1) print(corr_mm_op) size = 32 wviz = W1[:, 0, :, :, :] for i in range(wviz.shape[0]): w = wviz[i, :, :, :] # centerize the plot fz = len(w) xd = np.zeros((size, size, size)) pad = (size - fz) / 2 xd[pad:pad + fz, pad:pad + fz, pad:pad + fz] = w # only visualize the largest value t = 0.2 xd[xd < t] = 0 # store as png
def conv_and_add_bias(self, x): x = gpu_contiguous(x) rval = GpuCorr3dMM(subsample=tuple(self.kernel_stride))(x, self.filters) rval = rval + self.bias.dimshuffle('x', 0, 'x', 'x', 'x') return rval
def conv3d(x, kernel, strides=(1, 1, 1), border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING, volume_shape=None, filter_shape=None, conv_algo="GpuCorr3dMM"): ''' Run on cuDNN if available. border_mode: string, "same" or "valid". conv_algo: string, "conv3d2d": internally reshapes the data and performs 2d convs "dnn_conv3d": uses CuDNNs 3d convolution "GpuCorr3dM": performs a correlation, not a conolution (filter not flipped), uses the "Toeplitz"- matrix (which means it needs a little more memory) ''' if dim_ordering not in {'th', 'tf'}: raise Exception('Unknown dim_ordering ' + str(dim_ordering)) if border_mode not in {'same', 'valid'}: raise Exception('Invalid border mode: ' + str(border_mode)) if dim_ordering == 'tf': # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, conv_dim1, conv_dim2, conv_dim3) # TF input shape: (samples, conv_dim1, conv_dim2, conv_dim3, input_depth) # TH kernel shape: (out_depth, input_depth, kernel_dim1, kernel_dim2, kernel_dim3) # TF kernel shape: (kernel_dim1, kernel_dim2, kernel_dim3, input_depth, out_depth) x = x.dimshuffle((0, 4, 1, 2, 3)) kernel = kernel.dimshuffle((4, 3, 0, 1, 2)) if volume_shape: volume_shape = (volume_shape[0], volume_shape[4], volume_shape[1], volume_shape[2], volume_shape[3]) if filter_shape: filter_shape = (filter_shape[4], filter_shape[3], filter_shape[0], filter_shape[1], filter_shape[2]) if border_mode == 'same': assert (strides == (1, 1, 1)) pad_dim1 = (kernel.shape[2] - 1) pad_dim2 = (kernel.shape[3] - 1) pad_dim3 = (kernel.shape[4] - 1) output_shape = (x.shape[0], x.shape[1], x.shape[2] + pad_dim1, x.shape[3] + pad_dim2, x.shape[4] + pad_dim3) output = T.zeros(output_shape) indices = (slice(None), slice(None), slice(pad_dim1 // 2, x.shape[2] + pad_dim1 // 2), slice(pad_dim2 // 2, x.shape[3] + pad_dim2 // 2), slice(pad_dim3 // 2, x.shape[4] + pad_dim3 // 2)) x = T.set_subtensor(output[indices], x) border_mode = 'valid' border_mode_3d = (border_mode, border_mode, border_mode) if conv_algo == "conv3d2d": conv_out = conv3d2d.conv3d(signals=x.dimshuffle(0, 2, 1, 3, 4), filters=kernel.dimshuffle(0, 2, 1, 3, 4), border_mode=border_mode_3d) conv_out = conv_out.dimshuffle(0, 2, 1, 3, 4) elif conv_algo == "dnn_conv3d": conv_out = dnn_conv3d(img=x, kerns=kernel, border_mode=border_mode) elif conv_algo == "GpuCorr3dMM": bias = np.zeros((volume_shape[1])) conv_out = GpuCorr3dMM()(x, kernel) else: raise ("Unknown algorithm to perform 3d convolution") # support strides by manually slicing the output if strides != (1, 1, 1): conv_out = conv_out[:, :, ::strides[0], ::strides[1], ::strides[2]] if dim_ordering == 'tf': conv_out = conv_out.dimshuffle((0, 2, 3, 4, 1)) return conv_out