def apply(self, input_): """Perform the convolution. Parameters ---------- input_ : :class:`~tensor.TensorVariable` A 5D tensor with the axes representing batch size, number of channels, height, width and time. Returns ------- output : :class:`~tensor.TensorVariable` A 5D tensor of filtered images (feature maps) with dimensions representing batch size, number of filters, feature map height, feature map width and feature map time. """ if self.use_bias: W, b = self.parameters else: W, = self.parameters if self.cudnn_impl: output = dnn_conv3d(input_, W, subsample=tuple(self.kernel_stride), border_mode=self.padding) else: output = GpuCorr3dMM(subsample=tuple(self.step), pad=self.padding)(input_, W) if self.use_bias: if self.shared_bias: output += b.dimshuffle('x', 0, 'x', 'x', 'x') else: output += b.dimshuffle('x', 0, 1, 2, 3) return output
def run_conv3d_fwd(inputs_shape, filters_shape, subsample, border_mode, conv_mode): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') # Scale down the input values to prevent very large absolute errors # due to float rounding inputs_val /= 10 filters_val /= 10 inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[0]).astype('float32')) # Compile a theano function for the CuDNN implementation conv = dnn.dnn_conv3d(img=inputs, kerns=filters, border_mode=border_mode, subsample=subsample, conv_mode=conv_mode) f = theano.function([], conv, mode=mode_with_gpu) # If conv_mode is 'conv' the reference implementation should use # filters filpped according to the width, height and time axis if conv_mode == 'conv': flipped_filters = filters[:, :, ::-1, ::-1, ::-1] else: flipped_filters = filters # If border mode is anything but 'valid', the reference implementation # should operate on padded inputs if border_mode == 'valid': padded_inputs = inputs else: if border_mode == 'full': pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)] else: if isinstance(border_mode, int): pad_per_dim = [border_mode] * 3 else: pad_per_dim = border_mode pad_before_after = ([(0, 0), (0, 0)] + [(p, p) for p in pad_per_dim]) padded_inputs_val = numpy.pad(inputs_val, pad_before_after, 'constant') padded_inputs = shared(padded_inputs_val) # Compile a theano function for the reference implementation conv_ref = theano.tensor.nnet.conv3D( V=padded_inputs.dimshuffle(0, 2, 3, 4, 1), W=flipped_filters.dimshuffle(0, 2, 3, 4, 1), b=bias, d=subsample) f_ref = theano.function([], conv_ref.dimshuffle(0, 4, 1, 2, 3)) # Compare the results of the two implementations res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res)
def classify(X, w, g, b, w2, g2, b2, w3, g3, b3, w4, g4, b4, wfc1, wfc2, wy): h = relu( batchnorm3d(dnn_conv3d(X, w, subsample=(1, 1, 1), border_mode=(1, 1, 1)), g=g, b=b)) h = dnn_pool(h, ws=(2, 2, 2), stride=(2, 2, 2), mode='max') h2 = relu( batchnorm3d(dnn_conv3d(h, w2, subsample=(1, 1, 1), border_mode=(1, 1, 1)), g=g2, b=b2)) h2 = dnn_pool(h2, ws=(2, 2, 2), stride=(2, 2, 2), mode='max') h3 = relu( batchnorm3d(dnn_conv3d(h2, w3, subsample=(1, 1, 1), border_mode=(1, 1, 1)), g=g3, b=b3)) h3 = dnn_pool(h3, ws=(2, 2, 2), stride=(2, 2, 2), mode='max') h4 = relu( batchnorm3d(dnn_conv3d(h3, w4, subsample=(1, 1, 1), border_mode=(1, 1, 1)), g=g4, b=b4)) h4 = dnn_pool(h4, ws=(2, 2, 2), stride=(2, 2, 2), mode='max') h5 = T.flatten(h4, 2) h6 = relu(T.dot(h5, wfc1)) h7 = relu(T.dot(h6, wfc2)) y = softmax(T.dot(h7, wy)) return y
def convolve(self, input, **kwargs): # by default we assume 'cross', consistent with corrmm. conv_mode = 'conv' if self.flip_filters else 'cross' border_mode = self.pad if border_mode == 'same': border_mode = tuple(s // 2 for s in self.filter_size) conved = dnn.dnn_conv3d(img=input, kerns=self.W, subsample=self.stride, border_mode=border_mode, conv_mode=conv_mode) return conved
def convolve(self, input, **kwargs): # by default we assume 'cross', consistent with corrmm. conv_mode = 'conv' if self.flip_filters else 'cross' border_mode = self.pad if border_mode == 'same': border_mode = tuple(s // 2 for s in self.filter_size) conved = dnn.dnn_conv3d(img=input, kerns=self.W, subsample=self.stride, border_mode=border_mode, conv_mode=conv_mode ) return conved
def get_output_for(self, input, *args, **kwargs): conv_mode = 'conv' if self.flip_filters else 'cross' # Fractionally strided convolutions if any([s<1.0 for s in self.strides]): subsample=tuple([int(1.0/s) for s in self.strides]) img_shape = list(self.output_shape) if img_shape[0] is None: img_shape[0] = input.shape[0] image = T.alloc(0.,*img_shape) base = dnn.dnn_conv3d(img = image, kerns = self.W.transpose(1,0,2,3,4), subsample = subsample, border_mode = self.pad, conv_mode = conv_mode ) conved = T.grad(base.sum(), wrt = image, known_grads = {base: input}) else: conved = dnn.dnn_conv3d(img = input, kerns = self.W, subsample = self.strides, border_mode = self.pad, conv_mode = conv_mode ) if self.b is None: activation = conved else: activation = conved + self.b.dimshuffle('x', 0, 'x', 'x', 'x') return self.nonlinearity(activation)
def get_output_for(self, input, **kwargs): # by default we assume 'cross', consistent with corrmm. conv_mode = 'conv' if self.flip_filters else 'cross' conved = dnn.dnn_conv3d(img=input, kerns=self.W, subsample=self.stride, border_mode=self.pad, conv_mode=conv_mode) if self.b is None: activation = conved elif self.untie_biases: activation = conved + self.b.dimshuffle('x', 0, 1, 2, 3) else: activation = conved + self.b.dimshuffle('x', 0, 'x', 'x', 'x') return self.nonlinearity(activation)
def compute_output(self, network, in_vw): # gather hyperparameters num_filters = network.find_hyperparameter(["num_filters"]) filter_size = network.find_hyperparameter(["filter_size"]) stride = network.find_hyperparameter(["conv_stride", "stride"], (1, 1, 1)) pad = network.find_hyperparameter(["conv_pad", "pad"], (0, 0, 0)) pad = conv.conv_parse_pad(filter_size, pad) # by default, do convolution instead of cross-correlation # rationale: be compatible with standard (non-cuDNN) conv2d conv_mode = network.find_hyperparameter(["conv_mode"], "conv") inits = list(toolz.concat(network.find_hyperparameters( ["inits"], []))) assert len(filter_size) == 3 assert conv_mode in ["conv", "cross"] # create weight num_channels = in_vw.shape[1] W = network.create_vw( name="weight", is_shared=True, shape=(num_filters, num_channels) + tuple(filter_size), tags={"parameter", "weight"}, inits=inits, ).variable out_var = dnn.dnn_conv3d(img=in_vw.variable, kerns=W, border_mode=pad, subsample=stride, conv_mode=conv_mode) out_shape = conv.conv_output_shape(input_shape=in_vw.shape, num_filters=num_filters, axes=(2, 3, 4), conv_shape=filter_size, strides=stride, pads=pad) network.create_vw( "default", variable=out_var, shape=out_shape, tags={"output"}, )
def compute_output(self, network, in_vw): num_filters = network.find_hyperparameter(['num_filters']) stride = network.find_hyperparameter(['upsample_factor']) filter_size = network.find_hyperparameter(['filter_size']) pad_name = 'same' pad = treeano.nodes.conv.conv_parse_pad(filter_size, pad_name) # In the case, the 0th element of shape is the number of channels # in the low-res layer, and the 1st element is that of the hi-res # layer. We put it in W this way, because W is a convolution from # hi-res to low-res. W = network.create_vw( name='weight', is_shared=True, shape=( in_vw.shape[1], num_filters, ) + filter_size, tags={'parameter', 'weight'}, default_inits=[], ).variable out_shape = list(in_vw.shape) symbolic_shape = list(in_vw.symbolic_shape()) out_shape[1] = num_filters symbolic_shape[1] = num_filters for axis, s in zip((2, 3, 4), stride): if out_shape[axis] is not None: out_shape[axis] *= s symbolic_shape[axis] *= s out_shape = tuple(out_shape) symbolic_shape = tuple(symbolic_shape) x = T.zeros(symbolic_shape) conved = dnn.dnn_conv3d(img=x, kerns=W, border_mode=pad, subsample=stride) out_var = T.grad(None, wrt=x, known_grads={conved: in_vw.variable}) network.create_vw('default', variable=out_var, shape=out_shape, tags={'output'})
def get_output_for(self, input, **kwargs): # by default we assume 'cross', consistent with corrmm. conv_mode = 'conv' if self.flip_filters else 'cross' conved = dnn.dnn_conv3d(img=input, kerns=self.W, subsample=self.stride, border_mode=self.pad, conv_mode=conv_mode ) if self.b is None: activation = conved elif self.untie_biases: activation = conved + self.b.dimshuffle('x', 0, 1, 2, 3) else: activation = conved + self.b.dimshuffle('x', 0, 'x', 'x', 'x') return self.nonlinearity(activation)
def compute_output(self, network, in_vw): num_filters = network.find_hyperparameter(['num_filters']) stride = network.find_hyperparameter(['upsample_factor']) filter_size = network.find_hyperparameter(['filter_size']) pad_name = 'same' pad = treeano.nodes.conv.conv_parse_pad(filter_size, pad_name) # In the case, the 0th element of shape is the number of channels # in the low-res layer, and the 1st element is that of the hi-res # layer. We put it in W this way, because W is a convolution from # hi-res to low-res. W = network.create_vw( name='weight', is_shared=True, shape=(in_vw.shape[1], num_filters,) + filter_size, tags={'parameter', 'weight'}, default_inits=[], ).variable out_shape = list(in_vw.shape) symbolic_shape = list(in_vw.symbolic_shape()) out_shape[1] = num_filters symbolic_shape[1] = num_filters for axis, s in zip((2, 3, 4), stride): if out_shape[axis] is not None: out_shape[axis] *= s symbolic_shape[axis] *= s out_shape = tuple(out_shape) symbolic_shape = tuple(symbolic_shape) x = T.zeros(symbolic_shape) conved = dnn.dnn_conv3d(img=x, kerns=W, border_mode=pad, subsample=stride) out_var = T.grad(None, wrt=x, known_grads={conved: in_vw.variable}) network.create_vw( 'default', variable=out_var, shape=out_shape, tags={'output'} )
def conv3d(x, kernel, strides = (1, 1, 1), padding = 'same', dim_order = 'th'): if dim_order not in dim_orders or padding not in paddings: raise Exception('Error dim_order or padding parameter.') if env.get_device() == '': raise NotImplementedError x = _shuffle(x, dim3d[dim_order], dim3d[AR]) kernel = _shuffle(kernel, kernel3d[dim_order], kernel3d[AR]) if padding == 'same': padding = 'half' conv = dnn.dnn_conv3d(x, kernel, border_mode = padding, subsample = strides) if padding == 'half': shp = kernel.shape.eval() if shp[2] % 2 == 0: conv = conv[:, :, 1:, :, :] if shp[3] % 2 == 0: conv = conv[:, :, :, 1:, :] if shp[4] % 2 == 0: conv = conv[:, :, :, :, 1:] return _shuffle(conv, dim3d[AR], dim3d[dim_order])
def compute_output(self, network, in_vw): # gather hyperparameters num_filters = network.find_hyperparameter(["num_filters"]) filter_size = network.find_hyperparameter(["filter_size"]) stride = network.find_hyperparameter(["conv_stride", "stride"], (1, 1, 1)) pad = network.find_hyperparameter(["conv_pad", "pad"], (0, 0, 0)) pad = conv.conv_parse_pad(filter_size, pad) # by default, do convolution instead of cross-correlation # rationale: be compatible with standard (non-cuDNN) conv2d conv_mode = network.find_hyperparameter(["conv_mode"], "conv") assert len(filter_size) == 3 assert conv_mode in ["conv", "cross"] # create weight num_channels = in_vw.shape[1] W = network.create_vw( name="weight", is_shared=True, shape=(num_filters, num_channels) + tuple(filter_size), tags={"parameter", "weight"}, default_inits=[], ).variable out_var = dnn.dnn_conv3d(img=in_vw.variable, kerns=W, border_mode=pad, subsample=stride, conv_mode=conv_mode) out_shape = conv.conv_output_shape(input_shape=in_vw.shape, num_filters=num_filters, axes=(2, 3, 4), conv_shape=filter_size, strides=stride, pads=pad) network.create_vw( "default", variable=out_var, shape=out_shape, tags={"output"}, )
def get_output(self, train): X = self.get_input(train) padding = self.padding X=X.dimshuffle(0,2,3,4,1) border_mode=self.padding if dnn.dnn_available() and theano.config.device[:3] == 'gpu': conv_out=dnn.dnn_conv3d(img=X, kerns=self.W, border_mode=border_mode, subsample=self.subsample, conv_mode='cross').dimshuffle(0,4,1,2,3) else: raise ImportError( "cuDNN not available: %s" % dnn.dnn_available.msg) return self.activation(conv_out + self.b.dimshuffle('x','x', 0 ,'x','x'))
def convolve(self, input, **kwargs): """ Reshape and move the desired channels to the back, and the other channels to the front. """ dim_order = range(len(self.input_shape)) for channel in self.channel_axes: dim_order.remove(channel) for axis in self.convolution_axes: dim_order.remove(axis) for channel in self.channel_axes: dim_order.append(channel) for axis in self.convolution_axes: dim_order.append(axis) if dim_order != range(len(self.input_shape)): input = input.dimshuffle(*dim_order) # flatten other axes, flatten channel axes standard_input_shape = [ -1, np.prod( [self.input_shape[channel] for channel in self.channel_axes]) ] + [self.input_shape[axis] for axis in self.convolution_axes] input = input.reshape(standard_input_shape) target_shape, dim_shuffle_width = None, None if any([w != 1 for w in self.width]): target_shape = standard_input_shape[:2] dim_shuffle_width = [0] for i, width in enumerate(self.width): target_shape.append(standard_input_shape[2 + i] // width) target_shape.append(width) dim_shuffle_width += [3 + i * 2] dim_shuffle_width += [1] for i in xrange(len(self.convolution_axes)): dim_shuffle_width += [2 + i * 2] input = input.reshape(target_shape).dimshuffle(*dim_shuffle_width) target_shape = [-1, target_shape[1]] for i, width in enumerate(self.width): target_shape.append(standard_input_shape[2 + i] // width) input = input.reshape(target_shape) # by default we assume 'cross', consistent with corrmm. conv_mode = 'conv' if self.flip_filters else 'cross' border_mode = self.pad if border_mode == 'same': border_mode = tuple(s // 2 for s in self.filter_mask_size) if len(self.convolution_axes) == 0: conved_result = T.dot(input, self.filter_tensor.dimshuffle(1, 0)) elif len(self.convolution_axes) == 1: """ perform convolution as 2d convolution""" conved_result = dnn.dnn_conv( img=input.dimshuffle(0, 1, 2, 'x'), kerns=self.filter_tensor.dimshuffle(0, 1, 2, 'x'), subsample=self.stride + (1, ), border_mode=border_mode + (0, ), conv_mode=conv_mode)[:, :, :, 0] # drop the unused dimension elif len(self.convolution_axes) == 2: conved_result = dnn.dnn_conv(img=input, kerns=self.filter_tensor, subsample=self.stride, border_mode=border_mode, conv_mode=conv_mode) elif len(self.convolution_axes) == 3: conved_result = dnn.dnn_conv3d(img=input, kerns=self.filter_tensor, subsample=self.stride, border_mode=border_mode, conv_mode=conv_mode) else: raise RuntimeError("Only supports 1D, 2D and 3D convolutions") if any([w != 1 for w in self.width]): conv_output_shape = self.get_convolutional_output_shape( self.input_shape) target_shape = target_shape[:1] + list(self.width) + [ self.num_filters ] + [os // w for os, w in zip(conv_output_shape, self.width)] reverse_dimshuffle = [ dim_shuffle_width.index(i) for i in xrange(len(dim_shuffle_width)) ] conved_result = conved_result.reshape(target_shape).dimshuffle( *reverse_dimshuffle) ############### # merge the wide kernels again with their original axis ############### target_shape = [-1, self.num_filters] + conv_output_shape conved_result = conved_result.reshape(target_shape) output_shape = list(self.input_shape) output_shape = [ i for j, i in enumerate(output_shape) if j not in self.channel_axes and j not in self.convolution_axes ] output_shape = [i if i is not None else -1 for i in output_shape] self.get_convolutional_output_shape(self.input_shape) result = conved_result.reshape( output_shape + list(self.filter_shape) + self.get_convolutional_output_shape(self.input_shape)) reverse_dimshuffle = [ dim_order.index(i) for i in xrange(len(self.input_shape)) ] if reverse_dimshuffle != range(len(self.input_shape)): result = result.dimshuffle(*reverse_dimshuffle) return result
def test_conv(): if False: sig_shape = (100000, 20) fil_shape = (20, 30) x_val = np.random.rand(*sig_shape).astype(np.float32) W_val = np.random.rand(*fil_shape).astype( np.float32) # (nof, z, ch, xf, yf) x = T.TensorType('float32', (False, ) * 2, name='x_cnn_input')() W = elektronn2.neuromancer.variables.VariableParam(W_val) y1 = T.dot(x, W) y2 = dot(x, W, 1) g1 = theano.grad(T.log(y1).sum(), [x]) g2 = theano.grad(T.log(y2).sum(), [x]) f1 = utils.make_func([x], y1, profile_execution=10) f2 = utils.make_func([x], y2, profile_execution=10) r1 = f1(x_val) r2 = f2(x_val) assert np.allclose(r1, r2) sig_shape = (1, 5, 300, 200) fil_shape = (7, 5, 1, 1) x_val = np.random.rand(*sig_shape).astype(np.float32) W_val = np.random.rand(*fil_shape).astype( np.float32) # (nof, z, ch, xf, yf) x = T.TensorType('float32', (False, ) * 4, name='x_cnn_input')() W = elektronn2.neuromancer.variables.VariableParam(W_val) y1 = conv(x, W, w_shape=fil_shape) y2 = conv2d(x, W) g1 = theano.grad(T.log(y1).sum(), [x]) g2 = theano.grad(T.log(y2).sum(), [x]) f1 = utils.make_func([x], y1, profile_execution=5) f2 = utils.make_func([x], y2, profile_execution=5) r1 = f1(x_val) r2 = f2(x_val) assert np.allclose(r1, r2) sig_shape = (1, 100, 5, 300, 200) # x_shape = (None, 100, 5, 300, 200) fil_shape = (7, 3, 5, 3, 3) x_val = np.random.rand(*sig_shape).astype(np.float32) W_val = np.random.rand(*fil_shape).astype( np.float32) # (nof, z, ch, xf, yf) x = T.TensorType('float32', (False, ) * 5, name='x_cnn_input')() W = elektronn2.neuromancer.variables.VariableParam(W_val) y1 = conv(x, W, x_shape=sig_shape, w_shape=fil_shape) y2 = conv3d(x, W) g1 = theano.grad(T.log(y1).sum(), [x]) g2 = theano.grad(T.log(y2).sum(), [x]) f1 = utils.make_func([x], y1, profile_execution=5) f2 = utils.make_func([x], y2, profile_execution=5) r1 = f1(x_val) r2 = f2(x_val) assert np.allclose(r1, r2) sig_shape = (1, 1, 100) fil_shape = (1, 1, 20) x_val = np.random.rand(*sig_shape).astype(np.float32) W_val = np.random.rand(*fil_shape).astype( np.float32) # (nof, z, ch, xf, yf) x = T.TensorType('float32', (False, ) * 3, name='x_cnn_input')() W = elektronn2.neuromancer.variables.VariableParam(W_val) y1 = conv(x, W, w_shape=fil_shape) f1 = elektronn2.neuromancer.graphutils.make_func([x], y1, profile_execution=10) r1 = f1(x_val) r2 = np.convolve(x_val[0, 0], W_val[0, 0], mode='valid')[None, None] assert np.allclose(r1, r2) if True: sig_shape = (1, 5, 100, 300, 200) fil_shape = (7, 5, 3, 3, 3) x_val = np.random.rand(*sig_shape).astype(np.float32) W_val = np.random.rand(*fil_shape).astype( np.float32) # (nof, ch, zf xf, yf) x = T.TensorType('float32', (False, ) * 5, name='x_cnn_input')() W = elektronn2.neuromancer.variables.VariableParam(W_val) # test conv y1 = dnn.dnn_conv3d(x, W, border_mode='valid') y2 = conv3d(x.dimshuffle(0, 2, 1, 3, 4), W.dimshuffle(0, 2, 1, 3, 4)).dimshuffle(0, 2, 1, 3, 4) f1 = elektronn2.neuromancer.graphutils.make_func([x], y1, profile_execution=5) f2 = elektronn2.neuromancer.graphutils.make_func([x], y2, profile_execution=5) r3 = np.array(f1(x_val)) r4 = f2(x_val) assert np.allclose( r3, r4 ) # cudnn and reshaped conv2d3d give same result, but cudnn ist faster! y1 = dnn.dnn_conv3d(x, W, border_mode='valid') y1 = dnn.dnn_pool(y1, (2, 2, 2), stride=(2, 2, 2), pad=(0, 0, 0), mode='max') f1 = elektronn2.neuromancer.graphutils.make_func([x], y1, profile_execution=5) r3 = np.array(f1(x_val)) y2 = conv3d(x.dimshuffle(0, 2, 1, 3, 4), W.dimshuffle(0, 2, 1, 3, 4)) y2 = pooling(y2, (2, 2, 2)) f2 = elektronn2.neuromancer.graphutils.make_func([x], y2, profile_execution=5) r4 = f2(x_val) assert np.allclose(r3, r4.transpose( 0, 2, 1, 3, 4)) # pooling als works, not it is not so much faster anymore.... y1 = dnn.dnn_conv3d(x, W, border_mode='valid') y1 = dnn.dnn_pool(y1, (2, 2, 2), stride=(2, 2, 2), pad=(0, 0, 0), mode='max') sm = dnn.GpuDnnSoftmax('bc01', 'fast', 'channel') sh = y1.shape y1 = sm(y1.flatten(4)).reshape(sh) f1 = elektronn2.neuromancer.graphutils.make_func([x], y1, profile_execution=5) r3 = np.array(f1(x_val)) y2 = conv3d(x.dimshuffle(0, 2, 1, 3, 4), W.dimshuffle(0, 2, 1, 3, 4)) y2 = pooling(y2, (2, 2, 2)) y2 = softmax(y2, axis=2) f2 = elektronn2.neuromancer.graphutils.make_func([x], y2, profile_execution=5) r4 = f2(x_val) assert np.allclose(r3, r4.transpose(0, 2, 1, 3, 4), atol=1e-5) # sm also works but diff is ~1e-5
def conv(x, w, axis_order=None, conv_dim=None, x_shape=None, w_shape=None, border_mode='valid', stride=None): """ Apply appropriate convolution depending on input and filter dimensionality. If input ``w_shape`` is known, conv might be replaced by tensordot There are static assumptions which axes are spatial. Parameters ---------- x: T.Tensor | Input data (mini-batch). | Tensor of shape ``(b, f, x)``, ``(b, f, x, y)``, ``(b, z, f, x, y)`` or ``(b,f,x,y,z)``. w: T.Tensor | Set of convolution filter weights. | Tensor of shape ``(f_out, f_in, x)``, ``(f_out, f_in, x, y)``, ``(f_out, z, f_in, x, y)`` or ``(f_out, f_in, x, y, z)``. axis_order: str | (only relevant for 3d) | ``'dnn'`` ``(b,f,x,y(,z))`` or ``'theano'`` ``(b, z, f, x, y)``. conv_dim: int Dimensionality of the applied convolution (not the absolute dim of the inputs). x_shape: tuple shape tuple (``TaggedShape`` supported). w_shape: tuple shape tuple, see ``w``. border_mode: str * ``'valid'``: only apply filter to complete patches of the image. Generates output of shape: image_shape -filter_shape + 1. * ``'full'`` zero-pads image to multiple of filter shape to generate output of shape: image_shape + filter_shape - 1. stride: tuple | (tuple of len 2) | Factor by which to subsample the output. Returns ------- T.Tensor Set of feature maps generated by convolution. """ if (x_shape is None) or (None in x_shape): # variable batch size or so x_shape = None assert axis_order in ['dnn', 'theano', None] if conv_dim is not None: if x.ndim!=conv_dim+2 or w.ndim!=conv_dim+2: raise ValueError("Cannot perform %id conv on input and filter of" "dim %i, %i" % (conv_dim, x.ndim, w.ndim)) else: # infer conv_dim conv_dim = x.ndim-2 if w.ndim!=conv_dim+2: raise ValueError("Dimension mismatch for conv: tried to do %id conv" "on %id input x. This requires %id filter, but got" "%id" % (conv_dim, x.ndim, x.ndim, w.ndim)) if conv_dim>3: raise ValueError("Input tensor dim to big. No conv for dim>5.") if border_mode=='same': assert w_shape is not None if not np.all(np.remainder(w_shape[-conv_dim:], 2) == 1): raise ValueError('For "same"-mode convolution, filter shapes ' 'must be odd in all dimensions.') border_mode='half' crop_full = False else: crop_full = False use_tensordot = False if (w_shape is not None) and (stride is None): # cannot use tensordot with strides if conv_dim<3 or axis_order=='dnn': use_tensordot = np.all(np.equal(w_shape[2:], 1)) else: # theano order for 3d conv use_tensordot = w_shape[1] == 1 and np.all(np.equal(w_shape[3:], 1)) y = None if conv_dim==1: x = x.dimshuffle(0, 1, 2, 'x') w = w.dimshuffle(0, 1, 2, 'x') if w_shape is not None: w_shape = list(w_shape) + [1, ] if x_shape is not None: x_shape = list(x_shape) + [1,] if stride is None: stride = (1, 1) y = conv2d(x, w, x_shape, w_shape, border_mode, subsample=stride) y = y[:, :, :, 0] elif conv_dim==2: if stride is None: stride = (1, 1) if use_tensordot: logger.debug("Using dot for 2d conv") w = w[:, :, 0, 0].T # (f_in, f_out) (5, 7) y = dot(x, w, axis=1) elif dnn_avail and config.use_manual_cudnn_conv: logger.debug("Using cuDNN 2dconv") y = dnn.dnn_conv(x, w, border_mode, subsample=stride, algo=dnn_algo) else: # fallback to theano y = conv2d(x, w, x_shape, w_shape, border_mode, subsample=stride) elif conv_dim==3: assert axis_order in ['dnn', 'theano'] use_dnn = dnn_avail if not config.use_manual_cudnn_conv: use_dnn = False if w_shape[2]==1 and config.use_manual_cudnn_conv_not_w1: use_dnn = False logger.debug("Ignoring manual 3d cuDNN conv because kernel is " "1 for first axis") # then theano automatically uses dnn 2d conv which # has faster gradient than dnn 3d conv if stride is not None: raise NotImplementedError("Cannot use strided conv with 3d conv") if use_tensordot: logger.debug("Using dot for 3d conv") if axis_order=='theano': w = w[:, 0, :, 0, 0].T # (f_in, f_out) y = dot(x, w, axis=2) elif axis_order=='dnn': w = w[:, :, 0, 0, 0].T # (f_in, f_out) y = dot(x, w, axis=1) elif use_dnn: if stride is None: stride = (1, 1, 1) if axis_order=='dnn': logger.debug("Using cuDNN 3dconv") y = dnn.dnn_conv3d(x, w, border_mode, subsample=stride, algo=dnn_algo) # (b, f, x, y, z) else: if config.show_axis_order_warning: logger.warning("cuDNN available but axis order is " "for theano (z before f). This leads to possibly " "inefficient dimshuffles. use cuDNN axis order.\n" "Using dnn 3dconv") x = x.dimshuffle(0,2,1,3,4) w = w.dimshuffle(0, 2, 1, 3, 4) y = dnn.dnn_conv3d(x, w, border_mode, subsample=stride, algo=dnn_algo) # (b, f, x, y, z) y = y.dimshuffle(0,2,1,3,4) else: # fallback to theano if axis_order=='theano': logger.debug("Using theano 3dconv") y = conv3d(x, w, x_shape, w_shape, border_mode) # (b, z, f, x, y) else: if config.use_manual_cudnn_conv and not dnn_avail: if config.show_axis_order_warning: logger.warning("cuDNN not available but axis order is" "for cuDNN (z after features). This leads to possibly " "inefficient dimshuffles Use theano axis order or " "install cuDNN.\nUsing theano 3dconv") x = x.dimshuffle(0,2,1,3,4) w = w.dimshuffle(0, 2, 1, 3, 4) # Also swap shapes! w_shape = list(w_shape) z,f = w_shape[1], w_shape[2] w_shape[2] = z w_shape[1] = f if x_shape is not None: x_shape = list(x_shape) z,f = x_shape[1], x_shape[2] x_shape[2] = z x_shape[1] = f y = conv3d(x, w, x_shape, w_shape, border_mode) # (b, z, f, x, y) y = y.dimshuffle(0,2,1,3,4) if crop_full: # Unreachable code. Remove this if it stays unneeded. cropper = [] off = np.divide(w_shape[-conv_dim:], 2).astype(np.int) k = 0 if axis_order=='theano' and conv_dim==3: for i in range(y.ndim): if i in [1,3,4]: cropper.append(slice(off[k], -off[k])) k += 1 else: cropper.append(slice(None)) else: for i in range(y.ndim): if i >= y.ndim - conv_dim: cropper.append(slice(off[k], -off[k])) k += 1 else: cropper.append(slice(None)) cropper = tuple(cropper) y = y[cropper] return y
def conv3d(x, kernel, strides=(1, 1, 1), border_mode='valid', dim_ordering='th', image_shape=None, filter_shape=None): ''' Run on cuDNN if available. border_mode: string, "same" or "valid". conv_mode: string, "conv" or "cross". ''' if dim_ordering not in {'th', 'tf'}: raise Exception('Unknown dim_ordering ' + str(dim_ordering)) if dim_ordering == 'tf': # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, rows, cols, time) # TH kernel shape: (depth, input_depth, rows, cols, time) # TF input shape: (samples, rows, cols, time, input_depth) # TF kernel shape: (rows, cols, time, input_depth, depth) x = x.dimshuffle((0, 4, 1, 2, 3)) kernel = kernel.dimshuffle((4, 3, 0, 1, 2)) if image_shape: image_shape = (image_shape[0], image_shape[4], image_shape[1], image_shape[2], image_shape[3]) if filter_shape: filter_shape = (filter_shape[4], filter_shape[3], filter_shape[0], filter_shape[1], filter_shape[2]) if _on_gpu() and dnn.dnn_available(): if border_mode == 'same': np_kernel = kernel.eval() border_mode = tuple(s // 2 for s in np_kernel.shape[2:]) conv_out = dnn.dnn_conv3d(img=x, kerns=kernel, border_mode=border_mode, subsample=strides) else: if border_mode == 'same': assert(strides == (1, 1, 1)) pad_dim1 = (kernel.shape[2] - 1) pad_dim2 = (kernel.shape[3] - 1) pad_dim3 = (kernel.shape[4] - 1) output_shape = (x.shape[0], x.shape[1], x.shape[2] + pad_dim1, x.shape[3] + pad_dim2, x.shape[4] + pad_dim3) output = T.zeros(output_shape) indices = (slice(None), slice(None), slice(pad_dim1 // 2, x.shape[2] + pad_dim1 // 2), slice(pad_dim2 // 2, x.shape[3] + pad_dim2 // 2), slice(pad_dim3 // 2, x.shape[4] + pad_dim3 // 2)) x = T.set_subtensor(output[indices], x) border_mode = 'valid' border_mode_3d = (border_mode, border_mode, border_mode) conv_out = conv3d2d.conv3d(signals=x.dimshuffle(0, 2, 1, 3, 4), filters=kernel.dimshuffle(0, 2, 1, 3, 4), border_mode=border_mode_3d) conv_out = conv_out.dimshuffle(0, 2, 1, 3, 4) # support strides by manually slicing the output if strides != (1, 1, 1): conv_out = conv_out[:, :, ::strides[0], ::strides[1], ::strides[2]] if dim_ordering == 'tf': conv_out = conv_out.dimshuffle((0, 2, 3, 1)) return conv_out
def run_conv3d_bwd(inputs_shape, filters_shape, subsample, border_mode, conv_mode): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[0]).astype('float32')) # Compile a theano function for the CuDNN implementation conv = dnn.dnn_conv3d(img=inputs, kerns=filters, border_mode=border_mode, subsample=subsample, conv_mode=conv_mode) grad_i, grad_w = theano.tensor.grad(conv.sum(), [inputs, filters]) f = theano.function([], [grad_i, grad_w], mode=mode_with_gpu) # If conv_mode is 'conv' the reference implementation should use # filters filpped according to the width, height and time axis if conv_mode == 'conv': flipped_filters = filters[:, :, ::-1, ::-1, ::-1] else: flipped_filters = filters # If border mode is anything but 'valid', the reference implementation # should operate on padded inputs if border_mode == 'valid': padded_inputs = inputs else: if border_mode == 'full': pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)] else: if isinstance(border_mode, int): pad_per_dim = [border_mode] * 3 else: pad_per_dim = border_mode pad_before_after = ([(0, 0), (0, 0)] + [(p, p) for p in pad_per_dim]) padded_inputs_val = numpy.pad(inputs_val, pad_before_after, 'constant') padded_inputs = shared(padded_inputs_val) # Compile a theano function for the reference implementation conv_ref = theano.tensor.nnet.conv3D( V=padded_inputs.dimshuffle(0, 2, 3, 4, 1), W=flipped_filters.dimshuffle(0, 2, 3, 4, 1), b=bias, d=subsample) (grad_padded_i_ref, grad_w_ref) = theano.tensor.grad(conv_ref.sum(), [padded_inputs, filters]) # Recover grad_i_ref from grad_padded_i_ref if border_mode == 'valid': grad_i_ref = grad_padded_i_ref else: shp = grad_padded_i_ref.shape grad_i_ref = grad_padded_i_ref[ :, :, pad_per_dim[0]:shp[2] - pad_per_dim[0], pad_per_dim[1]:shp[3] - pad_per_dim[1], pad_per_dim[2]:shp[4] - pad_per_dim[2]] f_ref = theano.function([], [grad_i_ref, grad_w_ref]) # Compare the results of the two implementations res_ref = f_ref() res = f() utt.assert_allclose(res_ref[0], res[0]) utt.assert_allclose(res_ref[1], res[1])
def test_dnn_conv3d_alpha_output_merge(): if not cuda.dnn.dnn_available(): raise SkipTest(cuda.dnn.dnn_available.msg) t = T.TensorType(broadcastable=(False, False, False, False, False), dtype="float32") img = t() kern = t() out = t() b = 1 c = 4 f = 3 it = 10 ih = 5 iw = 8 kt = 3 kh = 2 kw = 6 img_val = numpy.random.random((b, c, it, ih, iw)).astype("float32") kern_val = numpy.random.random((f, c, kt, kh, kw)).astype("float32") out_val = numpy.random.random((b, f, it - kt + 1, ih - kh + 1, iw - kw + 1)).astype("float32") conv = dnn.dnn_conv3d(img, kern) gw = theano.grad(conv.sum(), kern) gi = theano.grad(conv.sum(), img) lr = numpy.asarray(0.05, dtype="float32") if cuda.dnn.version() == -1: # Can't merge alpha with cudnn v1 fr = conv + out wr = kern + gw ir = img + gi else: fr = lr * (conv + out) wr = kern + lr * gw ir = img + lr * gi f1 = theano.function([img, kern, out], [fr, wr, ir], mode=mode_with_gpu) assert isinstance(f1.maker.fgraph.outputs[0].owner.inputs[0].owner.op, dnn.GpuDnnConv) assert isinstance(f1.maker.fgraph.outputs[1].owner.inputs[0].owner.op, dnn.GpuDnnConvGradW) assert isinstance(f1.maker.fgraph.outputs[2].owner.inputs[0].owner.op, dnn.GpuDnnConvGradI) mode = mode_with_gpu mode = mode.excluding("local_dnn_conv_alpha_merge") mode = mode.excluding("local_dnn_convw_alpha_merge") mode = mode.excluding("local_dnn_convi_alpha_merge") mode = mode.excluding("local_dnn_conv_output_merge") mode = mode.excluding("local_dnn_convw_output_merge") mode = mode.excluding("local_dnn_convi_output_merge") f2 = theano.function([img, kern, out], [fr, wr, ir], mode=mode) assert not isinstance(f2.maker.fgraph.outputs[0].owner.inputs[0].owner.op, dnn.GpuDnnConv3d) assert not isinstance(f2.maker.fgraph.outputs[1].owner.inputs[0].owner.op, dnn.GpuDnnConv3dGradW) assert not isinstance(f2.maker.fgraph.outputs[2].owner.inputs[0].owner.op, dnn.GpuDnnConv3dGradI) out_f1 = f1(img_val, kern_val, out_val) out_f2 = f2(img_val, kern_val, out_val) assert len(out_f1) == len(out_f2) for v1, v2 in zip(out_f1, out_f2): utt.assert_allclose(v1, v2)
def run_conv3d_bwd(inputs_shape, filters_shape, subsample, border_mode, conv_mode): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[0]).astype('float32')) # Compile a theano function for the CuDNN implementation conv = dnn.dnn_conv3d(img=inputs, kerns=filters, border_mode=border_mode, subsample=subsample, conv_mode=conv_mode) grad_i, grad_w = theano.tensor.grad(conv.sum(), [inputs, filters]) f = theano.function([], [grad_i, grad_w], mode=mode_with_gpu) # If conv_mode is 'conv' the reference implementation should use # filters filpped according to the width, height and time axis if conv_mode == 'conv': flipped_filters = filters[:, :, ::-1, ::-1, ::-1] else: flipped_filters = filters # If border mode is anything but 'valid', the reference implementation # should operate on padded inputs if border_mode == 'valid': padded_inputs = inputs else: if border_mode == 'full': pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)] else: if isinstance(border_mode, int): pad_per_dim = [border_mode] * 3 else: pad_per_dim = border_mode pad_before_after = ([(0, 0), (0, 0)] + [(p, p) for p in pad_per_dim]) padded_inputs_val = numpy.pad(inputs_val, pad_before_after, 'constant') padded_inputs = shared(padded_inputs_val) # Compile a theano function for the reference implementation conv_ref = theano.tensor.nnet.conv3D( V=padded_inputs.dimshuffle(0, 2, 3, 4, 1), W=flipped_filters.dimshuffle(0, 2, 3, 4, 1), b=bias, d=subsample) (grad_padded_i_ref, grad_w_ref) = theano.tensor.grad(conv_ref.sum(), [padded_inputs, filters]) # Recover grad_i_ref from grad_padded_i_ref if border_mode == 'valid': grad_i_ref = grad_padded_i_ref else: shp = grad_padded_i_ref.shape grad_i_ref = grad_padded_i_ref[:, :, pad_per_dim[0]:shp[2] - pad_per_dim[0], pad_per_dim[1]:shp[3] - pad_per_dim[1], pad_per_dim[2]:shp[4] - pad_per_dim[2]] f_ref = theano.function([], [grad_i_ref, grad_w_ref]) # Compare the results of the two implementations res_ref = f_ref() res = f() utt.assert_allclose(res_ref[0], res[0]) utt.assert_allclose(res_ref[1], res[1])
def conv3d(x, kernel, strides=(1, 1, 1), border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING, volume_shape=None, filter_shape=None, conv_algo="GpuCorr3dMM"): ''' Run on cuDNN if available. border_mode: string, "same" or "valid". conv_algo: string, "conv3d2d": internally reshapes the data and performs 2d convs "dnn_conv3d": uses CuDNNs 3d convolution "GpuCorr3dM": performs a correlation, not a conolution (filter not flipped), uses the "Toeplitz"- matrix (which means it needs a little more memory) ''' if dim_ordering not in {'th', 'tf'}: raise Exception('Unknown dim_ordering ' + str(dim_ordering)) if border_mode not in {'same', 'valid'}: raise Exception('Invalid border mode: ' + str(border_mode)) if dim_ordering == 'tf': # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, conv_dim1, conv_dim2, conv_dim3) # TF input shape: (samples, conv_dim1, conv_dim2, conv_dim3, input_depth) # TH kernel shape: (out_depth, input_depth, kernel_dim1, kernel_dim2, kernel_dim3) # TF kernel shape: (kernel_dim1, kernel_dim2, kernel_dim3, input_depth, out_depth) x = x.dimshuffle((0, 4, 1, 2, 3)) kernel = kernel.dimshuffle((4, 3, 0, 1, 2)) if volume_shape: volume_shape = (volume_shape[0], volume_shape[4], volume_shape[1], volume_shape[2], volume_shape[3]) if filter_shape: filter_shape = (filter_shape[4], filter_shape[3], filter_shape[0], filter_shape[1], filter_shape[2]) if border_mode == 'same': assert (strides == (1, 1, 1)) pad_dim1 = (kernel.shape[2] - 1) pad_dim2 = (kernel.shape[3] - 1) pad_dim3 = (kernel.shape[4] - 1) output_shape = (x.shape[0], x.shape[1], x.shape[2] + pad_dim1, x.shape[3] + pad_dim2, x.shape[4] + pad_dim3) output = T.zeros(output_shape) indices = (slice(None), slice(None), slice(pad_dim1 // 2, x.shape[2] + pad_dim1 // 2), slice(pad_dim2 // 2, x.shape[3] + pad_dim2 // 2), slice(pad_dim3 // 2, x.shape[4] + pad_dim3 // 2)) x = T.set_subtensor(output[indices], x) border_mode = 'valid' border_mode_3d = (border_mode, border_mode, border_mode) if conv_algo == "conv3d2d": conv_out = conv3d2d.conv3d(signals=x.dimshuffle(0, 2, 1, 3, 4), filters=kernel.dimshuffle(0, 2, 1, 3, 4), border_mode=border_mode_3d) conv_out = conv_out.dimshuffle(0, 2, 1, 3, 4) elif conv_algo == "dnn_conv3d": conv_out = dnn_conv3d(img=x, kerns=kernel, border_mode=border_mode) elif conv_algo == "GpuCorr3dMM": bias = np.zeros((volume_shape[1])) conv_out = GpuCorr3dMM()(x, kernel) else: raise ("Unknown algorithm to perform 3d convolution") # support strides by manually slicing the output if strides != (1, 1, 1): conv_out = conv_out[:, :, ::strides[0], ::strides[1], ::strides[2]] if dim_ordering == 'tf': conv_out = conv_out.dimshuffle((0, 2, 3, 4, 1)) return conv_out