def lcn_3d_input(data, kernel_shape, n_maps): """ :param data: [examples, depth, filters, height, width] :param kernel_shape: int :param n_maps: int :return: new_x: [examples, depth, filters, height, width] """ # create symbolic variable for the input data ftensor5 = T.TensorType('float32', [False] * 5) x = ftensor5() # # determine the number of maps # n_maps = data.shape[2] # create 3d filter that spans across all channels / feature maps # todo: kernel is not really in 3d; need 3d implementation instead of 2d repeated across third dimension # todo: alternative is to keep 2d kernel and extend short range given data size in z-plane; change first kernel_sh. filter_shape = (1, kernel_shape[0], n_maps, kernel_shape[1], kernel_shape[2]) filters = np.resize(gaussian_filter(kernel_shape[1]), filter_shape) filters = filters / np.sum(filters) filters = sharedX(filters) # convolve filter with input signal convolution_out = conv3d( signals=x, filters=filters, signals_shape=data.shape, filters_shape=filter_shape, border_mode='valid' ) # for each pixel, remove mean of 9x9 neighborhood mid_0 = int(np.floor(kernel_shape[0] / 2.)) mid_1 = int(np.floor(kernel_shape[1] / 2.)) mid_2 = int(np.floor(kernel_shape[2] / 2.)) mean = T.tile(convolution_out, (1, 1, n_maps, 1, 1)) padded_mean = T.zeros_like(x) padded_mean = T.set_subtensor(padded_mean[:, mid_0:-mid_0, :, mid_1:-mid_1, mid_2:-mid_2], mean) centered_data = data - padded_mean # scale down norm of 9x9 patch if norm is bigger than 1 sum_sqr_xx = conv3d(signals=T.sqr(data), filters=filters) denominator = T.tile(T.sqrt(sum_sqr_xx), (1, 1, n_maps, 1, 1)) padded_denominator = T.ones_like(x) padded_denominator = T.set_subtensor( padded_denominator[:, mid_0:-mid_0, :, mid_1:-mid_1, mid_2:-mid_2], denominator ) per_img_mean = padded_denominator.mean(axis=[1, 2, 3, 4]) divisor = T.largest( per_img_mean.dimshuffle(0, 'x', 'x', 'x', 'x'), padded_denominator ) new_x = centered_data / T.maximum(1., divisor) # compile theano function f = theano.function([x], new_x) return f(data)
def set_output(self): padding = self._padding input_shape = self._input_shape padded_input = tensor.alloc(0.0, # Value to fill the tensor input_shape[0], input_shape[1] + 2 * padding[1], input_shape[2], input_shape[3] + 2 * padding[3], input_shape[4] + 2 * padding[4]) padded_input = tensor.set_subtensor(padded_input[:, padding[1]:padding[1] + input_shape[ 1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]], self._prev_layer.output) input_x_shape = self._fc_layer._output_shape padded_x_input = tensor.alloc(0.0, # Value to fill the tensor input_x_shape[0], input_x_shape[1] + 2 * padding[1], input_x_shape[2], input_x_shape[3] + 2 * padding[3], input_x_shape[4] + 2 * padding[4]) padded_x_input = tensor.set_subtensor(padded_input[:, padding[1]:padding[1] + input_x_shape[ 1], :, padding[3]:padding[3] + input_x_shape[3], padding[4]:padding[4] + input_x_shape[4]], self._fc_layer.output) temp = conv3d2d.conv3d(padded_input, self.Wh.val) fc_output = conv3d2d.conv3d(padded_x_input, self.Wx.val) self._output = conv3d2d.conv3d(padded_input, self.Wh.val) + \ fc_output + self.b.val.dimshuffle('x', 'x', 0, 'x', 'x')
def convolve(self, input, **kwargs): # Conv3d expects input [n_images, depth, channels, height, width] weights = self.W.dimshuffle(0, 2, 1, 3, 4) input_sh = input.dimshuffle(0, 2, 1, 3, 4) conved = conv3d(input_sh, weights, signals_shape=None, filters_shape=None, border_mode='valid') conved_sh = conved.dimshuffle(0, 2, 1, 3, 4) return conved_sh
def feedforward(self, inp=None, reshape=False, activation=None): # Argument inp is expected of the form: # inp.shape = (numimages, z, fmapsin, y, x) [3D] # inp.shape = (numimages, fmapsin, y, x) [2D] # Setting reshape to True assumes that the 3D input is of the form (numimages, fmapsin, y, x, z) # Parse input if not inp: inp = self.x if not activation: activation = self.activation # Reshape if requested if self.dim == 3 and reshape: inp = inp.dimshuffle(0, 2, 3, 4, 1) # Noise input pass # Convolve if self.dim == 2: # IW.shape = (numimages, fmapsout, y, x) IW = conv.conv2d(input=inp, filters=self.W, border_mode='full') self.y = activation(IW + self.b.dimshuffle('x', 0, 'x', 'x')) return self.y elif self.dim == 3: # IW.shape = (numstacks, z, fmapsout, y, x) IW = conv3d2d.conv3d(signals=inp, filters=self.W, border_mode='full') self.y = activation(IW + self.b.dimshuffle('x', 'x', 0, 'x', 'x')) return self.y
def dot(self): """ Convolve input with model weights """ f = conv3d(self.x, self.w) return f
def conv3d(x, kernel, strides=(1, 1, 1), border_mode='valid', dim_ordering='th', volume_shape=None, filter_shape=None): ''' Run on cuDNN if available. border_mode: string, "same" or "valid". ''' if dim_ordering not in {'th', 'tf'}: raise Exception('Unknown dim_ordering ' + str(dim_ordering)) if border_mode not in {'same', 'valid'}: raise Exception('Invalid border mode: ' + str(border_mode)) if dim_ordering == 'tf': # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, conv_dim1, conv_dim2, conv_dim3) # TF input shape: (samples, conv_dim1, conv_dim2, conv_dim3, input_depth) # TH kernel shape: (out_depth, input_depth, kernel_dim1, kernel_dim2, kernel_dim3) # TF kernel shape: (kernel_dim1, kernel_dim2, kernel_dim3, input_depth, out_depth) x = x.dimshuffle((0, 4, 1, 2, 3)) kernel = kernel.dimshuffle((4, 3, 0, 1, 2)) if volume_shape: volume_shape = (volume_shape[0], volume_shape[4], volume_shape[1], volume_shape[2], volume_shape[3]) if filter_shape: filter_shape = (filter_shape[4], filter_shape[3], filter_shape[0], filter_shape[1], filter_shape[2]) if border_mode == 'same': assert(strides == (1, 1, 1)) pad_dim1 = (kernel.shape[2] - 1) pad_dim2 = (kernel.shape[3] - 1) pad_dim3 = (kernel.shape[4] - 1) output_shape = (x.shape[0], x.shape[1], x.shape[2] + pad_dim1, x.shape[3] + pad_dim2, x.shape[4] + pad_dim3) output = T.zeros(output_shape) indices = (slice(None), slice(None), slice(pad_dim1 // 2, x.shape[2] + pad_dim1 // 2), slice(pad_dim2 // 2, x.shape[3] + pad_dim2 // 2), slice(pad_dim3 // 2, x.shape[4] + pad_dim3 // 2)) x = T.set_subtensor(output[indices], x) border_mode = 'valid' border_mode_3d = (border_mode, border_mode, border_mode) conv_out = conv3d2d.conv3d(signals=x.dimshuffle(0, 2, 1, 3, 4), filters=kernel.dimshuffle(0, 2, 1, 3, 4), border_mode=border_mode_3d) conv_out = conv_out.dimshuffle(0, 2, 1, 3, 4) # support strides by manually slicing the output if strides != (1, 1, 1): conv_out = conv_out[:, :, ::strides[0], ::strides[1], ::strides[2]] if dim_ordering == 'tf': conv_out = conv_out.dimshuffle((0, 2, 3, 4, 1)) return conv_out
def _forward(self): inpt = self.inpt self.weights = self.declare( (self.n_output, self.filter_depth, self.n_inpt, self.filter_height, self.filter_width)) self.bias = self.declare((self.n_output, )) if self.border_mode == 'same': pad_dim1 = self.filter_height - 1 pad_dim2 = self.filter_width - 1 pad_dim3 = self.filter_depth - 1 if pad_dim1 > 0 or pad_dim2 > 0 or pad_dim3 > 0: output_shape = (inpt.shape[0], inpt.shape[1] + pad_dim3, inpt.shape[2], inpt.shape[3] + pad_dim1, inpt.shape[4] + pad_dim2) big_zero = T.zeros(output_shape) indices = (slice(None), slice(pad_dim3 // 2, inpt.shape[1] + pad_dim3 // 2), slice(None), slice(pad_dim1 // 2, inpt.shape[3] + pad_dim1 // 2), slice(pad_dim2 // 2, inpt.shape[4] + pad_dim2 // 2)) inpt = T.set_subtensor(big_zero[indices], inpt) #print '@basic.py implementation: ', self.implementation if self.implementation == 'conv3d2d': self.output_in = conv3d(signals=inpt, filters=self.weights) if self.use_bias: self.output_in = self.output_in + self.bias.dimshuffle( 'x', 'x', 0, 'x', 'x') elif self.implementation == 'conv3D': filters_flip = self.weights[:, ::-1, :, ::-1, ::-1] bias = self.bias if self.use_bias else T.zeros(self.bias.shape) self.output_in = conv3D(V=inpt.dimshuffle(0, 3, 4, 1, 2), W=filters_flip.dimshuffle(0, 3, 4, 1, 2), b=bias, d=(1, 1, 1)) self.output_in = self.output_in.dimshuffle(0, 3, 4, 1, 2) elif self.implementation == 'dnn_conv3d': self.output_in = theano.sandbox.cuda.dnn.dnn_conv3d( img=inpt.dimshuffle(0, 2, 1, 3, 4), kerns=self.weights.dimshuffle(0, 2, 1, 3, 4)) self.output_in = self.output_in.dimshuffle(0, 2, 1, 3, 4) if self.use_bias: self.output_in = self.output_in + self.bias.dimshuffle( 'x', 'x', 0, 'x', 'x') else: raise NotImplementedError( 'This class only supports conv3d2d, conv3D and dnn_conv3d') self.output = self.output_in if self.strides != (1, 1, 1): self.output = self.output[:, ::self.strides[2], :, ::self. strides[0], ::self.strides[1]]
def get_output(self, train): X = self.get_input(train) border_mode = self.border_mode # Both conv3d2d.conv3d and nnet.conv3D only support the 'valid' border mode if border_mode != 'valid': if border_mode == 'same': assert(self.subsample == (1, 1, 1)) pad_z = (self.nb_depth - self.subsample[0]) pad_x = (self.nb_row - self.subsample[1]) pad_y = (self.nb_col - self.subsample[2]) else: #full pad_z = (self.nb_depth - 1) * 2 pad_x = (self.nb_row - 1) * 2 pad_y = (self.nb_col - 1) * 2 input_shape = X.shape output_shape = (input_shape[0], input_shape[1], input_shape[2] + pad_z, input_shape[3] + pad_x, input_shape[4] + pad_y) output = T.zeros(output_shape) indices = (slice(None), slice(None), slice(pad_z//2, input_shape[2] + pad_z//2), slice(pad_x//2, input_shape[3] + pad_x//2), slice(pad_y//2, input_shape[4] + pad_y//2)) X = T.set_subtensor(output[indices], X) border_mode = 'valid' if on_gpu(): # Shuffle the dimensions as per the input parameter order, restore it once done W_shape = (self.W_shape[0], self.W_shape[2], self.W_shape[1], self.W_shape[3],self.W_shape[4]) conv_out = conv3d2d.conv3d(signals=X.dimshuffle(0, 2, 1, 3, 4), filters=self.W.dimshuffle(0, 2, 1, 3, 4), filters_shape=W_shape, border_mode=border_mode) conv_out = conv_out.dimshuffle(0, 2, 1, 3, 4) self.W = self.W.dimshuffle(0, 2, 1, 3, 4) else: # Shuffle the dimensions as per the input parameter order, restore it once done # W1 = self.W.dimshuffle(0, 1, 3, 4, 2) self.W = self.W.dimshuffle(0, 2, 3, 4 , 1) conv_out = T.nnet.conv3D(V=X.dimshuffle(0, 2, 3, 4, 1), W=self.W, b=self.b, d=self.subsample) conv_out = conv_out.dimshuffle(0, 4, 1, 2, 3) self.W = self.W.dimshuffle(0, 4, 1, 2, 3) output = self.activation(conv_out + self.b.dimshuffle('x', 0, 'x', 'x', 'x')) return output
def compute_output(self, network, in_vw): # gather hyperparameters num_filters = network.find_hyperparameter(["num_filters"]) filter_size = network.find_hyperparameter(["filter_size"]) stride = network.find_hyperparameter(["conv_stride", "stride"], (1, 1, 1)) pad = network.find_hyperparameter(["conv_pad", "pad"], "valid") inits = list(toolz.concat(network.find_hyperparameters( ["inits"], []))) assert len(filter_size) == 3 assert pad == "valid" assert stride == (1, 1, 1) # create weight num_channels = in_vw.shape[1] filter_shape = (num_filters, num_channels) + tuple(filter_size) W = network.create_vw( name="weight", is_shared=True, shape=filter_shape, tags={"parameter", "weight"}, inits=inits, ).variable from theano.tensor.nnet.conv3d2d import conv3d # takes signals in order: (batch, time, channels, row, column) # and filters in order: (out channel, time, in channels, row, column) # but we keep the dimensions that W is stored in consistent with other # convolutions, so we have to dimshuffle here order = (0, 2, 1, 3, 4) out_var = conv3d(signals=in_vw.variable.dimshuffle(*order), filters=W.dimshuffle(*order), signals_shape=[in_vw.shape[o] for o in order], filters_shape=[filter_shape[o] for o in order], # HACK as of 20150916, conv3d does a check # if isinstance(border_mode, str), so we manually # cast as a string border_mode=str("valid")) out_shape = conv_output_shape(input_shape=in_vw.shape, num_filters=num_filters, axes=(2, 3, 4), conv_shape=filter_size, strides=stride, pads=conv_parse_pad(filter_size, pad)) network.create_vw( "default", variable=out_var, shape=out_shape, tags={"output"}, )
def __init__(self, filters, signal_shape, filter_shape, input_axes = ('b', 0, 1, 't', 'c'), batch_size=None, output_axes = ('b', 0, 1, 't', 'c'), kernel_stride = [1, 1, 1], pad=0, message = '', partial_sum=None): if len(kernel_stride) != 3: raise ValueError("kernel_stride must have length 3") elif kernel_stride[0] != kernel_stride[1]: raise ValueError("only values of kernel_stride with both " "elements equal are supported currently") if message != '': raise NotImplementedError() if batch_size != None: raise NotImplementedError() if input_axes != ('b', 0, 1, 't', 'c'): raise NotImplementedError() print kernel_stride if kernel_stride != (1, 1, 1): raise ValueError("only values of kernel_stride with value of 1 " " are supported currently") self.input_axes = input_axes self.output_axes = output_axes #self.conv3d_op = Conv3D() self.conv3d_op = conv3d() # filters should be a GPU shared variable. # I guess you could GpuFromHost them every time, # but if you're using this class you probably care # about performance and want to be at least warned # that this is happening assert hasattr(filters, 'get_value') assert 'Cuda' in str(type(filters)) self._filters = filters self.pad = pad self.partial_sum = partial_sum self.kernel_stride = kernel_stride self.signal_shape = signal_shape self.filter_shape = filter_shape ## Add a dummy b for interface issue self.b = sharedX(np.zeros((filter_shape[0])))
def set_output(self): padding = self._padding input_shape = self._input_shape padded_input_prev = tensor.alloc( 0.0, # Value to fill the tensor input_shape[0], input_shape[1] + 2 * padding[1], input_shape[2], input_shape[3] + 2 * padding[3], input_shape[4] + 2 * padding[4]) padded_input_curr = tensor.alloc( 0.0, # Value to fill the tensor input_shape[0], input_shape[1] + 2 * padding[1], input_shape[2], input_shape[3] + 2 * padding[3], input_shape[4] + 2 * padding[4]) padded_input_prev = tensor.set_subtensor( padded_input_prev[:, padding[1]:padding[1] + input_shape[1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]], self._prev_layer.output) padded_input_curr = tensor.set_subtensor( padded_input_curr[:, padding[1]:padding[1] + input_shape[1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]], self._curr_layer.output) prev_out = conv3d2d.conv3d(padded_input_prev, self.Wh.val) + self.bh.val.dimshuffle( 'x', 'x', 0, 'x', 'x') curr_out = conv3d2d.conv3d(padded_input_curr, self.Wx.val) + self.bx.val.dimshuffle( 'x', 'x', 0, 'x', 'x') self._output = prev_out + curr_out
def symb_forward(self, symb_input): """symb_input shape: (n_input, depth, channels, height, width)""" if symb_input.ndim < 5: raise NotImplementedError("3D convolution requires a dimension >= 5") conv_output = conv3d2d.conv3d(symb_input, self.weight, filters_shape=self.w_shape, border_mode=self.border_mode) if self.with_bias: return conv_output + self.bias.dimshuffle("x", "x", 0, "x", "x") else: return conv_output
def compute_output(self, network, in_vw): # gather hyperparameters num_filters = network.find_hyperparameter(["num_filters"]) filter_size = network.find_hyperparameter(["filter_size"]) stride = network.find_hyperparameter(["conv_stride", "stride"], (1, 1, 1)) pad = network.find_hyperparameter(["conv_pad", "pad"], "valid") assert len(filter_size) == 3 assert pad == "valid" assert stride == (1, 1, 1) # create weight num_channels = in_vw.shape[1] filter_shape = (num_filters, num_channels) + tuple(filter_size) W = network.create_vw( name="weight", is_shared=True, shape=filter_shape, tags={"parameter", "weight"}, default_inits=[], ).variable from theano.tensor.nnet.conv3d2d import conv3d # takes signals in order: (batch, time, channels, row, column) # and filters in order: (out channel, time, in channels, row, column) # but we keep the dimensions that W is stored in consistent with other # convolutions, so we have to dimshuffle here order = (0, 2, 1, 3, 4) out_var = conv3d( signals=in_vw.variable.dimshuffle(*order), filters=W.dimshuffle(*order), signals_shape=[in_vw.shape[o] for o in order], filters_shape=[filter_shape[o] for o in order], # HACK as of 20150916, conv3d does a check # if isinstance(border_mode, str), so we manually # cast as a string border_mode=str("valid")) out_shape = conv_output_shape(input_shape=in_vw.shape, num_filters=num_filters, axes=(2, 3, 4), conv_shape=filter_size, strides=stride, pads=conv_parse_pad(filter_size, pad)) network.create_vw( "default", variable=out_var, shape=out_shape, tags={"output"}, )
def apply(self, graph): in_vw = graph.read_key(key="input") num_filters = graph.read_key(key="num_filters") filter_size = graph.read_key(key="filter_size") stride = graph.read_key_with_default(key="stride", default=(1, 1, 1)) pad = graph.read_key_with_default(key="pad", default="valid") assert len(filter_size) == 3 assert pad == "valid" assert stride == (1, 1, 1) # create weight num_channels = in_vw.shape[1] filter_shape = (num_filters, num_channels) + tuple(filter_size) W = th_utils.read_key_with_state_default( graph=graph, key="weight", tags={"weight": True, "linear_weight": True, "in_axes": (1,), "out_axes": (0,), "shape": filter_shape, "dtype": fX}, state_tags={"parameter": True, "state": True} ).var from theano.tensor.nnet.conv3d2d import conv3d # takes signals in order: (batch, time, channels, row, column) # and filters in order: (out channel, time, in channels, row, column) # but we keep the dimensions that W is stored in consistent with other # convolutions, so we have to dimshuffle here order = (0, 2, 1, 3, 4) out_var = conv3d(signals=in_vw.variable.dimshuffle(*order), filters=W.dimshuffle(*order), signals_shape=[in_vw.shape[o] for o in order], filters_shape=[filter_shape[o] for o in order], # HACK as of 20150916, conv3d does a check # if isinstance(border_mode, str), so we manually # cast as a string border_mode=str("valid")) out_shape = conv_output_shape(input_shape=in_vw.shape, num_filters=num_filters, axes=(2, 3, 4), conv_shape=filter_size, strides=stride, pads=conv_parse_pad(filter_size, pad)) out_vw = VariableWrapper(out_var, out_shape) graph.write_key(key="output", value=out_vw)
def kernel_3d_center_surround_filter(symbolic_input,model=None,name=uuid.uuid4(),config={}): """ Function to be used to initialize a `Node`. Comparable to the VirtualRetina OPL layer, this node computes a center-surround signal. To do this it creates a big composit kernel. """ _kernel = dtensor5(name+'_kernel') output_variable = conv3d(symbolic_input,_kernel) output_variable.name = name+'_output' parameter_variables = [_kernel] node_type = '3d Kernel Filter Node' epsilon = float(config.get('epsilon',0.000000001)) num_E_n_C = m_en_filter(int(config.get('center-n__uint',0)),float(config.get('center-tau__sec',0.0001)), normalize=True,retina=model) num_G_C = m_g_filter(float(config.get('center-sigma__deg',0.05)),float(config.get('center-sigma__deg',0.05)), retina=model,normalize=True,even=False) num_TwuTu_C = m_t_filter(float(config.get('undershoot',{}).get('tau__sec',0.001)), float(config.get('undershoot',{}).get('relative-weight',1.0)), normalize=True,retina=model,epsilon=0.0000000000001) num_E_S = m_e_filter(float(config.get('surround-tau__sec',0.001)),retina=model,normalize=True) num_G_S = m_g_filter(float(config.get('surround-sigma__deg',0.15)),float(config.get('surround-sigma__deg',0.15)), retina=model,normalize=True,even=False) num_Reshape_C_S = fake_filter(num_G_S,num_E_S) num_lambda_OPL = config.get('opl-amplification',0.25) / model.config.get('input-luminosity-range',255.0) num_w_OPL = config.get('opl-relative-weight',0.7) center_filter = retina_base.conv(retina_base.conv(num_E_n_C,num_TwuTu_C), num_G_C) num_kernel = retina_base.minimize_filter( num_lambda_OPL*( retina_base.conv(center_filter,num_Reshape_C_S) - num_w_OPL * retina_base.conv(retina_base.conv(center_filter,num_E_S),num_G_S)), filter_epsilon = epsilon) node_description = lambda: 'Convolution '+str(num_kernel.shape) def get_num_inputs(num_input_variable): return dict(zip(parameter_variables,[num_kernel])) return { 'output_variable': output_variable, 'accept_dimensions': [3], 'parameter_variables': parameter_variables, 'state_variables': [], 'inital_states': [], 'updated_state_variables': [], 'node_type': '2d Gauss Filter Node', 'node_description': lambda: 'Recursive Filtering', 'get_num_inputs': get_num_inputs }
def __init__(self,config,kernel_center=None,kernel_surround=None,name=None): self.config = config if name is None: name = str(uuid.uuid4()) self.kernel_center = kernel_center if kernel_center is not None else np.ones((1,1,1,1,1)) self.kernel_surround = kernel_surround if kernel_surround is not None else np.ones((1,1,1,1,1)) self.name = self.config.get('name',name) self._I = dtensor5(name+'_I') self._kernel_C = dtensor5(name+'_k_C') self._kernel_S = dtensor5(name+'_k_S') self._C = conv3d(self._I,self._kernel_C) self._S = conv3d(self._C,self._kernel_S) self._Reshape_C_S = dtensor5(name+'_Reshape_C_S') self._lambda_OPL = T.dscalar(name+'_lambda_OPL') self._w_OPL = T.dscalar(name+'_lambda_OPL') self._I_OPL = self._lambda_OPL * (conv3d(self._C,self._Reshape_C_S) - self._w_OPL * self._S) self.input_variables = [self._I] self.internal_variables = [self._kernel_C,self._kernel_S,self._Reshape_C_S, self._lambda_OPL,self._w_OPL] self.output_variable = self._I_OPL self.compute_function= theano.function(self.input_variables + self.internal_variables, self.output_variable) self.num_Reshape_C_S = fake_filter(self.kernel_center) self.num_lambda_OPL = self.config.get('amplification',0.25) / self.config.get('input-luminosity-range',255.0) self.num_w_OPL = self.config.get('relative-weight',0.7) self.state = None
def symb_forward(self, symb_input): """symb_input shape: (n_input, depth, channels, height, width)""" if symb_input.ndim < 5: raise NotImplementedError( '3D convolution requires a dimension >= 5') conv_output = conv3d2d.conv3d(symb_input, self.weight, filters_shape=self.w_shape, border_mode=self.border_mode) if self.with_bias: return conv_output + self.bias.dimshuffle('x', 'x', 0, 'x', 'x') else: return conv_output
def conv3d(x, kernel, strides=(1, 1, 1), border_mode='valid'): ''' Run on cuDNN if available. border_mode: string, "same" or "valid". ''' # Both conv3d2d.conv3d and nnet.conv3D only support the 'valid' border mode if border_mode != 'valid': if border_mode == 'same': assert(strides == (1, 1, 1)) pad_z = (kernel.shape[2] - strides[0]) pad_x = (kernel.shape[3] - strides[1]) pad_y = (kernel.shape[4] - strides[2]) else: #full pad_z = (kernel.shape[2] - 1) * 2 pad_x = (kernel.shape[3] - 1) * 2 pad_y = (kernel.shape[4] - 1) * 2 input_shape = x.shape output_shape = (input_shape[0], input_shape[1], input_shape[2] + pad_z, input_shape[3] + pad_x, input_shape[4] + pad_y) output = T.zeros(output_shape) indices = (slice(None), slice(None), slice(pad_z//2, input_shape[2] + pad_z//2), slice(pad_x//2, input_shape[3] + pad_x//2), slice(pad_y//2, input_shape[4] + pad_y//2)) x = T.set_subtensor(output[indices], x) border_mode = 'valid' if _on_gpu(): assert(strides == (1, 1, 1)) # Shuffle the dimensions as per the input parameter order, restore it once done conv_out = conv3d2d.conv3d(signals=x.dimshuffle(0, 2, 1, 3, 4), filters=kernel.dimshuffle(0, 2, 1, 3, 4), border_mode=border_mode) conv_out = conv_out.dimshuffle(0, 2, 1, 3, 4) else: # Shuffle the dimensions as per the input parameter order, restore it once done conv_out = T.nnet.conv3D(V=x.dimshuffle(0, 2, 3, 4, 1), W=kernel.dimshuffle(0, 2, 3, 4, 1), b=None, d=strides) conv_out = conv_out.dimshuffle(0, 4, 1, 2, 3) return conv_out
def forward(self, x, batch_size, run_time): img_batch_shape = (batch_size,) + self.image_shape x = x.reshape(img_batch_shape) # Convolve input feature maps with filters conv_out = conv3d2d.conv3d(signals=x, filters=self.w, signals_shape=img_batch_shape, filters_shape=self.filter_shape, border_mode='valid') perm = [0, 2, 1, 3, 4] # Permutation is needed due to the pooling function prototype pooled_out = max_pool_3d(conv_out.dimshuffle(perm), self.poolsize, ignore_border=True) return self.neuron_type.activation_function(pooled_out.dimshuffle(perm) + self.b.dimshuffle('x', 'x', 0, 'x', 'x')).flatten(2)
def set_inpt(self, inpt, inpt_dropout, mini_batch_size): self.inpt = inpt.reshape(self.image_shape) ## conv3d takes as input (Batch, Z, n_feature maps, Y, X) ## we feed it (Batch, n_feature_maps, X, Y, Z) so we need to shuffle it #OUTPUTS: (N, Z- z_filter + 1, n_features, Y - y_filter + 1, X - x_filter + 1) conv_out = conv3d( signals=self.inpt.dimshuffle(0, 4, 1, 3, 2), filters=self.w.dimshuffle(0, 4, 1, 3, 2), filters_shape=[self.filter_shape[idx] for idx in [0, 4, 1, 3, 2]], signals_shape=[self.image_shape[idx] for idx in [0, 4, 1, 3, 2]]) conv_out = conv_out.dimshuffle(0, 2, 4, 3, 1) self.pooled_out = pool.pool_3d(input=conv_out, ws=self.poolsize, ignore_border=True) self.activation = self.pooled_out + self.b.dimshuffle( 'x', 0, 'x', 'x', 'x') ##dimshuffle broadcasts the bias vector ## across the 3D tensor dimvs self.output = self.activation_fn(self.activation) self.output_dropout = self.output # no dropout in the convolutional layers
def set_output(self): padding = self._padding input_shape = self._input_shape if np.sum(self._padding) > 0: padded_input = tensor.alloc(0.0, # Value to fill the tensor input_shape[0], input_shape[1] + 2 * padding[1], input_shape[2], input_shape[3] + 2 * padding[3], input_shape[4] + 2 * padding[4]) padded_input = tensor.set_subtensor( padded_input[:, padding[1]:padding[1] + input_shape[1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]], self._prev_layer.output) else: padded_input = self._prev_layer.output self._output = conv3d2d.conv3d(padded_input, self.W.val) + \ self.b.val.dimshuffle('x', 'x', 0, 'x', 'x')
def get_output_for(self, input, *args, **kwargs): """ input is bct01 based on https://github.com/lpigou/Theano-3D-ConvNet/blob/master/convnet3d/convnet3d.py released as public domain. """ input_shape = self.input_layer.get_output_shape() t, h, w = input_shape[2], input_shape[3], input_shape[4] input_c = input_shape[1] batch_size = input_shape[0] filter_t, filter_h, filter_w = self.filter_size input_btc01 = input.dimshuffle([0,2,1,3,4]) # bct01 -> btc01 out_btc01 = conv3d2d.conv3d(signals=btc01, filters=self.W, signals_shape=(batch_size, t, input_c, h, w), filters_shape=(self.num_filters, filter_t, input_c, filter_h, filter_w), border_mode='valid') out_bct01 = out_btc01.dimshuffle([0,2,1,3,4]) # btc01 -> bct01 if self.b is not None: out_bct01 = out_bct01 + self.b.dimshuffle('x',0,'x','x','x') return self.nonlinearity(out_bct01)
def get_output_for(self, input, *args, **kwargs): """ input is bct01 based on https://github.com/lpigou/Theano-3D-ConvNet/blob/master/convnet3d/convnet3d.py released as public domain. """ input_shape = self.input_layer.get_output_shape() t, h, w = input_shape[2], input_shape[3], input_shape[4] input_c = input_shape[1] batch_size = input_shape[0] filter_t, filter_h, filter_w = self.filter_size input_btc01 = input.dimshuffle([0,2,1,3,4]) # bct01 -> btc01 out_btc01 = conv3d2d.conv3d(signals=btc01, filters=self.W, signals_shape=(batch_size, t, input_c, h, w), filters_shape=(self.num_filters, filter_t, input_c, filter_h, filter_w), border_mode='valid') out_bct01 = out_btc01.dimshuffle([0,2,1,3,4]) # btc01 -> bct01 if self.b is not None: out_bct01 = out_bct01 + self.b.dimshuffle('x',0,'x','x','x') return self.nonlinearity(out_bct01)
def get_reconstructed_input(self): """ Computes the reconstructed input given the values of the hidden layer """ repeated_conv = conv3d( self.hidden, self.W_prime, ) bp=(self.filter_shape[1]-1)/2 repeated_conv=repeated_conv.dimshuffle(0,2,1,3,4) zeropad=T.zeros((self.image_shape[0], 1, self.image_shape[1]/self.poolsize[0], self.image_shape[3]/self.poolsize[1], self.image_shape[4]/self.poolsize[2]))-100 repeated_conv=T.set_subtensor(zeropad[:,:,bp:-bp,bp:-bp,bp:-bp],repeated_conv) #repeated_conv=repeated_conv.dimshuffle(0,2,1,3,4) #multiple_conv_out = [repeated_conv.flatten()] * np.prod(self.poolsize) #stacked_conv_neibs = T.stack(*multiple_conv_out).T #newshape=() #stretch_unpooling_out = T.nnet.neighbours.neibs2images(stacked_conv_neibs, #self.poolsize, self.x1.shape) z=repeated_conv ### now zp is (n_batch, 1, n/2, n/2, n/2) shp=z.shape zp= z.reshape((shp[0]*shp[1],shp[2],shp[3],shp[4])) ### (50,16,16,16) iid = [T.arange(self.x1.shape[3])//self.poolsize[1]] c=zp[:,:,:,iid] c=c[:,:,iid] c=c[:,iid].reshape(self.x1.shape) #c = ((zp[T.arange(z.shape[0]*z.shape[1]*z.shape[2])//self.poolsize[0]].T)[T.arange(self.x1.shape[3])//self.poolsize[1]].T).reshape(self.x1.shape) z=T.nnet.sigmoid(c + self.b_prime.dimshuffle('x', 'x', 0, 'x', 'x')) return z
def decoderfeedforward(self, inp=None, reshape=False, activation=None): # Argument inp is expected of the form: # inp.shape = (numimages, z, fmapsout, y, x) [3D] # inp.shape = (numimages, fmapsout, y, x) [2D] # This layer tries to map a (numimages, z, fmapsout, y, x) image to (numimages, z, fmapsin, y, x). # The convolution filters are flipped along the zyx axes. # Parse input if not inp: inp = self.y if not activation: activation = self.activation # Reshape if requested if self.dim == 3 and reshape: inp = inp.dimshuffle(0, 2, 3, 4, 1) if self.dim == 2: # Flip conv. kernel Wt = self.flipconvfilter() # Convolve, transfer and return # IW.shape = (numimages, fmapsin, y, x) IW = conv.conv2d(input=inp, filters=Wt, border_mode='full') self.xr = self.activation(IW + self.bp.dimshuffle('x', 0, 'x', 'x')) return self.xr elif self.dim == 3: # Flip conv. kernel Wt = self.flipconvfilter() # Convolve, transfer and return # IW.shape = (numstacks, z, fmapsin, y, x) IW = conv3d2d.conv3d(signals=inp, filters=Wt, border_mode='full') self.xr = self.activation( IW + self.bp.dimshuffle('x', 'x', 0, 'x', 'x')) return self.xr
def __init__(self, rng, input, filter_shape, image_shape, W=None, b=None): # signals_shape = (batchsize, in_time, in_channels, in_height, in_width) # filters_shape = (flt_channels, flt_time, in_channels, flt_height, flt_width) self.input = input assert image_shape[2] == filter_shape[2] # initialize weights with random weights fan_in = numpy.prod(filter_shape[2:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[3:])) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) if W is None: self.W = theano.shared(numpy.asarray(rng.uniform( low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True, name='W') else: self.W = W if b is None: b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True, name='b') else: self.b = b conv_out5D = conv3d2d.conv3d(signals=input, filters=self.W, signals_shape=image_shape, filters_shape=filter_shape) # activation # out_4D = relu(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) self.output = relu(conv_out5D + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b]
def __model(self): #Customizable non-linearity, added 6/8/15 #Default is the hyperbolic tangent #Prepare input tensor Xin = self.X.dimshuffle(3, 0, 'x', 1, 2) if((self.activation == 'sig') or (self.activation == 'Sig')): #Also include the option of only sigmoidal non-linear units #Layer 1: input layer out = T.nnet.sigmoid(conv3d(Xin, self.w[0], border_mode='valid') + self.b[0].dimshuffle('x','x',0,'x','x')) #Every other layer in the network, as definied by filter_shapes for layer in range(1, self.net_shape.shape[0]-1): out = T.nnet.sigmoid(conv3d(out, self.w[layer], border_mode='valid') + self.b[layer].dimshuffle('x','x',0,'x','x')) elif((self.activation == 'relu') or (self.activation == 'ReLU')): #Layer 1: input layer out = T.maximum(conv3d(Xin, self.w[0], border_mode='valid') + self.b[0].dimshuffle('x','x',0,'x','x'), 0) #Every other layer in the network, as definied by filter_shapes for layer in range(1, self.net_shape.shape[0]-1): #An attempt to eliminate the nan errors by normalizing the relu outputs before sending them to the sigmoid function (added 6/15/15) out = T.maximum(conv3d(out, self.w[layer], border_mode='valid') + self.b[layer].dimshuffle('x','x',0,'x','x'), 0) else: #nonlin == 'tanh' #Layer 1: input layer out = T.tanh(conv3d(Xin, self.w[0], border_mode='valid') + self.b[0].dimshuffle('x','x',0,'x','x')) #Every other layer in the network, as definied by filter_shapes for layer in range(1, self.net_shape.shape[0]-1): out = T.tanh(conv3d(out, self.w[layer], border_mode='valid') + self.b[layer].dimshuffle('x','x',0,'x','x')) out = T.nnet.sigmoid(conv3d(out, self.w[-1], border_mode='valid') + self.b[-1].dimshuffle('x','x',0,'x','x')) #Reshuffle the dimensions so that the last three are the xyz dimensions # and the second one is the number of affinity graph types (for each dimension) self.out = out.dimshuffle(2, 1, 3, 4, 0)
def __init__(self, rng, filter_shape, image_shape, poolsize): #assert image_shape[1] == filter_shape[1] self.image_shape=theano.shared( value=np.asarray(image_shape,dtype='int16'),borrow=True) self.poolsize=poolsize #self.input = input self.x = T.matrix(name='input') self.x1=self.x.reshape(self.image_shape,ndim=5) self.filter_shape=filter_shape # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = np.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) / np.prod(poolsize)) # initialize weights with random weights W_bound = np.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared( np.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX ), borrow=True ) self.W_prime=self.W[:,::-1, :, ::-1, ::-1] self.W_prime=self.W_prime.dimshuffle(2,1,0,3,4) #self.W_prime=self.W_prime[:,::-1] #print self.W.get_value() #print self.W_prime.eval() # the bias is a 1D tensor -- one bias per output feature map b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX) bp_values = np.zeros((filter_shape[2],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) self.b_prime = theano.shared(value=bp_values, borrow=True) # convolve input feature maps with filters conv_out = conv3d( self.x1, self.W, #filter_shape=filter_shape, #border_mode='full' ) bp=(filter_shape[1]-1)/2 #conv_out=conv_out[:,bp:-bp,:,bp:-bp,bp:-bp] conv_out=conv_out.dimshuffle(0,2,1,3,4) zeropad=T.zeros((self.image_shape[0],filter_shape[0],self.image_shape[1], self.image_shape[3],self.image_shape[4]))-100 conv_out=T.set_subtensor(zeropad[:,:,bp:-bp,bp:-bp,bp:-bp],conv_out) self.conv_out=conv_out # downsample each feature map individually, using maxpooling self.pooled_out = max_pool_3d( input=conv_out, ds=poolsize, ignore_border=True ) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.hidden = T.nnet.sigmoid(self.pooled_out+ self.b.dimshuffle('x', 0, 'x', 'x','x')) self.hidden=self.hidden.dimshuffle(0,2,1,3,4) # store parameters of this layer self.params = [self.W, self.b]
def test_conv3d(border_mode): if ndimage is None or not theano.config.cxx: pytest.skip("conv3d2d tests need SciPy and a c++ compiler") if theano.config.mode == "FAST_COMPILE": mode = theano.compile.mode.get_mode("FAST_RUN") else: mode = theano.compile.mode.get_default_mode() shared = theano.tensor._shared Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32 Nf, Tf, C, Hf, Wf = 32, 5, 3, 5, 5 signals = (np.arange(Ns * Ts * C * Hs * Ws).reshape(Ns, Ts, C, Hs, Ws).astype("float32")) filters = (np.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype("float32")) t0 = time.time() pyres = pyconv3d(signals, filters, border_mode) print(time.time() - t0) s_signals = shared(signals) s_filters = shared(filters) s_output = shared(signals * 0) out = conv3d( s_signals, s_filters, signals_shape=signals.shape, filters_shape=filters.shape, border_mode=border_mode, ) newconv3d = theano.function([], [], updates={s_output: out}, mode=mode) check_diagonal_subtensor_view_traces(newconv3d) t0 = time.time() newconv3d() print(time.time() - t0) utt.assert_allclose(pyres, s_output.get_value(borrow=True)) gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters]) gnewconv3d = theano.function( [], [], updates=[(s_filters, gfilters), (s_signals, gsignals)], mode=mode, name="grad", ) check_diagonal_subtensor_view_traces(gnewconv3d) t0 = time.time() gnewconv3d() print("grad", time.time() - t0) Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5 Nf, Tf, C, Hf, Wf = 4, 2, 3, 2, 2 signals = np.random.rand(Ns, Ts, C, Hs, Ws).astype("float32") filters = np.random.rand(Nf, Tf, C, Hf, Wf).astype("float32") utt.verify_grad( lambda s, f: conv3d(s, f, border_mode=border_mode), [signals, filters], eps=1e-1, mode=mode, ) # Additional Test that covers the case of patched implementation for filter with Tf=1 Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32 Nf, Tf, C, Hf, Wf = 32, 1, 3, 5, 5 signals = (np.arange(Ns * Ts * C * Hs * Ws).reshape(Ns, Ts, C, Hs, Ws).astype("float32")) filters = (np.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype("float32")) t0 = time.time() pyres = pyconv3d(signals, filters, border_mode) print(time.time() - t0) s_signals = shared(signals) s_filters = shared(filters) s_output = shared(signals * 0) out = conv3d( s_signals, s_filters, signals_shape=signals.shape, filters_shape=filters.shape, border_mode=border_mode, ) newconv3d = theano.function([], [], updates={s_output: out}, mode=mode) t0 = time.time() newconv3d() print(time.time() - t0) utt.assert_allclose(pyres, s_output.get_value(borrow=True)) gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters]) gnewconv3d = theano.function( [], [], updates=[(s_filters, gfilters), (s_signals, gsignals)], mode=mode, name="grad", ) t0 = time.time() gnewconv3d() print("grad", time.time() - t0) Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5 Nf, Tf, C, Hf, Wf = 4, 1, 3, 2, 2 signals = np.random.rand(Ns, Ts, C, Hs, Ws).astype("float32") filters = np.random.rand(Nf, Tf, C, Hf, Wf).astype("float32") utt.verify_grad( lambda s, f: conv3d(s, f, border_mode=border_mode), [signals, filters], eps=1e-1, mode=mode, )
def __init__(self, input, n_in_maps, n_out_maps, kernel_shape, video_shape, batch_size, activation, rng, layer_name="Conv", borrow=True, W=None, b=None): """ video_shape: (frames, height, width) kernel_shape: (frames, height, width) W_shape: (out, in, kern_frames, kern_height, kern_width) """ self.__dict__.update(locals()) del self.self # init W if W != None: W_val = W else: # fan in: filter time x filter height x filter width x input maps fan_in = prod(kernel_shape) * n_in_maps norm_scale = sqrt(2. / fan_in) W_shape = (n_out_maps, n_in_maps) + kernel_shape W_val = _asarray(rng.normal(loc=0, scale=norm_scale, size=W_shape),\ dtype=floatX) self.W = shared(value=W_val, borrow=borrow, name=layer_name + '_W') self.params = [self.W] # init bias if b != None: b_val = b else: b_val = zeros((n_out_maps, ), dtype=floatX) self.b = shared(b_val, name=layer_name + "_b", borrow=borrow) self.params.append(self.b) # Zero pad to simulate a 'same' convolution pad_T, pad_H, pad_W = ((kernel_shape[i] - 1) / 2 for i in range(len(kernel_shape))) N = input.shape[0] C = n_in_maps TT, HH, WW = video_shape T_zeros = T.zeros((N, C, pad_T, HH, WW)) H_zeros = T.zeros((N, C, TT + 2 * pad_T, pad_H, WW)) W_zeros = T.zeros((N, C, TT + 2 * pad_T, HH + 2 * pad_H, pad_W)) paddedT = T.concatenate([T_zeros, input, T_zeros], axis=2) paddedTH = T.concatenate([H_zeros, paddedT, H_zeros], axis=3) paddedTHW = T.concatenate([W_zeros, paddedTH, W_zeros], axis=4) # 3D convolution; dimshuffle: last 3 dimensions must be (in, h, w) n_fr = video_shape[0] + 2 * pad_T h = video_shape[1] + 2 * pad_H w = video_shape[2] + 2 * pad_W n_fr_k, h_k, w_k = kernel_shape out = conv3d(signals=paddedTHW.dimshuffle([0, 2, 1, 3, 4]), filters=self.W, signals_shape=(batch_size, n_fr, n_in_maps, h, w), filters_shape=(n_out_maps, n_fr_k, n_in_maps, h_k, w_k), border_mode='valid').dimshuffle([0, 2, 1, 3, 4]) out += self.b.dimshuffle('x', 0, 'x', 'x', 'x') self.output = activation(out)
def conv(x, w, axis_order=None, conv_dim=None, x_shape=None, w_shape=None, border_mode='valid', stride=None): """ Apply appropriate convolution depending on input and filter dimensionality. If input ``w_shape`` is known, conv might be replaced by tensordot There are static assumptions which axes are spatial. Parameters ---------- x: T.Tensor | Input data (mini-batch). | Tensor of shape ``(b, f, x)``, ``(b, f, x, y)``, ``(b, z, f, x, y)`` or ``(b,f,x,y,z)``. w: T.Tensor | Set of convolution filter weights. | Tensor of shape ``(f_out, f_in, x)``, ``(f_out, f_in, x, y)``, ``(f_out, z, f_in, x, y)`` or ``(f_out, f_in, x, y, z)``. axis_order: str | (only relevant for 3d) | ``'dnn'`` ``(b,f,x,y(,z))`` or ``'theano'`` ``(b, z, f, x, y)``. conv_dim: int Dimensionality of the applied convolution (not the absolute dim of the inputs). x_shape: tuple shape tuple (``TaggedShape`` supported). w_shape: tuple shape tuple, see ``w``. border_mode: str * ``'valid'``: only apply filter to complete patches of the image. Generates output of shape: image_shape -filter_shape + 1. * ``'full'`` zero-pads image to multiple of filter shape to generate output of shape: image_shape + filter_shape - 1. stride: tuple | (tuple of len 2) | Factor by which to subsample the output. Returns ------- T.Tensor Set of feature maps generated by convolution. """ if (x_shape is None) or (None in x_shape): # variable batch size or so x_shape = None assert axis_order in ['dnn', 'theano', None] if conv_dim is not None: if x.ndim!=conv_dim+2 or w.ndim!=conv_dim+2: raise ValueError("Cannot perform %id conv on input and filter of" "dim %i, %i" % (conv_dim, x.ndim, w.ndim)) else: # infer conv_dim conv_dim = x.ndim-2 if w.ndim!=conv_dim+2: raise ValueError("Dimension mismatch for conv: tried to do %id conv" "on %id input x. This requires %id filter, but got" "%id" % (conv_dim, x.ndim, x.ndim, w.ndim)) if conv_dim>3: raise ValueError("Input tensor dim to big. No conv for dim>5.") if border_mode=='same': assert w_shape is not None if not np.all(np.remainder(w_shape[-conv_dim:], 2) == 1): raise ValueError('For "same"-mode convolution, filter shapes ' 'must be odd in all dimensions.') border_mode='half' crop_full = False else: crop_full = False use_tensordot = False if (w_shape is not None) and (stride is None): # cannot use tensordot with strides if conv_dim<3 or axis_order=='dnn': use_tensordot = np.all(np.equal(w_shape[2:], 1)) else: # theano order for 3d conv use_tensordot = w_shape[1] == 1 and np.all(np.equal(w_shape[3:], 1)) y = None if conv_dim==1: x = x.dimshuffle(0, 1, 2, 'x') w = w.dimshuffle(0, 1, 2, 'x') if w_shape is not None: w_shape = list(w_shape) + [1, ] if x_shape is not None: x_shape = list(x_shape) + [1,] if stride is None: stride = (1, 1) y = conv2d(x, w, x_shape, w_shape, border_mode, subsample=stride) y = y[:, :, :, 0] elif conv_dim==2: if stride is None: stride = (1, 1) if use_tensordot: logger.debug("Using dot for 2d conv") w = w[:, :, 0, 0].T # (f_in, f_out) (5, 7) y = dot(x, w, axis=1) elif dnn_avail and config.use_manual_cudnn_conv: logger.debug("Using cuDNN 2dconv") y = dnn.dnn_conv(x, w, border_mode, subsample=stride, algo=dnn_algo) else: # fallback to theano y = conv2d(x, w, x_shape, w_shape, border_mode, subsample=stride) elif conv_dim==3: assert axis_order in ['dnn', 'theano'] use_dnn = dnn_avail if not config.use_manual_cudnn_conv: use_dnn = False if w_shape[2]==1 and config.use_manual_cudnn_conv_not_w1: use_dnn = False logger.debug("Ignoring manual 3d cuDNN conv because kernel is " "1 for first axis") # then theano automatically uses dnn 2d conv which # has faster gradient than dnn 3d conv if stride is not None: raise NotImplementedError("Cannot use strided conv with 3d conv") if use_tensordot: logger.debug("Using dot for 3d conv") if axis_order=='theano': w = w[:, 0, :, 0, 0].T # (f_in, f_out) y = dot(x, w, axis=2) elif axis_order=='dnn': w = w[:, :, 0, 0, 0].T # (f_in, f_out) y = dot(x, w, axis=1) elif use_dnn: if stride is None: stride = (1, 1, 1) if axis_order=='dnn': logger.debug("Using cuDNN 3dconv") y = dnn.dnn_conv3d(x, w, border_mode, subsample=stride, algo=dnn_algo) # (b, f, x, y, z) else: if config.show_axis_order_warning: logger.warning("cuDNN available but axis order is " "for theano (z before f). This leads to possibly " "inefficient dimshuffles. use cuDNN axis order.\n" "Using dnn 3dconv") x = x.dimshuffle(0,2,1,3,4) w = w.dimshuffle(0, 2, 1, 3, 4) y = dnn.dnn_conv3d(x, w, border_mode, subsample=stride, algo=dnn_algo) # (b, f, x, y, z) y = y.dimshuffle(0,2,1,3,4) else: # fallback to theano if axis_order=='theano': logger.debug("Using theano 3dconv") y = conv3d(x, w, x_shape, w_shape, border_mode) # (b, z, f, x, y) else: if config.use_manual_cudnn_conv and not dnn_avail: if config.show_axis_order_warning: logger.warning("cuDNN not available but axis order is" "for cuDNN (z after features). This leads to possibly " "inefficient dimshuffles Use theano axis order or " "install cuDNN.\nUsing theano 3dconv") x = x.dimshuffle(0,2,1,3,4) w = w.dimshuffle(0, 2, 1, 3, 4) # Also swap shapes! w_shape = list(w_shape) z,f = w_shape[1], w_shape[2] w_shape[2] = z w_shape[1] = f if x_shape is not None: x_shape = list(x_shape) z,f = x_shape[1], x_shape[2] x_shape[2] = z x_shape[1] = f y = conv3d(x, w, x_shape, w_shape, border_mode) # (b, z, f, x, y) y = y.dimshuffle(0,2,1,3,4) if crop_full: # Unreachable code. Remove this if it stays unneeded. cropper = [] off = np.divide(w_shape[-conv_dim:], 2).astype(np.int) k = 0 if axis_order=='theano' and conv_dim==3: for i in range(y.ndim): if i in [1,3,4]: cropper.append(slice(off[k], -off[k])) k += 1 else: cropper.append(slice(None)) else: for i in range(y.ndim): if i >= y.ndim - conv_dim: cropper.append(slice(off[k], -off[k])) k += 1 else: cropper.append(slice(None)) cropper = tuple(cropper) y = y[cropper] return y
def test_conv(): if False: sig_shape = (100000, 20) fil_shape = (20, 30) x_val = np.random.rand(*sig_shape).astype(np.float32) W_val = np.random.rand(*fil_shape).astype( np.float32) # (nof, z, ch, xf, yf) x = T.TensorType('float32', (False, ) * 2, name='x_cnn_input')() W = elektronn2.neuromancer.variables.VariableParam(W_val) y1 = T.dot(x, W) y2 = dot(x, W, 1) g1 = theano.grad(T.log(y1).sum(), [x]) g2 = theano.grad(T.log(y2).sum(), [x]) f1 = utils.make_func([x], y1, profile_execution=10) f2 = utils.make_func([x], y2, profile_execution=10) r1 = f1(x_val) r2 = f2(x_val) assert np.allclose(r1, r2) sig_shape = (1, 5, 300, 200) fil_shape = (7, 5, 1, 1) x_val = np.random.rand(*sig_shape).astype(np.float32) W_val = np.random.rand(*fil_shape).astype( np.float32) # (nof, z, ch, xf, yf) x = T.TensorType('float32', (False, ) * 4, name='x_cnn_input')() W = elektronn2.neuromancer.variables.VariableParam(W_val) y1 = conv(x, W, w_shape=fil_shape) y2 = conv2d(x, W) g1 = theano.grad(T.log(y1).sum(), [x]) g2 = theano.grad(T.log(y2).sum(), [x]) f1 = utils.make_func([x], y1, profile_execution=5) f2 = utils.make_func([x], y2, profile_execution=5) r1 = f1(x_val) r2 = f2(x_val) assert np.allclose(r1, r2) sig_shape = (1, 100, 5, 300, 200) # x_shape = (None, 100, 5, 300, 200) fil_shape = (7, 3, 5, 3, 3) x_val = np.random.rand(*sig_shape).astype(np.float32) W_val = np.random.rand(*fil_shape).astype( np.float32) # (nof, z, ch, xf, yf) x = T.TensorType('float32', (False, ) * 5, name='x_cnn_input')() W = elektronn2.neuromancer.variables.VariableParam(W_val) y1 = conv(x, W, x_shape=sig_shape, w_shape=fil_shape) y2 = conv3d(x, W) g1 = theano.grad(T.log(y1).sum(), [x]) g2 = theano.grad(T.log(y2).sum(), [x]) f1 = utils.make_func([x], y1, profile_execution=5) f2 = utils.make_func([x], y2, profile_execution=5) r1 = f1(x_val) r2 = f2(x_val) assert np.allclose(r1, r2) sig_shape = (1, 1, 100) fil_shape = (1, 1, 20) x_val = np.random.rand(*sig_shape).astype(np.float32) W_val = np.random.rand(*fil_shape).astype( np.float32) # (nof, z, ch, xf, yf) x = T.TensorType('float32', (False, ) * 3, name='x_cnn_input')() W = elektronn2.neuromancer.variables.VariableParam(W_val) y1 = conv(x, W, w_shape=fil_shape) f1 = elektronn2.neuromancer.graphutils.make_func([x], y1, profile_execution=10) r1 = f1(x_val) r2 = np.convolve(x_val[0, 0], W_val[0, 0], mode='valid')[None, None] assert np.allclose(r1, r2) if True: sig_shape = (1, 5, 100, 300, 200) fil_shape = (7, 5, 3, 3, 3) x_val = np.random.rand(*sig_shape).astype(np.float32) W_val = np.random.rand(*fil_shape).astype( np.float32) # (nof, ch, zf xf, yf) x = T.TensorType('float32', (False, ) * 5, name='x_cnn_input')() W = elektronn2.neuromancer.variables.VariableParam(W_val) # test conv y1 = dnn.dnn_conv3d(x, W, border_mode='valid') y2 = conv3d(x.dimshuffle(0, 2, 1, 3, 4), W.dimshuffle(0, 2, 1, 3, 4)).dimshuffle(0, 2, 1, 3, 4) f1 = elektronn2.neuromancer.graphutils.make_func([x], y1, profile_execution=5) f2 = elektronn2.neuromancer.graphutils.make_func([x], y2, profile_execution=5) r3 = np.array(f1(x_val)) r4 = f2(x_val) assert np.allclose( r3, r4 ) # cudnn and reshaped conv2d3d give same result, but cudnn ist faster! y1 = dnn.dnn_conv3d(x, W, border_mode='valid') y1 = dnn.dnn_pool(y1, (2, 2, 2), stride=(2, 2, 2), pad=(0, 0, 0), mode='max') f1 = elektronn2.neuromancer.graphutils.make_func([x], y1, profile_execution=5) r3 = np.array(f1(x_val)) y2 = conv3d(x.dimshuffle(0, 2, 1, 3, 4), W.dimshuffle(0, 2, 1, 3, 4)) y2 = pooling(y2, (2, 2, 2)) f2 = elektronn2.neuromancer.graphutils.make_func([x], y2, profile_execution=5) r4 = f2(x_val) assert np.allclose(r3, r4.transpose( 0, 2, 1, 3, 4)) # pooling als works, not it is not so much faster anymore.... y1 = dnn.dnn_conv3d(x, W, border_mode='valid') y1 = dnn.dnn_pool(y1, (2, 2, 2), stride=(2, 2, 2), pad=(0, 0, 0), mode='max') sm = dnn.GpuDnnSoftmax('bc01', 'fast', 'channel') sh = y1.shape y1 = sm(y1.flatten(4)).reshape(sh) f1 = elektronn2.neuromancer.graphutils.make_func([x], y1, profile_execution=5) r3 = np.array(f1(x_val)) y2 = conv3d(x.dimshuffle(0, 2, 1, 3, 4), W.dimshuffle(0, 2, 1, 3, 4)) y2 = pooling(y2, (2, 2, 2)) y2 = softmax(y2, axis=2) f2 = elektronn2.neuromancer.graphutils.make_func([x], y2, profile_execution=5) r4 = f2(x_val) assert np.allclose(r3, r4.transpose(0, 2, 1, 3, 4), atol=1e-5) # sm also works but diff is ~1e-5
def __init__(self, rng, input, filter_shape, image_shape, poolsize, stride, max_out, maxout_size, activation, W = None, b = None, alpha = None, batch_norm = False, p = 0.5 , verbose = True): batchsize = image_shape[0] channels = image_shape[1] stack_size = image_shape[2] width = image_shape[4] height = image_shape[3] next_height = int(floor((height - filter_shape[3] + 1))) / poolsize[1] next_width = int(floor((width - filter_shape[4] + 1))) / poolsize[2] kern_shape = int(floor(filter_shape[0]/maxout_size)) #output_size = ( batchsize, bias_shape, next_height , next_width ) srng = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(999999)) if verbose is True: print " --> initializing 3D convolutional layer with " + str(filter_shape[0]) + " kernels" print " ....... kernel size [" + str(filter_shape[1]) + " X " + str(filter_shape[3]) + " X " + str(filter_shape[4]) +"]" print " ....... pooling size [" + str(poolsize[0]) + " X " + str(poolsize[1]) + " X " + str(poolsize[2]) + "]" print " ....... stride size [" + str(stride[0]) + " X " + str(stride[1]) + " X " + str(stride[2]) + "]" print " ....... maxout size [" + str(maxout_size) + "]" print " ....... input size [" + str(height)+ " X " + str(width) + "]" print " ....... input number of feature maps is " +str(channels) print " ....... output size is [" + str(int(floor(filter_shape[0] / poolsize[0]))) + " X " + str((height - filter_shape[3] + 1 ) / (poolsize[1] * stride[1]) ) + " X " + str((width - filter_shape[4] + 1 ) / (poolsize[2] * stride[2]) ) + "]" self.input = input assert stride[2] == 1 assert stride[1] == 1 assert stride[0] == 1 # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) if W is None: self.W = theano.shared( numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size =filter_shape), dtype=theano.config.floatX ), borrow=True ) else: self.W = W # the bias is a 1D tensor -- one bias per output feature map if b is None: b_values = numpy.zeros((floor(filter_shape[0] / poolsize[0]),), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) else: self.b = b if alpha is None: alpha_values = numpy.ones((floor(filter_shape[0] / poolsize[0]),), dtype=theano.config.floatX) self.alpha = theano.shared(value=alpha_values, borrow = True) else: self.alpha = alpha # convolve input feature maps with filters conv_out = conv3d( signals=self.input, filters=self.W, signals_shape=image_shape, filters_shape=filter_shape, ) # downsample each feature map individually, using maxpoolig #if fast_conv is False: # downsample each feature map individually, using maxpooling if poolsize[1] > 1: pool_out = maxpool_3D( input=conv_out, ds=poolsize ) else: pool_out = conv_out pool_out = pool_out.sum(axis = 1, keepdims = False) # This will become a summation like what Pascal said happens in the 2D Conv ?? # self.co = conv_out # self.po = pool_out # The above statements are used for debugging and probing purposes. They have no use and can be commented out. # During debugging while in pdb inside a terminal from the lenet module's train function code, use functions such as : # co = theano.function ( inputs = [index], outputs = conv_layers[0].co, givens = {x: self.test_set_x[index * self.batch_size: (index + 1) * self.batch_size]}) # po = theano.function ( inputs = [index], outputs = conv_layers[0].po, givens = {x: self.test_set_x[index * self.batch_size: (index + 1) * self.batch_size]}) # ip = theano.function ( inputs = [index], outputs = conv_layers[0].input, givens = {x: self.test_set_x[index * self.batch_size: (index + 1) * self.batch_size]}) # op = theano.function ( inputs = [index], outputs = conv_layers[0].output, givens = {x: self.test_set_x[index * self.batch_size: (index + 1) * self.batch_size]}) #To debug the layers.... if batch_norm is True: mean = pool_out.mean( (0,2,3), keepdims = True ) std = pool_out.std( (0,2,3), keepdims = True ) std += 0.001 # To avoid divide by zero like fudge_factor self.pool_out = pool_out - mean self.output = activation(pool_out * ( self.alpha.dimshuffle('x', 0, 'x', 'x') / std ) + self.b.dimshuffle('x', 0, 'x', 'x')) else: self.output = activation(pool_out + self.b.dimshuffle('x', 0, 'x', 'x')) """ max_out =1 Ian Goodfellow et al. " Maxout Networks " on arXiv. (jmlr) max_out =2, max_out = 3, Yu, Dingjun, et al. "Mixed Pooling for Convolutional Neural Networks." Rough Sets and Knowledge Technology. Springer International Publishing, 2014. 364-375. Same is also implemeted in the MLP layers also. """ if max_out == 1: # Do maxout network. maxout_out = None for i in xrange(maxout_size): temp = self.output[:,i::maxout_size,:,:] if maxout_out is None: maxout_out = temp else: maxout_out = T.maximum(maxout_out, temp) self.output = maxout_out elif max_out == 2: # Do meanout network. maxout_out = None for i in xrange(maxout_size): temp = self.output[:,i::maxout_size,:,:] if maxout_out is None: maxout_out = temp else: maxout_out = (maxout_out*(i+1)+temp)/(i+2) self.output = maxout_out elif max_out == 3: # Do mixout network. maxout_out = None maxout_mean = None maxout_max = None for i in xrange(maxout_size): temp = self.output[:,i::maxout_size,:,:] if maxout_mean is None: maxout_mean = temp maxout_max = temp maxout_out = temp else: maxout_mean = (maxout_out*(i+1)+temp)/(i+2) maxout_max = T.maximum(maxout_out, temp) lambd = srng.uniform( maxout_mean.shape, low=0.0, high=1.0) maxout_out = lambd * maxout_max + (1 - lambd) * maxout_mean self.output = maxout_out # store parameters of this layer self.params = [self.W, self.b]
def __init__(self, rng, input, filter_shape, temporal_filter, image_shape, poolsize=(2, 2), outputType = 'rl'): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height,filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows,#cols) """ self.input = input self.W = theano.shared(value=numpy.reshape(temporal_filter,(1,filter_shape[1],1,1,1)).astype(theano.config.floatX), name='W', borrow=True) self.W_helper = theano.shared(value=numpy.zeros((1,filter_shape[1],1,1,1), \ dtype=theano.config.floatX), name='W_helper', borrow=True) self.W_helper2 = theano.shared(value=numpy.zeros((1,filter_shape[1],1,1,1), \ dtype=theano.config.floatX), name='W_helper2', borrow=True) # parameters of this layer self.params = [self.W] self.params_helper = [self.W_helper] self.params_helper2 = [self.W_helper2] # to get same using 'valid', pre-pad with zeros image_shape_pad = list(image_shape) a1 = numpy.floor((filter_shape[1]-1)/2.0).astype(int) b1 = numpy.ceil((filter_shape[1]-1)/2.0).astype(int) #a2 = numpy.floor((filter_shape[3]-1)/2.0).astype(int) #b2 = numpy.ceil((filter_shape[3]-1)/2.0).astype(int) #a3 = numpy.floor((filter_shape[4]-1)/2.0).astype(int) #b3 = numpy.ceil((filter_shape[4]-1)/2.0).astype(int) image_shape_pad[1] += a1+b1 #image_shape_pad[3] += a2+b2 #image_shape_pad[4] += a3+b3 input_padded = theano.shared(value=numpy.zeros(image_shape_pad, \ dtype=theano.config.floatX), borrow=True) #input_padded = T.set_subtensor(input_padded[:,a1:-b1,:,a2:-b2,a3:-b3], input) input_padded = T.set_subtensor(input_padded[:,(a1+b1):,:,:,:], input) #post-pad #input_padded = T.concatenate( (input_padded,T.alloc(0,(1,b1,1,1,1))), axis = 1) #time #input_padded = T.concatenate( (input_padded,T.alloc(0,(1,1,1,b2,1))), axis = 3) #height #input_padded = T.concatenate( (input_padded,T.alloc(0,(1,1,1,1,b3))), axis = 4) #width conv_out = conv3d2d.conv3d( signals=input_padded, # Ns, Ts, C, Hs, Ws filters=self.W, # Nf, Tf, C, Hf, Wf signals_shape=image_shape_pad, #(batchsize, in_time, in_channels, in_height, in_width) filters_shape=filter_shape, #(flt_channels, flt_time, in_channels, flt_height, flt_width) border_mode='valid') # downsample each feature map individually, using maxpooling pooled_out = downsample.max_pool_2d(input=conv_out,ds=poolsize, ignore_border=True) self.lin_output = pooled_out; # Activation is given by sigmoid: #self.output = T.tanh(lin_output) # Activation is rectified linear if outputType == 'rl': self.output = self.lin_output*(self.lin_output>0) elif outputType == 'l': self.output = self.lin_output
X = ftensor5('x') w1 = init_weights([13, 3, 1, 11, 11]) # out after (1, 4, 4) max-pooling: [examples, 28, filters, 62, 62] w2 = init_weights([13, 3, 13, 6, 6]) # out after (2, 4, 4) max-pooling: [examples, 13, filters, 15, 15] w3 = init_weights([13, 3, 13, 4, 4]) # out after (4, 4, 4) max-pooling: [examples, 3, filters, 3, 3] w4 = init_weights([13, 13 * 3 * 3 * 3]) # out: [examples, filters] # w4 = init_weights([13, 2, 13, 3, 3]) # out after max-pooling: [examples, 1, filters, 13, 13] # w5 = init_weights([13, 13, 2, 2]) # out after (4, 4) max-pooling: [examples, filters, 3, 3] # w6 = init_weights([13, 13 * 3 * 3]) ####################### conv_out1 = conv3d( signals=X, filters=w1 ) pool_out1 = maxpool3d.max_pool_3d( conv_out1.dimshuffle((0, 2, 1, 3, 4)), (1, 4, 4) ) lcn_out1 = lcn_3d(pool_out1.dimshuffle((0, 2, 1, 3, 4)), [3, 5, 5], 13) # [examples, 28, filters, 62, 62] ####################### conv_out2 = conv3d( signals=lcn_out1, filters=w2 )
def _forward(self): inpt = self.inpt self.weights = self.declare( (self.n_output, self.filter_depth, self.n_inpt, self.filter_height, self.filter_width) ) self.bias = self.declare((self.n_output,)) if self.border_mode == 'same': pad_dim1 = self.filter_height - 1 pad_dim2 = self.filter_width - 1 pad_dim3 = self.filter_depth - 1 if pad_dim1 > 0 or pad_dim2 > 0 or pad_dim3 > 0: output_shape = ( inpt.shape[0], inpt.shape[1] + pad_dim3, inpt.shape[2], inpt.shape[3] + pad_dim1, inpt.shape[4] + pad_dim2 ) big_zero = T.zeros(output_shape) indices = ( slice(None), slice(pad_dim3 // 2, inpt.shape[1] + pad_dim3 // 2), slice(None), slice(pad_dim1 // 2, inpt.shape[3] + pad_dim1 // 2), slice(pad_dim2 // 2, inpt.shape[4] + pad_dim2 // 2) ) inpt = T.set_subtensor(big_zero[indices], inpt) #print '@basic.py implementation: ', self.implementation if self.implementation == 'conv3d2d': self.output_in = conv3d( signals=inpt, filters=self.weights ) if self.use_bias: self.output_in = self.output_in + self.bias.dimshuffle('x', 'x', 0, 'x', 'x') elif self.implementation == 'conv3D': filters_flip = self.weights[:, ::-1, :, ::-1, ::-1] bias = self.bias if self.use_bias else T.zeros(self.bias.shape) self.output_in = conv3D( V=inpt.dimshuffle(0, 3, 4, 1, 2), W=filters_flip.dimshuffle(0, 3, 4, 1, 2), b=bias, d=(1, 1, 1) ) self.output_in = self.output_in.dimshuffle(0, 3, 4, 1, 2) elif self.implementation == 'dnn_conv3d': self.output_in = theano.sandbox.cuda.dnn.dnn_conv3d( img=inpt.dimshuffle(0, 2, 1, 3, 4), kerns=self.weights.dimshuffle(0, 2, 1, 3, 4) ) self.output_in = self.output_in.dimshuffle(0, 2, 1, 3, 4) if self.use_bias: self.output_in = self.output_in + self.bias.dimshuffle('x', 'x', 0, 'x', 'x') else: raise NotImplementedError('This class only supports conv3d2d, conv3D and dnn_conv3d') self.output = self.output_in if self.strides != (1, 1, 1): self.output = self.output[:, ::self.strides[2], :, ::self.strides[0], ::self.strides[1]]
def __init__(self, input, n_in_maps, n_out_maps, kernel_shape, video_shape, batch_size, activation, layer_name="Conv", rng=RandomState(1234), borrow=True, W=None, b=None): """ video_shape: (frames, height, width) kernel_shape: (frames, height, width) W_shape: (out, in, kern_frames, kern_height, kern_width) """ self.__dict__.update(locals()) del self.self # init W if W != None: W_val = W else: # fan in: filter time x filter height x filter width x input maps fan_in = prod(kernel_shape) * n_in_maps norm_scale = 2. * sqrt(1. / fan_in) if activation in (relu, softplus): norm_scale = 0.01 W_shape = (n_out_maps, n_in_maps) + kernel_shape W_val = _asarray(rng.normal(loc=0, scale=norm_scale, size=W_shape),\ dtype=floatX) self.W = shared(value=W_val, borrow=borrow, name=layer_name + '_W') self.params = [self.W] # init bias if b != None: b_val = b elif activation in (relu, softplus): b_val = ones((n_out_maps, ), dtype=floatX) else: b_val = zeros((n_out_maps, ), dtype=floatX) self.b = shared(b_val, name=layer_name + "_b", borrow=borrow) self.params.append(self.b) # 3D convolution; dimshuffle: last 3 dimensions must be (in, h, w) n_fr, h, w = video_shape n_fr_k, h_k, w_k = kernel_shape # if border_mode='valid' it means no padding out = conv3d(signals=input.dimshuffle([0, 2, 1, 3, 4]), filters=self.W, signals_shape=(batch_size, n_fr, n_in_maps, h, w), filters_shape=(n_out_maps, n_fr_k, n_in_maps, h_k, w_k), border_mode='valid').dimshuffle([0, 2, 1, 3, 4]) # if border_mode='full' it means padding is used so that input and output have the same size # out = conv3d( # signals=input.dimshuffle([0,2,1,3,4]), # filters=self.W, # signals_shape=(batch_size, n_fr, n_in_maps, h, w), # filters_shape=(n_out_maps, n_fr_k, n_in_maps, h_k, w_k), # border_mode='full').dimshuffle([0,2,1,3,4]) out += self.b.dimshuffle('x', 0, 'x', 'x', 'x') self.output = activation(out)
def __init__(self, input, input_shape, filter_shape, pool, activation_func, enable_dropout, use_fragment_pooling, reshape_output, mfp_offsets, mfp_strides, input_layer=None, W=None, b=None, pooling_mode='max', affinity=False): assert len(filter_shape) == 5 assert input_shape[2] == filter_shape[2] self.input = input self.pool = pool self.number_of_filters = filter_shape[0] self.filter_shape = filter_shape self.activation_func = activation_func self.input_shape = input_shape self.input_layer = input_layer self.mfp_strides = mfp_strides self.mfp_offsets = mfp_offsets self.reshape_output = reshape_output print "3DConv: input=", input_shape, "\tfilter=", filter_shape #,"@std=",W_bound if W is None: W_values = np.asarray(initWeights(filter_shape, scale='glorot', mode='normal', pool=pool), dtype='float32') self.W = theano.shared(W_values, name='W_conv', borrow=True) else: if isinstance(W, np.ndarray): self.W = theano.shared(W.astype(np.float32), name='W_conv', borrow=True) else: assert isinstance( W, T.TensorVariable ), "W must be either np.ndarray or theano var" self.W = W # the bias is a 1D tensor -- one bias per output feature map if activation_func in ['ReLU', 'relu']: norm = filter_shape[1] * filter_shape[3] * filter_shape[4] # b_values = np.ones((filter_shape[0], ), dtype='float32') / norm if b is None: n_out = filter_shape[0] if activation_func == 'relu' or activation_func == 'ReLU': norm = filter_shape[1] * filter_shape[3] * filter_shape[4] b_values = np.asarray(initWeights((n_out, ), scale=1.0 / norm, mode='const'), dtype='float32') elif activation_func == 'sigmoid': b_values = np.asarray(initWeights((n_out, ), scale=0.5, mode='const'), dtype='float32') else: # activation_func=='tanh': b_values = np.asarray(initWeights((n_out, ), scale=1e-6, mode='fix-uni'), dtype='float32') self.b = theano.shared(value=b_values, borrow=True, name='b_conv') else: if isinstance(b, np.ndarray): self.b = theano.shared(b.astype(np.float32), name='b_conv', borrow=True) else: assert isinstance( b, T.TensorVariable ), "b must be either np.ndarray or theano var" self.b = b # store parameters of this layer self.params = [self.W, self.b] # convolve input feature maps with filters self.mode = theano.compile.get_default_mode() self.conv_out = conv3d( signals=input, filters=self.W, border_mode='valid', filters_shape=filter_shape ) # signals_shape=input_shape if input_shape[0] is not None else None) # down-sample each feature map individually, using maxpooling if np.any(pool != 1): pool_func = lambda x: pooling.pooling3d( x, pool_shape=pool, mode=pooling_mode) if use_fragment_pooling: pooled_out, self.mfp_offsets, self.mfp_strides = self.fragmentpool( self.conv_out, pool, mfp_offsets, mfp_strides, pool_func) else: pooled_out = pool_func(self.conv_out) else: pooled_out = self.conv_out if enable_dropout: print "Dropout: ACTIVE" self.activation_noise = theano.shared(np.float32(0.5), name='Dropout Rate') rng = T.shared_randomstreams.RandomStreams(int(time.time())) p = 1 - self.activation_noise self.dropout_gate = 1.0 / p * rng.binomial( (pooled_out.shape[1], pooled_out.shape[3], pooled_out.shape[4]), 1, p, dtype='float32') pooled_out = pooled_out * self.dropout_gate.dimshuffle( ('x', 0, 'x', 1, 2)) lin_output = pooled_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x') self.lin_output = lin_output r = 1 if activation_func == 'tanh': self.activation_func = 'tanh' self.output = T.tanh( lin_output) # shape: (batch_size, num_outputs) elif activation_func in ['ReLU', 'relu']: #rectified linear unit self.activation_func = 'relu' self.output = lin_output * (lin_output > 0 ) # shape: (batch_size, num_outputs) elif activation_func in ['linear', 'none', 'None', None]: self.activation_func = 'linear' self.output = lin_output elif activation_func in ['abs']: self.activation_func = 'abs' self.output = T.abs_(lin_output) elif activation_func in ['sigmoid']: self.activation_func = 'sigmoid' self.output = T.nnet.sigmoid(lin_output) elif activation_func.startswith("maxout"): r = int(activation_func.split(" ")[1]) assert r >= 2 self.output = pooling.maxout(lin_output, factor=r, axis=2) else: raise NotImplementedError() output_shape = getOutputShape( (1 if input_shape[0] is None else input_shape[0], ) + input_shape[1:], filter_shape, pool, use_fragment_pooling, r) print "Output=", output_shape, "Dropout", ( "ON," if enable_dropout else "OFF,"), "Act:", activation_func, "pool:", pooling_mode self.output_shape = output_shape # e.g. (None, 16, 100, 100) if affinity: raise RuntimeError("Dont use this code") # self.class_probabilities = T.nnet.sigmoid(lin_output) # (bs, z, 3, x, y) # self.class_probabilities = self.class_probabilities.dimshuffle((0,2,1,3,4)) # sh = lin_output.shape # if use_fragment_pooling: # self.fragmentstodense(sh) # works on # # self.prob_shape = getProbShape(output_shape, self.mfp_strides) # self.class_prediction = T.gt(self.class_probabilities, 0.5) # # self.class_probabilities = (bs,3,z,x,y) else: sh = lin_output.shape #(bs,x,ch,y,z) # use this shape to reshape the output to image-shape after softmax sh = (sh[2], sh[0], sh[1], sh[3], sh[4]) #(ch, x, y, bs) # put spatial, at back --> (ch,bs,x,y,z), flatten this --> (ch, bs*x*y*z), swap labels --> (bs*x*y*z, ch) self.class_probabilities = T.nnet.softmax( lin_output.dimshuffle((2, 0, 1, 3, 4)).flatten(2).dimshuffle( (1, 0))) if reshape_output: self.reshapeoutput(sh) if use_fragment_pooling: self.fragmentstodense(sh) self.prob_shape = getProbShape(output_shape, self.mfp_strides) print "Class Prob Output =", self.prob_shape # compute prediction as class whose "probability" is maximal in symbolic form self.class_prediction = T.argmax(self.class_probabilities, axis=1)
import litus import theano import theano.tensor as T import numpy as np import matplotlib.pylab as plt from theano.tensor.nnet.conv3d2d import conv3d from theano.tensor.signal.conv import conv2d import uuid dtensor5 = T.TensorType('float64', (False,)*5) A = dtensor5('A') B = dtensor5('B') C = conv3d(A,B) _conv_func = theano.function(inputs=[A,B], outputs=C) def conv(a,b,padding_things_equal=[1,3,4],padding_things_tail=[1],*args,**kwargs): a_ = a.copy() b_ = b.copy() a_ = np.pad(a_,[(s-1,s-1) for si,s in enumerate(b_.shape)],mode='constant') return _conv_func(a_,b_) ### Functions to create filters # def minimize_filter(f,filter_epsilon = 0.0, minimize_xy=True, minimize_t_tail=True, minimize_t_start=False): """ reduces a filter by taking of the sides if they are smaller than :py:obj:`filter_epsilon`
def __init__(self, rng, input, filter_shape, image_shape, W=None, b=None, poolsize=(2, 2), activation=relu): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: dtensor5 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 5 :param filter_shape: (number of filters, num input feature maps, filter height,filter width,filter depth) :type image_shape: tuple or list of length 5 :param image_shape: (batch size, num input feature maps, image height, image width, num time steps) :type poolsize: tuple or list of length 3 :param poolsize: the downsampling (pooling) factor (#rows,#cols,#timesteps) """ assert image_shape[2] == filter_shape[2] self.input = input self.image_shape = image_shape self.filter_shape = filter_shape if W is None: # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width * filter depth" / # pooling size => 1 fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / 1) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) W = theano.shared(numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) if b is None: # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) b = theano.shared(value=b_values, borrow=True) self.W = W self.b = b # convolve input feature maps with filters # conv_out = conv.conv2d(input=input, filters=self.W, # filter_shape=filter_shape, image_shape=image_shape) conv_out = conv3d(signals=input, filters=self.W, signals_shape=image_shape, filters_shape=filter_shape) pooled_out = conv_out # no pooling # # downsample each feature map individually, using maxpooling # pooled_out_temporal = downsample.max_pool_2d(input=conv_out, # ds=poolsize, ignore_border=True) # #downsample twice - once for spatial, then over temporal # pooled_out = downsample.max_pool_2d(input=pooled_out_temporal, # ds=poolsize, ignore_border=True) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height # self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) intermediate = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x') # use rectified linear output self.output = activation(intermediate) # store parameters of this layer self.params = [self.W, self.b]
def conv3d(x, kernel, strides=(1, 1, 1), border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING, volume_shape=None, filter_shape=None, conv_algo="GpuCorr3dMM"): ''' Run on cuDNN if available. border_mode: string, "same" or "valid". conv_algo: string, "conv3d2d": internally reshapes the data and performs 2d convs "dnn_conv3d": uses CuDNNs 3d convolution "GpuCorr3dM": performs a correlation, not a conolution (filter not flipped), uses the "Toeplitz"- matrix (which means it needs a little more memory) ''' if dim_ordering not in {'th', 'tf'}: raise Exception('Unknown dim_ordering ' + str(dim_ordering)) if border_mode not in {'same', 'valid'}: raise Exception('Invalid border mode: ' + str(border_mode)) if dim_ordering == 'tf': # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, conv_dim1, conv_dim2, conv_dim3) # TF input shape: (samples, conv_dim1, conv_dim2, conv_dim3, input_depth) # TH kernel shape: (out_depth, input_depth, kernel_dim1, kernel_dim2, kernel_dim3) # TF kernel shape: (kernel_dim1, kernel_dim2, kernel_dim3, input_depth, out_depth) x = x.dimshuffle((0, 4, 1, 2, 3)) kernel = kernel.dimshuffle((4, 3, 0, 1, 2)) if volume_shape: volume_shape = (volume_shape[0], volume_shape[4], volume_shape[1], volume_shape[2], volume_shape[3]) if filter_shape: filter_shape = (filter_shape[4], filter_shape[3], filter_shape[0], filter_shape[1], filter_shape[2]) if border_mode == 'same': assert (strides == (1, 1, 1)) pad_dim1 = (kernel.shape[2] - 1) pad_dim2 = (kernel.shape[3] - 1) pad_dim3 = (kernel.shape[4] - 1) output_shape = (x.shape[0], x.shape[1], x.shape[2] + pad_dim1, x.shape[3] + pad_dim2, x.shape[4] + pad_dim3) output = T.zeros(output_shape) indices = (slice(None), slice(None), slice(pad_dim1 // 2, x.shape[2] + pad_dim1 // 2), slice(pad_dim2 // 2, x.shape[3] + pad_dim2 // 2), slice(pad_dim3 // 2, x.shape[4] + pad_dim3 // 2)) x = T.set_subtensor(output[indices], x) border_mode = 'valid' border_mode_3d = (border_mode, border_mode, border_mode) if conv_algo == "conv3d2d": conv_out = conv3d2d.conv3d(signals=x.dimshuffle(0, 2, 1, 3, 4), filters=kernel.dimshuffle(0, 2, 1, 3, 4), border_mode=border_mode_3d) conv_out = conv_out.dimshuffle(0, 2, 1, 3, 4) elif conv_algo == "dnn_conv3d": conv_out = dnn_conv3d(img=x, kerns=kernel, border_mode=border_mode) elif conv_algo == "GpuCorr3dMM": bias = np.zeros((volume_shape[1])) conv_out = GpuCorr3dMM()(x, kernel) else: raise ("Unknown algorithm to perform 3d convolution") # support strides by manually slicing the output if strides != (1, 1, 1): conv_out = conv_out[:, :, ::strides[0], ::strides[1], ::strides[2]] if dim_ordering == 'tf': conv_out = conv_out.dimshuffle((0, 2, 3, 4, 1)) return conv_out
def compile_theano_3d_upscale_blur_and_combine_function( upsampling_factor, blur_sigma, kernal_scale): """ Returns a Theano function that upsamples or "unpools" its first input 3D tensor by the provided upsampling factor, then applies a 3D Gaussian blur with blur standard deviation equal to blur_sigma, then gets the elementwise maximum between this result and the second input 3D tensor. """ if blur_sigma == 0 or upsampling_factor < 1: raise # Make sure total kernel size is odd and that we have at least a 3-standard-deviation # radius for the filter so that we cut the distribution off when it's already very small. size = int(kernal_scale * blur_sigma + 1) if int(kernal_scale * blur_sigma + 1) % 2 else int( kernal_scale * blur_sigma) #kernal_scale*10 when blur_sigma = 10 # Set k to be half the kernel size, without taking into account the middle pixel k = int(size / 2) print('kernel size: ' + str(size)) print('blur sigma: ' + str(blur_sigma)) print('upsampling factor: ' + str(upsampling_factor)) ## Prepare 1x1x(2k+1)x(2k+1)x(2k+1) 3D Gaussian blur kernel in numpy ## where dims are (# in?put channels, # out?put channels, height, width) x = np.arange(-k, k + 1) y = np.arange(-k, k + 1) z = np.arange(-k, k + 1) X, Y, Z = np.meshgrid(x, y, z) Gv = np.exp(-(X**2 + Y**2 + Z**2) / (2.0 * blur_sigma**2)) Gv = (Gv / np.sum(Gv.reshape(-1))).astype(np.float32) Gv = np.tile(Gv, (1, 1, 1, 1, 1)) ## Turn kernel into Theano shared variable. #G_kernel = theano.shared(Gv) G_kernel = Gv print("filter size: " + str(G_kernel.shape)) G_kernel = np.transpose(G_kernel, (0, 4, 1, 2, 3)) print("new filter size: " + str(G_kernel.shape)) #G_kernel = theano.printing.Print('G_kernel')(G_kernel) ## Compile convolution function graph. ### input predicted full occupancy grid for whole object. input_1 = T.ftensor3() M1 = T.shape_padleft(input_1, 2) ### input occupancy grid for front of object. input_2 = T.ftensor3() M2 = T.shape_padleft(input_2, 2) ### Apply upsampling to network output (Assumes BC12 image format) R2 = M1.repeat(upsampling_factor, axis=2).repeat(upsampling_factor, axis=3).repeat(upsampling_factor, axis=4) #R2 = theano.printing.Print('R2')(R2) MM2 = M2 #.repeat(upsampling_factor, axis=2).repeat(upsampling_factor, axis=3).repeat(upsampling_factor, axis=4) ### Apply Gaussian blur to upsampled image #R3 = conv2d(R2, G_kernel, border_mode="full") R2 = T.maximum(R2, MM2) input_shape = R2.shape output_shape = (input_shape[0], input_shape[1], input_shape[2] + 4 * k, input_shape[3] + 4 * k, input_shape[4] + 4 * k) R2_padded = output = T.zeros(output_shape, R2.dtype) R2_padded = T.set_subtensor( R2_padded[:, :, 2 * k:-2 * k, 2 * k:-2 * k, 2 * k:-2 * k], R2) R2_padded = R2_padded.dimshuffle(0, 4, 1, 2, 3) R3 = conv3d(R2_padded, G_kernel, border_mode="valid") R3 = conv3d(R2_padded, G_kernel, border_mode="valid") R3 = R3.dimshuffle(0, 2, 3, 4, 1) #R3 = theano.printing.Print('R3')(R3) ### Get subtensor so as to achieve 'same' border mode #R4 = R3 #R3[:, :, k:-k, k:-k, k:-k] #R4 = theano.printing.Print('R4')(R4) ### Combine blurred network output with visible surface occupancy grid through some sort of union operation #R5 = R4 + 0*MM2 #R5 = R4 + MM2 - R4 * MM2 #R5 = T.maximum(R4, MM2) upscale_blur_and_combine_fn = theano.function( inputs=[input_1, input_2], outputs=R3, ) return upscale_blur_and_combine_fn
if __name__ == '__main__': from theano.tensor.nnet import conv2d from theano.tensor.nnet.conv3d2d import conv3d x = T.tensor4() h = T.tensor4() # conv2d_temp = [] # for patch_num in range(36): # input = T.concatenate([x[patch_num, :, :, :].dimshuffle(0, 'x', 1, 2)]*512, 1) # filter = h[patch_num, :, :, :].dimshuffle('x', 0, 1, 2) # conv2d_temp.append(conv2d(input, filter, # border_mode='half' # )) # conv_out = T.concatenate(conv2d_temp, axis=0) x_con = T.concatenate([x] * 512, 1) x_prime = x_con.dimshuffle(0, 'x', 1, 2, 3) conv_out3 = conv3d(x_prime, h.dimshuffle('x', 0, 1, 2, 3), border_mode='half') conv_out3 = T.stack([conv_out3[i, i] for i in range(36)]) # func = nn.function([x, h], conv_out) func_3 = nn.function([x, h], conv_out3) x = np.random.rand(36, 1, 10, 10).astype('float32') h = np.random.rand(36, 512, 3, 3).astype('float32') print(func_3(x, h).shape) # print(func(x, h).shape) # assert np.allclose(func(x, h), func_3(x, h))
def check_conv3d(border_mode, mode=mode_without_gpu, shared=theano.tensor._shared): if ndimage is None: raise SkipTest("conv3d2d tests need SciPy") Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32 Nf, Tf, C, Hf, Wf = 32, 5, 3, 5, 5 signals = numpy.arange(Ns * Ts * C * Hs * Ws).reshape(Ns, Ts, C, Hs, Ws).astype("float32") filters = numpy.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype("float32") t0 = time.time() pyres = pyconv3d(signals, filters, border_mode) print(time.time() - t0) s_signals = shared(signals) s_filters = shared(filters) s_output = shared(signals * 0) out = conv3d( s_signals, s_filters, signals_shape=signals.shape, filters_shape=filters.shape, border_mode=border_mode ) newconv3d = theano.function([], [], updates={s_output: out}, mode=mode) check_diagonal_subtensor_view_traces(newconv3d) t0 = time.time() newconv3d() print(time.time() - t0) utt.assert_allclose(pyres, s_output.get_value(borrow=True)) gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters]) gnewconv3d = theano.function([], [], updates=[(s_filters, gfilters), (s_signals, gsignals)], mode=mode, name="grad") check_diagonal_subtensor_view_traces(gnewconv3d) t0 = time.time() gnewconv3d() print("grad", time.time() - t0) Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5 Nf, Tf, C, Hf, Wf = 4, 2, 3, 2, 2 signals = numpy.random.rand(Ns, Ts, C, Hs, Ws).astype("float32") filters = numpy.random.rand(Nf, Tf, C, Hf, Wf).astype("float32") utt.verify_grad(lambda s, f: conv3d(s, f, border_mode=border_mode), [signals, filters], eps=1e-1, mode=mode) # Additional Test that covers the case of patched implementation for filter with Tf=1 Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32 Nf, Tf, C, Hf, Wf = 32, 1, 3, 5, 5 signals = numpy.arange(Ns * Ts * C * Hs * Ws).reshape(Ns, Ts, C, Hs, Ws).astype("float32") filters = numpy.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype("float32") t0 = time.time() pyres = pyconv3d(signals, filters, border_mode) print(time.time() - t0) s_signals = shared(signals) s_filters = shared(filters) s_output = shared(signals * 0) out = conv3d( s_signals, s_filters, signals_shape=signals.shape, filters_shape=filters.shape, border_mode=border_mode ) newconv3d = theano.function([], [], updates={s_output: out}, mode=mode) t0 = time.time() newconv3d() print(time.time() - t0) utt.assert_allclose(pyres, s_output.get_value(borrow=True)) gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters]) gnewconv3d = theano.function([], [], updates=[(s_filters, gfilters), (s_signals, gsignals)], mode=mode, name="grad") t0 = time.time() gnewconv3d() print("grad", time.time() - t0) Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5 Nf, Tf, C, Hf, Wf = 4, 1, 3, 2, 2 signals = numpy.random.rand(Ns, Ts, C, Hs, Ws).astype("float32") filters = numpy.random.rand(Nf, Tf, C, Hf, Wf).astype("float32") utt.verify_grad(lambda s, f: conv3d(s, f, border_mode=border_mode), [signals, filters], eps=1e-1, mode=mode)
def test_conv3d(border_mode): if ndimage is None or not theano.config.cxx: raise SkipTest("conv3d2d tests need SciPy and a c++ compiler") if theano.config.mode == 'FAST_COMPILE': mode = theano.compile.mode.get_mode('FAST_RUN') else: mode = theano.compile.mode.get_default_mode() shared = theano.tensor._shared Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32 Nf, Tf, C, Hf, Wf = 32, 5, 3, 5, 5 signals = np.arange(Ns * Ts * C * Hs * Ws).reshape(Ns, Ts, C, Hs, Ws).astype('float32') filters = np.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype('float32') t0 = time.time() pyres = pyconv3d(signals, filters, border_mode) print(time.time() - t0) s_signals = shared(signals) s_filters = shared(filters) s_output = shared(signals * 0) out = conv3d(s_signals, s_filters, signals_shape=signals.shape, filters_shape=filters.shape, border_mode=border_mode) newconv3d = theano.function([], [], updates={s_output: out}, mode=mode) check_diagonal_subtensor_view_traces(newconv3d) t0 = time.time() newconv3d() print(time.time() - t0) utt.assert_allclose(pyres, s_output.get_value(borrow=True)) gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters]) gnewconv3d = theano.function([], [], updates=[(s_filters, gfilters), (s_signals, gsignals)], mode=mode, name='grad') check_diagonal_subtensor_view_traces(gnewconv3d) t0 = time.time() gnewconv3d() print('grad', time.time() - t0) Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5 Nf, Tf, C, Hf, Wf = 4, 2, 3, 2, 2 signals = np.random.rand(Ns, Ts, C, Hs, Ws).astype('float32') filters = np.random.rand(Nf, Tf, C, Hf, Wf).astype('float32') utt.verify_grad(lambda s, f: conv3d(s, f, border_mode=border_mode), [signals, filters], eps=1e-1, mode=mode) # Additional Test that covers the case of patched implementation for filter with Tf=1 Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32 Nf, Tf, C, Hf, Wf = 32, 1, 3, 5, 5 signals = np.arange(Ns * Ts * C * Hs * Ws).reshape(Ns, Ts, C, Hs, Ws).astype('float32') filters = np.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype('float32') t0 = time.time() pyres = pyconv3d(signals, filters, border_mode) print(time.time() - t0) s_signals = shared(signals) s_filters = shared(filters) s_output = shared(signals * 0) out = conv3d(s_signals, s_filters, signals_shape=signals.shape, filters_shape=filters.shape, border_mode=border_mode) newconv3d = theano.function([], [], updates={s_output: out}, mode=mode) t0 = time.time() newconv3d() print(time.time() - t0) utt.assert_allclose(pyres, s_output.get_value(borrow=True)) gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters]) gnewconv3d = theano.function([], [], updates=[(s_filters, gfilters), (s_signals, gsignals)], mode=mode, name='grad') t0 = time.time() gnewconv3d() print('grad', time.time() - t0) Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5 Nf, Tf, C, Hf, Wf = 4, 1, 3, 2, 2 signals = np.random.rand(Ns, Ts, C, Hs, Ws).astype('float32') filters = np.random.rand(Nf, Tf, C, Hf, Wf).astype('float32') utt.verify_grad(lambda s, f: conv3d(s, f, border_mode=border_mode), [signals, filters], eps=1e-1, mode=mode)
def __init__(self, rng, input, video_shape, filter_shape): """ :param rng: a random number generator used to initialize weights :param input: symbolic video tensor of shape video_shape :param video_shape: (batch, frame number of video (input temporal length), number of feature maps (channels), frame height of video, frame width of video ) :param filter_shape: (number of output feature maps, filter temporal length, number of input feature maps (channels), filter height, filter width) :return: (batch, frame number of output, number of feature maps (channels), frame height of video, frame width of video ) """ assert video_shape[2] == filter_shape[2] self.input = input #fan_in is the number of units in the (i ? 1)-th layer fan_in = numpy.prod(filter_shape[1:]) #fan_out is the number of units in the i-th layer fan_out = numpy.prod(filter_shape[0:2]) * numpy.prod(filter_shape[3:]) #initialize weights (if 'tanh' is the activation function) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) #if flag == 0: self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) #else : # self.W = W """ #initialize weights (if 'sigmoid' is the action function) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared( numpy.asarray( rng.uniform(low= -4 * W_bound, high= 4 * W_bound, size=filter_shape), dtype=theano.config.floatX ), borrow=True ) """ """ #initialize weights (another way in uniform distribution) W_bound = numpy.sqrt(1.0 / fan_in) self.W = theano.shared( numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX ), borrow=True ) """ """ #initialize weights (normal distribution) W_bound = numpy.sqrt(2.0 / fan_in) self.W = theano.shared( numpy.asarray( rng.normal(rng.normal(loc=0, scale=W_bound, size=filter_shape), dtype=theano.config.floatX ), borrow=True ) """ #initialize bias, the bias is a 1D tensor -- one bias per output feature map #if flag == 0: b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) #else: # self.b = b # convolve input feature maps with filters conv_out = conv3d( signals=input, #(batch, time, in channel, height, width) filters=self.W, #(out channel,time,in channel, height, width) signals_shape=video_shape, filters_shape=filter_shape, border_mode='valid') #add bias to every feature map self.output = conv_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x') # store parameters of this layer self.params = [self.W, self.b] # keep track of model input self.input = input
def conv3d( x, kernel, strides=(1, 1, 1), border_mode="valid", dim_ordering=_IMAGE_DIM_ORDERING, volume_shape=None, filter_shape=None, ): """ Run on cuDNN if available. border_mode: string, "same" or "valid". """ if dim_ordering not in {"th", "tf"}: raise Exception("Unknown dim_ordering " + str(dim_ordering)) if border_mode not in {"same", "valid"}: raise Exception("Invalid border mode: " + str(border_mode)) if dim_ordering == "tf": # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, conv_dim1, conv_dim2, conv_dim3) # TF input shape: (samples, conv_dim1, conv_dim2, conv_dim3, input_depth) # TH kernel shape: (out_depth, input_depth, kernel_dim1, kernel_dim2, kernel_dim3) # TF kernel shape: (kernel_dim1, kernel_dim2, kernel_dim3, input_depth, out_depth) x = x.dimshuffle((0, 4, 1, 2, 3)) kernel = kernel.dimshuffle((4, 3, 0, 1, 2)) if volume_shape: volume_shape = (volume_shape[0], volume_shape[4], volume_shape[1], volume_shape[2], volume_shape[3]) if filter_shape: filter_shape = (filter_shape[4], filter_shape[3], filter_shape[0], filter_shape[1], filter_shape[2]) if border_mode == "same": assert strides == (1, 1, 1) pad_dim1 = kernel.shape[2] - 1 pad_dim2 = kernel.shape[3] - 1 pad_dim3 = kernel.shape[4] - 1 output_shape = (x.shape[0], x.shape[1], x.shape[2] + pad_dim1, x.shape[3] + pad_dim2, x.shape[4] + pad_dim3) output = T.zeros(output_shape) indices = ( slice(None), slice(None), slice(pad_dim1 // 2, x.shape[2] + pad_dim1 // 2), slice(pad_dim2 // 2, x.shape[3] + pad_dim2 // 2), slice(pad_dim3 // 2, x.shape[4] + pad_dim3 // 2), ) x = T.set_subtensor(output[indices], x) border_mode = "valid" border_mode_3d = (border_mode, border_mode, border_mode) conv_out = conv3d2d.conv3d( signals=x.dimshuffle(0, 2, 1, 3, 4), filters=kernel.dimshuffle(0, 2, 1, 3, 4), border_mode=border_mode_3d ) conv_out = conv_out.dimshuffle(0, 2, 1, 3, 4) # support strides by manually slicing the output if strides != (1, 1, 1): conv_out = conv_out[:, :, :: strides[0], :: strides[1], :: strides[2]] if dim_ordering == "tf": conv_out = conv_out.dimshuffle((0, 2, 3, 4, 1)) return conv_out
def conv3d(x, kernel, strides=(1, 1, 1), border_mode='valid', dim_ordering='th', image_shape=None, filter_shape=None): ''' Run on cuDNN if available. border_mode: string, "same" or "valid". conv_mode: string, "conv" or "cross". ''' if dim_ordering not in {'th', 'tf'}: raise Exception('Unknown dim_ordering ' + str(dim_ordering)) if dim_ordering == 'tf': # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, rows, cols, time) # TH kernel shape: (depth, input_depth, rows, cols, time) # TF input shape: (samples, rows, cols, time, input_depth) # TF kernel shape: (rows, cols, time, input_depth, depth) x = x.dimshuffle((0, 4, 1, 2, 3)) kernel = kernel.dimshuffle((4, 3, 0, 1, 2)) if image_shape: image_shape = (image_shape[0], image_shape[4], image_shape[1], image_shape[2], image_shape[3]) if filter_shape: filter_shape = (filter_shape[4], filter_shape[3], filter_shape[0], filter_shape[1], filter_shape[2]) if _on_gpu() and dnn.dnn_available(): if border_mode == 'same': np_kernel = kernel.eval() border_mode = tuple(s // 2 for s in np_kernel.shape[2:]) conv_out = dnn.dnn_conv3d(img=x, kerns=kernel, border_mode=border_mode, subsample=strides) else: if border_mode == 'same': assert(strides == (1, 1, 1)) pad_dim1 = (kernel.shape[2] - 1) pad_dim2 = (kernel.shape[3] - 1) pad_dim3 = (kernel.shape[4] - 1) output_shape = (x.shape[0], x.shape[1], x.shape[2] + pad_dim1, x.shape[3] + pad_dim2, x.shape[4] + pad_dim3) output = T.zeros(output_shape) indices = (slice(None), slice(None), slice(pad_dim1 // 2, x.shape[2] + pad_dim1 // 2), slice(pad_dim2 // 2, x.shape[3] + pad_dim2 // 2), slice(pad_dim3 // 2, x.shape[4] + pad_dim3 // 2)) x = T.set_subtensor(output[indices], x) border_mode = 'valid' border_mode_3d = (border_mode, border_mode, border_mode) conv_out = conv3d2d.conv3d(signals=x.dimshuffle(0, 2, 1, 3, 4), filters=kernel.dimshuffle(0, 2, 1, 3, 4), border_mode=border_mode_3d) conv_out = conv_out.dimshuffle(0, 2, 1, 3, 4) # support strides by manually slicing the output if strides != (1, 1, 1): conv_out = conv_out[:, :, ::strides[0], ::strides[1], ::strides[2]] if dim_ordering == 'tf': conv_out = conv_out.dimshuffle((0, 2, 3, 1)) return conv_out
dtype='float64' ) ) wdec = theano.shared( np.asarray( np.random.randn(1,9,23,9,9), dtype='float64' ) ) bdec = theano.shared( np.asarray( np.zeros(1,), dtype='float64' ) ) params = [ wenc,benc,wdec,bdec ] convenc = max_nonlin( conv3d(x,wenc) + benc.dimshuffle('x','x',0,'x','x') ) convdec = T.nnet.sigmoid( conv3d(convenc,wdec) + bdec.dimshuffle('x','x',0,'x','x') ) def loss(ypred,ytrue): r = 0.1 x = T.sum((((ytrue - ypred)**2)*ytrue))/(1+T.sum(ytrue)) y = T.sum((((ytrue - ypred)**2)*(1-ytrue)))/T.sum(1.0-ytrue) return r*x + (1-r)*y cost = loss(convdec,ytrue) grads = T.grad(cost,params) gradwenc = T.grad(cost,wenc)
def __init__(self, input, input_shape, filter_shape, pool, activation_func, enable_dropout, use_fragment_pooling, reshape_output, mfp_offsets, mfp_strides, input_layer=None, W=None, b=None, pooling_mode='max', affinity=False): assert len(filter_shape) == 5 assert input_shape[2] == filter_shape[2] self.input = input self.pool = pool self.number_of_filters = filter_shape[0] self.filter_shape = filter_shape self.activation_func = activation_func self.input_shape = input_shape self.input_layer = input_layer self.mfp_strides = mfp_strides self.mfp_offsets = mfp_offsets self.reshape_output = reshape_output print "3DConv: input=", input_shape, "\tfilter=", filter_shape #,"@std=",W_bound if W is None: W_values = np.asarray( initWeights(filter_shape, scale='glorot', mode='normal', pool=pool), dtype='float32') self.W = theano.shared(W_values, name='W_conv', borrow=True) else: if isinstance(W, np.ndarray): self.W = theano.shared( W.astype(np.float32), name='W_conv', borrow=True) else: assert isinstance( W, T.TensorVariable), "W must be either np.ndarray or theano var" self.W = W # the bias is a 1D tensor -- one bias per output feature map if activation_func in ['ReLU', 'relu']: norm = filter_shape[1] * filter_shape[3] * filter_shape[4] # b_values = np.ones( (filter_shape[0], ), dtype='float32') / norm if b is None: n_out = filter_shape[0] if activation_func == 'relu' or activation_func == 'ReLU': norm = filter_shape[1] * filter_shape[3] * filter_shape[4] b_values = np.asarray( initWeights( (n_out, ), scale=1.0 / norm, mode='const'), dtype='float32') elif activation_func == 'sigmoid': b_values = np.asarray( initWeights( (n_out, ), scale=0.5, mode='const'), dtype='float32') else: # activation_func=='tanh': b_values = np.asarray( initWeights( (n_out, ), scale=1e-6, mode='fix-uni'), dtype='float32') self.b = theano.shared(value=b_values, borrow=True, name='b_conv') else: if isinstance(b, np.ndarray): self.b = theano.shared( b.astype(np.float32), name='b_conv', borrow=True) else: assert isinstance( b, T.TensorVariable), "b must be either np.ndarray or theano var" self.b = b # store parameters of this layer self.params = [self.W, self.b] # convolve input feature maps with filters self.mode = theano.compile.get_default_mode() self.conv_out = conv3d( signals=input, filters=self.W, border_mode='valid', filters_shape=filter_shape ) # signals_shape=input_shape if input_shape[0] is not None else None) # down-sample each feature map individually, using maxpooling if np.any(pool != 1): pool_func = lambda x: pooling.pooling3d(x, pool_shape=pool, mode=pooling_mode) if use_fragment_pooling: pooled_out, self.mfp_offsets, self.mfp_strides = self.fragmentpool( self.conv_out, pool, mfp_offsets, mfp_strides, pool_func) else: pooled_out = pool_func(self.conv_out) else: pooled_out = self.conv_out if enable_dropout: print "Dropout: ACTIVE" self.activation_noise = theano.shared( np.float32(0.5), name='Dropout Rate') rng = T.shared_randomstreams.RandomStreams(int(time.time())) p = 1 - self.activation_noise self.dropout_gate = 1.0 / p * rng.binomial( (pooled_out.shape[1], pooled_out.shape[3], pooled_out.shape[4]), 1, p, dtype='float32') pooled_out = pooled_out * self.dropout_gate.dimshuffle(('x', 0, 'x', 1, 2)) lin_output = pooled_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x') self.lin_output = lin_output r = 1 if activation_func == 'tanh': self.activation_func = 'tanh' self.output = T.tanh(lin_output) # shape: (batch_size, num_outputs) elif activation_func in ['ReLU', 'relu']: #rectified linear unit self.activation_func = 'relu' self.output = lin_output * (lin_output > 0) # shape: (batch_size, num_outputs) elif activation_func in ['linear', 'none', 'None', None]: self.activation_func = 'linear' self.output = lin_output elif activation_func in ['abs']: self.activation_func = 'abs' self.output = T.abs_(lin_output) elif activation_func in ['sigmoid']: self.activation_func = 'sigmoid' self.output = T.nnet.sigmoid(lin_output) elif activation_func.startswith("maxout"): r = int(activation_func.split(" ")[1]) assert r >= 2 self.output = pooling.maxout(lin_output, factor=r, axis=2) else: raise NotImplementedError() output_shape = getOutputShape( (1 if input_shape[0] is None else input_shape[0], ) + input_shape[1:], filter_shape, pool, use_fragment_pooling, r) print "Output=", output_shape, "Dropout", ( "ON," if enable_dropout else "OFF,"), "Act:", activation_func, "pool:", pooling_mode self.output_shape = output_shape # e.g. (None, 16, 100, 100) if affinity: raise RuntimeError("Dont use this code") # self.class_probabilities = T.nnet.sigmoid(lin_output) # (bs, z, 3, x, y) # self.class_probabilities = self.class_probabilities.dimshuffle((0,2,1,3,4)) # sh = lin_output.shape # if use_fragment_pooling: # self.fragmentstodense(sh) # works on # # self.prob_shape = getProbShape(output_shape, self.mfp_strides) # self.class_prediction = T.gt(self.class_probabilities, 0.5) # # self.class_probabilities = (bs,3,z,x,y) else: sh = lin_output.shape #(bs,x,ch,y,z) # use this shape to reshape the output to image-shape after softmax sh = (sh[2], sh[0], sh[1], sh[3], sh[4]) #(ch, x, y, bs) # put spatial, at back --> (ch,bs,x,y,z), flatten this --> (ch, bs*x*y*z), swap labels --> (bs*x*y*z, ch) self.class_probabilities = T.nnet.softmax( lin_output.dimshuffle((2, 0, 1, 3, 4)).flatten(2).dimshuffle((1, 0))) if reshape_output: self.reshapeoutput(sh) if use_fragment_pooling: self.fragmentstodense(sh) self.prob_shape = getProbShape(output_shape, self.mfp_strides) print "Class Prob Output =", self.prob_shape # compute prediction as class whose "probability" is maximal in symbolic form self.class_prediction = T.argmax(self.class_probabilities, axis=1)
def deconv3d(x, kernel, strides=(1, 1, 1), border_mode='valid', dim_ordering='th', volume_shape=None, filter_shape=None): ''' Run on cuDNN if available. border_mode: string, "same" or "valid". ''' if dim_ordering not in {'th', 'tf'}: raise Exception('Unknown dim_ordering ' + str(dim_ordering)) if border_mode not in {'same', 'valid'}: raise Exception('Invalid border mode: ' + str(border_mode)) if dim_ordering == 'tf': # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, conv_dim1, conv_dim2, conv_dim3) # TF input shape: (samples, conv_dim1, conv_dim2, conv_dim3, input_depth) # TH kernel shape: (out_depth, input_depth, kernel_dim1, kernel_dim2, kernel_dim3) # TF kernel shape: (kernel_dim1, kernel_dim2, kernel_dim3, input_depth, out_depth) x = x.dimshuffle((0, 4, 1, 2, 3)) kernel = kernel.dimshuffle((4, 3, 0, 1, 2)) if volume_shape: volume_shape = (volume_shape[0], volume_shape[4], volume_shape[1], volume_shape[2], volume_shape[3]) if filter_shape: filter_shape = (filter_shape[4], filter_shape[3], filter_shape[0], filter_shape[1], filter_shape[2]) if border_mode == 'same': assert(strides == (1, 1, 1)) pad_dim1 = (kernel.shape[2] - 1) pad_dim2 = (kernel.shape[3] - 1) pad_dim3 = (kernel.shape[4] - 1) output_shape = (x.shape[0], x.shape[1], x.shape[2] + pad_dim1, x.shape[3] + pad_dim2, x.shape[4] + pad_dim3) output = T.zeros(output_shape) indices = (slice(None), slice(None), slice(pad_dim1 // 2, x.shape[2] + pad_dim1 // 2), slice(pad_dim2 // 2, x.shape[3] + pad_dim2 // 2), slice(pad_dim3 // 2, x.shape[4] + pad_dim3 // 2)) x = T.set_subtensor(output[indices], x) border_mode = 'valid' border_mode_3d = (border_mode, border_mode, border_mode) #### TRANSPOSED KERNELS #### # flip the filters again, since the original Convolution3D implemented in # the keras backend (theano.tensor.nnet.conv3d2d.conv3d) does it as well # this way we emulate the transposed convolution ### # TH input shape: (samples, input_depth, conv_dim1, conv_dim2, conv_dim3) # TH kernel shape: (out_depth, input_depth, kernel_dim1, kernel_dim2, kernel_dim3) # idx 2, 3, 4 are kernel size dimensions filters_flip = kernel[:,:,::-1,::-1,::-1] #### # perform the convolution conv_out = conv3d2d.conv3d(signals=x.dimshuffle(0, 2, 1, 3, 4), filters=filters_flip.dimshuffle(0, 2, 1, 3, 4), border_mode=border_mode_3d) # re-arrange the dimensions of the output conv_out = conv_out.dimshuffle(0, 2, 1, 3, 4) # support strides by manually slicing the output if strides != (1, 1, 1): conv_out = conv_out[:, :, ::strides[0], ::strides[1], ::strides[2]] if dim_ordering == 'tf': conv_out = conv_out.dimshuffle((0, 2, 3, 4, 1)) return conv_out
def test_conv3d(mode=mode_without_gpu, shared=theano.tensor._shared): if ndimage is None: raise SkipTest("conv3d2d tests need SciPy") Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32 Nf, Tf, C, Hf, Wf = 32, 5, 3, 5, 5 signals = numpy.arange(Ns * Ts * C * Hs * Ws).reshape(Ns, Ts, C, Hs, Ws).astype('float32') filters = numpy.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype('float32') t0 = time.time() pyres = pyconv3d(signals, filters) print(time.time() - t0) s_signals = shared(signals) s_filters = shared(filters) s_output = shared(signals * 0) out = conv3d(s_signals, s_filters, signals_shape=signals.shape, filters_shape=filters.shape) newconv3d = theano.function([], [], updates={s_output: out}, mode=mode) check_diagonal_subtensor_view_traces(newconv3d) t0 = time.time() newconv3d() print(time.time() - t0) utt.assert_allclose(pyres, s_output.get_value(borrow=True)) gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters]) gnewconv3d = theano.function([], [], updates=[(s_filters, gfilters), (s_signals, gsignals)], mode=mode, name='grad') check_diagonal_subtensor_view_traces(gnewconv3d) t0 = time.time() gnewconv3d() print('grad', time.time() - t0) Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5 Nf, Tf, C, Hf, Wf = 4, 2, 3, 2, 2 signals = numpy.random.rand(Ns, Ts, C, Hs, Ws).astype('float32') filters = numpy.random.rand(Nf, Tf, C, Hf, Wf).astype('float32') utt.verify_grad(conv3d, [signals, filters], eps=1e-1, mode=mode) # Additional Test that covers the case of patched implementation for filter with Tf=1 Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32 Nf, Tf, C, Hf, Wf = 32, 1, 3, 5, 5 signals = numpy.arange(Ns * Ts * C * Hs * Ws).reshape(Ns, Ts, C, Hs, Ws).astype('float32') filters = numpy.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype('float32') t0 = time.time() pyres = pyconv3d(signals, filters) print(time.time() - t0) s_signals = shared(signals) s_filters = shared(filters) s_output = shared(signals * 0) out = conv3d(s_signals, s_filters, signals_shape=signals.shape, filters_shape=filters.shape) newconv3d = theano.function([], [], updates={s_output: out}, mode=mode) t0 = time.time() newconv3d() print(time.time() - t0) utt.assert_allclose(pyres, s_output.get_value(borrow=True)) gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters]) gnewconv3d = theano.function([], [], updates=[(s_filters, gfilters), (s_signals, gsignals)], mode=mode, name='grad') t0 = time.time() gnewconv3d() print('grad', time.time() - t0) Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5 Nf, Tf, C, Hf, Wf = 4, 1, 3, 2, 2 signals = numpy.random.rand(Ns, Ts, C, Hs, Ws).astype('float32') filters = numpy.random.rand(Nf, Tf, C, Hf, Wf).astype('float32') utt.verify_grad(conv3d, [signals, filters], eps=1e-1, mode=mode)
tensor5 = T.TensorType(broadcastable=(False, False, False, False, False), dtype='float64') x = tensor5() ytrue = tensor5() wenc = theano.shared( np.asarray(np.random.randn(23, 9, 3, 9, 9), dtype='float64')) benc = theano.shared(np.asarray(np.zeros(23, ), dtype='float64')) wdec = theano.shared( np.asarray(np.random.randn(1, 9, 23, 9, 9), dtype='float64')) bdec = theano.shared(np.asarray(np.zeros(1, ), dtype='float64')) params = [wenc, benc, wdec, bdec] convenc = max_nonlin(conv3d(x, wenc) + benc.dimshuffle('x', 'x', 0, 'x', 'x')) convdec = T.nnet.sigmoid( conv3d(convenc, wdec) + bdec.dimshuffle('x', 'x', 0, 'x', 'x')) def loss(ypred, ytrue): r = 0.1 x = T.sum((((ytrue - ypred)**2) * ytrue)) / (1 + T.sum(ytrue)) y = T.sum((((ytrue - ypred)**2) * (1 - ytrue))) / T.sum(1.0 - ytrue) return r * x + (1 - r) * y cost = loss(convdec, ytrue) grads = T.grad(cost, params) gradwenc = T.grad(cost, wenc)