def __init__(self, numpy_rng, input, input_shape, filter_shape, poolsize, activation, W=None, b=None, border_mode = 'valid', use_fast = False): assert input_shape[1] == filter_shape[1] self.input = input#.reshape(input_shape) self.input_shape = input_shape self.filter_shape = filter_shape self.poolsize = poolsize self.activation = activation fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) if W is None: #initialize weights with random weights in range (-w_bound,w_bound) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) initial_W = numpy.asarray( numpy_rng.uniform(low=-W_bound, high=W_bound,size=filter_shape), dtype=theano.config.floatX) if activation == T.nnet.sigmoid: initial_W *= 4 W = theano.shared(value = initial_W, name = 'W') self.W = W if b is None: # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) b = theano.shared(value=b_values, name='b') self.b = b #Will be used for computing momentum self.delta_W = theano.shared(value = numpy.zeros(filter_shape,dtype=theano.config.floatX), name='delta_W') self.delta_b = theano.shared(value = numpy.zeros_like(self.b.get_value(borrow=True),dtype=theano.config.floatX), name='delta_b') if use_fast: #uses pylearn2 modules but it has got lot of limitations input_shuffled = self.input.dimshuffle(1, 2, 3, 0) #rotating axes towards right filters_shuffled = self.W.dimshuffle(1, 2, 3, 0) #rotating axes towards right conv_op = FilterActs() contiguous_input = gpu_contiguous(input_shuffled) contiguous_filters = gpu_contiguous(filters_shuffled) conv_out_shuffled = conv_op(contiguous_input, contiguous_filters) y_out_shuffled = activation(conv_out_shuffled + self.b.dimshuffle(0, 'x', 'x', 'x')) pool_op = MaxPool(ds=poolsize[0], stride=poolsize[0]) #only supports square window for pooling pooled_out = pool_op(y_out_shuffled).dimshuffle(3, 0, 1, 2) # roating axes back else: #uses theano modules, border_mode - ? conv_out = conv.conv2d(input=self.input, filters=self.W,filter_shape=filter_shape, image_shape=input_shape,border_mode = border_mode) y_out = activation(conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) # downsample each feature map individually, using maxpooling pooled_out = downsample.max_pool_2d(input=y_out, ds=poolsize, ignore_border=True) self.output = pooled_out #if flatten: #if final convolution layer we need to flatten # self.output = self.output.flatten(2) self.params = [self.W, self.b] self.delta_params = [self.delta_W, self.delta_b]
def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape, subsample=(1, 1, 1)): inputs_val = numpy.random.random(inputs_shape).astype('float32') dCdH_val = numpy.random.random(dCdH_shape).astype('float32') inputs = shared(inputs_val) dCdH = shared(dCdH_val) conv = theano.tensor.nnet.convGrad3D(V=inputs, dCdH=dCdH, WShape=filters_shape, d=subsample) img = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3)) topgrad = gpu_contiguous(dCdH.dimshuffle(0, 4, 1, 2, 3)) if (subsample == (1, 1, 1)): conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)(img, topgrad) else: conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)( img, topgrad, shape=filters_shape[1:4]) conv_gemm = conv_gemm.dimshuffle(0, 2, 3, 4, 1) f_ref = theano.function([], conv) f = theano.function([], conv_gemm, mode=mode_with_gpu) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res)
def dnn_3dconv(img, kerns, subsample=(1, 1), conv_mode='conv'): """ GPU 3d convolution using cuDNN from NVIDIA. The memory layout to use is 'bc012', that is 'batch', 'channel', 'first dim', 'second dim', 'third dim' in that order. :param img: images to do the convolution over :param kerns: convolution filters :param subsample: perform subsampling of the output (default: (1, 1)) :warning: The cuDNN library only works with GPU that have a compute capability of 3.0 or higer. This means that older GPU will not work with this Op. """ img = gpu_contiguous(img) kerns = gpu_contiguous(kerns) desc = GpuDnnConv3dDesc(subsample=tuple(subsample), conv_mode=conv_mode)() desc_op = desc.owner.op out_shp = GpuDnn3dConv.get_out_shape(img.shape, kerns.shape, desc_op.subsample) out = gpu_alloc_empty(*out_shp) return GpuDnn3dConv()(img, kerns, out, desc)
def local_gpu_fft_conv(node): """ gpu_conv -> gpu_fft_conv_op """ if not isinstance(node.op, GpuConv): return if (node.op.border_mode=='full' and node.op.subsample==(1,1)): img, kern = node.inputs img = gpu_contiguous(img) kern = gpu_contiguous(kern) gpu_fft_conv = GpuFFTConvOp(node.op.border_mode, check=node.op.verbose) return [gpu_fft_conv(img,kern)] if (config.GpuFFTConvOp.valid and node.op.border_mode=='valid' and node.op.subsample==(1,1) and node.op.kshp and node.op.imshp): kshp = node.op.kshp ishp = node.op.imshp[1:] pad_up = kshp[0]-1 pad_left = kshp[1]-1 size_height = ishp[0]-kshp[0]+1 size_width = ishp[1]-kshp[1]+1 img = gpu_contiguous(node.inputs[0]) kern = gpu_contiguous(node.inputs[1]) gpu_fft_conv = GpuFFTConvOp("full", check=node.op.verbose)(img,kern)[:,:,pad_up:pad_up+size_height,pad_left:pad_left+size_width] gpu_fft_conv = cuda.gpu_from_host(gpu_fft_conv) return [gpu_fft_conv]
def f_conv(self, x, spec, in_dim, weight_name): layer_type, dims = spec num_filters = dims[0] filter_size = (dims[1], dims[1]) stride = (dims[2], dims[2]) bm = 'full' if 'convf' in layer_type else 'valid' num_channels = in_dim[0] W = self.weight(self.rand_init_conv( (num_filters, num_channels) + filter_size), weight_name) if stride != (1, 1): f = GpuCorrMM(subsample=stride, border_mode=bm, pad=(0, 0)) y = f(gpu_contiguous(x), gpu_contiguous(W)) else: assert self.p.batch_size == self.p.valid_batch_size y = conv2d(x, W, image_shape=(2*self.p.batch_size, ) + in_dim, filter_shape=((num_filters, num_channels) + filter_size), border_mode=bm) output_size = ((num_filters,) + ConvOp.getOutputShape(in_dim[1:], filter_size, stride, bm)) return y, output_size
def get_output_for(self, input, *args, **kwargs): if self.dimshuffle: filters = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b input = input.dimshuffle(1, 2, 3, 0) # bc01 to c01b else: filters = self.W if self.flip_filters: filters = filters[:, ::-1, ::-1, :] # flip width, height contiguous_filters = gpu_contiguous(filters) contiguous_input = gpu_contiguous(input) conved = self.filter_acts_op(contiguous_input, contiguous_filters) if self.b is not None: if self.untie_biases: biases = self.b.dimshuffle(0, 1, 2, 'x') # c01 to c01b else: biases = self.b.dimshuffle(0, 'x', 'x', 'x') # c to c01b conved += biases conved = self.nonlinearity(conved) if self.dimshuffle: return conved.dimshuffle(3, 0, 1, 2) # c01b to bc01 else: return conved
def build_graph(self, state_below): filters = self.filters nfilters = self.nfilters b = self.b border_mode = self.border_mode # activ = self.activ batch_size = state_below.shape[0] out_size = DeConvNet.infer_size(state_below.shape[1:3], filters.shape[2:], self.stride, self.border_mode) out_shape = [batch_size, nfilters, out_size[0], out_size[1]] state_below = state_below.dimshuffle(0, 3, 1, 2) filters = gpu_contiguous(filters) state_below = gpu_contiguous(state_below) out_shape = tensor.stack(out_shape) desc = GpuDnnConvDesc(border_mode=border_mode, subsample=self.stride, conv_mode='conv')(out_shape, filters.shape) pred = GpuDnnConvGradI()( filters, state_below, gpu_alloc_empty(*out_shape), desc) pred += b.dimshuffle('x', 0, 'x', 'x') pred = pred.dimshuffle(0, 2, 3, 1) return eval(self.activ)(pred)
def grad(self, inp, grads): x, top_down = inp gp, gh= grads gp = gpu_contiguous(gp) gh = gpu_contiguous(gh) p, h = self(x, top_down) return ProbMaxPoolGrad(self.ds, self.stride, self.start)(p, h, gp, gh)
def deconv(self, X, subsample=(2, 2), border_mode=(2, 2), conv_mode='conv', atype='sigmoid'): """ sets up dummy convolutional forward pass and uses its grad as deconv currently only tested/working with same padding """ #Always return a c contiguous output. #Copy the input only if it is not already c contiguous. img = gpu_contiguous(X) kerns = gpu_contiguous(self.W) #Implement Alloc on the gpu, but without initializing memory. gpu_alloc_img_shape = gpu_alloc_empty(img.shape[0], kerns.shape[1], \ img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape #This Op builds a convolution descriptor for use in the other convolution operations. desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, conv_mode=conv_mode)(gpu_alloc_img_shape, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0],\ img.shape[3]*subsample[1]) #The convolution gradient with respect to the inputs. d_img = GpuDnnConvGradI()(kerns, img, out, desc) return activation_fn_th(d_img + self.b.dimshuffle('x', 0, 'x', 'x'), atype=atype)
def run_gradinput(self, inputs_shape, filters_shape, subsample=(1, 1, 1)): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[4]).astype('float32')) conv = theano.tensor.nnet.convTransp3D(W=filters, b=bias, d=subsample, H=inputs) f_ref = theano.function([], conv) res_ref = f_ref() # Get bottom shape using convTransp3D bottom_shape = res_ref.shape bottom_val = numpy.random.random(bottom_shape).astype('float32') bottom = shared(bottom_val) weight = gpu_contiguous(filters.dimshuffle(0, 4, 1, 2, 3)) top = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3)) if (subsample == (1, 1, 1)): conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top) else: conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top, shape=bottom.shape[1:4]) conv_gemm = conv_gemm.dimshuffle(0, 2, 3, 4, 1) f = theano.function([], conv_gemm, mode=mode_with_gpu) res = f() utt.assert_allclose(res_ref, res)
def run_gradinput(self, inputs_shape, filters_shape, subsample=(1, 1, 1)): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bottom_height = (inputs_shape[1] - 1) * subsample[0] + filters_shape[1] bottom_width = (inputs_shape[2] - 1) * subsample[1] + filters_shape[2] bottom_depth = (inputs_shape[3] - 1) * subsample[2] + filters_shape[3] bottom_shape = theano.shared(numpy.array([bottom_height, bottom_width, bottom_depth])) weight = gpu_contiguous(filters.dimshuffle(0, 4, 1, 2, 3)) top = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3)) if (subsample == (1, 1, 1)): conv_ref = Corr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top) conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top) else: conv_ref = Corr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top, shape=bottom_shape) conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top, shape=bottom_shape) f_ref = theano.function([], conv_ref, mode='FAST_RUN') f = theano.function([], conv_gemm, mode=mode_with_gpu) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res)
def _deconv2d(X, w, subsample=(1, 1), border_mode=(0, 0), conv_mode='conv'): """ from Alec (https://github.com/Newmu/dcgan_code/blob/master/lib/ops.py) sets up dummy convolutional forward pass and uses its grad as deconv currently only tested/working with same padding """ img = gpu_contiguous(X) kerns = gpu_contiguous(w) out = gpu_alloc_empty( img.shape[0], kerns.shape[1], img.shape[2]*subsample[0], img.shape[3]*subsample[1] ) desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, conv_mode=conv_mode) desc = desc( out.shape, kerns.shape ) d_img = GpuDnnConvGradI()(kerns, img, out, desc) return d_img
def __init__(self, rngs, input_layer, Lshape, traits, activation): super(ConvLayer, self).__init__(input_layer, traits, "Conv") self.rng = rngs[0] self.l2decay = traits['l2decay'] filter_shape = Lshape[1] # The number of input channels must match number of filter channels assert Lshape[0][1] == filter_shape[1] self.pad = traits['padding'] self.W = NNl.gen_weights(self.rng, filter_shape, 0, traits['initW']) # convolve input feature maps with filters # Using Alex K.'s fast CUDA conv, courtesy of S. Dieleman self.x = self.input_layer.output(False) conv_op = FilterActs(pad=self.pad, partial_sum=1) input_shuffled = (self.x).dimshuffle(1, 2, 3, 0) # bc01 to c01b filters_shuffled = (self.W).dimshuffle(1, 2, 3, 0) # bc01 to c01b contiguous_input = gpu_contiguous(input_shuffled) contiguous_filters = gpu_contiguous(filters_shuffled) out_shuffled = conv_op(contiguous_input, contiguous_filters) self.conv_out = out_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01 # store parameters of this layer self.params = [self.W]
def output(self): # convolution output # conv_out = T.nnet.conv.conv2d( # input=self.x, filters=self.W, # filter_shape = self.filter_shape, # image_shape=self.image_shape) input_shuffled = self.x.dimshuffle(1, 2, 3, 0) # bc01 to c01b filters_shuffled = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b conv_op = FilterActs(stride=1, partial_sum=1) contiguous_input = gpu_contiguous(input_shuffled) contiguous_filters = gpu_contiguous(filters_shuffled) conv_out_shuffled = conv_op(contiguous_input, contiguous_filters) # max-pooling output # pooled_out = max_pool_2d( # input = conv_out, # ds = self.pool_shape, # ignore_border=True) pool_op = MaxPool(ds=self.pool_shape[0], stride=self.pool_shape[0]) pooled_out_shuffled = pool_op(conv_out_shuffled) pooled_out = pooled_out_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01 y = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x') if self.actfun is not None: y = self.actfun(y) if self.flatten is True: y = y.flatten(2) return y
def output(self, input=None, dropout_active=True, *args, **kwargs): if input is None: input = self.input_layer.output(dropout_active=dropout_active, *args, **kwargs) if self.untie_biases: input -= self.b.dimshuffle(0, 1, 2, 'x') else: input -= self.b.dimshuffle(0, 'x', 'x', 'x') if dropout_active and (self.dropout > 0.): retain_prob = 1 - self.dropout mask = layers.srng.binomial(input.shape, p=retain_prob, dtype='int32').astype('float32') # apply the input mask and rescale the input accordingly. # By doing this it's no longer necessary to rescale the weights # at test time. input = input / retain_prob * mask contiguous_input = gpu_contiguous(input) contiguous_filters = gpu_contiguous(self.W) if self.stride == 1: deconved = self.image_acts_op(contiguous_input, contiguous_filters) else: _, x, y, _ = self.get_output_shape() deconved = self.image_acts_op(contiguous_input, contiguous_filters, as_tensor_variable((x, y))) return self.nonlinearity(deconved)
def output(self, input=None, dropout_active=True, *args, **kwargs): if input == None: input = self.input_layer.output(dropout_active=dropout_active, *args, **kwargs) if dropout_active and (self.dropout > 0.): retain_prob = 1 - self.dropout mask = layers.srng.binomial(input.shape, p=retain_prob, dtype='int32').astype('float32') # apply the input mask and rescale the input accordingly. By doing this it's no longer necessary to rescale the weights at test time. input = input / retain_prob * mask # pad input so the valid convolution amounts to a circular one. # we need to copy (filter_size - stride) values from one side to the other input_padded = T.zeros((input.shape[0], input.shape[1] + self.filter_size - self.stride, input.shape[2], input.shape[3])) input_padded = T.set_subtensor(input_padded[:, :input.shape[1], :, :], input) input_padded = T.set_subtensor(input_padded[:, input.shape[1]:, :, :], input[:, :self.filter_size - self.stride, :, :]) contiguous_input = gpu_contiguous(input_padded) contiguous_filters = gpu_contiguous(self.W) conved = self.filter_acts_op(contiguous_input, contiguous_filters) if self.untie_biases: conved += self.b.dimshuffle(0, 1, 2, 'x') else: conved += self.b.dimshuffle(0, 'x', 'x', 'x') return self.nonlinearity(conved)
def fprop(self, input): # we reduce the precision of parameters for the computations self.w_comp = apply_format(self.format, self.W, self.comp_precision, self.w_range) self.b_comp = apply_format(self.format, self.b, self.comp_precision, self.b_range) input = input.reshape(self.image_shape) # convolution input_shuffled = input.dimshuffle(1, 2, 3, 0) # bc01 to c01b filters_shuffled = self.w_comp.dimshuffle(1, 2, 3, 0) *self.scale # bc01 to c01b conv_op = FilterActs(stride=self.filter_stride, partial_sum=self.partial_sum,pad = self.zero_pad) contiguous_input = gpu_contiguous(input_shuffled) contiguous_filters = gpu_contiguous(filters_shuffled) conv_out_shuffled = conv_op(contiguous_input, contiguous_filters) # downsample each feature map individually, using maxpooling # pooled_out = downsample.max_pool_2d(input=conv_out, # ds=poolsize, ignore_border=True) pool_op = MaxPool(ds=self.pool_shape, stride=self.pool_stride) pooled_out_shuffled = pool_op(conv_out_shuffled) pooled_out = pooled_out_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01 # bias pooled_out = apply_format(self.format, pooled_out + self.b_comp.dimshuffle('x', 0, 'x', 'x')*self.scale, self.comp_precision, self.z_range) # activation pooled_out = self.activation(pooled_out) pooled_out = apply_format(self.format, pooled_out.flatten(2), self.comp_precision, self.y_range) return pooled_out
def get_output_for(self, input, **kwargs): if self.dimshuffle: filters = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b input = input.dimshuffle(1, 2, 3, 0) # bc01 to c01b else: filters = self.W if self.flip_filters: filters = filters[:, ::-1, ::-1, :] # flip top-down, left-right contiguous_filters = gpu_contiguous(filters) contiguous_input = gpu_contiguous(input) conved = self.filter_acts_op(contiguous_input, contiguous_filters) if self.stride != 1: # cuda-convnet calculates a non-standard strided output shape, # so we need to truncate the output in this case pad = self.pad if isinstance(self.pad, tuple) else (self.pad,) * 2 true_rows = conv_output_length(input.shape[1], self.filter_size[0], self.stride[0], pad[0]) true_columns = conv_output_length(input.shape[2], self.filter_size[1], self.stride[1], pad[1]) conved = conved[:, :true_rows, :true_columns, :] if self.b is not None: if self.untie_biases: biases = self.b.dimshuffle(0, 1, 2, "x") # c01 to c01b else: biases = self.b.dimshuffle(0, "x", "x", "x") # c to c01b conved += biases conved = self.nonlinearity(conved) if self.dimshuffle: return conved.dimshuffle(3, 0, 1, 2) # c01b to bc01 else: return conved
def output(self, input=None, dropout_active=True, *args, **kwargs): if input == None: print "if input is none we do sth in cc_layers" input = self.input_layer.output(dropout_active=dropout_active, *args, **kwargs) print type(input) if dropout_active and (self.dropout > 0.): print "input is not none" retain_prob = 1 - self.dropout mask = layers.srng.binomial(input.shape, p=retain_prob, dtype='int32').astype('float32') # apply the input mask and rescale the input accordingly. By doing this it's no longer necessary to rescale the weights at test time. input = input / retain_prob * mask #print "input is",input print type(input) print "we got to contigous input thingy" contiguous_input = gpu_contiguous(input) contiguous_filters = gpu_contiguous(self.W) conved = self.filter_acts_op(contiguous_input, contiguous_filters) print "jsghksfjhgjfldgjhdflghj" if self.untie_biases: conved += self.b.dimshuffle(0, 1, 2, 'x') else: conved += self.b.dimshuffle(0, 'x', 'x', 'x') return self.nonlinearity(conved)
def __init__(self, numpy_rng=None, input = None, filter_shape=(2, 1, 5, 5), poolsize=(1, 1), activation=T.nnet.sigmoid, flatten = False, use_fast = False): self.type = 'conv' self.input = input self.filter_shape = filter_shape self.poolsize = poolsize self.activation = activation self.flatten = flatten fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) initial_W = numpy.asarray( numpy_rng.uniform( low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX) if activation == T.nnet.sigmoid: initial_W *= 4 W = theano.shared(value = initial_W, name = 'W') self.W = W # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, name='b') # convolve input feature maps with filters if use_fast: from theano.sandbox.cuda.basic_ops import gpu_contiguous from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs from pylearn2.sandbox.cuda_convnet.pool import MaxPool input_shuffled = self.input.dimshuffle(1, 2, 3, 0) # bc01 to c01b filters_shuffled = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b conv_op = FilterActs() contiguous_input = gpu_contiguous(input_shuffled) contiguous_filters = gpu_contiguous(filters_shuffled) conv_out_shuffled = conv_op(contiguous_input, contiguous_filters) y_out_shuffled = activation(conv_out_shuffled + self.b.dimshuffle(0, 'x', 'x', 'x')) pool_op = MaxPool(ds=poolsize[0], stride=poolsize[0]) self.output = pool_op(y_out_shuffled).dimshuffle(3, 0, 1, 2) else: conv_out = conv.conv2d(input=self.input, filters=self.W, filter_shape=filter_shape) y_out = activation(conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) # downsample each feature map individually, using maxpooling self.output = downsample.max_pool_2d(input=y_out, ds=poolsize, ignore_border=True) if self.flatten: self.output = self.output.flatten(2) self.params = [self.W, self.b]
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), conv_mode='conv'): img = gpu_contiguous(img) kerns = gpu_contiguous(kerns) desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, conv_mode=conv_mode)(img.shape, kerns.shape) return GpuDnnConv()(img, kerns, desc)
def f(x): if not satisfy_convnet: z = T.nnet.conv.conv2d(input=x, filters=self._W, filter_shape=self._filter_shape, border_mode=self._border_mode) else: if self._border_mode == 'valid': pad = 0 elif self._border_mode == 'same': pad = (self._filter_shape[2] - 1) // 2 else: pad = self._filter_shape[2] - 1 conv_op = FilterActs(stride=1, partial_sum=1, pad=pad) x = gpu_contiguous(x.dimshuffle(1, 2, 3, 0)) W = gpu_contiguous(self._W.dimshuffle(1, 2, 3, 0)) z = conv_op(x, W).dimshuffle(3, 0, 1, 2) if self._pool_shape != (1, 1): z = max_pool_2d( input=z, ds=self._pool_shape, ignore_border=True) if isinstance(self._b, float): z = z + self._b else: z = z + self._b.dimshuffle('x', 0, 'x', 'x') if self._active_func is not None: z = self._active_func(z) if self._flatten: z = z.flatten(2) return z
def deconv(X, w, subsample=(1, 1), border_mode=(0, 0), conv_mode='conv'): img = gpu_contiguous(X) kerns = gpu_contiguous(w) desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, conv_mode=conv_mode)(gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0], img.shape[3]*subsample[1]) d_img = GpuDnnConvGradI()(kerns, img, out, desc) return d_img
def __init__(self, rng, input, filter_shape=None,W=None, b=None, init='something', border=None, subsample=(1,1)): #weight and bias init if none are given. if init == 'zero': if W is None: W_values = np.zeros(filter_shape, dtype=theano.config.floatX) W = theano.shared(value=W_values, name='W_conv', borrow=True) else: if W is None: fan_in = np.prod(filter_shape[1:]) fan_out = (filter_shape[0] * np.prod(filter_shape[2:])) # initialize weights with random weights W_bound = np.sqrt(6. / (fan_in + fan_out)) W = theano.shared( np.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX ), name='W_conv', borrow=True ) if b is None: # the bias is a 1D tensor -- one bias per output feature map b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX) b = theano.shared(value=b_values, name='b_conv', borrow=True) self.W = W self.b = b self.filter_shape = W.shape.eval() self.border = border # This is largely based on https://github.com/Newmu/dcgan_code/blob/master/lib/ops.py#L85 with some minor changes. if border == 'same': assert self.filter_shape[2] % 2 == 1 and self.filter_shape[3] % 2 == 1 self.border_padding = ((self.filter_shape[2]-1)//2, (self.filter_shape[3]-1)//2) out = basic_ops.gpu_alloc_empty(input.shape[0], self.W.shape[1], input.shape[2]*subsample[0], input.shape[3]*subsample[1]) elif border == 'valid': self.border_padding = (0,0) out = basic_ops.gpu_alloc_empty(input.shape[0], self.W.shape[1], input.shape[2]*subsample[0]+(self.filter_shape[2]-1), input.shape[3]*subsample[1]+(self.filter_shape[3]-1)) else: return NotImplementedError() self.subsample = subsample img = basic_ops.gpu_contiguous(input - self.b.dimshuffle('x', 0, 'x', 'x')) kerns = basic_ops.gpu_contiguous(self.W) desc = dnn.GpuDnnConvDesc(border_mode=self.border_padding, subsample=self.subsample, conv_mode='conv')(basic_ops.gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape, kerns.shape) d_img = dnn.GpuDnnConvGradI()(kerns, img, out, desc) conv_out = d_img self.output = conv_out # store parameters of this layer self.params = [self.W, self.b] self.input = input
def make_node(self, X, DY): X = gpu_contiguous(as_cuda_ndarray_variable(X)) DY = gpu_contiguous(as_cuda_ndarray_variable(DY)) assert X.dtype == "float32" assert DY.dtype == "float32" assert X.ndim == 4 assert DY.ndim == 4 return theano.Apply(self, [X, DY], [X.type()])
def make_node(self, X, sizes): X = gpu_contiguous(as_cuda_ndarray_variable(X)) sizes = gpu_contiguous(as_cuda_ndarray_variable(sizes)) assert X.dtype == "float32" assert X.ndim == 4 assert sizes.dtype == "float32" assert sizes.ndim == 2 return theano.Apply(self, [X, sizes], [X.type()])
def compileActivation(self, net, layerNum): variable = net.x if layerNum == 0 else net.varArrayA[layerNum - 1] #Calc shapes for reshape function on-the-fly. Assume we have square images as input. sX = T.cast(T.sqrt(T.shape(variable)[0] / self.kernel_shape[1]), 'int16') #Converts input from 2 to 4 dimensions Xr = T.reshape(variable.T, (T.shape(variable)[1], self.kernel_shape[1], sX, sX)) if self.optimized: out_size = T.cast( T.ceil((T.shape(Xr)[-1] - T.shape(net.varWeights[layerNum]['w'])[-1] + 1) / np.float32(self.stride)), 'int32') conv_op = FilterActs(stride=self.stride) input_shuffled = Xr.dimshuffle(1, 2, 3, 0) # bc01 to c01b filters_shuffled = net.varWeights[layerNum]['w'].dimshuffle(1, 2, 3, 0) # bc01 to c01b filters_flipped = filters_shuffled[:, ::-1, ::-1, :] # flip rows and columns contiguous_input = gpu_contiguous(input_shuffled) contiguous_filters = gpu_contiguous(filters_flipped * (net.dropOutVectors[layerNum].dimshuffle('x', 0, 1, 'x') if self.dropout else 1.0)) a = conv_op(contiguous_input, contiguous_filters) a = a[:, :out_size, :out_size, :] #Add bias a = a + net.varWeights[layerNum]['b'].dimshuffle(0, 'x', 'x', 'x') else: a = T.nnet.conv2d(Xr, net.varWeights[layerNum]['w'] * (net.dropOutVectors[layerNum].dimshuffle('x', 'x', 0, 1) if self.dropout else 1.0), border_mode='valid', subsample=(self.stride, self.stride)) #Add bias a = a + net.varWeights[layerNum]['b'].dimshuffle('x', 0, 'x', 'x') if self.pooling: if self.optimized: #Pooling # ds - side of square pool window # stride - Defines the stride size between successive pooling squares. # Setting this parameter smaller than sizeX produces overlapping pools. # Setting it equal to sizeX gives the usual, non-overlapping pools. Values greater than sizeX are not allowed. pool_op = MaxPool(ds=self.pooling_shape, stride=self.pooling_shape) contiguous_input = gpu_contiguous(a) a = pool_op(contiguous_input) a = a.dimshuffle(3, 0, 1, 2) # c01b to bc01 else: #a = downsample.max_pool_2d(a, (self.pooling_shape, self.pooling_shape), ignore_border=False) a = pool.max_pool2D(a, (self.pooling_shape, self.pooling_shape), ignore_border=False) else: if self.optimized: a = a.dimshuffle(3, 0, 1, 2) # c01b to bc01 a = T.flatten(a, outdim=2).T #Sigmoid a = self.activation(a, self.pool_size) net.varArrayA.append(a)
def cgemm_batched(a, b, trans_a, trans_b, m, n, k, alpha, batch_size): ''' Batch matrix multiplications. The matrices are expected to be stored contiguously in memory. ''' if 'Cuda' not in str(type(a)): ffts = gpu_contiguous(a) if 'Cuda' not in str(type(b)): ffts = gpu_contiguous(b) op = CGEMMBatched(trans_a, trans_b, m, n, k, alpha, batch_size) return op(a, b)
def local_conv_dnn(node): if isinstance(node.op, GpuConv): if (node.op.subsample != (1, 1) or node.op.border_mode not in ['full', 'valid']): return img, kern = node.inputs border_mode = node.op.border_mode return [GpuDnnConv(border_mode)(gpu_contiguous(img), gpu_contiguous(kern))]
def local_conv_dnn(node): if isinstance(node.op, GpuConv): if node.op.border_mode not in ['full', 'valid']: return img, kern = node.inputs border_mode = node.op.border_mode subsample = node.op.subsample return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern), border_mode=border_mode, subsample=subsample)]
def fft_to_01bc(imgs, n_imgs, n_channels, img_shape): ''' Batch FFT of bc01 images to 01bc form. ''' op = FFT_TO_01BC(n_imgs, n_channels, img_shape) if 'Cuda' not in str(type(imgs)): imgs = gpu_contiguous(imgs) return op(imgs)
# benchmark cudnn, convolution with kernel flipping # if hasattr(theano.sandbox.cuda, 'dnn') and 'dnn' not in skip_tests: # benchmark_three_ways('(auto) theano.sandbox.cuda.dnn.GpuDnnConv', # sharedX, sharedY, sharedW, X, Y, gW, gX, # mode.including('conv_dnn')) # benchmark caffe-like gemm convolution # Mimic THEANO_FLAGS=optimizer_excluding=conv_dnn if 'gemm' not in skip_tests and 'caffe' not in skip_tests: # benchmark_three_ways('(auto) theano.sandbox.cuda.blas.GpuCorrMM', # sharedX, sharedY, sharedW, X, Y, gW, gX, # mode.excluding('conv_dnn')) # benchmark caffe-like gemm convolution again, directly, w/o kernel flipping Y = theano.sandbox.cuda.blas.GpuCorrMM(subsample=(dh, dw))( gpu_contiguous(X), gpu_contiguous(sharedW)) gW = theano.grad(None, wrt=sharedW, known_grads={Y: sharedY}) gX = theano.grad(None, wrt=X, known_grads={Y: sharedY}) benchmark_three_ways('(manual) theano.sandbox.cuda.blas.GpuCorrMM', sharedX, sharedY, sharedW, X, Y, gW, gX) # benchmark nvidia convolution directly # if hasattr(theano.sandbox.cuda, 'dnn') and 'dnn' not in skip_tests: # Y = theano.sandbox.cuda.dnn.dnn_conv(X, sharedW, 'valid', # subsample=(dh, dw)) # gW = theano.grad(None, wrt=sharedW, known_grads={Y: sharedY}) # gX = theano.grad(None, wrt=X, known_grads={Y: sharedY}) # benchmark_three_ways( # '(manual conv) theano.sandbox.cuda.dnn.GpuDnnConv', # sharedX, sharedY, sharedW, X, Y, gW, gX) # if int(os.environ.get('DNN_CORR', 0)):
def __init__(self, numpy_rng=None, input=None, is_input_layer=False, input_shape=(1, 28, 28), filter_shape=(2, 1, 5, 5), poolsize=(1, 1), activation=T.tanh, flatten=False, border_mode='valid', non_maximum_erasing=False, W=None, b=None): assert input_shape[1] == filter_shape[1] if is_input_layer: self.input = input.reshape(input_shape).dimshuffle(1, 2, 3, 0) else: self.input = input # Now reconstruct the input_shape and filter_shape input_shape = (input_shape[1], input_shape[2], input_shape[3], input_shape[0]) filter_shape = (filter_shape[1], filter_shape[2], filter_shape[3], filter_shape[0]) self.input_shape = input_shape self.filter_shape = filter_shape self.poolsize = poolsize self.activation = activation self.flatten = flatten fan_in = numpy.prod(filter_shape[:3]) fan_out = (filter_shape[3] * numpy.prod(filter_shape[1:3]) / numpy.prod(poolsize)) # initialize weights with random weights if W is None: W_bound = numpy.sqrt(6. / (fan_in + fan_out)) initial_W = numpy.asarray(numpy_rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX) if activation == T.nnet.sigmoid: initial_W *= 4 W = theano.shared(value=initial_W, name='W') self.W = W # the bias is a 1D tensor -- one bias per output feature map if b is None: b_values = numpy.zeros((filter_shape[3], ), dtype=theano.config.floatX) b = theano.shared(value=b_values, name='b') self.b = b # for momentum self.delta_W = theano.shared(value=numpy.zeros( filter_shape, dtype=theano.config.floatX), name='delta_W') self.delta_b = theano.shared(value=numpy.zeros_like( self.b.get_value(borrow=True), dtype=theano.config.floatX), name='delta_b') # convolve input feature maps with filters conv_op = FilterActs() contiguous_input = gpu_contiguous(self.input) contiguous_filters = gpu_contiguous(self.W) conv_out = conv_op(contiguous_input, contiguous_filters) y_out = activation(conv_out + self.b.dimshuffle(0, 'x', 'x', 'x')) pool_op = MaxPool(ds=poolsize[0], stride=poolsize[0]) pooled_out = pool_op(y_out) if non_maximum_erasing: ds = tuple(poolsize) po = pooled_out.repeat(ds[0], axis=2).repeat(ds[1], axis=3) self.output = T.eq(y_out, po) * y_out else: self.output = pooled_out if flatten: self.output = self.output.dimshuffle(3, 0, 1, 2) # c01b to bc01 self.output = self.output.flatten(2) self.params = [self.W, self.b] self.delta_params = [self.delta_W, self.delta_b]
def conv3d(signals, filters, signals_shape=None, filters_shape=None, border_mode='valid', fast_conv=False,stride=1): """Convolve spatio-temporal filters with a movie. It flips the filters. :param signals: timeseries of images whose pixels have color channels. shape: [Ns, Ts, C, Hs, Ws] :param filters: spatio-temporal filters shape: [Nf, Tf, C, Hf, Wf] :param signals_shape: None or a tuple/list with the shape of signals :param filters_shape: None or a tuple/list with the shape of filters :param border_mode: The only one tested is 'valid'. :note: Work on the GPU. Another way to define signals: (batch, time, in channel, row, column) Another way to define filters: (out channel,time,in channel, row, column) :see: Someone made a script that shows how to swap the axes between both 3d convolution implementations in Theano. See the last `attachment <https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_. """ if isinstance(border_mode, str): border_mode = (border_mode, border_mode, border_mode) if signals_shape is None: _signals_shape_5d = signals.shape else: _signals_shape_5d = signals_shape if filters_shape is None: _filters_shape_5d = filters.shape else: _filters_shape_5d = filters_shape _signals_shape_4d = ( _signals_shape_5d[0] * _signals_shape_5d[1], _signals_shape_5d[2], _signals_shape_5d[3], _signals_shape_5d[4], ) _filters_shape_4d = ( _filters_shape_5d[0] * _filters_shape_5d[1], _filters_shape_5d[2], _filters_shape_5d[3], _filters_shape_5d[4], ) if border_mode[1] != border_mode[2]: raise NotImplementedError('height and width bordermodes must match') conv2d_signal_shape = _signals_shape_4d conv2d_filter_shape = _filters_shape_4d if signals_shape is None: conv2d_signal_shape = None if filters_shape is None: conv2d_filter_shape = None if not fast_conv: out_4d = tensor.nnet.conv2d( signals.reshape(_signals_shape_4d), filters.reshape(_filters_shape_4d), image_shape=conv2d_signal_shape, filter_shape=conv2d_filter_shape, border_mode = border_mode[1]) # ignoring border_mode[2] else: from theano.sandbox.cuda.basic_ops import gpu_contiguous from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs input = signals.reshape(_signals_shape_4d) filters_ = filters.reshape(_filters_shape_4d) # # inputs have to be c01b input = input.dimshuffle(1, 2, 3, 0) filters_ = filters_.dimshuffle(1, 2, 3, 0) op = FilterActs(stride=stride, partial_sum=1) contiguous_input = gpu_contiguous(input) contiguous_filters = gpu_contiguous(filters_) out_4d = op(contiguous_input, contiguous_filters) out_4d = out_4d.dimshuffle(3, 0, 1, 2) if _filters_shape_5d[1]==1: # no time convolution out_5d = out_4d.reshape(( _signals_shape_5d[0], #b _signals_shape_5d[1], #time _filters_shape_5d[0], #c _signals_shape_5d[3]-(_filters_shape_5d[3]-1), _signals_shape_5d[4]-(_filters_shape_5d[4]-1), )) return out_5d # reshape the output to restore its original size # shape = Ns, Ts, Nf, Tf, W-Wf+1, H-Hf+1 if border_mode[1] == 'valid': out_tmp = out_4d.reshape(( _signals_shape_5d[0], # Ns _signals_shape_5d[1], # Ts _filters_shape_5d[0], # Nf _filters_shape_5d[1], # Tf _signals_shape_5d[3] - _filters_shape_5d[3] + 1, _signals_shape_5d[4] - _filters_shape_5d[4] + 1, )) elif border_mode[1] == 'full': out_tmp = out_4d.reshape(( _signals_shape_5d[0], # Ns _signals_shape_5d[1], # Ts _filters_shape_5d[0], # Nf _filters_shape_5d[1], # Tf _signals_shape_5d[3] + _filters_shape_5d[3] - 1, _signals_shape_5d[4] + _filters_shape_5d[4] - 1, )) elif border_mode[1] == 'same': raise NotImplementedError() else: raise ValueError('invalid border mode', border_mode[1]) # now sum out along the Tf to get the output # but we have to sum on a diagonal through the Tf and Ts submatrix. if border_mode[0] == 'valid': out_5d = diagonal_subtensor(out_tmp, 1, 3).sum(axis=3) elif border_mode[0] in ('full', 'same'): raise NotImplementedError('sequence border mode', border_mode[0]) else: raise ValueError('invalid border mode', border_mode[1]) return out_5d
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), conv_mode='conv', direction_hint=None): """ GPU convolution using cuDNN from NVIDIA. The memory layout to use is 'bc01', that is 'batch', 'channel', 'first dim', 'second dim' in that order. :param img: images to do the convolution over :param kerns: convolution filters :param border_mode: one of 'valid', 'full'; additionally, the padding size could be directly specified by an integer or a pair of integers :param subsample: perform subsampling of the output (default: (1, 1)) :param conv_mode: perform convolution (kernels flipped) or cross-correlation. One of 'conv', 'cross'. (default: 'conv') :param direction_hint: Used by graph optimizers to change algorithm choice. By default, GpuDnnConv will be used to carry out the convolution. If border_mode is 'valid', subsample is (1,1) and direction_hint is 'bprop weights', it will use GpuDnnConvGradW. If border_mode is 'full', subsample is (1,1) and direction_hint is *not* 'forward!', it will use GpuDnnConvGradI. This parameter is used internally by graph optimizers and may be removed at any time without a deprecation period. You have been warned. :warning: The cuDNN library only works with GPU that have a compute capability of 3.0 or higer. This means that older GPU will not work with this Op. """ if (border_mode == 'valid' and subsample == (1, 1) and direction_hint == 'bprop weights'): # Special case: We are asked to use GpuDnnConvGradW. We need to set # up a suitable 'fake' convolution to compute the gradient for. img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3)) if conv_mode == 'conv': # We need to flip manually. These 'kerns' are not the kernels # that would be flipped by conv_mode='conv' in GpuDnnConvGradW. kerns = kerns[:, :, ::-1, ::-1] kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3)) shape = theano.tensor.stack(kerns.shape[1], img.shape[1], img.shape[2] - kerns.shape[2] + 1, img.shape[3] - kerns.shape[3] + 1) desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), conv_mode='cross')(img.shape, shape) conv = GpuDnnConvGradW()(img, kerns, desc, shape[2], shape[3]) return as_cuda_ndarray_variable(conv.dimshuffle(1, 0, 2, 3)) elif (border_mode == 'full' and subsample == (1, 1) and direction_hint != 'forward!'): # Special case: We can be faster by using GpuDnnConvGradI to compute # the full convolution as the backward pass of a valid convolution. # We just need to set up a suitable 'fake' valid convolution. img = gpu_contiguous(img) kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3)) conv_mode = 'cross' if conv_mode == 'conv' else 'conv' shape = theano.tensor.stack(img.shape[0], kerns.shape[1], img.shape[2] + kerns.shape[2] - 1, img.shape[3] + kerns.shape[3] - 1) desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), conv_mode=conv_mode)(shape, kerns.shape) return GpuDnnConvGradI()(kerns, img, desc, shape[2], shape[3]) # Standard case: We use GpuDnnConv with suitable padding. img = gpu_contiguous(img) kerns = gpu_contiguous(kerns) desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, conv_mode=conv_mode)(img.shape, kerns.shape) return GpuDnnConv()(img, kerns, desc)
def grad(self, inp, grads): x, = inp gz, = grads gz = gpu_contiguous(gz) maxout = self(x) return [MaxPoolGrad(self.ds, self.stride, self.start)(x, maxout, gz)]
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), cuda_convnet=0, W=None, b=None, activation=T.tanh, border_mode='valid', partial_sum=1, pad=0): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height, filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows, #cols) """ channels_idx = 0 if cuda_convnet else 1 assert image_shape[channels_idx] == filter_shape[channels_idx] self.input = input if W is None: if cuda_convnet: fan_in = numpy.prod(filter_shape[0:3]) fan_out = (filter_shape[3] * numpy.prod(filter_shape[1:3]) / numpy.prod(poolsize)) # TODO: correct numpy.prod(poolsize). Theano's max_pool_2d uses a tuple # to signify (x,y) poolsize and doesn't overlap these. cuda-convnet uses # the tuple to signify a square pool of size x^2, with a stride y. So only # if x == y in this case does numpy.prod(poolsize) work. else: # there are "num input feature maps * filter height * filter width" # inputs to __each__ hidden unit fan_in = numpy.prod(filter_shape[1:]) # __each__ unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) W_values = numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX) else: W_values = numpy.asarray(W, dtype=theano.config.floatX) # the bias is a 1D tensor -- one bias per output feature map if b is None: numchan = filter_shape[3] if cuda_convnet else filter_shape[0] b_values = numpy.zeros((numchan, ), dtype=theano.config.floatX) else: b_values = numpy.asarray(b, dtype=theano.config.floatX) self.W = theano.shared(value=W_values, borrow=True) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters if cuda_convnet: conv_op = FilterActs(partial_sum=partial_sum, pad=pad) contiguous_input = gpu_contiguous(input) contiguous_filters = gpu_contiguous(self.W) conv_out = conv_op(contiguous_input, contiguous_filters) else: conv_out = conv.conv2d(input=input, filters=self.W, image_shape=image_shape, filter_shape=filter_shape, border_mode=border_mode) # downsample each feature map individually, using maxpooling if (poolsize[0] == 1 and cuda_convnet) or (poolsize[0] == 1 and poolsize[1] == 1 and not cuda_convnet): pooled_out = conv_out elif cuda_convnet: pool_op = MaxPool(ds=poolsize[0], stride=poolsize[1]) contiguous_input = gpu_contiguous(conv_out) pooled_out = pool_op(contiguous_input) else: pooled_out = downsample.max_pool_2d(input=conv_out, ds=poolsize, ignore_border=True) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height if cuda_convnet: self.output = activation(pooled_out + self.b.dimshuffle(0, 'x', 'x', 'x')) else: self.output = activation(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b]
def __init__(self, numpy_rng=None, input=None, filter_shape=(2, 1, 5, 5), poolsize=(1, 1), activation=T.nnet.sigmoid, flatten=False, use_fast=False): self.type = 'conv' self.input = input self.filter_shape = filter_shape self.poolsize = poolsize self.activation = activation self.flatten = flatten fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) initial_W = numpy.asarray(numpy_rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX) if activation == T.nnet.sigmoid: initial_W *= 4 W = theano.shared(value=initial_W, name='W') self.W = W # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, name='b') # convolve input feature maps with filters if use_fast: from theano.sandbox.cuda.basic_ops import gpu_contiguous from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs from pylearn2.sandbox.cuda_convnet.pool import MaxPool input_shuffled = self.input.dimshuffle(1, 2, 3, 0) # bc01 to c01b filters_shuffled = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b conv_op = FilterActs() contiguous_input = gpu_contiguous(input_shuffled) contiguous_filters = gpu_contiguous(filters_shuffled) conv_out_shuffled = conv_op(contiguous_input, contiguous_filters) y_out_shuffled = activation(conv_out_shuffled + self.b.dimshuffle(0, 'x', 'x', 'x')) pool_op = MaxPool(ds=poolsize[0], stride=poolsize[0]) self.output = pool_op(y_out_shuffled).dimshuffle(3, 0, 1, 2) else: conv_out = conv.conv2d(input=self.input, filters=self.W, filter_shape=filter_shape) y_out = activation(conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) # downsample each feature map individually, using maxpooling self.output = downsample.max_pool_2d(input=y_out, ds=poolsize, ignore_border=True) if self.flatten: self.output = self.output.flatten(2) self.params = [self.W, self.b]
def output(self, *args, **kwargs): input = self.input_layer.output(*args, **kwargs) contiguous_input = gpu_contiguous(input) return self.pool_op(contiguous_input)
def __init__(self, numpy_rng=None, input=None, input_shape=(256, 1, 28, 28), filter_shape=(2, 1, 5, 5), poolsize=(1, 1), activation=T.tanh, flatten=False, border_mode='valid', non_maximum_erasing=False, W=None, b=None, use_fast=False, testing=False): self.type = 'conv' assert input_shape[1] == filter_shape[1] if testing: self.input = input.reshape((input.shape[0], input_shape[1], input_shape[2], input_shape[3])) input_shape = None else: self.input = input.reshape(input_shape) self.filter_shape = filter_shape self.poolsize = poolsize self.activation = activation self.flatten = flatten fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights if W is None: W_bound = numpy.sqrt(6. / (fan_in + fan_out)) initial_W = numpy.asarray(numpy_rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX) if activation == T.nnet.sigmoid: initial_W *= 4 W = theano.shared(value=initial_W, name='W') self.W = W # the bias is a 1D tensor -- one bias per output feature map if b is None: b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) b = theano.shared(value=b_values, name='b') self.b = b # for momentum self.delta_W = theano.shared(value=numpy.zeros( filter_shape, dtype=theano.config.floatX), name='delta_W') self.delta_b = theano.shared(value=numpy.zeros_like( self.b.get_value(borrow=True), dtype=theano.config.floatX), name='delta_b') # convolve input feature maps with filters if use_fast: from theano.sandbox.cuda.basic_ops import gpu_contiguous from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs from pylearn2.sandbox.cuda_convnet.pool import MaxPool input_shuffled = self.input.dimshuffle(1, 2, 3, 0) # bc01 to c01b filters_shuffled = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b conv_op = FilterActs() contiguous_input = gpu_contiguous(input_shuffled) contiguous_filters = gpu_contiguous(filters_shuffled) conv_out_shuffled = conv_op(contiguous_input, contiguous_filters) y_out_shuffled = activation(conv_out_shuffled + self.b.dimshuffle(0, 'x', 'x', 'x')) pool_op = MaxPool(ds=poolsize[0], stride=poolsize[0]) pooled_out = pool_op(y_out_shuffled).dimshuffle(3, 0, 1, 2) else: conv_out = conv.conv2d(input=self.input, filters=self.W, filter_shape=filter_shape, image_shape=input_shape, border_mode=border_mode) y_out = activation(conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) # downsample each feature map individually, using maxpooling pooled_out = downsample.max_pool_2d(input=y_out, ds=poolsize, ignore_border=True) if non_maximum_erasing: ds = tuple(poolsize) po = pooled_out.repeat(ds[0], axis=2).repeat(ds[1], axis=3) self.output = T.eq(y_out, po) * y_out else: self.output = pooled_out if flatten: self.output = self.output.flatten(2) self.params = [self.W, self.b] self.delta_params = [self.delta_W, self.delta_b]
def __init__(self, input, image_shape, filter_shape, convstride, padsize, group, poolsize, poolstride, bias_init, lrn=False, lib_conv='cudnn', ): """ lib_conv can be cudnn or cudaconvet """ assert group in [1, 2] assert lib_conv in ['cudnn', 'cudaconvnet'] self.filter_size = filter_shape self.convstride = convstride self.padsize = padsize self.poolsize = poolsize self.poolstride = poolstride if lib_conv == 'cudnn': self.channel = image_shape[1] else: self.channel = image_shape[0] self.lrn = lrn self.lib_conv = lib_conv self.filter_shape = np.asarray(filter_shape) self.image_shape = np.asarray(image_shape) if self.lrn: self.lrn_func = CrossChannelNormalization() if group == 1: self.W = Weight(self.filter_shape) if lib_conv == 'cudnn': self.b = Weight(self.filter_shape[0], bias_init, std=0) else: self.b = Weight(self.filter_shape[3], bias_init, std=0) else: if lib_conv == 'cudnn': self.filter_shape[1] /= 2 self.filter_shape[0] /= 2 self.image_shape[1] /= 2 self.image_shape[0] /= 2 else: self.filter_shape[0] /= 2 self.filter_shape[3] /= 2 self.image_shape[0] /= 2 self.image_shape[3] /= 2 self.W0 = Weight(self.filter_shape) self.W1 = Weight(self.filter_shape) if lib_conv=='cudnn': self.b0 = Weight(self.filter_shape[0], bias_init, std=0) self.b1 = Weight(self.filter_shape[0], bias_init, std=0) else: self.b0 = Weight(self.filter_shape[3], bias_init, std=0) self.b1 = Weight(self.filter_shape[3], bias_init, std=0) if lib_conv == 'cudaconvnet': self.conv_op = FilterActs(pad=self.padsize, stride=self.convstride, partial_sum=1) # Conv if group == 1: contiguous_input = gpu_contiguous(input) contiguous_filters = gpu_contiguous(self.W.val) conv_out = self.conv_op(contiguous_input, contiguous_filters) conv_out = conv_out + self.b.val.dimshuffle(0, 'x', 'x', 'x') else: contiguous_input0 = gpu_contiguous( input[:self.channel / 2, :, :, :]) contiguous_filters0 = gpu_contiguous(self.W0.val) conv_out0 = self.conv_op( contiguous_input0, contiguous_filters0) conv_out0 = conv_out0 + \ self.b0.val.dimshuffle(0, 'x', 'x', 'x') contiguous_input1 = gpu_contiguous( input[self.channel / 2:, :, :, :]) contiguous_filters1 = gpu_contiguous(self.W1.val) conv_out1 = self.conv_op( contiguous_input1, contiguous_filters1) conv_out1 = conv_out1 + \ self.b1.val.dimshuffle(0, 'x', 'x', 'x') conv_out = T.concatenate([conv_out0, conv_out1], axis=0) # ReLu conv_out = T.maximum(conv_out, 0) self.output = gpu_contiguous(conv_out) # Pooling if self.poolsize != 1: self.pool_op = MaxPool(ds=poolsize, stride=poolstride) self.output = self.pool_op(self.output) else: if group == 1: conv_out = dnn.dnn_conv(img=input, kerns=self.W.val, subsample=(convstride, convstride), border_mode=padsize, ) conv_out = conv_out + self.b.val.dimshuffle('x', 0, 'x', 'x') else: input1, input2 = \ theano.tensor.split(input, [self.channel/2, self.channel/2], 2, axis=1) conv_out0 = \ dnn.dnn_conv(img=input1, kerns=self.W0.val, subsample=(convstride, convstride), border_mode=padsize, ) conv_out0 = conv_out0 + self.b0.val.dimshuffle('x', 0, 'x', 'x') conv_out1 = \ dnn.dnn_conv(img=input2, kerns=self.W1.val, subsample=(convstride, convstride), border_mode=padsize, ) conv_out1 = conv_out1 + self.b1.val.dimshuffle('x', 0, 'x', 'x') # self.conv_out = conv_out1 conv_out = T.concatenate([conv_out0, conv_out1], axis=1) # ReLu self.output = T.maximum(conv_out, 0) # Pooling if self.poolsize != 1: self.output = dnn.dnn_pool(self.output, ws=(poolsize, poolsize), stride=(poolstride, poolstride)) # LRN if self.lrn: self.output = self.lrn_func(self.output) if group == 1: self.params = [self.W.val, self.b.val] self.weight_type = ['W', 'b'] else: self.params = [self.W0.val, self.b0.val, self.W1.val, self.b1.val] self.weight_type = ['W', 'b', 'W', 'b'] print "conv ({}) layer with shape_in: {}".format(lib_conv, str(image_shape))
def ifft_to_bc01(ffts, n_imgs, n_channels, img_shape): ''' Batch FFT of 01bc images to bc01 form. ''' op = IFFT_TO_BC01(n_imgs, n_channels, img_shape) if 'Cuda' not in str(type(ffts)): ffts = gpu_contiguous(ffts) return op(ffts)
import sys sys.path.append("../") print(sys.version) from voxnet import isovox from theano.sandbox.cuda.basic_ops import gpu_contiguous from theano.sandbox.cuda.blas import GpuCorr3dMM import numpy as np vis_mat = np.load('../../voxnet/scripts/weights.npz') W1 = vis_mat['conv1.W'] # size(32,1,5,5,5) W2 = vis_mat['conv2.W'] # size(32,32,3,3,3) contiguous_W1 = gpu_contiguous(W1) contiguous_W2 = gpu_contiguous(W2) strides = (1, 1, 1) pad = (2, 2, 2) corr_mm_op = GpuCorr3dMM(subsample=strides, pad=pad)(contiguous_W2, contiguous_W1) print(corr_mm_op) size = 32 wviz = W1[:, 0, :, :, :] for i in range(wviz.shape[0]): w = wviz[i, :, :, :] # centerize the plot fz = len(w) xd = np.zeros((size, size, size)) pad = (size - fz) / 2 xd[pad:pad + fz, pad:pad + fz, pad:pad + fz] = w # only visualize the largest value
def __init__(self, input, image_shape, filter_shape, convstride, padsize, group, poolsize, poolstride, bias_init, lrn=False): self.filter_size = filter_shape self.convstride = convstride self.padsize = padsize self.poolsize = poolsize self.poolstride = poolstride self.channel = image_shape[0] self.lrn = lrn assert group in [1, 2] self.filter_shape = np.asarray(filter_shape) self.image_shape = np.asarray(image_shape) if self.lrn: self.lrn_func = CrossChannelNormalization() if group == 1: self.W = Weight(self.filter_shape) self.b = Weight(self.filter_shape[3], bias_init, std=0) else: self.filter_shape[0] = self.filter_shape[0] / 2 self.filter_shape[3] = self.filter_shape[3] / 2 self.image_shape[0] = self.image_shape[0] / 2 self.image_shape[3] = self.image_shape[3] / 2 self.W0 = Weight(self.filter_shape) self.W1 = Weight(self.filter_shape) self.b0 = Weight(self.filter_shape[3], bias_init, std=0) self.b1 = Weight(self.filter_shape[3], bias_init, std=0) self.conv_op = FilterActs(pad=self.padsize, stride=self.convstride, partial_sum=1) # Conv if group == 1: contiguous_input = gpu_contiguous(input) contiguous_filters = gpu_contiguous(self.W.val) conv_out = self.conv_op(contiguous_input, contiguous_filters) conv_out = conv_out + self.b.val.dimshuffle(0, 'x', 'x', 'x') else: contiguous_input0 = gpu_contiguous(input[:self.channel / 2, :, :, :]) contiguous_filters0 = gpu_contiguous(self.W0.val) conv_out0 = self.conv_op(contiguous_input0, contiguous_filters0) conv_out0 = conv_out0 + \ self.b0.val.dimshuffle(0, 'x', 'x', 'x') contiguous_input1 = gpu_contiguous(input[self.channel / 2:, :, :, :]) contiguous_filters1 = gpu_contiguous(self.W1.val) conv_out1 = self.conv_op(contiguous_input1, contiguous_filters1) conv_out1 = conv_out1 + \ self.b1.val.dimshuffle(0, 'x', 'x', 'x') conv_out = T.concatenate([conv_out0, conv_out1], axis=0) # ReLu self.output = T.maximum(conv_out, 0) conv_out = gpu_contiguous(conv_out) # Pooling if self.poolsize != 1: self.pool_op = MaxPool(ds=poolsize, stride=poolstride) self.output = self.pool_op(self.output) # LRN if self.lrn: # lrn_input = gpu_contiguous(self.output) self.output = self.lrn_func(self.output) if group == 1: self.params = [self.W.val, self.b.val] self.weight_type = ['W', 'b'] else: self.params = [self.W0.val, self.b0.val, self.W1.val, self.b1.val] self.weight_type = ['W', 'b', 'W', 'b'] print "conv layer with shape_in: " + str(image_shape)
def local_softmax_dnn(node): if isinstance(node.op, GpuSoftmax): ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x') out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins)) out = as_cuda_ndarray_variable(out.dimshuffle(0, 1)) return [out]
sX = T.cast(T.sqrt(T.shape(X)[1]), 'int16') Xr = T.reshape(X, (T.shape(X)[0], 1, sX, sX)) sW = T.cast(T.sqrt(T.shape(Wcnn)[1]), 'int16') Wr = T.reshape(Wcnn, (T.shape(Wcnn)[0], 1, sW, sW)) #Convolve #res = T.nnet.conv2d(Xr, Wr, border_mode='valid', subsample=(2, 2)) #res = T.nnet.conv2d(Xr, Wr, border_mode='full') size = T.floor((T.shape(Xr)[-1] - T.shape(Wr)[-1] + 1) / 2) conv_op = FilterActs(stride=1) input_shuffled = Xr.dimshuffle(1, 2, 3, 0) # bc01 to c01b filters_shuffled = Wr.dimshuffle(1, 2, 3, 0) # bc01 to c01b contiguous_input = gpu_contiguous(input_shuffled) contiguous_filters = gpu_contiguous(filters_shuffled) res = conv_op(contiguous_input, contiguous_filters) res = res.dimshuffle(3, 0, 1, 2) # c01b to bc01 res = res[:, :, :size, :size] #Add bias #res = res + Bcnn.dimshuffle('x', 0, 'x', 'x') #res = res + Bcnn.reshape((T.shape(Bcnn)[0],)).dimshuffle(0, 'x', 'x', 'x') #Sigmoid #res = 1 / (1 + T.exp(-res)) #Pooling #pool_shape = (2, 2)
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(1, 1), conv_stride=(1, 1), pool_stride=None): if pool_stride == None: pool_stride = poolsize """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height, filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows, #cols) """ assert image_shape[1] == filter_shape[1] self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # # convolve input feature maps with filters # conv_out = conv.conv2d( # input=input, # filters=self.W, # filter_shape=filter_shape, # image_shape=image_shape, # subsample=conv_stride # ) input_shuffled = input.dimshuffle(1, 2, 3, 0) # bc01 to c01b filters_shuffled = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b conv_op = FilterActs(stride=conv_stride[0], partial_sum=1) contiguous_input = gpu_contiguous(input_shuffled) contiguous_filters = gpu_contiguous(filters_shuffled) conv_out_shuffled = conv_op(contiguous_input, contiguous_filters) # downsample each feature map individually, using maxpooling # pooled_out = downsample.max_pool_2d( # input=conv_out, # ds=poolsize, # ignore_border=True, # st=pool_stride # ) pool_op = MaxPool(ds=poolsize[0], stride=pool_stride[0]) pooled_out_shuffled = pool_op(conv_out_shuffled) pooled_out = pooled_out_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01 # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b]
def make_node(self, X): X = gpu_contiguous(as_cuda_ndarray_variable(X)) assert X.dtype == "float32" assert X.ndim == 4 return theano.Apply(self, [X], [X.type()])
def __init__( self, input, image_shape, filter_shape, convstride, padsize, group, poolsize, poolstride, bias_init, lrn=False, lib_conv='cudnn', ): ''' lib_conv can be cudnn (recommended)or cudaconvnet ''' self.filter_size = filter_shape self.convstride = convstride self.padsize = padsize self.poolsize = poolsize self.poolstride = poolstride self.channel = image_shape[0] self.lrn = lrn self.lib_conv = lib_conv assert group in [1, 2] self.filter_shape = np.asarray(filter_shape) self.image_shape = np.asarray(image_shape) if self.lrn: self.lrn_func = CrossChannelNormalization() if group == 1: self.W = Weight(self.filter_shape) self.b = Weight(self.filter_shape[3], bias_init, std=0) else: self.filter_shape[0] = self.filter_shape[0] / 2 self.filter_shape[3] = self.filter_shape[3] / 2 self.image_shape[0] = self.image_shape[0] / 2 self.image_shape[3] = self.image_shape[3] / 2 self.W0 = Weight(self.filter_shape) self.W1 = Weight(self.filter_shape) self.b0 = Weight(self.filter_shape[3], bias_init, std=0) self.b1 = Weight(self.filter_shape[3], bias_init, std=0) if lib_conv == 'cudaconvnet': self.conv_op = FilterActs(pad=self.padsize, stride=self.convstride, partial_sum=1) # Conv if group == 1: contiguous_input = gpu_contiguous(input) contiguous_filters = gpu_contiguous(self.W.val) conv_out = self.conv_op(contiguous_input, contiguous_filters) conv_out = conv_out + self.b.val.dimshuffle(0, 'x', 'x', 'x') else: contiguous_input0 = gpu_contiguous(input[:self.channel / 2, :, :, :]) contiguous_filters0 = gpu_contiguous(self.W0.val) conv_out0 = self.conv_op(contiguous_input0, contiguous_filters0) conv_out0 = conv_out0 + \ self.b0.val.dimshuffle(0, 'x', 'x', 'x') contiguous_input1 = gpu_contiguous(input[self.channel / 2:, :, :, :]) contiguous_filters1 = gpu_contiguous(self.W1.val) conv_out1 = self.conv_op(contiguous_input1, contiguous_filters1) conv_out1 = conv_out1 + \ self.b1.val.dimshuffle(0, 'x', 'x', 'x') conv_out = T.concatenate([conv_out0, conv_out1], axis=0) # ReLu self.output = T.maximum(conv_out, 0) conv_out = gpu_contiguous(conv_out) # Pooling if self.poolsize != 1: self.pool_op = MaxPool(ds=poolsize, stride=poolstride) self.output = self.pool_op(self.output) elif lib_conv == 'cudnn': input_shuffled = input.dimshuffle(3, 0, 1, 2) # c01b to bc01 # in01out to outin01 # print image_shape_shuffled # print filter_shape_shuffled if group == 1: W_shuffled = self.W.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 conv_out = dnn.dnn_conv( img=input_shuffled, kerns=W_shuffled, subsample=(convstride, convstride), border_mode=padsize, ) conv_out = conv_out + self.b.val.dimshuffle('x', 0, 'x', 'x') else: W0_shuffled = \ self.W0.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 conv_out0 = \ dnn.dnn_conv(img=input_shuffled[:, :self.channel / 2, :, :], kerns=W0_shuffled, subsample=(convstride, convstride), border_mode=padsize, ) conv_out0 = conv_out0 + \ self.b0.val.dimshuffle('x', 0, 'x', 'x') W1_shuffled = \ self.W1.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 conv_out1 = \ dnn.dnn_conv(img=input_shuffled[:, self.channel / 2:, :, :], kerns=W1_shuffled, subsample=(convstride, convstride), border_mode=padsize, ) conv_out1 = conv_out1 + \ self.b1.val.dimshuffle('x', 0, 'x', 'x') conv_out = T.concatenate([conv_out0, conv_out1], axis=1) # ReLu self.output = T.maximum(conv_out, 0) # Pooling if self.poolsize != 1: self.output = dnn.dnn_pool(self.output, ws=(poolsize, poolsize), stride=(poolstride, poolstride)) self.output = self.output.dimshuffle(1, 2, 3, 0) # bc01 to c01b else: NotImplementedError("lib_conv can only be cudaconvnet or cudnn") # LRN if self.lrn: # lrn_input = gpu_contiguous(self.output) self.output = self.lrn_func(self.output) if group == 1: self.params = [self.W.val, self.b.val] self.weight_type = ['W', 'b'] else: self.params = [self.W0.val, self.b0.val, self.W1.val, self.b1.val] self.weight_type = ['W', 'b', 'W', 'b'] print "conv ({}) layer with shape_in: {}".format( lib_conv, str(image_shape))
mode = mode.including('cudnn') benchmark_three_ways( '(experimental, auto) theano.sandbox.cuda.dnn.GpuDnnConv', sharedX, sharedY, sharedW, X, Y, gW, gX, mode) # benchmark caffe-like gemm convolution # Mimic Theano flag THEANO_FLAGS=optimizer_including=conv_gemm mode = theano.compile.get_default_mode() mode = mode.including('conv_gemm') benchmark_three_ways( '(experimental, auto) theano.sandbox.cuda.blas.GpuCorrMM', sharedX, sharedY, sharedW, X, Y, gW, gX, mode) # benchmark caffe-like gemm convolution again, directly, w/o kernel flipping Y = theano.sandbox.cuda.blas.GpuCorrMM(subsample=(dh, dw))( gpu_contiguous(X), gpu_contiguous(sharedW)) gW = theano.grad(None, wrt=sharedW, known_grads={Y: sharedY}) gX = theano.grad(None, wrt=X, known_grads={Y: sharedY}) benchmark_three_ways( '(experimental, manual) theano.sandbox.cuda.blas.GpuCorrMM', sharedX, sharedY, sharedW, X, Y, gW, gX, mode) del sharedX del sharedY del sharedW # benchmark cuda-convnet convolution # we use the pylearn2 wrapper for cuda-convnet (http://benanne.github.io/2014/04/03/faster-convolutions-in-theano.html) if FilterActs is None: continue # skip cuda-convnet if pylearn2 wrapper is not available #(channels, rows, columns, batch_size)
def __init__(self, input, image_shape, filter_shape, convstride, padsize, group, poolsize, poolstride, bias_init, lrn=False, lib_conv='cudnn', verbose=False ): ''' lib_conv can be cudnn (recommended)or cudaconvnet ''' self.filter_size = filter_shape self.convstride = convstride self.padsize = padsize self.poolsize = poolsize self.poolstride = poolstride self.channel = image_shape[0] self.lrn = lrn self.lib_conv = lib_conv self.verbose = verbose assert group in [1, 2] self.filter_shape = np.asarray(filter_shape) self.image_shape = np.asarray(image_shape) if self.lrn: self.lrn_func = CrossChannelNormalization() if group == 1: self.W = Weight(self.filter_shape) self.b = Weight(self.filter_shape[3], bias_init, std=0) else: self.filter_shape[0] = self.filter_shape[0] / 2 self.filter_shape[3] = self.filter_shape[3] / 2 self.image_shape[0] = self.image_shape[0] / 2 self.image_shape[3] = self.image_shape[3] / 2 self.W0 = Weight(self.filter_shape) self.W1 = Weight(self.filter_shape) self.b0 = Weight(self.filter_shape[3], bias_init, std=0) self.b1 = Weight(self.filter_shape[3], bias_init, std=0) if lib_conv == 'cudaconvnet': self.conv_op = FilterActs(pad=self.padsize, stride=self.convstride, partial_sum=1) from theano.sandbox.cuda.basic_ops import gpu_contiguous # Conv if group == 1: contiguous_input = gpu_contiguous(input) contiguous_filters = gpu_contiguous(self.W.val) conv_out = self.conv_op(contiguous_input, contiguous_filters) conv_out = conv_out + self.b.val.dimshuffle(0, 'x', 'x', 'x') else: contiguous_input0 = gpu_contiguous( input[:self.channel / 2, :, :, :]) contiguous_filters0 = gpu_contiguous(self.W0.val) conv_out0 = self.conv_op( contiguous_input0, contiguous_filters0) conv_out0 = conv_out0 + \ self.b0.val.dimshuffle(0, 'x', 'x', 'x') contiguous_input1 = gpu_contiguous( input[self.channel / 2:, :, :, :]) contiguous_filters1 = gpu_contiguous(self.W1.val) conv_out1 = self.conv_op( contiguous_input1, contiguous_filters1) conv_out1 = conv_out1 + \ self.b1.val.dimshuffle(0, 'x', 'x', 'x') conv_out = T.concatenate([conv_out0, conv_out1], axis=0) # ReLu conv_out = gpu_contiguous(conv_out) self.output = T.maximum(conv_out, 0) # Pooling if self.poolsize != 1: self.pool_op = MaxPool(ds=poolsize, stride=poolstride) self.output = self.pool_op(self.output) elif lib_conv == 'corrmm': from theano.sandbox.cuda.basic_ops import gpu_contiguous from theano.sandbox.cuda.blas import GpuCorrMM border_mode = 'half' if padsize == (filter_shape[1]-1)/2 else (padsize, padsize) self.corr_mm_op = GpuCorrMM(subsample=(convstride,convstride), border_mode=border_mode) flip_filters=True input_shuffled = input.dimshuffle(3, 0, 1, 2) # c01b to bc01 if group==1: filters = self.W.val.dimshuffle(3, 0, 1, 2) if flip_filters: filters = filters[:, :, ::-1, ::-1] # flip top-down, left-right contiguous_filters = gpu_contiguous(filters) contiguous_input = gpu_contiguous(input_shuffled) conv_out = self.corr_mm_op(contiguous_input, contiguous_filters) conv_out = conv_out + self.b.val.dimshuffle('x', 0, 'x', 'x') else: W0_shuffled = \ self.W0.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 if flip_filters: W0_shuffled = W0_shuffled[:, :, ::-1, ::-1] contiguous_filters0 = gpu_contiguous(W0_shuffled) contiguous_input0 = gpu_contiguous(input_shuffled[:, :self.channel / 2,:, :]) conv_out0 = self.corr_mm_op(contiguous_input0, contiguous_filters0) conv_out0 = conv_out0 + \ self.b0.val.dimshuffle('x', 0, 'x', 'x') W1_shuffled = \ self.W1.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 if flip_filters: W1_shuffled = W1_shuffled[:, :, ::-1, ::-1] contiguous_filters1 = gpu_contiguous(W1_shuffled) contiguous_input1 = gpu_contiguous(input_shuffled[:, self.channel / 2:,:, :]) conv_out1 = self.corr_mm_op(contiguous_input1, contiguous_filters1) conv_out1 = conv_out1 + \ self.b1.val.dimshuffle('x', 0, 'x', 'x') conv_out = T.concatenate([conv_out0, conv_out1], axis=1) # ReLu self.output = T.maximum(conv_out, 0) # Pooling if self.poolsize != 1: from theano.tensor.signal import downsample self.output = downsample.max_pool_2d(self.output, ds=(poolsize,poolsize), st=(poolstride,poolstride), ignore_border=False, padding=(0,0), mode='max', ) self.output = self.output.dimshuffle(1, 2, 3, 0) # bc01 to c01b elif lib_conv == 'cudnn': input_shuffled = input.dimshuffle(3, 0, 1, 2) # c01b to bc01 # in01out to outin01 # print image_shape_shuffled # print filter_shape_shuffled if group == 1: W_shuffled = self.W.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 conv_out = dnn.dnn_conv(img=input_shuffled, kerns=W_shuffled, subsample=(convstride, convstride), border_mode=padsize, ) conv_out = conv_out + self.b.val.dimshuffle('x', 0, 'x', 'x') else: W0_shuffled = \ self.W0.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 conv_out0 = \ dnn.dnn_conv(img=input_shuffled[:, :self.channel / 2, :, :], kerns=W0_shuffled, subsample=(convstride, convstride), border_mode=padsize, ) conv_out0 = conv_out0 + \ self.b0.val.dimshuffle('x', 0, 'x', 'x') W1_shuffled = \ self.W1.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 conv_out1 = \ dnn.dnn_conv(img=input_shuffled[:, self.channel / 2:, :, :], kerns=W1_shuffled, subsample=(convstride, convstride), border_mode=padsize, ) conv_out1 = conv_out1 + \ self.b1.val.dimshuffle('x', 0, 'x', 'x') conv_out = T.concatenate([conv_out0, conv_out1], axis=1) # ReLu self.output = T.maximum(conv_out, 0) # Pooling if self.poolsize != 1: self.output = dnn.dnn_pool(self.output, ws=(poolsize, poolsize), stride=(poolstride, poolstride)) self.output = self.output.dimshuffle(1, 2, 3, 0) # bc01 to c01b else: NotImplementedError("lib_conv can only be cudaconvnet or cudnn") # LRN if self.lrn: # lrn_input = gpu_contiguous(self.output) self.output = self.lrn_func(self.output) if group == 1: self.params = [self.W.val, self.b.val] self.weight_type = ['W', 'b'] else: self.params = [self.W0.val, self.b0.val, self.W1.val, self.b1.val] self.weight_type = ['W', 'b', 'W', 'b'] if self.verbose: print "conv ({}) layer with shape_in: {}".format(lib_conv, str(image_shape))
def __init__( self, input, image_shape, filter_shape, convstride, padsize, group, poolsize, poolstride, bias_init, lrn=False, lib_conv='cudnn', poolpadsize=(0, 0), caffe_style=False, Bn=False, ): ''' lib_conv can be cudnn (recommended)or cudaconvnet ''' self.filter_size = filter_shape self.convstride = convstride self.padsize = padsize self.poolsize = poolsize self.poolstride = poolstride self.channel = image_shape[0] self.lrn = lrn self.lib_conv = lib_conv # assert input.shape==image_shape assert group in [1, 2] self.filter_shape = np.asarray(filter_shape) self.image_shape = np.asarray(image_shape) if self.lrn: self.lrn_func = CrossChannelNormalization(alpha=0.0005, k=1) # self.lrn_func = CrossChannelNormalization(alpha=0.0005) if group == 1: self.W = Weight(self.filter_shape) self.b = Weight(self.filter_shape[3], bias_init, std=0) else: self.filter_shape[0] = self.filter_shape[0] / 2 self.filter_shape[3] = self.filter_shape[3] / 2 self.image_shape[0] = self.image_shape[0] / 2 self.image_shape[3] = self.image_shape[3] / 2 self.W0 = Weight(self.filter_shape) self.W1 = Weight(self.filter_shape) self.b0 = Weight(self.filter_shape[3], bias_init, std=0) self.b1 = Weight(self.filter_shape[3], bias_init, std=0) if lib_conv == 'cudaconvnet': self.conv_op = FilterActs(pad=self.padsize, stride=self.convstride, partial_sum=1) # Conv if group == 1: contiguous_input = gpu_contiguous(input) contiguous_filters = gpu_contiguous(self.W.val) conv_out = self.conv_op(contiguous_input, contiguous_filters) conv_out = conv_out + self.b.val.dimshuffle(0, 'x', 'x', 'x') else: contiguous_input0 = gpu_contiguous(input[:self.channel / 2, :, :, :]) contiguous_filters0 = gpu_contiguous(self.W0.val) conv_out0 = self.conv_op(contiguous_input0, contiguous_filters0) conv_out0 = conv_out0 + \ self.b0.val.dimshuffle(0, 'x', 'x', 'x') contiguous_input1 = gpu_contiguous(input[self.channel / 2:, :, :, :]) contiguous_filters1 = gpu_contiguous(self.W1.val) conv_out1 = self.conv_op(contiguous_input1, contiguous_filters1) conv_out1 = conv_out1 + \ self.b1.val.dimshuffle(0, 'x', 'x', 'x') conv_out = T.concatenate([conv_out0, conv_out1], axis=0) # ReLu self.output = T.maximum(conv_out, 0) # Pooling if self.poolsize != 1: self.pool_op = MaxPool(ds=poolsize, stride=poolstride) self.output = self.pool_op(self.output) elif lib_conv == 'cudnn': input_shuffled = input.dimshuffle(3, 0, 1, 2) # c01b to bc01 # in01out to outin01 if group == 1: W_shuffled = self.W.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 conv_out = dnn.dnn_conv( img=input_shuffled, kerns=W_shuffled, subsample=(convstride, convstride), border_mode=padsize, ) conv_out = conv_out + self.b.val.dimshuffle('x', 0, 'x', 'x') else: W0_shuffled = \ self.W0.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 conv_out0 = \ dnn.dnn_conv(img=input_shuffled[:, :self.channel / 2, :, :], kerns=W0_shuffled, subsample=(convstride, convstride), border_mode=padsize, ) conv_out0 = conv_out0 + \ self.b0.val.dimshuffle('x', 0, 'x', 'x') W1_shuffled = \ self.W1.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 conv_out1 = \ dnn.dnn_conv(img=input_shuffled[:, self.channel / 2:, :, :], kerns=W1_shuffled, subsample=(convstride, convstride), border_mode=padsize, ) conv_out1 = conv_out1 + \ self.b1.val.dimshuffle('x', 0, 'x', 'x') conv_out = T.concatenate([conv_out0, conv_out1], axis=1) self.conv_out = conv_out if Bn: #Warning this just used for testing phase!!!! self.mean = theano.shared( value=np.zeros((1, filter_shape[3], 1, 1), dtype=theano.config.floatX), broadcastable=[True, False, True, True], name='mean', borrow=True) self.var = theano.shared( value=np.ones((1, filter_shape[3], 1, 1), dtype=theano.config.floatX), broadcastable=[True, False, True, True], name='var', borrow=True) self.gamma = theano.shared(value=np.ones( (filter_shape[3], ), dtype=theano.config.floatX), name='gamma', borrow=True) self.beta = theano.shared(value=np.zeros( (filter_shape[3], ), dtype=theano.config.floatX), name='beta', borrow=True) conv_out = batch_normalization(inputs=conv_out, gamma=self.gamma, beta=self.beta, mean=self.mean, std=T.sqrt(self.var), mode='high_mem') # ReLu self.Bn = conv_out self.output = T.maximum(conv_out, 0) # # Pooling if caffe_style: self.output = self.output[:, :, ::-1, ::-1] if self.poolsize != 1: self.output = dnn.dnn_pool(self.output, ws=(poolsize, poolsize), stride=(poolstride, poolstride), pad=poolpadsize) if caffe_style: self.output = self.output[:, :, ::-1, ::-1] self.output = self.output.dimshuffle(1, 2, 3, 0) # bc01 to c01b else: NotImplementedError("lib_conv can only be cudaconvnet or cudnn") if group == 1: if Bn: #self.params = [self.W.val, self.b.val,self.beta,self.gamma,self.mean,self.var] self.params = [self.W.val, self.b.val] self.weight_type = ['W', 'b'] #self.weight_type = ['W', 'b','b','b','b','b'] pass else: self.params = [self.W.val, self.b.val] self.weight_type = ['W', 'b'] else: self.params = [self.W0.val, self.b0.val, self.W1.val, self.b1.val] self.weight_type = ['W', 'b', 'W', 'b'] print "conv ({}) layer with shape_in: {}".format( lib_conv, str(image_shape))
def make_node(self, inp): inp = basic_ops.gpu_contiguous(basic_ops.as_cuda_ndarray_variable(inp)) assert inp.dtype == "float32" return theano.Apply(self, [inp], [self.output_type(inp)()])
def weighted_max_pool_c01b(c01b, pool_shape, pool_stride, start=0): assert pool_shape[0] == pool_shape[1] assert pool_stride[0] == pool_stride[1] op = WeightedMaxPool(pool_shape[0], pool_stride[0], start) c01b = gpu_contiguous(c01b) return op(c01b)
def call(self, x, mask=None): contiguous_input = gpu_contiguous(x) return self.pool_op(contiguous_input)
def __init__(self, rng, input=None, filt_def=None, pool_def=(2, 2), \ activation=None, drop_rate=0., input_noise=0., bias_noise=0., \ W=None, b=None, name="", W_scale=1.0): # Setup a shared random generator for this layer #self.rng = theano.tensor.shared_randomstreams.RandomStreams( \ # rng.randint(100000)) self.rng = CURAND_RandomStreams(rng.randint(1000000)) self.clean_input = input # Add gaussian noise to the input (if desired) if (input_noise > 1e-4): self.fuzzy_input = input + self.rng.normal(size=input.shape, \ avg=0.0, std=input_noise, dtype=theano.config.floatX) else: self.fuzzy_input = input # Apply masking noise to the input (if desired) if (drop_rate > 1e-4): self.noisy_input = self._drop_from_input(self.fuzzy_input, drop_rate) else: self.noisy_input = self.fuzzy_input # Set the activation function for the conv filters if activation: self.activation = activation else: self.activation = lambda x: relu_actfun(x) # initialize weights with random weights W_init = 0.01 * np.asarray(rng.normal( \ size=filt_def), dtype=theano.config.floatX) self.W = theano.shared(value=(W_scale*W_init), \ name="{0:s}_W".format(name)) # the bias is a 1D tensor -- one bias per output feature map b_init = np.zeros((filt_def[0], ), dtype=theano.config.floatX) + 0.1 self.b = theano.shared(value=b_init, name="{0:s}_b".format(name)) # convolve input feature maps with filters input_c01b = self.noisy_input.dimshuffle(1, 2, 3, 0) # bc01 to c01b filters_c01b = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b conv_op = FilterActs(stride=1, partial_sum=1) contig_input = gpu_contiguous(input_c01b) contig_filters = gpu_contiguous(filters_c01b) conv_out_c01b = conv_op(contig_input, contig_filters) if (bias_noise > 1e-4): noisy_conv_out_c01b = conv_out_c01b + self.rng.normal( \ size=conv_out_c01b.shape, avg=0.0, std=bias_noise, \ dtype=theano.config.floatX) else: noisy_conv_out_c01b = conv_out_c01b # downsample each feature map individually, using maxpooling pool_op = MaxPool(ds=pool_def[0], stride=pool_def[1]) mp_out_c01b = pool_op(noisy_conv_out_c01b) mp_out_bc01 = mp_out_c01b.dimshuffle(3, 0, 1, 2) # c01b to bc01 # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.noisy_linear_output = mp_out_bc01 + self.b.dimshuffle( 'x', 0, 'x', 'x') self.linear_output = self.noisy_linear_output self.output = self.activation(self.noisy_linear_output) # store parameters of this layer self.params = [self.W, self.b] return
def local_dnn3d_convi_output_merge(node, *inputs): inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:] return [GpuDnn3dConvGradI()(*inputs)]
def conv_and_add_bias(self, x): x = gpu_contiguous(x) rval = GpuCorr3dMM(subsample=tuple(self.kernel_stride))(x, self.filters) rval = rval + self.bias.dimshuffle('x', 0, 'x', 'x', 'x') return rval
def compilePredictActivation(self, net, layerNum): variable = net.x if layerNum == 0 else net.varArrayAc[layerNum - 1] #Calc shapes for reshape function on-the-fly. Assume we have square images as input. sX = T.cast(T.sqrt(T.shape(variable)[0] / self.kernel_shape[1]), 'int32') #Converts input from 2 to 4 dimensions Xr = T.reshape(variable.T, (T.shape(variable)[1], self.kernel_shape[1], sX, sX)) if self.optimized: out_size = T.cast( T.ceil((T.shape(Xr)[-1] - T.shape(net.varWeights[layerNum]['w'])[-1] + 1) / np.float32(self.stride)), 'int32') conv_op = FilterActs(stride=self.stride) input_shuffled = Xr.dimshuffle(1, 2, 3, 0) # bc01 to c01b filters_shuffled = net.varWeights[layerNum]['w'].dimshuffle( 1, 2, 3, 0) # bc01 to c01b filters_flipped = filters_shuffled[:, ::-1, :: -1, :] # flip rows and columns contiguous_input = gpu_contiguous(input_shuffled) contiguous_filters = gpu_contiguous( filters_flipped * (self.dropout if self.dropout else 1.0)) a = conv_op(contiguous_input, contiguous_filters) a = a[:, :out_size, :out_size, :] #Add bias a = a + net.varWeights[layerNum]['b'].dimshuffle(0, 'x', 'x', 'x') else: a = T.nnet.conv2d(Xr, net.varWeights[layerNum]['w'] * (net.dropOutVectors[layerNum].dimshuffle( 'x', 'x', 0, 1) if self.dropout else 1.0), border_mode='valid', subsample=(self.stride, self.stride)) #Add bias a = a + net.varWeights[layerNum]['b'].dimshuffle('x', 0, 'x', 'x') if self.pooling: if self.optimized: #Pooling # ds - side of square pool window # stride - Defines the stride size between successive pooling squares. # Setting this parameter smaller than sizeX produces overlapping pools. # Setting it equal to sizeX gives the usual, non-overlapping pools. Values greater than sizeX are not allowed. pool_op = MaxPool(ds=self.pooling_shape, stride=self.pooling_shape) contiguous_input = gpu_contiguous( a.astype(theano.config.floatX)) a = pool_op(contiguous_input) a = a.dimshuffle(3, 0, 1, 2) # c01b to bc01 else: a = downsample.max_pool_2d( a, (self.pooling_shape, self.pooling_shape), ignore_border=False) else: if self.optimized: a = a.dimshuffle(3, 0, 1, 2) # c01b to bc01 a = T.flatten(a, outdim=2).T #Sigmoid a = self.activation(a, self.pool_size) net.varArrayAc.append(a)
def conv(data, filter_gen, feature_batch_size, num_feature_batches, data_batch_size, cuda_convnet=True, symmetric_relu=True, start_feature_batch=0, pool_type='avg', pool_size=14, pad=0, bias=1.0, ps=6): cuda_convnet = True outX = int(math.ceil((data.shape[2] - ps + 1) / float(pool_size))) outY = int(math.ceil((data.shape[3] - ps + 1) / float(pool_size))) outFilters = feature_batch_size * num_feature_batches if (symmetric_relu): outFilters = 2 * outFilters print "Out Shape ", outX, "x", outY, "x", outFilters XFinal = np.zeros((data.shape[0], outFilters, outX, outY), 'float32') filters = [] numImages = data.shape[0] # Convert to cuda-convnet order if (cuda_convnet): data = data.transpose(1, 2, 3, 0) # POOL OP CREATION if (cuda_convnet): if (pool_type == 'avg'): pool_op = AvgPool(ds=pool_size, stride=pool_size) elif (pool_type == 'max'): pool_op = MaxPool(ds=pool_size, stride=pool_size) else: raise Exception('Unsupported pool type') else: pool_op = lambda X: T.signal.pool.pool_2d( X, (pool_size, pool_size), ignore_border=False, mode='max') if (cuda_convnet): conv_op = FilterActs(pad=pad) else: conv_op = lambda X, F: T.nnet.conv2d(X, F) CHANNEL_AXIS = 1 for j in range(num_feature_batches): F = filter_gen(feature_batch_size) if (cuda_convnet): F = F.transpose(1, 2, 3, 0) CHANNEL_AXIS = 0 filters.append(F) FTheano = shared(F.astype('float32')) start_filters = j * feature_batch_size end_filters = (j + 1) * feature_batch_size if symmetric_relu: start_filters *= 2 end_filters *= 2 for i in range(int(np.ceil(numImages / float(data_batch_size)))): start = i * data_batch_size end = min((i + 1) * data_batch_size, numImages) print "FEATURE BATCH #", ( j + start_feature_batch ), "DATA BATCH #", i, " SIZE IS ", end - start if (cuda_convnet): XBlock = shared(data[:, :, :, start:end]) else: XBlock = shared(data[start:end, :, :, :]) if (cuda_convnet): XBlock_gpu = gpu_contiguous(XBlock) FTheano_gpu = gpu_contiguous(FTheano) # CONV XBlock_conv_out = conv_op(XBlock_gpu, FTheano_gpu) # RELU XBlock0 = T.nnet.relu(XBlock_conv_out - bias, 0) if (symmetric_relu): XBlock1 = T.nnet.relu(-1.0 * XBlock_conv_out - bias, 0) XBlock0 = pool_op(XBlock0) if (symmetric_relu): XBlock1 = pool_op(XBlock1) XBlockOut = np.concatenate((XBlock0.eval(), XBlock1.eval()), axis=CHANNEL_AXIS) else: XBlockOut = np.array(XBlock0.eval()) if (cuda_convnet): XBlockOut = XBlockOut.transpose(3, 0, 1, 2) F = F.transpose(3, 0, 1, 2) XBlock.set_value([[[[]]]]) XFinal[start:end, start_filters:end_filters, :, :] = XBlockOut FTheano.set_value([[[[]]]]) filters = np.concatenate(filters, axis=0) return (XFinal, filters)