def test_model(self, X, w1, w2, w3, w4, w5, w6,w_o, p_drop_conv, p_drop_hidden): l1a = l.rectify(conv2d(X, w1, border_mode='valid') + self.b1) l1 = max_pool_2d(l1a, (2, 2), ignore_border=True) #l1 = l.dropout(l1, p_drop_conv) l2a = l.rectify(conv2d(l1, w2,border_mode='valid') + self.b2) l2 = max_pool_2d(l2a, (2, 2), ignore_border=True) #l2 = l.dropout(l2, p_drop_conv) l3 = l.rectify(conv2d(l2, w3, border_mode='valid') + self.b3) #l3 = l.dropout(l3a, p_drop_conv) l4a = l.rectify(conv2d(l3, w4, border_mode='valid') + self.b4) l4 = max_pool_2d(l4a, (2, 2), ignore_border=True) #l4 = T.flatten(l4, outdim=2) #l4 = l.dropout(l4, p_drop_conv) l5 = l.rectify(conv2d(l4, w5, border_mode='valid') + self.b5) #l5 = l.dropout(l5, p_drop_hidden) l6 = l.rectify(conv2d(l5, w6, border_mode='valid') + self.b6) #l6 = l.dropout(l6, p_drop_hidden) #l6 = self.bn(l6, self.g,self.b,self.m,self.v) l6 = conv2d(l6, w_o, border_mode='valid') #l6 = self.bn(l6, self.g, self.b, T.mean(l6, axis=1), T.std(l6,axis=1)) l6 = T.flatten(l6, outdim=2) #l6 = ((l6 - T.mean(l6, axis=0))/T.std(l6,axis=0))*self.g + self.b#self.bn( l6, self.g,self.b,T.mean(l6, axis=0),T.std(l6,axis=0) ) l6 = ((l6 - self.r_m)/(self.r_s + 1e-4))*self.g + self.b pyx = T.nnet.softmax(l6) return pyx
def pool2d(x, pool_size, strides=(1, 1), border_mode='valid', dim_ordering='th', pool_mode='max'): if border_mode == 'same': # TODO: add implementation for border_mode="same" raise Exception('border_mode="same" not supported with Theano.') elif border_mode == 'valid': ignore_border = True padding = (0, 0) else: raise Exception('Invalid border mode: ' + str(border_mode)) if dim_ordering not in {'th', 'tf'}: raise Exception('Unknown dim_ordering ' + str(dim_ordering)) if dim_ordering == 'tf': x = x.dimshuffle((0, 3, 1, 2)) if pool_mode == 'max': pool_out = pool.max_pool_2d(x, ds=pool_size, st=strides, ignore_border=ignore_border, padding=padding, mode='max') elif pool_mode == 'avg': pool_out = pool.max_pool_2d(x, ds=pool_size, st=strides, ignore_border=ignore_border, padding=padding, mode='average_exc_pad') else: raise Exception('Invalid pooling mode: ' + str(pool_mode)) if dim_ordering == 'tf': pool_out = pool_out.dimshuffle((0, 2, 3, 1)) return pool_out
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), non_linear="tanh"): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height,filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows,#cols) """ assert image_shape[1] == filter_shape[1] self.input = input self.filter_shape = filter_shape self.image_shape = image_shape self.poolsize = poolsize self.non_linear = non_linear # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /numpy.prod(poolsize)) # initialize weights with random weights if self.non_linear=="none" or self.non_linear=="relu": self.W = theano.shared(numpy.asarray(rng.uniform(low=-0.01,high=0.01,size=filter_shape), dtype=theano.config.floatX),borrow=True,name="W_conv") else: W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX),borrow=True,name="W_conv") b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True, name="b_conv") # convolve input feature maps with filters conv_out = conv.conv2d(input=input, filters=self.W,filter_shape=self.filter_shape, image_shape=self.image_shape) if self.non_linear=="tanh": conv_out_tanh = T.tanh(conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) self.output = max_pool_2d(input=conv_out_tanh, ds=self.poolsize, ignore_border=True) elif self.non_linear=="relu": conv_out_tanh = ReLU(conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) self.output = max_pool_2d(input=conv_out_tanh, ds=self.poolsize, ignore_border=True) else: pooled_out = max_pool_2d(input=conv_out, ds=self.poolsize, ignore_border=True) self.output = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x') self.params = [self.W, self.b]
def pool2d(x, pool_size, strides=(1, 1), border_mode='valid', dim_ordering='th', pool_mode='max'): if border_mode == 'same': # TODO: add implementation for border_mode="same" raise Exception('border_mode="same" not supported with Theano.') elif border_mode == 'valid': ignore_border = False padding = (0, 0) else: raise Exception('Invalid border mode: ' + str(border_mode)) if dim_ordering not in {'th', 'tf'}: raise Exception('Unknown dim_ordering ' + str(dim_ordering)) if dim_ordering == 'tf': x = x.dimshuffle((0, 3, 1, 2)) if not OLD_THEANO: pool_out = pool.pool_2d(x, ds=pool_size, ignore_border=ignore_border, padding=padding, mode=pool_mode) else: if pool_mode == 'max': pool_out = pool.max_pool_2d( x, ds=pool_size, ignore_border=ignore_border, padding=padding, ) elif pool_mode == 'avg': pool_out = pool.max_pool_2d( x, ds=pool_size, ignore_border=ignore_border, padding=padding, mode='average_exc_pad', ) else: raise Exception('Invalid pooling mode: ' + str(pool_mode)) if dim_ordering == 'tf': pool_out = pool_out.dimshuffle((0, 2, 3, 1)) return pool_out
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)): assert image_shape[1] == filter_shape[1] self.input = input fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) conv_out = conv.conv2d(input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape) pooled_out = pool.max_pool_2d(input=conv_out, ds=poolsize, ignore_border=True) self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) self.params = [self.W, self.b] self.input = input
def test_convolution(self): """ input: a 4D tensor corresponding to a mini-batch of input images. The shape of the tensor is as follows: [mini-batch size, number of input feature maps, image height, image width]. """ self.input = T.tensor4(name='input') #Weights W_shape = (self.numbers_of_feature_maps[1],self.numbers_of_feature_maps[0],self.filter_shape[0],self.filter_shape[1]) w_bound = np.sqrt(self.numbers_of_feature_maps[0]*self.filter_shape[0]*self.filter_shape[1]) self.W = theano.shared( np.asarray(np.random.uniform(-1.0/w_bound,1,0/w_bound,W_shape),dtype=self.input.dtype), name = 'W' ) #Bias bias_shape = (self.numbers_of_feature_maps[1],) self.bias = theano.shared(np.asarray( np.random.uniform(-.5,.5, size=bias_shape), dtype=input.dtype), name ='b') #Colvolution self.convolution = conv.conv2d(self.input,self.W) self.max_pooling = pool.max_pool_2d( input=self.convolution, ds=self.pooling_size, ignore_border=True ) output = T.tanh(self.convolution + self.bias.dimshuffle('x', 0, 'x', 'x')) f = theano.function([input], output)
def predict(self, new_data, batch_size): """ predict for new data """ img_shape = (batch_size, 1, self.image_shape[2], self.image_shape[3]) conv_out = conv.conv2d(input=new_data, filters=self.W, filter_shape=self.filter_shape, image_shape=img_shape) if self.non_linear=="tanh": conv_out_tanh = T.tanh(conv_out + self.b) output = pool.max_pool_2d(input=conv_out_tanh, ds=self.poolsize, ignore_border=True) elif self.non_linear=="relu": conv_out_tanh = ReLU(conv_out + self.b) output = pool.max_pool_2d(input=conv_out_tanh, ds=self.poolsize, ignore_border=True) else: pooled_out = pool.max_pool_2d(input=conv_out, ds=self.poolsize, ignore_border=True) output = pooled_out + self.b return output
def myMaxPooling3d(image3dBC012, image3dBC012Shape, maxPoolingParameters): # image3dBC012 dimensions: (batch, fms, r, c, z) # maxPoolingParameters: [[dsr,dsc,dsz], [strr,strc,strz], [mirrorPad-r,-c,-z], mode] ds = maxPoolingParameters[0] stride = maxPoolingParameters[1] mode1 = maxPoolingParameters[3] image3dBC012WithMirroredFinalElemets = mirrorFinalBordersOfImage( image3dBC012, maxPoolingParameters[2]) pooled_out1 = pool.max_pool_2d(input=image3dBC012WithMirroredFinalElemets, ds=(ds[1], ds[2]), ignore_border=True, st=(stride[1], stride[2]), padding=(0, 0), mode=mode1) rLastPooledOut1 = pooled_out1.dimshuffle(0, 1, 3, 4, 2) pooled_out2 = pool.max_pool_2d(input=rLastPooledOut1, ds=(1, ds[0]), ignore_border=True, st=(1, stride[0]), padding=(0, 0), mode=mode1) pooled_out = pooled_out2.dimshuffle(0, 1, 4, 2, 3) #calculate the shape of the image after the max pooling. #This calculation is for ignore_border=True! Pooling should only be done in full areas in the mirror-padded image. shapeOfImageBeforeMaxPoolingAfterMirroring = [ image3dBC012Shape[0], image3dBC012Shape[1], int( ceil( (image3dBC012Shape[2] + maxPoolingParameters[2][0] - ds[0] + 1) / stride[0])), int( ceil( (image3dBC012Shape[3] + maxPoolingParameters[2][1] - ds[1] + 1) / stride[1])), int( ceil( (image3dBC012Shape[4] + maxPoolingParameters[2][2] - ds[2] + 1) / stride[2])) ] #return (pooled_out, T.shape(shapeOfImage)) this one should work, but lets calculate it ourselves, for defensive programming. return (pooled_out, shapeOfImageBeforeMaxPoolingAfterMirroring)
def model_util(self, X, w1, w2, w3, w4, w5, w6, w_o): l1a = l.rectify(conv2d(X, w1, border_mode='valid') + self.b1) l1 = max_pool_2d(l1a, (2, 2), ignore_border=True) l2a = l.rectify(conv2d(l1, w2,border_mode='valid') + self.b2) l2 = max_pool_2d(l2a, (2, 2), ignore_border=True) l3 = l.rectify(conv2d(l2, w3, border_mode='valid') + self.b3) l4a = l.rectify(conv2d(l3, w4, border_mode='valid') + self.b4) l4 = max_pool_2d(l4a, (2, 2), ignore_border=True) l5 = l.rectify(conv2d(l4, w5, border_mode='valid') + self.b5) l6 = l.rectify(conv2d(l5, w6, border_mode='valid') + self.b6) l6 = conv2d(l6, w_o, border_mode='valid') l6 = T.flatten(l6, outdim=2) return l6
def set_inpt(self, inpt, inpt_dropout, mini_batch_size): self.inpt = inpt.reshape(self.image_shape) conv_out = conv.conv2d( input=self.inpt, filters=self.w, filter_shape=self.filter_shape, image_shape=self.image_shape) pooled_out = downsample.max_pool_2d( input=conv_out, ds=self.poolsize, ignore_border=True) self.output = self.activation_fn( pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) self.output_dropout = self.output # no dropout in the convolutional layers
def get_output_for(self, input, **kwargs): pooled = pool.max_pool_2d( input, ds=self.pool_size, st=self.stride, ignore_border=self.ignore_border, padding=self.pad, mode=self.mode, ) return pooled
def get_output_for(self, input, **kwargs): input_4d = T.shape_padright(input, 1) pooled = pool.max_pool_2d( input_4d, ds=(self.pool_size[0], 1), st=(self.stride[0], 1), ignore_border=self.ignore_border, padding=(self.pad[0], 0), ) return pooled[:, :, :, 0]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size): self.inpt = inpt.reshape(self.image_shape) conv_out = conv.conv2d(input=self.inpt, filters=self.w, filter_shape=self.filter_shape, image_shape=self.image_shape) pooled_out = pool.max_pool_2d(input=conv_out, ds=self.poolsize, ignore_border=True) self.output = self.activation_fn(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) self.output_dropout = self.output # no dropout in the convolutional layers
def myMaxPooling3d(image3dBC012, image3dBC012Shape, maxPoolingParameters) : # image3dBC012 dimensions: (batch, fms, r, c, z) # maxPoolingParameters: [[dsr,dsc,dsz], [strr,strc,strz], [mirrorPad-r,-c,-z], mode] ds = maxPoolingParameters[0] stride = maxPoolingParameters[1] mode1 = maxPoolingParameters[3] image3dBC012WithMirroredFinalElemets = mirrorFinalBordersOfImage(image3dBC012, maxPoolingParameters[2]) pooled_out1 = pool.max_pool_2d( input = image3dBC012WithMirroredFinalElemets, ds=(ds[1], ds[2]), ignore_border=True, st=(stride[1],stride[2]), padding=(0, 0), mode=mode1) rLastPooledOut1 = pooled_out1.dimshuffle(0,1,3,4,2) pooled_out2 = pool.max_pool_2d( input = rLastPooledOut1, ds=(1,ds[0]), ignore_border=True, st=(1,stride[0]), padding=(0, 0), mode=mode1) pooled_out = pooled_out2.dimshuffle(0,1,4,2,3) #calculate the shape of the image after the max pooling. #This calculation is for ignore_border=True! Pooling should only be done in full areas in the mirror-padded image. shapeOfImageBeforeMaxPoolingAfterMirroring = [image3dBC012Shape[0], image3dBC012Shape[1], int(ceil( (image3dBC012Shape[2] + maxPoolingParameters[2][0] - ds[0] + 1) / stride[0]) ), int(ceil( (image3dBC012Shape[3] + maxPoolingParameters[2][1] - ds[1] + 1) / stride[1]) ), int(ceil( (image3dBC012Shape[4] + maxPoolingParameters[2][2] - ds[2] + 1) / stride[2]) ) ] #return (pooled_out, T.shape(shapeOfImage)) this one should work, but lets calculate it ourselves, for defensive programming. return (pooled_out, shapeOfImageBeforeMaxPoolingAfterMirroring)
def test_model_for_bigger_image(self,X, w1,w2,w3,w4,w5,w6,w_o,b1,b2,b3,b4,b5,b6,r_m,r_s,g,b ): l1a = l.rectify(conv2d(X, w1, border_mode='valid') + b1) l1 = max_pool_2d(l1a, (2, 2), ignore_border=True) l2a = l.rectify(conv2d(l1, w2,border_mode='valid') + b2) l2 = max_pool_2d(l2a, (2, 2), ignore_border=True) l3 = l.rectify(conv2d(l2, w3, border_mode='valid') + b3) l4a = l.rectify(conv2d(l3, w4, border_mode='valid') + b4) l4 = max_pool_2d(l4a, (2, 2), ignore_border=True) l5 = l.rectify(conv2d(l4, w5, border_mode='valid') + b5) l6 = l.rectify(conv2d(l5, w6, border_mode='valid') + b6) l6a = conv2d(l6, w_o, border_mode='valid') #l6 = T.flatten(l6, outdim=2) l6 = T.max(l6a,axis=(2,3),keepdims=False) #l6 = T.max(l6,axis=2,keepdims=False) l6 = ((l6 - r_m)/(r_s + 1e-4))*g + b pyx = T.nnet.softmax(l6) return pyx, l6, l6a
def fancy_max_pool(input_tensor, pool_shape, pool_stride, ignore_border=False): """Using theano built-in maxpooling, create a more flexible version. Obviously suboptimal, but gets the work done.""" if isinstance(pool_shape, numbers.Number): pool_shape = pool_shape, if isinstance(pool_stride, numbers.Number): pool_stride = pool_stride, if len(pool_shape) == 1: pool_shape = pool_shape * 2 if len(pool_stride) == 1: pool_stride = pool_stride * 2 lcmh, lcmw = [_lcm(p, s) for p, s in zip(pool_shape, pool_stride)] dsh, dsw = lcmh // pool_shape[0], lcmw // pool_shape[1] pre_shape = input_tensor.shape[:-2] length = T.prod(pre_shape) post_shape = input_tensor.shape[-2:] new_shape = T.concatenate([[length], post_shape]) reshaped_input = input_tensor.reshape(new_shape, ndim=3) sub_pools = [] for sh in range(0, lcmh, pool_stride[0]): sub_pool = [] sub_pools.append(sub_pool) for sw in range(0, lcmw, pool_stride[1]): full_pool = max_pool_2d(reshaped_input[:, sh:, sw:], pool_shape, ignore_border=ignore_border) ds_pool = full_pool[:, ::dsh, ::dsw] concat_shape = T.concatenate([[length], ds_pool.shape[-2:]]) sub_pool.append(ds_pool.reshape(concat_shape, ndim=3)) output_shape = (length, T.sum([l[0].shape[1] for l in sub_pools]), T.sum([i.shape[2] for i in sub_pools[0]])) output = T.zeros(output_shape, dtype=input_tensor.dtype) for i, line in enumerate(sub_pools): for j, item in enumerate(line): output = T.set_subtensor( output[:, i::lcmh // pool_stride[0], j::lcmw // pool_stride[1]], item) return output.reshape(T.concatenate([pre_shape, output.shape[1:]]), ndim=input_tensor.ndim)
def test_convolution(self): """ input: a 4D tensor corresponding to a mini-batch of input images. The shape of the tensor is as follows: [mini-batch size, number of input feature maps, image height, image width]. """ self.input = T.tensor4(name='input') #Weights W_shape = (self.numbers_of_feature_maps[1], self.numbers_of_feature_maps[0], self.filter_shape[0], self.filter_shape[1]) w_bound = np.sqrt(self.numbers_of_feature_maps[0] * self.filter_shape[0] * self.filter_shape[1]) self.W = theano.shared(np.asarray(np.random.uniform( -1.0 / w_bound, 1, 0 / w_bound, W_shape), dtype=self.input.dtype), name='W') #Bias bias_shape = (self.numbers_of_feature_maps[1], ) self.bias = theano.shared(np.asarray(np.random.uniform( -.5, .5, size=bias_shape), dtype=input.dtype), name='b') #Colvolution self.convolution = conv.conv2d(self.input, self.W) self.max_pooling = pool.max_pool_2d(input=self.convolution, ds=self.pooling_size, ignore_border=True) output = T.tanh(self.convolution + self.bias.dimshuffle('x', 0, 'x', 'x')) f = theano.function([input], output)
def load_model(): [e_params, g_params, d_params] = pickle.load(open("faces_dcgan.pkl", "rb")) gwx = g_params[-1] dwy = d_params[-1] # inputs X = T.tensor4() ## encode layer e_layer_sizes = [128, 64, 32, 16, 8] e_filter_sizes = [3, 256, 256, 512, 1024] eX, e_params, e_layers = make_conv_set(X, e_layer_sizes, e_filter_sizes, "e", weights=e_params) ## generative layer g_layer_sizes = [8, 16, 32, 64, 128] g_num_filters = [1024, 512, 256, 256, 128] g_out, g_params, g_layers = make_conv_set(eX, g_layer_sizes, g_num_filters, "g", weights=g_params) g_params += [gwx] gX = tanh(deconv(g_out, gwx, subsample=(1, 1), border_mode=(2, 2))) ## discrim layer(s) df1 = 128 d_layer_sizes = [128, 64, 32, 16, 8] d_filter_sizes = [3, df1, 2 * df1, 4 * df1, 8 * df1] def discrim(input, name, weights=None): d_out, disc_params, d_layers = make_conv_set(input, d_layer_sizes, d_filter_sizes, name, weights=weights) d_flat = T.flatten(d_out, 2) disc_params += [dwy] y = sigmoid(T.dot(d_flat, dwy)) return y, disc_params, d_layers # target outputs target = T.tensor4() p_real, d_params, d_layers = discrim(target, "d", weights=d_params) # we need to make sure the p_gen params are the same as the p_real params p_gen, d_params2, d_layers = discrim(gX, "d", weights=d_params) ## GAN costs d_cost_real = bce(p_real, T.ones(p_real.shape)).mean() d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean() g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean() ## MSE encoding cost is done on an (averaged) downscaling of the image target_pool = max_pool_2d(target, (4, 4), mode="average_exc_pad", ignore_border=True) target_flat = T.flatten(target_pool, 2) gX_pool = max_pool_2d(gX, (4, 4), mode="average_exc_pad", ignore_border=True) gX_flat = T.flatten(gX_pool, 2) enc_cost = mse(gX_flat, target_flat).mean() ## generator cost is a linear combination of the discrim cost plus the MSE enocding cost d_cost = d_cost_real + d_cost_gen g_cost = g_cost_d + enc_cost / 10 ## if the enc_cost is weighted too highly it will take a long time to train ## N.B. e_cost and e_updates will only try and minimise MSE loss on the autoencoder (for debugging) e_cost = enc_cost cost = [g_cost_d, d_cost_real, enc_cost] elrt = sharedX(0.002) lrt = sharedX(lr) d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) e_updater = updates.Adam(lr=elrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(d_params, d_cost) g_updates = g_updater(e_params + g_params, g_cost) e_updates = e_updater(e_params, e_cost) print 'COMPILING' t = time() _train_g = theano.function([X, target], cost, updates=g_updates) _train_d = theano.function([X, target], cost, updates=d_updates) _train_e = theano.function([X, target], cost, updates=e_updates) _get_cost = theano.function([X, target], cost) print('%.2f seconds to compile theano functions' % (time() - t)) img_dir = "gen_images/" if not os.path.exists(img_dir): os.makedirs(img_dir) ae_encode = theano.function([X, target], [gX, target]) return ae_encode
def _build_expression(self): self.input_ = T.tensor4(dtype=self.input_dtype) self.expression_ = max_pool_2d(self.input_, self.max_pool_stride, ignore_border=True)