def build_model_mnist(): # CNN filter_size = (5, 5) activation = Rectifier().apply pooling_size = (2, 2) num_filters = 50 layer0 = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters, pooling_size=pooling_size, weights_init=Uniform(width=0.1), biases_init=Uniform(width=0.01), name="layer_0") filter_size = (3, 3) activation = Rectifier().apply num_filters = 20 layer1 = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters, pooling_size=pooling_size, weights_init=Uniform(width=0.1), biases_init=Uniform(width=0.01), name="layer_1") conv_layers = [layer0, layer1] convnet = ConvolutionalSequence(conv_layers, num_channels= 1, image_size=(28, 28)) convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) mlp = MLP(activations=[Identity()], dims=[output_dim, 10], weights_init=Uniform(width=0.1), biases_init=Uniform(width=0.01), name="layer_2") mlp.initialize() classifier = Classifier(convnet, mlp) classifier.initialize() return classifier
def test_convolutional_sequence(): x = tensor.tensor4('x') num_channels = 4 pooling_size = 3 batch_size = 5 activation = Rectifier().apply conv = ConvolutionalLayer(activation, (3, 3), 5, (pooling_size, pooling_size), weights_init=Constant(1.), biases_init=Constant(5.)) conv2 = ConvolutionalActivation(activation, (2, 2), 4, weights_init=Constant(1.)) seq = ConvolutionalSequence([conv, conv2], num_channels, image_size=(17, 13)) seq.push_allocation_config() assert conv.num_channels == 4 assert conv2.num_channels == 5 conv2.convolution.use_bias = False y = seq.apply(x) seq.initialize() func = function([x], y) x_val = numpy.ones((batch_size, 4, 17, 13), dtype=theano.config.floatX) y_val = (numpy.ones((batch_size, 4, 4, 3)) * (9 * 4 + 5) * 4 * 5) assert_allclose(func(x_val), y_val)
def create_model_bricks(): convnet = ConvolutionalSequence( layers=[ Convolutional( filter_size=(4, 4), num_filters=32, name='conv1'), SpatialBatchNormalization(name='batch_norm1'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=32, name='conv2'), SpatialBatchNormalization(name='batch_norm2'), Rectifier(), Convolutional( filter_size=(4, 4), num_filters=64, name='conv3'), SpatialBatchNormalization(name='batch_norm3'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=64, name='conv4'), SpatialBatchNormalization(name='batch_norm4'), Rectifier(), Convolutional( filter_size=(3, 3), num_filters=128, name='conv5'), SpatialBatchNormalization(name='batch_norm5'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=128, name='conv6'), SpatialBatchNormalization(name='batch_norm6'), Rectifier(), ], num_channels=3, image_size=(64, 64), use_bias=False, weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='convnet') convnet.initialize() mlp = BatchNormalizedMLP( activations=[Rectifier(), Logistic()], dims=[numpy.prod(convnet.get_dim('output')), 1000, 40], weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='mlp') mlp.initialize() return convnet, mlp
def test_convolutional_sequence_with_no_input_size(): # suppose x is outputted by some RNN x = tensor.tensor4('x') filter_size = (1, 1) num_filters = 2 num_channels = 1 pooling_size = (1, 1) conv = Convolutional(filter_size, num_filters, tied_biases=False, weights_init=Constant(1.), biases_init=Constant(1.)) act = Rectifier() pool = MaxPooling(pooling_size) bad_seq = ConvolutionalSequence([conv, act, pool], num_channels, tied_biases=False) assert_raises_regexp(ValueError, 'Cannot infer bias size \S+', bad_seq.initialize) seq = ConvolutionalSequence([conv, act, pool], num_channels, tied_biases=True) try: seq.initialize() out = seq.apply(x) except TypeError: assert False, "This should have succeeded" assert out.ndim == 4
def build_conv_layers(self, image=None): if image is None: image = T.ftensor4('spectrogram') else: image = image conv_list = [] for layer in range(self.layers): layer_param = self.params[layer] conv_layer = Convolutional(layer_param[0], layer_param[1], layer_param[2]) pool_layer = MaxPooling(layer_param[3]) conv_layer.name = "convolution" + str(layer) pool_layer.name = "maxpooling" + str(layer) conv_list.append(conv_layer) conv_list.append(pool_layer) conv_list.append(Rectifier()) conv_seq = ConvolutionalSequence(conv_list, self.params[0][2], image_size=self.image_size, weights_init=IsotropicGaussian( std=0.5, mean=0), biases_init=Constant(0)) conv_seq._push_allocation_config() conv_seq.initialize() out = conv_seq.apply(image) return out, conv_seq.get_dim('output')
def test_batch_normalization_inside_convolutional_sequence(): """Test that BN bricks work in ConvolutionalSequences.""" conv_seq = ConvolutionalSequence( [Convolutional(filter_size=(3, 3), num_filters=4), BatchNormalization(broadcastable=(False, True, True)), AveragePooling(pooling_size=(2, 2)), BatchNormalization(broadcastable=(False, False, False)), MaxPooling(pooling_size=(2, 2), step=(1, 1))], weights_init=Constant(1.), biases_init=Constant(2.), image_size=(10, 8), num_channels=9) conv_seq_no_bn = ConvolutionalSequence( [Convolutional(filter_size=(3, 3), num_filters=4), AveragePooling(pooling_size=(2, 2)), MaxPooling(pooling_size=(2, 2), step=(1, 1))], weights_init=Constant(1.), biases_init=Constant(2.), image_size=(10, 8), num_channels=9) conv_seq.initialize() conv_seq_no_bn.initialize() rng = numpy.random.RandomState((2015, 12, 17)) input_ = random_unif(rng, (2, 9, 10, 8)) x = theano.tensor.tensor4() ybn = conv_seq.apply(x) y = conv_seq_no_bn.apply(x) yield (assert_equal, ybn.eval({x: input_}), y.eval({x: input_})) std = conv_seq.children[-2].population_stdev std.set_value(3 * std.get_value(borrow=True)) yield (assert_equal, ybn.eval({x: input_}), y.eval({x: input_}) / 3.)
def build_conv_layers(self, image=None) : if image is None : image = T.ftensor4('spectrogram') else : image = image conv_list = [] for layer in range(self.layers) : layer_param = self.params[layer] conv_layer = Convolutional(layer_param[0], layer_param[1], layer_param[2]) pool_layer = MaxPooling(layer_param[3]) conv_layer.name = "convolution"+str(layer) pool_layer.name = "maxpooling"+str(layer) conv_list.append(conv_layer) conv_list.append(pool_layer) conv_list.append(Rectifier()) conv_seq = ConvolutionalSequence( conv_list, self.params[0][2], image_size=self.image_size, weights_init=IsotropicGaussian(std=0.5, mean=0), biases_init=Constant(0)) conv_seq._push_allocation_config() conv_seq.initialize() out = conv_seq.apply(image) return out, conv_seq.get_dim('output')
def test_fully_layer(): batch_size=2 x = T.tensor4(); y = T.ivector() V = 200 layer_conv = Convolutional(filter_size=(5,5),num_filters=V, name="toto", weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) # try with no bias activation = Rectifier() pool = MaxPooling(pooling_size=(2,2)) convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15, image_size=(10,10), name="conv_section") convnet.push_allocation_config() convnet.initialize() output=convnet.apply(x) batch_size=output.shape[0] output_dim=np.prod(convnet.get_dim('output')) result_conv = output.reshape((batch_size, output_dim)) mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) mlp.initialize() output=mlp.apply(result_conv) cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output)) cg = ComputationGraph(cost) W = VariableFilter(roles=[WEIGHT])(cg.variables) B = VariableFilter(roles=[BIAS])(cg.variables) W = W[0]; b = B[0] inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg) outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg) var_input=inputs_fully[0] var_output=outputs_fully[0] [d_W,d_S,d_b] = T.grad(cost, [W, var_output, b]) d_b = d_b.dimshuffle(('x',0)) d_p = T.concatenate([d_W, d_b], axis=0) x_value = 1e3*np.random.ranf((2,15, 10, 10)) f = theano.function([x,y], [var_input, d_S, d_p], allow_input_downcast=True, on_unused_input='ignore') A, B, C= f(x_value, [5, 0]) A = np.concatenate([A, np.ones((2,1))], axis=1) print 'A', A.shape print 'B', B.shape print 'C', C.shape print lin.norm(C - np.dot(np.transpose(A), B), 'fro') return """
def conv_block(input_img, n_filter, filter_size, input_featuremap_size, ordering=''): # found in torch spatialconvolution std0 = 2. / (filter_size[0] * filter_size[1] * input_featuremap_size[0])**.5 std1 = 2. / (input_featuremap_size[0])**.5 layers = [] layers.append( Convolutional(filter_size=filter_size, num_filters=n_filter, border_mode='half', name='conv%s_1' % (ordering, ), use_bias=True, weights_init=Uniform(width=std0))) layers.append(BatchNormalization(name='bn%s_1' % (ordering, ))) layers.append(LeakyReLU()) layers.append( Convolutional(filter_size=filter_size, num_filters=n_filter, border_mode='half', name='conv%s_2' % (ordering, ), use_bias=True, weights_init=Uniform(width=std0))) layers.append(BatchNormalization(name='bn%s_2' % (ordering, ))) layers.append(LeakyReLU()) layers.append( Convolutional(filter_size=(1, 1), num_filters=n_filter, border_mode='valid', name='conv%s_3b' % (ordering, ), use_bias=True, weights_init=Uniform(width=std1))) layers.append(BatchNormalization(name='bn%s_3' % (ordering, ))) layers.append(LeakyReLU()) conv_sequence = ConvolutionalSequence( layers, num_channels=input_featuremap_size[0], image_size=(input_featuremap_size[1], input_featuremap_size[2]), biases_init=Uniform(width=.1), name='convsequence%s' % (ordering, )) conv_sequence.initialize() return conv_sequence.apply(input_img)
def main(): initial = numpy.random.normal(0, 0.1, (1, 1, 200, 200)) x = theano.shared(initial) conv_layer = ConvolutionalLayer( Rectifier().apply, (16, 16), 9, (4, 4), 1 ) conv_layer2 = ConvolutionalLayer( Rectifier().apply, (7, 7), 9, (2, 2), 1 ) con_seq = ConvolutionalSequence([conv_layer], 1, image_size=(200, 200), weights_init=IsotropicGaussian(0.1), biases_init=Constant(0.) ) con_seq.initialize() out = con_seq.apply(x) target_out = out[0, 0, 1, 1] grad = theano.grad(target_out - .1 * (x ** 2).sum(), x) updates = {x: x + 5e-1 * grad} #x.set_value(numpy.ones((1, 1, 200, 200))) #print theano.function([], out)() make_step = theano.function([], target_out, updates=updates) for i in xrange(400): out_val = make_step() print i, out_val image = x.get_value()[0][0] image = (image - image.mean()) / image.std() image = numpy.array([image, image, image]).transpose(1, 2, 0) plt.imshow(numpy.cast['uint8'](image * 65. + 128.), interpolation='none') plt.show()
def inception(image_shape, num_input, conv1, conv2, conv3, conv4, conv5, conv6, out, i): layers1 = [] layers2 = [] layers3 = [] layers4 = [] layers1.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers1.append(BatchNormalization(name='batch_{}'.format(i))) layers1.append(Rectifier()) conv_sequence1 = ConvolutionalSequence(layers1, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence1.initialize() out1 = conv_sequence1.apply(out) i = i + 1 layers2.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers2.append(BatchNormalization(name='batch_{}'.format(i))) layers2.append(Rectifier()) i = i + 1 layers2.append(Convolutional(filter_size=(3,3), num_channels=conv2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers2.append(BatchNormalization(name='batch_{}'.format(i))) layers2.append(Rectifier()) conv_sequence2 = ConvolutionalSequence(layers2, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence2.initialize() out2 = conv_sequence2.apply(out) i = i + 1 layers3.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv4, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers3.append(BatchNormalization(name='batch_{}'.format(i))) layers3.append(Rectifier()) i = i + 1 layers3.append(Convolutional(filter_size=(5,5), num_channels=conv4, num_filters=conv5, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers3.append(BatchNormalization(name='batch_{}'.format(i))) layers3.append(Rectifier()) conv_sequence3 = ConvolutionalSequence(layers3, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence3.initialize() out3 = conv_sequence3.apply(out) i = i + 1 layers4.append(MaxPooling((3,3), step=(1,1), padding=(1,1), name='pool_{}'.format(i))) layers4.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv6, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers4.append(BatchNormalization(name='batch_{}'.format(i))) layers4.append(Rectifier()) i = i + 1 conv_sequence4 = ConvolutionalSequence(layers4, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence4.initialize() out4 = conv_sequence4.apply(out) #Merge return T.concatenate([out1, out2, out3, out4], axis=1)
def main(): # # # # # # # # # # # # Modeling Building # # # # # # # # # # # # # ConvOp requires input be a 4D tensor x = tensor.tensor4("features") y = tensor.ivector("targets") # Convolutional Layers # ==================== # "Improving neural networks by preventing co-adaptation of feature detectors" # conv_layers = [ # # ConvolutionalLayer(activiation, filter_size, num_filters, pooling_size, name) # ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l1') # , ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l2') # , ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l3') # ] # "VGGNet" conv_layers = [ ConvolutionalActivation(Rectifier().apply, (3,3), 64, border_mode='full', name='l1') , ConvolutionalLayer(Rectifier().apply, (3,3), 64, (2,2), border_mode='full', name='l2') , ConvolutionalActivation(Rectifier().apply, (3,3), 128, border_mode='full', name='l3') , ConvolutionalLayer(Rectifier().apply, (3,3), 128, (2,2), border_mode='full', name='l4') , ConvolutionalActivation(Rectifier().apply, (3,3), 256, border_mode='full', name='l5') , ConvolutionalLayer(Rectifier().apply, (3,3), 256, (2,2), border_mode='full', name='l6') ] # Bake my own # conv_layers = [ # # ConvolutionalLayer(activiation, filter_size, num_filters, pooling_size, name) # ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l1') # , ConvolutionalLayer(Rectifier().apply, (3,3), 128, (2,2), border_mode='full', name='l2') # , ConvolutionalActivation(Rectifier().apply, (3,3), 256, border_mode='full', name='l3') # , ConvolutionalLayer(Rectifier().apply, (3,3), 256, (2,2), border_mode='full', name='l4') # ] convnet = ConvolutionalSequence( conv_layers, num_channels=3, image_size=(32,32), weights_init=IsotropicGaussian(0.1), biases_init=Constant(0) ) convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) # Fully Connected Layers # ====================== conv_features = convnet.apply(x) features = Flattener().apply(conv_features) mlp = MLP( activations=[Rectifier()]*2+[None] , dims=[output_dim, 256, 256, 10] , weights_init=IsotropicGaussian(0.01) , biases_init=Constant(0) ) mlp.initialize() y_hat = mlp.apply(features) # print y_hat.shape.eval({x: np.zeros((1, 3, 32, 32), dtype=theano.config.floatX)}) # Numerically Stable Softmax cost = Softmax().categorical_cross_entropy(y, y_hat) error_rate = MisclassificationRate().apply(y, y_hat) cg = ComputationGraph(cost) weights = VariableFilter(roles=[FILTER, WEIGHT])(cg.variables) l2_regularization = 0.005 * sum((W**2).sum() for W in weights) cost = cost + l2_regularization cost.name = 'cost_with_regularization' # Print sizes to check print("Representation sizes:") for layer in convnet.layers: print(layer.get_dim('input_')) # # # # # # # # # # # # Modeling Training # # # # # # # # # # # # # Figure out data source train = CIFAR10("train") test = CIFAR10("test") # Load Data Using Fuel train_stream = DataStream.default_stream( dataset=train , iteration_scheme=SequentialScheme(train.num_examples, batch_size=128)) test_stream = DataStream.default_stream( dataset=test , iteration_scheme=SequentialScheme(test.num_examples, batch_size=1024)) # Train algorithm = GradientDescent( cost=cost , params=cg.parameters , step_rule=Adam(learning_rate=0.0005) ) main_loop = MainLoop( model=Model(cost) , data_stream=train_stream , algorithm=algorithm , extensions=[ TrainingDataMonitoring( [cost, error_rate] , prefix='train' , after_epoch=True) , DataStreamMonitoring( [cost, error_rate] , test_stream, prefix='test') , ExperimentSaver(dest_directory='...', src_directory='.') , Printing() , ProgressBar() ] ) main_loop.run()
def test_convolutional_layer(): batch_size=2 x = T.tensor4(); y = T.ivector() V = 200 layer_conv = Convolutional(filter_size=(5,5),num_filters=V, name="toto", weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) # try with no bias activation = Rectifier() pool = MaxPooling(pooling_size=(2,2)) convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15, image_size=(10,10), name="conv_section") convnet.push_allocation_config() convnet.initialize() output=convnet.apply(x) batch_size=output.shape[0] output_dim=np.prod(convnet.get_dim('output')) result_conv = output.reshape((batch_size, output_dim)) mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) mlp.initialize() output=mlp.apply(result_conv) cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output)) cg = ComputationGraph(cost) W = VariableFilter(roles=[WEIGHT])(cg.variables) B = VariableFilter(roles=[BIAS])(cg.variables) W = W[-1]; b = B[-1] print W.shape.eval() print b.shape.eval() import pdb pdb.set_trace() inputs_conv = VariableFilter(roles=[INPUT], bricks=[Convolutional])(cg) outputs_conv = VariableFilter(roles=[OUTPUT], bricks=[Convolutional])(cg) var_input=inputs_conv[0] var_output=outputs_conv[0] [d_W,d_S,d_b] = T.grad(cost, [W, var_output, b]) import pdb pdb.set_trace() w_shape = W.shape.eval() d_W = d_W.reshape((w_shape[0], w_shape[1]*w_shape[2]*w_shape[3])) d_b = T.zeros((w_shape[0],6*6)) #d_b = d_b.reshape((w_shape[0], 8*8)) d_p = T.concatenate([d_W, d_b], axis=1) d_S = d_S.dimshuffle((1, 0, 2, 3)).reshape((w_shape[0], batch_size, 6*6)).reshape((w_shape[0], batch_size*6*6)) #d_S = d_S.reshape((2,200, 64)) #x_value=1e3*np.random.ranf((1,15,10,10)) x_value = 1e3*np.random.ranf((2,15, 10, 10)) f = theano.function([x,y], [var_input, d_S, d_W], allow_input_downcast=True, on_unused_input='ignore') A, B, C= f(x_value, [5, 5]) print np.mean(B) return E_A = expansion_op(A, (2, 15, 10, 10), (5,5)) print E_A.shape E_A = E_A.reshape((2*36, C.shape[1])) print E_A.shape tmp = C - np.dot(B, E_A) print lin.norm(tmp, 'fro')
def build_and_run(label, config): ############## CREATE THE NETWORK ############### #Define the parameters num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation = config['num_epochs'], config['num_batches'], config['num_channels'], config['image_shape'], config['filter_size'], config['num_filter'], config['pooling_sizes'], config['mlp_hiddens'], config['output_size'], config['batch_size'], config['activation'], config['mlp_activation'] # print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation) lambda_l1 = 0.000025 lambda_l2 = 0.000025 print("Building model") #Create the symbolics variable x = T.tensor4('image_features') y = T.lmatrix('targets') #Get the parameters conv_parameters = zip(filter_size, num_filter) #Create the convolutions layers conv_layers = list(interleave([(Convolutional( filter_size=filter_size, num_filters=num_filter, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)), (activation), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) # (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.)) #Initialize the convnet conv_sequence.initialize() #Add the MLP top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size] out = Flattener().apply(conv_sequence.apply(x)) mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) #Initialisze the MLP mlp.initialize() #Get the output predict = mlp.apply(out) cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict) #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') ########### REGULARIZATION ################## cg = ComputationGraph([cost]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) biases = VariableFilter(roles=[BIAS])(cg.variables) # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer l2_penalty = T.sum([lambda_l2 * (W ** 2).sum() for i,W in enumerate(weights+biases)]) # Gradually increase penalty for layer # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases]) # # #l2_penalty = l2_penalty_weights + l2_penalty_bias l2_penalty.name = 'l2_penalty' l1_penalty = T.sum([lambda_l1*T.abs_(z).sum() for z in weights+biases]) # l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer # l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases]) # l1_penalty = l1_penalty_biases + l1_penalty_weights l1_penalty.name = 'l1_penalty' costreg = cost + l2_penalty + l1_penalty costreg.name = 'costreg' ########### DEFINE THE ALGORITHM ############# # algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum()) algorithm = GradientDescent(cost=costreg, parameters=cg.parameters, step_rule=Adam()) ########### GET THE DATA ##################### istest = 'test' in config.keys() train_stream, valid_stream, test_stream = get_stream(batch_size,image_shape,test=istest) ########### INITIALIZING EXTENSIONS ########## checkpoint = Checkpoint('models/best_'+label+'.tar') checkpoint.add_condition(['after_epoch'], predicate=OnLogRecord('valid_error_rate_best_so_far')) #Adding a live plot with the bokeh server plot = Plot(label, channels=[['train_error_rate', 'valid_error_rate'], ['valid_cost', 'valid_error_rate2'], # ['train_costreg','train_grad_norm']], # ['train_costreg','train_total_gradient_norm','train_l2_penalty','train_l1_penalty']], server_url="http://hades.calculquebec.ca:5042") grad_norm = aggregation.mean(algorithm.total_gradient_norm) grad_norm.name = 'grad_norm' extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), DataStreamMonitoring([cost, error_rate, error_rate2], valid_stream, prefix="valid"), TrainingDataMonitoring([costreg, error_rate, error_rate2, grad_norm,l2_penalty,l1_penalty], prefix="train", after_epoch=True), plot, ProgressBar(), Printing(), TrackTheBest('valid_error_rate',min), #Keep best checkpoint, #Save best FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4)] # Early-stopping model = Model(cost) main_loop = MainLoop(algorithm,data_stream=train_stream,model=model,extensions=extensions) main_loop.run()
def run_experiment(): np.random.seed(42) X = tensor.tensor4('features') nbr_channels = 3 image_shape = (5, 5) conv_layers = [ ConvolutionalLayer( filter_size=(2,2), num_filters=10, activation=Rectifier().apply, border_mode='valid', pooling_size=(1,1), weights_init=Uniform(width=0.1), #biases_init=Uniform(width=0.01), biases_init=Constant(0.0), name='conv0')] conv_sequence = ConvolutionalSequence( conv_layers, num_channels=nbr_channels, image_size=image_shape) #conv_sequence.push_allocation_config() conv_sequence.initialize() flattener = Flattener() conv_output = conv_sequence.apply(X) y_hat = flattener.apply(conv_output) # Whatever. Not important since we're not going to actually train anything. cost = tensor.sqr(y_hat).sum() #L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[FILTER, BIAS])(ComputationGraph([y_hat]).variables)] L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[BIAS])(ComputationGraph([y_hat]).variables)] # works on the sum of the gradients in a mini-batch sum_square_norm_gradients_method_02 = sum([tensor.sqr(g).sum() for g in L_grads_method_02]) D_by_layer = get_conv_layers_transformation_roles(ComputationGraph(conv_output)) individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_conv_transformations(D_by_layer, cost) # why does this thing depend on N again ? # I don't think I've used a cost that divides by N. N = 2 Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32) #Xtrain[1:,:,:,:] = 0.0 Xtrain[:,:,:,:] = 1.0 convolution_filter_variable = VariableFilter(roles=[FILTER])(ComputationGraph([y_hat]).variables)[0] convolution_filter_variable_value = convolution_filter_variable.get_value() convolution_filter_variable_value[:,:,:,:] = 1.0 #convolution_filter_variable_value[0,0,:,:] = 1.0 convolution_filter_variable.set_value(convolution_filter_variable_value) f = theano.function([X], [cost, individual_sum_square_norm_gradients_method_00, sum_square_norm_gradients_method_02]) [c, v0, gs2] = f(Xtrain) #print "[c, v0, gs2]" L_c, L_v0, L_gs2 = ([], [], []) for n in range(N): [nc, nv0, ngs2] = f(Xtrain[n,:, :, :].reshape((1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3]))) L_c.append(nc) L_v0.append(nv0) L_gs2.append(ngs2) print "Cost for whole mini-batch in single shot : %f." % c print "Cost for whole mini-batch accumulated : %f." % sum(L_c) print "" print "Square-norm of all gradients for each data point in single shot :" print v0.reshape((1,-1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs2).reshape((1,-1)) print "" print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2))) print "" print "Ratios : " print np.array(L_gs2).reshape((1,-1)) / v0.reshape((1,-1))
def create_network(inputs=None, batch=batch_size): if inputs is None: inputs = T.tensor4('features') x = T.cast(inputs, 'float32') x = x / 255. if dataset != 'binarized_mnist' else x # PixelCNN architecture conv_list = [ ConvolutionalNoFlip(*first_layer, mask='A', name='0'), Rectifier() ] for i in range(n_layer): conv_list.extend([ ConvolutionalNoFlip(*second_layer, mask='B', name=str(i + 1)), Rectifier() ]) conv_list.extend([ ConvolutionalNoFlip((1, 1), h * n_channel, mask='B', name=str(n_layer + 1)), Rectifier() ]) conv_list.extend([ ConvolutionalNoFlip((1, 1), h * n_channel, mask='B', name=str(n_layer + 2)), Rectifier() ]) conv_list.extend( [ConvolutionalNoFlip(*third_layer, mask='B', name=str(n_layer + 3))]) sequence = ConvolutionalSequence(conv_list, num_channels=n_channel, batch_size=batch, image_size=(img_dim, img_dim), border_mode='half', weights_init=IsotropicGaussian(std=0.05, mean=0), biases_init=Constant(0.02), tied_biases=False) sequence.initialize() x = sequence.apply(x) if MODE == '256ary': x = x.reshape( (-1, 256, n_channel, img_dim, img_dim)).dimshuffle(0, 2, 3, 4, 1) x = x.reshape((-1, 256)) x_hat = Softmax().apply(x) inp = T.cast(inputs, 'int64').flatten() cost = CategoricalCrossEntropy().apply(inp, x_hat) * img_dim * img_dim cost_bits_dim = categorical_crossentropy(log_softmax(x), inp) else: x_hat = Logistic().apply(x) cost = BinaryCrossEntropy().apply(inputs, x_hat) * img_dim * img_dim #cost = T.nnet.binary_crossentropy(x_hat, inputs) #cost = cost.sum() / inputs.shape[0] cost_bits_dim = -(inputs * T.log2(x_hat) + (1.0 - inputs) * T.log2(1.0 - x_hat)).mean() cost_bits_dim.name = "nnl_bits_dim" cost.name = 'loglikelihood_nat' return cost, cost_bits_dim
def build_and_run(label, config): ############## CREATE THE NETWORK ############### #Define the parameters num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation = config[ 'num_epochs'], config['num_batches'], config['num_channels'], config[ 'image_shape'], config['filter_size'], config[ 'num_filter'], config['pooling_sizes'], config[ 'mlp_hiddens'], config['output_size'], config[ 'batch_size'], config['activation'], config[ 'mlp_activation'] # print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation) lambda_l1 = 0.000025 lambda_l2 = 0.000025 print("Building model") #Create the symbolics variable x = T.tensor4('image_features') y = T.lmatrix('targets') #Get the parameters conv_parameters = zip(filter_size, num_filter) #Create the convolutions layers conv_layers = list( interleave([(Convolutional(filter_size=filter_size, num_filters=num_filter, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)), (activation), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) # (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.)) #Initialize the convnet conv_sequence.initialize() #Add the MLP top_mlp_dims = [np.prod(conv_sequence.get_dim('output')) ] + mlp_hiddens + [output_size] out = Flattener().apply(conv_sequence.apply(x)) mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) #Initialisze the MLP mlp.initialize() #Get the output predict = mlp.apply(out) cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict) #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') ########### REGULARIZATION ################## cg = ComputationGraph([cost]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) biases = VariableFilter(roles=[BIAS])(cg.variables) # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer l2_penalty = T.sum([ lambda_l2 * (W**2).sum() for i, W in enumerate(weights + biases) ]) # Gradually increase penalty for layer # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases]) # # #l2_penalty = l2_penalty_weights + l2_penalty_bias l2_penalty.name = 'l2_penalty' l1_penalty = T.sum([lambda_l1 * T.abs_(z).sum() for z in weights + biases]) # l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer # l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases]) # l1_penalty = l1_penalty_biases + l1_penalty_weights l1_penalty.name = 'l1_penalty' costreg = cost + l2_penalty + l1_penalty costreg.name = 'costreg' ########### DEFINE THE ALGORITHM ############# # algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum()) algorithm = GradientDescent(cost=costreg, parameters=cg.parameters, step_rule=Adam()) ########### GET THE DATA ##################### istest = 'test' in config.keys() train_stream, valid_stream, test_stream = get_stream(batch_size, image_shape, test=istest) ########### INITIALIZING EXTENSIONS ########## checkpoint = Checkpoint('models/best_' + label + '.tar') checkpoint.add_condition( ['after_epoch'], predicate=OnLogRecord('valid_error_rate_best_so_far')) #Adding a live plot with the bokeh server plot = Plot( label, channels=[ ['train_error_rate', 'valid_error_rate'], ['valid_cost', 'valid_error_rate2'], # ['train_costreg','train_grad_norm']], # [ 'train_costreg', 'train_total_gradient_norm', 'train_l2_penalty', 'train_l1_penalty' ] ], server_url="http://hades.calculquebec.ca:5042") grad_norm = aggregation.mean(algorithm.total_gradient_norm) grad_norm.name = 'grad_norm' extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), DataStreamMonitoring([cost, error_rate, error_rate2], valid_stream, prefix="valid"), TrainingDataMonitoring([ costreg, error_rate, error_rate2, grad_norm, l2_penalty, l1_penalty ], prefix="train", after_epoch=True), plot, ProgressBar(), Printing(), TrackTheBest('valid_error_rate', min), #Keep best checkpoint, #Save best FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4) ] # Early-stopping model = Model(cost) main_loop = MainLoop(algorithm, data_stream=train_stream, model=model, extensions=extensions) main_loop.run()
def build_submodel(input_shape, output_dim, L_dim_conv_layers, L_filter_size, L_pool_size, L_activation_conv, L_dim_full_layers, L_activation_full, L_exo_dropout_conv_layers, L_exo_dropout_full_layers, L_endo_dropout_conv_layers, L_endo_dropout_full_layers, L_border_mode=None, L_filter_step=None, L_pool_step=None): # TO DO : target size and name of the features x = T.tensor4('features') y = T.imatrix('targets') assert len(input_shape) == 3, "input_shape must be a 3d tensor" num_channels = input_shape[0] image_size = tuple(input_shape[1:]) print image_size print num_channels prediction = output_dim # CONVOLUTION output_conv = x output_dim = num_channels*np.prod(image_size) conv_layers = [] assert len(L_dim_conv_layers) == len(L_filter_size) if L_filter_step is None: L_filter_step = [None] * len(L_dim_conv_layers) assert len(L_dim_conv_layers) == len(L_pool_size) if L_pool_step is None: L_pool_step = [None] * len(L_dim_conv_layers) assert len(L_dim_conv_layers) == len(L_pool_step) assert len(L_dim_conv_layers) == len(L_activation_conv) if L_border_mode is None: L_border_mode = ["valid"] * len(L_dim_conv_layers) assert len(L_dim_conv_layers) == len(L_border_mode) assert len(L_dim_conv_layers) == len(L_endo_dropout_conv_layers) assert len(L_dim_conv_layers) == len(L_exo_dropout_conv_layers) # regarding the batch dropout : the dropout is applied on the filter # which is equivalent to the output dimension # you have to look at the dropout_rate of the next layer # that is why we need to have the first dropout value of L_exo_dropout_full_layers # the first value has to be 0.0 in this context, and we'll # assume that it is, but let's have an assert assert L_exo_dropout_conv_layers[0] == 0.0, "L_exo_dropout_conv_layers[0] has to be 0.0 in this context. There are ways to make it work, of course, but we don't support this with this scripts." # here modifitication of L_exo_dropout_conv_layers L_exo_dropout_conv_layers = L_exo_dropout_conv_layers[1:] + [L_exo_dropout_full_layers[0]] if len(L_dim_conv_layers): for (num_filters, filter_size, filter_step, pool_size, pool_step, activation_str, border_mode, dropout, index) in zip(L_dim_conv_layers, L_filter_size, L_filter_step, L_pool_size, L_pool_step, L_activation_conv, L_border_mode, L_exo_dropout_conv_layers, xrange(len(L_dim_conv_layers)) ): # convert filter_size and pool_size in tuple filter_size = tuple(filter_size) if filter_step is None: filter_step = (1, 1) else: filter_step = tuple(filter_step) if pool_size is None: pool_size = (0,0) else: pool_size = tuple(pool_size) # TO DO : leaky relu if activation_str.lower() == 'rectifier': activation = Rectifier().apply elif activation_str.lower() == 'tanh': activation = Tanh().apply elif activation_str.lower() in ['sigmoid', 'logistic']: activation = Logistic().apply elif activation_str.lower() in ['id', 'identity']: activation = Identity().apply else: raise Exception("unknown activation function : %s", activation_str) assert 0.0 <= dropout and dropout < 1.0 num_filters = num_filters - int(num_filters*dropout) print "border_mode : %s" % border_mode # filter_step # http://blocks.readthedocs.org/en/latest/api/bricks.html#module-blocks.bricks.conv kwargs = {} if filter_step is None or filter_step == (1,1): pass else: # there's a bit of a mix of names because `Convolutional` takes # a "step" argument, but `ConvolutionActivation` takes "conv_step" argument kwargs['conv_step'] = filter_step if (pool_size[0] == 0 and pool_size[1] == 0): layer_conv = ConvolutionalActivation(activation=activation, filter_size=filter_size, num_filters=num_filters, border_mode=border_mode, name="layer_%d" % index, **kwargs) else: if pool_step is None: pass else: kwargs['pooling_step'] = tuple(pool_step) layer_conv = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters, border_mode=border_mode, pooling_size=pool_size, name="layer_%d" % index, **kwargs) conv_layers.append(layer_conv) convnet = ConvolutionalSequence(conv_layers, num_channels=num_channels, image_size=image_size, weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name="conv_section") convnet.push_allocation_config() convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) output_conv = convnet.apply(output_conv) output_conv = Flattener().apply(output_conv) # FULLY CONNECTED output_mlp = output_conv full_layers = [] assert len(L_dim_full_layers) == len(L_activation_full) assert len(L_dim_full_layers) + 1 == len(L_endo_dropout_full_layers) assert len(L_dim_full_layers) + 1 == len(L_exo_dropout_full_layers) # reguarding the batch dropout : the dropout is applied on the filter # which is equivalent to the output dimension # you have to look at the dropout_rate of the next layer # that is why we throw away the first value of L_exo_dropout_full_layers L_exo_dropout_full_layers = L_exo_dropout_full_layers[1:] pre_dim = output_dim print "When constructing the model, the output_dim of the conv section is %d." % output_dim if len(L_dim_full_layers): for (dim, activation_str, dropout, index) in zip(L_dim_full_layers, L_activation_full, L_exo_dropout_full_layers, range(len(L_dim_conv_layers), len(L_dim_conv_layers)+ len(L_dim_full_layers)) ): # TO DO : leaky relu if activation_str.lower() == 'rectifier': activation = Rectifier().apply elif activation_str.lower() == 'tanh': activation = Tanh().apply elif activation_str.lower() in ['sigmoid', 'logistic']: activation = Logistic().apply elif activation_str.lower() in ['id', 'identity']: activation = Identity().apply else: raise Exception("unknown activation function : %s", activation_str) assert 0.0 <= dropout and dropout < 1.0 dim = dim - int(dim*dropout) print "When constructing the fully-connected section, we apply dropout %f to add an MLP going from pre_dim %d to dim %d." % (dropout, pre_dim, dim) layer_full = MLP(activations=[activation], dims=[pre_dim, dim], weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name="layer_%d" % index) layer_full.initialize() full_layers.append(layer_full) pre_dim = dim for layer in full_layers: output_mlp = layer.apply(output_mlp) output_dim = L_dim_full_layers[-1] - int(L_dim_full_layers[-1]*L_exo_dropout_full_layers[-1]) # COST FUNCTION output_layer = Linear(output_dim, prediction, weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name="layer_"+str(len(L_dim_conv_layers)+ len(L_dim_full_layers)) ) output_layer.initialize() full_layers.append(output_layer) y_pred = output_layer.apply(output_mlp) y_hat = Softmax().apply(y_pred) # SOFTMAX and log likelihood y_pred = Softmax().apply(y_pred) # be careful. one version expects the output of a softmax; the other expects just the # output of the network cost = CategoricalCrossEntropy().apply(y.flatten(), y_pred) #cost = Softmax().categorical_cross_entropy(y.flatten(), y_pred) cost.name = "cost" # Misclassification error_rate_brick = MisclassificationRate() error_rate = error_rate_brick.apply(y.flatten(), y_hat) error_rate.name = "error_rate" # put names D_params, D_kind = build_params(x, T.matrix(), conv_layers, full_layers) # test computation graph cg = ComputationGraph(cost) # DROPOUT L_endo_dropout = L_endo_dropout_conv_layers + L_endo_dropout_full_layers cg_dropout = cg inputs = VariableFilter(roles=[INPUT])(cg.variables) for (index, drop_rate) in enumerate(L_endo_dropout): for input_ in inputs: m = re.match(r"layer_(\d+)_apply.*", input_.name) if m and index == int(m.group(1)): if drop_rate < 0.0001: print "Skipped applying dropout on %s because the dropout rate was under 0.0001." % input_.name break else: cg_dropout = apply_dropout(cg, [input_], drop_rate) print "Applied dropout %f on %s." % (drop_rate, input_.name) break cg = cg_dropout return (cg, error_rate, cost, D_params, D_kind)
def create_model_bricks(z_dim, image_size, depth): g_image_size = image_size g_image_size2 = g_image_size / 2 g_image_size3 = g_image_size / 4 g_image_size4 = g_image_size / 8 g_image_size5 = g_image_size / 16 encoder_layers = [] if depth > 0: encoder_layers = encoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name='conv1'), SpatialBatchNormalization(name='batch_norm1'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name='conv2'), SpatialBatchNormalization(name='batch_norm2'), Rectifier(), Convolutional( filter_size=(2, 2), step=(2, 2), num_filters=32, name='conv3'), SpatialBatchNormalization(name='batch_norm3'), Rectifier() ] if depth > 1: encoder_layers = encoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name='conv4'), SpatialBatchNormalization(name='batch_norm4'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name='conv5'), SpatialBatchNormalization(name='batch_norm5'), Rectifier(), Convolutional( filter_size=(2, 2), step=(2, 2), num_filters=64, name='conv6'), SpatialBatchNormalization(name='batch_norm6'), Rectifier() ] if depth > 2: encoder_layers = encoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name='conv7'), SpatialBatchNormalization(name='batch_norm7'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name='conv8'), SpatialBatchNormalization(name='batch_norm8'), Rectifier(), Convolutional( filter_size=(2, 2), step=(2, 2), num_filters=128, name='conv9'), SpatialBatchNormalization(name='batch_norm9'), Rectifier() ] if depth > 3: encoder_layers = encoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv10'), SpatialBatchNormalization(name='batch_norm10'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv11'), SpatialBatchNormalization(name='batch_norm11'), Rectifier(), Convolutional(filter_size=(2, 2), step=(2, 2), num_filters=256, name='conv12'), SpatialBatchNormalization(name='batch_norm12'), Rectifier(), ] if depth > 4: encoder_layers = encoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name='conv13'), SpatialBatchNormalization(name='batch_norm13'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name='conv14'), SpatialBatchNormalization(name='batch_norm14'), Rectifier(), Convolutional(filter_size=(2, 2), step=(2, 2), num_filters=512, name='conv15'), SpatialBatchNormalization(name='batch_norm15'), Rectifier() ] decoder_layers = [] if depth > 4: decoder_layers = decoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name='conv_n3'), SpatialBatchNormalization(name='batch_norm_n3'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name='conv_n2'), SpatialBatchNormalization(name='batch_norm_n2'), Rectifier(), ConvolutionalTranspose( filter_size=(2, 2), step=(2, 2), original_image_size=(g_image_size5, g_image_size5), num_filters=512, name='conv_n1'), SpatialBatchNormalization(name='batch_norm_n1'), Rectifier() ] if depth > 3: decoder_layers = decoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv1'), SpatialBatchNormalization(name='batch_norm1'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv2'), SpatialBatchNormalization(name='batch_norm2'), Rectifier(), ConvolutionalTranspose( filter_size=(2, 2), step=(2, 2), original_image_size=(g_image_size4, g_image_size4), num_filters=256, name='conv3'), SpatialBatchNormalization(name='batch_norm3'), Rectifier() ] if depth > 2: decoder_layers = decoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name='conv4'), SpatialBatchNormalization(name='batch_norm4'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name='conv5'), SpatialBatchNormalization(name='batch_norm5'), Rectifier(), ConvolutionalTranspose( filter_size=(2, 2), step=(2, 2), original_image_size=(g_image_size3, g_image_size3), num_filters=128, name='conv6'), SpatialBatchNormalization(name='batch_norm6'), Rectifier() ] if depth > 1: decoder_layers = decoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name='conv7'), SpatialBatchNormalization(name='batch_norm7'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name='conv8'), SpatialBatchNormalization(name='batch_norm8'), Rectifier(), ConvolutionalTranspose( filter_size=(2, 2), step=(2, 2), original_image_size=(g_image_size2, g_image_size2), num_filters=64, name='conv9'), SpatialBatchNormalization(name='batch_norm9'), Rectifier() ] if depth > 0: decoder_layers = decoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name='conv10'), SpatialBatchNormalization(name='batch_norm10'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name='conv11'), SpatialBatchNormalization(name='batch_norm11'), Rectifier(), ConvolutionalTranspose( filter_size=(2, 2), step=(2, 2), original_image_size=(g_image_size, g_image_size), num_filters=32, name='conv12'), SpatialBatchNormalization(name='batch_norm12'), Rectifier() ] decoder_layers = decoder_layers + [ Convolutional(filter_size=(1, 1), num_filters=3, name='conv_out'), Logistic() ] print("creating model of depth {} with {} encoder and {} decoder layers". format(depth, len(encoder_layers), len(decoder_layers))) encoder_convnet = ConvolutionalSequence( layers=encoder_layers, num_channels=3, image_size=(g_image_size, g_image_size), use_bias=False, weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='encoder_convnet') encoder_convnet.initialize() encoder_filters = numpy.prod(encoder_convnet.get_dim('output')) encoder_mlp = MLP( dims=[encoder_filters, 1000, z_dim], activations=[ Sequence([BatchNormalization(1000).apply, Rectifier().apply], name='activation1'), Identity().apply ], weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='encoder_mlp') encoder_mlp.initialize() decoder_mlp = BatchNormalizedMLP( activations=[Rectifier(), Rectifier()], dims=[encoder_mlp.output_dim // 2, 1000, encoder_filters], weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='decoder_mlp') decoder_mlp.initialize() decoder_convnet = ConvolutionalSequence( layers=decoder_layers, num_channels=encoder_convnet.get_dim('output')[0], image_size=encoder_convnet.get_dim('output')[1:], use_bias=False, weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='decoder_convnet') decoder_convnet.initialize() return encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp
def run_experiment(): np.random.seed(42) X = tensor.tensor4('features') nbr_channels = 3 image_shape = (5, 5) conv_layers = [ ConvolutionalLayer( filter_size=(2, 2), num_filters=10, activation=Rectifier().apply, border_mode='valid', pooling_size=(1, 1), weights_init=Uniform(width=0.1), #biases_init=Uniform(width=0.01), biases_init=Constant(0.0), name='conv0') ] conv_sequence = ConvolutionalSequence(conv_layers, num_channels=nbr_channels, image_size=image_shape) #conv_sequence.push_allocation_config() conv_sequence.initialize() flattener = Flattener() conv_output = conv_sequence.apply(X) y_hat = flattener.apply(conv_output) # Whatever. Not important since we're not going to actually train anything. cost = tensor.sqr(y_hat).sum() #L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[FILTER, BIAS])(ComputationGraph([y_hat]).variables)] L_grads_method_02 = [ tensor.grad(cost, v) for v in VariableFilter( roles=[BIAS])(ComputationGraph([y_hat]).variables) ] # works on the sum of the gradients in a mini-batch sum_square_norm_gradients_method_02 = sum( [tensor.sqr(g).sum() for g in L_grads_method_02]) D_by_layer = get_conv_layers_transformation_roles( ComputationGraph(conv_output)) individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_conv_transformations( D_by_layer, cost) # why does this thing depend on N again ? # I don't think I've used a cost that divides by N. N = 2 Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32) #Xtrain[1:,:,:,:] = 0.0 Xtrain[:, :, :, :] = 1.0 convolution_filter_variable = VariableFilter(roles=[FILTER])( ComputationGraph([y_hat]).variables)[0] convolution_filter_variable_value = convolution_filter_variable.get_value() convolution_filter_variable_value[:, :, :, :] = 1.0 #convolution_filter_variable_value[0,0,:,:] = 1.0 convolution_filter_variable.set_value(convolution_filter_variable_value) f = theano.function([X], [ cost, individual_sum_square_norm_gradients_method_00, sum_square_norm_gradients_method_02 ]) [c, v0, gs2] = f(Xtrain) #print "[c, v0, gs2]" L_c, L_v0, L_gs2 = ([], [], []) for n in range(N): [nc, nv0, ngs2] = f(Xtrain[n, :, :, :].reshape( (1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3]))) L_c.append(nc) L_v0.append(nv0) L_gs2.append(ngs2) print "Cost for whole mini-batch in single shot : %f." % c print "Cost for whole mini-batch accumulated : %f." % sum(L_c) print "" print "Square-norm of all gradients for each data point in single shot :" print v0.reshape((1, -1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs2).reshape((1, -1)) print "" print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2))) print "" print "Ratios : " print np.array(L_gs2).reshape((1, -1)) / v0.reshape((1, -1))
def load_vgg_classifier(): """Loads the VGG19 classifier into a brick. Relies on ``vgg19_normalized.pkl`` containing the model parameters. Returns ------- convnet : :class:`blocks.bricks.conv.ConvolutionalSequence` VGG19 convolutional brick. """ convnet = ConvolutionalSequence( layers=[ Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name='conv1_1'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name='conv1_2'), Rectifier(), AveragePooling( pooling_size=(2, 2), name='pool1'), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name='conv2_1'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name='conv2_2'), Rectifier(), AveragePooling( pooling_size=(2, 2), name='pool2'), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv3_1'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv3_2'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv3_3'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv3_4'), Rectifier(), AveragePooling( pooling_size=(2, 2), name='pool3'), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name='conv4_1'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name='conv4_2'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name='conv4_3'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name='conv4_4'), Rectifier(), AveragePooling( pooling_size=(2, 2), name='pool4'), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name='conv5_1'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name='conv5_2'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name='conv5_3'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name='conv5_4'), Rectifier(), AveragePooling( pooling_size=(2, 2), name='pool5'), ], num_channels=3, image_size=(32, 32), tied_biases=True, weights_init=Constant(0), biases_init=Constant(0), name='convnet') convnet.initialize() with open('vgg19_normalized.pkl', 'rb') as f: if six.PY3: data = cPickle.load(f, encoding='latin1') else: data = cPickle.load(f) parameter_values = data['param values'] conv_weights = parameter_values[::2] conv_biases = parameter_values[1::2] conv_indices = [0, 2, 5, 7, 10, 12, 14, 16, 19, 21, 23, 25, 28, 30, 32, 34] conv_layers = [convnet.layers[i] for i in conv_indices] for layer, W_val, b_val in zip(conv_layers, conv_weights, conv_biases): W, b = layer.parameters W.set_value(W_val) b.set_value(b_val) return convnet
def create_model_bricks(image_size, depth): # original celebA64 was depth=3 (went to bach_norm6) layers = [] if(depth > 0): layers = layers + [ Convolutional( filter_size=(4, 4), num_filters=32, name='conv1'), SpatialBatchNormalization(name='batch_norm1'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=32, name='conv2'), SpatialBatchNormalization(name='batch_norm2'), Rectifier(), ] if(depth > 1): layers = layers + [ Convolutional( filter_size=(4, 4), num_filters=64, name='conv3'), SpatialBatchNormalization(name='batch_norm3'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=64, name='conv4'), SpatialBatchNormalization(name='batch_norm4'), Rectifier(), ] if(depth > 2): layers = layers + [ Convolutional( filter_size=(3, 3), num_filters=128, name='conv5'), SpatialBatchNormalization(name='batch_norm5'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=128, name='conv6'), SpatialBatchNormalization(name='batch_norm6'), Rectifier(), ] if(depth > 3): layers = layers + [ Convolutional( filter_size=(3, 3), num_filters=256, name='conv7'), SpatialBatchNormalization(name='batch_norm7'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=256, name='conv8'), SpatialBatchNormalization(name='batch_norm8'), Rectifier(), ] if(depth > 4): layers = layers + [ Convolutional( filter_size=(3, 3), num_filters=512, name='conv9'), SpatialBatchNormalization(name='batch_norm9'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=512, name='conv10'), SpatialBatchNormalization(name='batch_norm10'), Rectifier(), ] if(depth > 5): layers = layers + [ Convolutional( filter_size=(3, 3), num_filters=512, name='conv11'), SpatialBatchNormalization(name='batch_norm11'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=512, name='conv12'), SpatialBatchNormalization(name='batch_norm12'), Rectifier(), ] if(depth > 6): layers = layers + [ Convolutional( filter_size=(3, 3), num_filters=512, name='conv13'), SpatialBatchNormalization(name='batch_norm13'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=512, name='conv14'), SpatialBatchNormalization(name='batch_norm14'), Rectifier(), ] if(depth > 7): layers = layers + [ Convolutional( filter_size=(3, 3), num_filters=512, name='conv15'), SpatialBatchNormalization(name='batch_norm15'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=512, name='conv16'), SpatialBatchNormalization(name='batch_norm16'), Rectifier(), ] print("creating model of depth {} with {} layers".format(depth, len(layers))) convnet = ConvolutionalSequence( layers=layers, num_channels=3, image_size=(image_size, image_size), use_bias=False, weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='convnet') convnet.initialize() mlp = BatchNormalizedMLP( activations=[Rectifier(), Logistic()], dims=[numpy.prod(convnet.get_dim('output')), 1000, 64], weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='mlp') mlp.initialize() return convnet, mlp, len(layers)
x = T.tensor4('features') y = T.lmatrix('targets') # Convolutional Layers conv_layers = [ ConvolutionalLayer(Rectifier().apply, (3, 3), 16, (2, 2), name='l1'), ConvolutionalLayer(Rectifier().apply, (3, 3), 32, (2, 2), name='l2') ] convnet = ConvolutionalSequence(conv_layers, num_channels=1, image_size=(28, 28), weights_init=IsotropicGaussian(0.1), biases_init=Constant(0)) convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) print(output_dim) # Fully connected layers features = Flattener().apply(convnet.apply(x)) mlp = MLP(activations=[Rectifier(), None], dims=[output_dim, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() y_hat = mlp.apply(features)
num_filters = [128, 256] pooling_sizes = [(2, 2)] * 2 activation = Logistic().apply conv_layers = [ b.ConvolutionalLayer(activation, filter_size, num_filters_, pooling_size, num_channels=3) for filter_size, num_filters_, pooling_size in zip(filter_sizes, num_filters, pooling_sizes) ] convnet = ConvolutionalSequence(conv_layers, num_channels=3, image_size=(32, 32), weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) convnet.initialize() conv_features = Flattener().apply(convnet.apply(X)) # MLP mlp = MLP(activations=[Logistic(name='sigmoid_0'), Softmax(name='softmax_1')], dims=[ 256, 256, 256, 2], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) [child.name for child in mlp.children] ['linear_0', 'sigmoid_0', 'linear_1', 'softmax_1'] Y = mlp.apply(conv_features) mlp.initialize() # Setting up the cost function
def create_model_bricks(): encoder_convnet = ConvolutionalSequence( layers=[ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name='conv1'), SpatialBatchNormalization(name='batch_norm1'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name='conv2'), SpatialBatchNormalization(name='batch_norm2'), Rectifier(), Convolutional(filter_size=(2, 2), step=(2, 2), num_filters=32, name='conv3'), SpatialBatchNormalization(name='batch_norm3'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name='conv4'), SpatialBatchNormalization(name='batch_norm4'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name='conv5'), SpatialBatchNormalization(name='batch_norm5'), Rectifier(), Convolutional(filter_size=(2, 2), step=(2, 2), num_filters=64, name='conv6'), SpatialBatchNormalization(name='batch_norm6'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name='conv7'), SpatialBatchNormalization(name='batch_norm7'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name='conv8'), SpatialBatchNormalization(name='batch_norm8'), Rectifier(), Convolutional(filter_size=(2, 2), step=(2, 2), num_filters=128, name='conv9'), SpatialBatchNormalization(name='batch_norm9'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv10'), SpatialBatchNormalization(name='batch_norm10'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv11'), SpatialBatchNormalization(name='batch_norm11'), Rectifier(), Convolutional(filter_size=(2, 2), step=(2, 2), num_filters=256, name='conv12'), SpatialBatchNormalization(name='batch_norm12'), Rectifier(), ], num_channels=3, image_size=(64, 64), use_bias=False, weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='encoder_convnet') encoder_convnet.initialize() encoder_filters = numpy.prod(encoder_convnet.get_dim('output')) encoder_mlp = MLP( dims=[encoder_filters, 1000, 1000], activations=[ Sequence([BatchNormalization(1000).apply, Rectifier().apply], name='activation1'), Identity().apply ], weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='encoder_mlp') encoder_mlp.initialize() decoder_mlp = BatchNormalizedMLP( activations=[Rectifier(), Rectifier()], dims=[encoder_mlp.output_dim // 2, 1000, encoder_filters], weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='decoder_mlp') decoder_mlp.initialize() decoder_convnet = ConvolutionalSequence( layers=[ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv1'), SpatialBatchNormalization(name='batch_norm1'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv2'), SpatialBatchNormalization(name='batch_norm2'), Rectifier(), ConvolutionalTranspose(filter_size=(2, 2), step=(2, 2), original_image_size=(8, 8), num_filters=256, name='conv3'), SpatialBatchNormalization(name='batch_norm3'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name='conv4'), SpatialBatchNormalization(name='batch_norm4'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name='conv5'), SpatialBatchNormalization(name='batch_norm5'), Rectifier(), ConvolutionalTranspose(filter_size=(2, 2), step=(2, 2), original_image_size=(16, 16), num_filters=128, name='conv6'), SpatialBatchNormalization(name='batch_norm6'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name='conv7'), SpatialBatchNormalization(name='batch_norm7'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name='conv8'), SpatialBatchNormalization(name='batch_norm8'), Rectifier(), ConvolutionalTranspose(filter_size=(2, 2), step=(2, 2), original_image_size=(32, 32), num_filters=64, name='conv9'), SpatialBatchNormalization(name='batch_norm9'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name='conv10'), SpatialBatchNormalization(name='batch_norm10'), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name='conv11'), SpatialBatchNormalization(name='batch_norm11'), Rectifier(), ConvolutionalTranspose(filter_size=(2, 2), step=(2, 2), original_image_size=(64, 64), num_filters=32, name='conv12'), SpatialBatchNormalization(name='batch_norm12'), Rectifier(), Convolutional(filter_size=(1, 1), num_filters=3, name='conv_out'), Logistic(), ], num_channels=encoder_convnet.get_dim('output')[0], image_size=encoder_convnet.get_dim('output')[1:], use_bias=False, weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='decoder_convnet') decoder_convnet.initialize() return encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp
def inception(image_shape, num_input, conv1, conv2, conv3, conv4, conv5, conv6, out, i): layers1 = [] layers2 = [] layers3 = [] layers4 = [] layers1.append( Convolutional(filter_size=(1, 1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers1.append(BatchNormalization(name='batch_{}'.format(i))) layers1.append(Rectifier()) conv_sequence1 = ConvolutionalSequence(layers1, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence1.initialize() out1 = conv_sequence1.apply(out) i = i + 1 layers2.append( Convolutional(filter_size=(1, 1), num_channels=num_input, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers2.append(BatchNormalization(name='batch_{}'.format(i))) layers2.append(Rectifier()) i = i + 1 layers2.append( Convolutional(filter_size=(3, 3), num_channels=conv2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers2.append(BatchNormalization(name='batch_{}'.format(i))) layers2.append(Rectifier()) conv_sequence2 = ConvolutionalSequence(layers2, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence2.initialize() out2 = conv_sequence2.apply(out) i = i + 1 layers3.append( Convolutional(filter_size=(1, 1), num_channels=num_input, num_filters=conv4, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers3.append(BatchNormalization(name='batch_{}'.format(i))) layers3.append(Rectifier()) i = i + 1 layers3.append( Convolutional(filter_size=(5, 5), num_channels=conv4, num_filters=conv5, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers3.append(BatchNormalization(name='batch_{}'.format(i))) layers3.append(Rectifier()) conv_sequence3 = ConvolutionalSequence(layers3, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence3.initialize() out3 = conv_sequence3.apply(out) i = i + 1 layers4.append( MaxPooling((3, 3), step=(1, 1), padding=(1, 1), name='pool_{}'.format(i))) layers4.append( Convolutional(filter_size=(1, 1), num_channels=num_input, num_filters=conv6, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers4.append(BatchNormalization(name='batch_{}'.format(i))) layers4.append(Rectifier()) i = i + 1 conv_sequence4 = ConvolutionalSequence(layers4, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence4.initialize() out4 = conv_sequence4.apply(out) #Merge return T.concatenate([out1, out2, out3, out4], axis=1)
def create_model_bricks(z_dim, image_size, depth): g_image_size = image_size g_image_size2 = g_image_size / 2 g_image_size3 = g_image_size / 4 g_image_size4 = g_image_size / 8 g_image_size5 = g_image_size / 16 encoder_layers = [] if depth > 0: encoder_layers = encoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name="conv1"), SpatialBatchNormalization(name="batch_norm1"), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name="conv2"), SpatialBatchNormalization(name="batch_norm2"), Rectifier(), Convolutional(filter_size=(2, 2), step=(2, 2), num_filters=32, name="conv3"), SpatialBatchNormalization(name="batch_norm3"), Rectifier(), ] if depth > 1: encoder_layers = encoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name="conv4"), SpatialBatchNormalization(name="batch_norm4"), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name="conv5"), SpatialBatchNormalization(name="batch_norm5"), Rectifier(), Convolutional(filter_size=(2, 2), step=(2, 2), num_filters=64, name="conv6"), SpatialBatchNormalization(name="batch_norm6"), Rectifier(), ] if depth > 2: encoder_layers = encoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name="conv7"), SpatialBatchNormalization(name="batch_norm7"), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name="conv8"), SpatialBatchNormalization(name="batch_norm8"), Rectifier(), Convolutional(filter_size=(2, 2), step=(2, 2), num_filters=128, name="conv9"), SpatialBatchNormalization(name="batch_norm9"), Rectifier(), ] if depth > 3: encoder_layers = encoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name="conv10"), SpatialBatchNormalization(name="batch_norm10"), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name="conv11"), SpatialBatchNormalization(name="batch_norm11"), Rectifier(), Convolutional(filter_size=(2, 2), step=(2, 2), num_filters=256, name="conv12"), SpatialBatchNormalization(name="batch_norm12"), Rectifier(), ] if depth > 4: encoder_layers = encoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name="conv13"), SpatialBatchNormalization(name="batch_norm13"), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name="conv14"), SpatialBatchNormalization(name="batch_norm14"), Rectifier(), Convolutional(filter_size=(2, 2), step=(2, 2), num_filters=512, name="conv15"), SpatialBatchNormalization(name="batch_norm15"), Rectifier(), ] decoder_layers = [] if depth > 4: decoder_layers = decoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name="conv_n3"), SpatialBatchNormalization(name="batch_norm_n3"), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=512, name="conv_n2"), SpatialBatchNormalization(name="batch_norm_n2"), Rectifier(), ConvolutionalTranspose( filter_size=(2, 2), step=(2, 2), original_image_size=(g_image_size5, g_image_size5), num_filters=512, name="conv_n1", ), SpatialBatchNormalization(name="batch_norm_n1"), Rectifier(), ] if depth > 3: decoder_layers = decoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name="conv1"), SpatialBatchNormalization(name="batch_norm1"), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name="conv2"), SpatialBatchNormalization(name="batch_norm2"), Rectifier(), ConvolutionalTranspose( filter_size=(2, 2), step=(2, 2), original_image_size=(g_image_size4, g_image_size4), num_filters=256, name="conv3", ), SpatialBatchNormalization(name="batch_norm3"), Rectifier(), ] if depth > 2: decoder_layers = decoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name="conv4"), SpatialBatchNormalization(name="batch_norm4"), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name="conv5"), SpatialBatchNormalization(name="batch_norm5"), Rectifier(), ConvolutionalTranspose( filter_size=(2, 2), step=(2, 2), original_image_size=(g_image_size3, g_image_size3), num_filters=128, name="conv6", ), SpatialBatchNormalization(name="batch_norm6"), Rectifier(), ] if depth > 1: decoder_layers = decoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name="conv7"), SpatialBatchNormalization(name="batch_norm7"), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name="conv8"), SpatialBatchNormalization(name="batch_norm8"), Rectifier(), ConvolutionalTranspose( filter_size=(2, 2), step=(2, 2), original_image_size=(g_image_size2, g_image_size2), num_filters=64, name="conv9", ), SpatialBatchNormalization(name="batch_norm9"), Rectifier(), ] if depth > 0: decoder_layers = decoder_layers + [ Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name="conv10"), SpatialBatchNormalization(name="batch_norm10"), Rectifier(), Convolutional(filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name="conv11"), SpatialBatchNormalization(name="batch_norm11"), Rectifier(), ConvolutionalTranspose( filter_size=(2, 2), step=(2, 2), original_image_size=(g_image_size, g_image_size), num_filters=32, name="conv12", ), SpatialBatchNormalization(name="batch_norm12"), Rectifier(), ] decoder_layers = decoder_layers + [Convolutional(filter_size=(1, 1), num_filters=3, name="conv_out"), Logistic()] print( "creating model of depth {} with {} encoder and {} decoder layers".format( depth, len(encoder_layers), len(decoder_layers) ) ) encoder_convnet = ConvolutionalSequence( layers=encoder_layers, num_channels=3, image_size=(g_image_size, g_image_size), use_bias=False, weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name="encoder_convnet", ) encoder_convnet.initialize() encoder_filters = numpy.prod(encoder_convnet.get_dim("output")) encoder_mlp = MLP( dims=[encoder_filters, 1000, z_dim], activations=[ Sequence([BatchNormalization(1000).apply, Rectifier().apply], name="activation1"), Identity().apply, ], weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name="encoder_mlp", ) encoder_mlp.initialize() decoder_mlp = BatchNormalizedMLP( activations=[Rectifier(), Rectifier()], dims=[encoder_mlp.output_dim // 2, 1000, encoder_filters], weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name="decoder_mlp", ) decoder_mlp.initialize() decoder_convnet = ConvolutionalSequence( layers=decoder_layers, num_channels=encoder_convnet.get_dim("output")[0], image_size=encoder_convnet.get_dim("output")[1:], use_bias=False, weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name="decoder_convnet", ) decoder_convnet.initialize() return encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp
def __init__(self, rnn_dims, num_actions, data_X_np=None, data_y_np=None, width=32, height=32): ############################################################### # # Network and data setup # ############################################################## RNN_DIMS = 100 NUM_ACTIONS = num_actions tensor5 = T.TensorType('float32', [False, True, True, True, True]) self.x = T.tensor4('features') self.reward = T.tensor3('targets', dtype='float32') self.state = T.matrix('states', dtype='float32') self.hidden_states = [] # holds hidden states in np array form #data_X & data_Y supplied in init function now... if data_X_np is None or data_y_np is None: print 'you did not supply data at init' data_X_np = np.float32(np.random.normal(size=(1280, 1,1, width, height))) data_y_np = np.float32(np.random.normal(size=(1280, 1,1,1))) #data_states_np = np.float32(np.ones((1280, 1, 100))) state_shape = (data_X_np.shape[0],rnn_dims) self.data_states_np = np.float32(np.zeros(state_shape)) self.datastream = IterableDataset(dict(features=data_X_np, targets=data_y_np, states=self.data_states_np)).get_example_stream() self.datastream_test = IterableDataset(dict(features=data_X_np, targets=data_y_np, states=self.data_states_np)).get_example_stream() data_X = self.datastream # 2 conv inputs # we want to take our sequence of input images and convert them to convolutional # representations conv_layers = [ConvolutionalLayer(Rectifier().apply, (3, 3), 16, (2, 2), name='l1'), ConvolutionalLayer(Rectifier().apply, (3, 3), 32, (2, 2), name='l2'), ConvolutionalLayer(Rectifier().apply, (3, 3), 64, (2, 2), name='l3'), ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l4'), ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l5'), ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l6')] convnet = ConvolutionalSequence(conv_layers, num_channels=4, image_size=(width, height), weights_init=init.Uniform(0, 0.01), biases_init=init.Constant(0.0), tied_biases=False, border_mode='full') convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) conv_out = convnet.apply(self.x) reshape_dims = (conv_out.shape[0], conv_out.shape[1]*conv_out.shape[2]*conv_out.shape[3]) hidden_repr = conv_out.reshape(reshape_dims) conv2rnn = Linear(input_dim=output_dim, output_dim=RNN_DIMS, weights_init=init.Uniform(width=0.01), biases_init=init.Constant(0.)) conv2rnn.initialize() conv2rnn_output = conv2rnn.apply(hidden_repr) # RNN hidden layer # then we want to feed those conv representations into an RNN rnn = SimpleRecurrent(dim=RNN_DIMS, activation=Rectifier(), weights_init=init.Uniform(width=0.01)) rnn.initialize() self.learned_state = rnn.apply(inputs=conv2rnn_output, states=self.state, iterate=False) # linear output from hidden layer # the RNN has two outputs, but only this one has a target. That is, this is "expected return" # which the network attempts to minimize difference between expected return and actual return lin_output = Linear(input_dim=RNN_DIMS, output_dim=1, weights_init=init.Uniform(width=0.01), biases_init=init.Constant(0.)) lin_output.initialize() self.exp_reward = lin_output.apply(self.learned_state) self.get_exp_reward = theano.function([self.x, self.state], self.exp_reward) # softmax output from hidden layer # this provides a softmax of action recommendations # the hypothesis is that adjusting the other outputs magically influences this set of outputs # to suggest smarter (or more realistic?) moves action_output = Linear(input_dim=RNN_DIMS, output_dim=NUM_ACTIONS, weights_init=init.Constant(.001), biases_init=init.Constant(0.)) action_output.initialize() self.suggested_actions = Softmax().apply(action_output.apply(self.learned_state[-1])) ###################### # use this to get suggested actions... it requires the state of the hidden units from the previous # timestep ##################### self.get_suggested_actions = theano.function([self.x, self.state], [self.suggested_actions, self.learned_state])
def create_model_bricks(): encoder_convnet = ConvolutionalSequence( layers=[ Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name='conv1'), SpatialBatchNormalization(name='batch_norm1'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name='conv2'), SpatialBatchNormalization(name='batch_norm2'), Rectifier(), Convolutional( filter_size=(2, 2), step=(2, 2), num_filters=32, name='conv3'), SpatialBatchNormalization(name='batch_norm3'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name='conv4'), SpatialBatchNormalization(name='batch_norm4'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name='conv5'), SpatialBatchNormalization(name='batch_norm5'), Rectifier(), Convolutional( filter_size=(2, 2), step=(2, 2), num_filters=64, name='conv6'), SpatialBatchNormalization(name='batch_norm6'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name='conv7'), SpatialBatchNormalization(name='batch_norm7'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name='conv8'), SpatialBatchNormalization(name='batch_norm8'), Rectifier(), Convolutional( filter_size=(2, 2), step=(2, 2), num_filters=128, name='conv9'), SpatialBatchNormalization(name='batch_norm9'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv10'), SpatialBatchNormalization(name='batch_norm10'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv11'), SpatialBatchNormalization(name='batch_norm11'), Rectifier(), Convolutional( filter_size=(2, 2), step=(2, 2), num_filters=256, name='conv12'), SpatialBatchNormalization(name='batch_norm12'), Rectifier(), ], num_channels=3, image_size=(64, 64), use_bias=False, weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='encoder_convnet') encoder_convnet.initialize() encoder_filters = numpy.prod(encoder_convnet.get_dim('output')) encoder_mlp = MLP( dims=[encoder_filters, 1000, 1000], activations=[Sequence([BatchNormalization(1000).apply, Rectifier().apply], name='activation1'), Identity().apply], weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='encoder_mlp') encoder_mlp.initialize() decoder_mlp = BatchNormalizedMLP( activations=[Rectifier(), Rectifier()], dims=[encoder_mlp.output_dim // 2, 1000, encoder_filters], weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='decoder_mlp') decoder_mlp.initialize() decoder_convnet = ConvolutionalSequence( layers=[ Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv1'), SpatialBatchNormalization(name='batch_norm1'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=256, name='conv2'), SpatialBatchNormalization(name='batch_norm2'), Rectifier(), ConvolutionalTranspose( filter_size=(2, 2), step=(2, 2), original_image_size=(8, 8), num_filters=256, name='conv3'), SpatialBatchNormalization(name='batch_norm3'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name='conv4'), SpatialBatchNormalization(name='batch_norm4'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=128, name='conv5'), SpatialBatchNormalization(name='batch_norm5'), Rectifier(), ConvolutionalTranspose( filter_size=(2, 2), step=(2, 2), original_image_size=(16, 16), num_filters=128, name='conv6'), SpatialBatchNormalization(name='batch_norm6'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name='conv7'), SpatialBatchNormalization(name='batch_norm7'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=64, name='conv8'), SpatialBatchNormalization(name='batch_norm8'), Rectifier(), ConvolutionalTranspose( filter_size=(2, 2), step=(2, 2), original_image_size=(32, 32), num_filters=64, name='conv9'), SpatialBatchNormalization(name='batch_norm9'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name='conv10'), SpatialBatchNormalization(name='batch_norm10'), Rectifier(), Convolutional( filter_size=(3, 3), border_mode=(1, 1), num_filters=32, name='conv11'), SpatialBatchNormalization(name='batch_norm11'), Rectifier(), ConvolutionalTranspose( filter_size=(2, 2), step=(2, 2), original_image_size=(64, 64), num_filters=32, name='conv12'), SpatialBatchNormalization(name='batch_norm12'), Rectifier(), Convolutional( filter_size=(1, 1), num_filters=3, name='conv_out'), Logistic(), ], num_channels=encoder_convnet.get_dim('output')[0], image_size=encoder_convnet.get_dim('output')[1:], use_bias=False, weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='decoder_convnet') decoder_convnet.initialize() return encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp
def create_network(inputs=None, batch=batch_size): if inputs is None: inputs = T.tensor4('features') x = T.cast(inputs,'float32') x = x / 255. if dataset != 'binarized_mnist' else x # GatedPixelCNN gated = GatedPixelCNN( name='gated_layer_0', filter_size=7, image_size=(img_dim,img_dim), num_filters=h*n_channel, num_channels=n_channel, batch_size=batch, weights_init=IsotropicGaussian(std=0.02, mean=0), biases_init=Constant(0.02), res=False ) gated.initialize() x_v, x_h = gated.apply(x, x) for i in range(n_layer): gated = GatedPixelCNN( name='gated_layer_{}'.format(i+1), filter_size=3, image_size=(img_dim,img_dim), num_channels=h*n_channel, batch_size=batch, weights_init=IsotropicGaussian(std=0.02, mean=0), biases_init=Constant(0.02), res=True ) gated.initialize() x_v, x_h = gated.apply(x_v, x_h) conv_list = [] conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask_type='B', name='1x1_conv_1')]) #conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask='B', name='1x1_conv_2')]) conv_list.extend([Rectifier(), ConvolutionalNoFlip(*third_layer, mask_type='B', name='output_layer')]) sequence = ConvolutionalSequence( conv_list, num_channels=h*n_channel, batch_size=batch, image_size=(img_dim,img_dim), border_mode='half', weights_init=IsotropicGaussian(std=0.02, mean=0), biases_init=Constant(0.02), tied_biases=False ) sequence.initialize() x = sequence.apply(x_h) if MODE == '256ary': x = x.reshape((-1, 256, n_channel, img_dim, img_dim)).dimshuffle(0,2,3,4,1) x = x.reshape((-1, 256)) x_hat = Softmax().apply(x) inp = T.cast(inputs, 'int64').flatten() cost = CategoricalCrossEntropy().apply(inp, x_hat) * img_dim * img_dim cost_bits_dim = categorical_crossentropy(log_softmax(x), inp) else: x_hat = Logistic().apply(x) cost = BinaryCrossEntropy().apply(inputs, x_hat) * img_dim * img_dim #cost = T.nnet.binary_crossentropy(x_hat, inputs) #cost = cost.sum() / inputs.shape[0] cost_bits_dim = -(inputs * T.log2(x_hat) + (1.0 - inputs) * T.log2(1.0 - x_hat)).mean() cost_bits_dim.name = "nnl_bits_dim" cost.name = 'loglikelihood_nat' return cost, cost_bits_dim
num_filters=num_filter, step=conv_step, border_mode=border_mode, name='conv_{}_1'.format(i))) conv_layers2.append(conv_activation[i]) conv_layers2.append(MaxPooling(pooling_size, name='pool_{}_1'.format(i))) # --------------------------------------------------------------- # Building both sequences and merge them by tensor.concatenate # --------------------------------------------------------------- conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_size,weights_init=Uniform(width=0.2), biases_init=Constant(0.), name='conv_sequence_0') conv_sequence2 = ConvolutionalSequence(conv_layers2, num_channels, image_size=image_size,weights_init=Uniform(width=0.2), biases_init=Constant(0.), name='conv_sequence_1') conv_sequence.initialize() conv_out1 = Flattener(name='flattener_0').apply(conv_sequence.apply(x)) conv_out2 = Flattener(name='flattener_1').apply(conv_sequence2.apply(x2)) conv_out = tensor.concatenate([conv_out1,conv_out2],axis=1) top_mlp_dims = [2*numpy.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size] top_mlp = MLP(mlp_activation, top_mlp_dims,weights_init=GlorotInitialization(),biases_init=Constant(0.)) top_mlp.initialize() predict = top_mlp.apply(conv_out) # --------------------------------------------------------------- # Building computational graph # --------------------------------------------------------------- cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost')
#Create the symbolics variable x = tensor.tensor4('image_features') y = tensor.lmatrix('targets') num_epochs = 1000 layers = [] ###############FIRST STAGE####################### #Create the convolutions layers layers.append(Convolutional(filter_size=(7,7), step=(2,2), num_filters=96, border_mode='half', name='conv_0')) layers.append(BatchNormalization(name='batch_0')) layers.append(Rectifier()) layers.append(MaxPooling((3,3), step=(2,2), padding=(1,1), name='pool_0')) convSeq = ConvolutionalSequence(layers, num_channels=3, image_size=(220,220), weights_init=Orthogonal(), use_bias=False, name='ConvSeq') convSeq.initialize() out = convSeq.apply(x) #FIRE MODULES out1 = Fire((55,55), 96, 16, 16, 16, out, 10) out2 = Fire((55,55), 128, 16, 16, 16, out1, 25) out3 = Fire((55,55), 128, 32, 32, 32, out2, 300) out31 = MaxPooling((3,3), step=(2,2), padding=(1,1), name='poolLow').apply(out3) out4 = Fire((28,28), 256, 32, 32, 32, out31, 45) out5 = Fire((28,28), 256, 48, 48, 48, out4, 500) out6 = Fire((28,28), 384, 48, 48, 48, out5, 65) out7 = Fire((28,28), 384, 64, 64, 64, out6, 700) out71 = MaxPooling((3,3), step=(2,2), padding=(1,1), name='poolLow2').apply(out7) out8 = Fire((14,14), 512, 64, 64, 64, out71, 85) #LAST LAYERS
i = i + 1 #Sequence conv_layers1.append( Convolutional( filter_size=filter_size[j+3], num_filters=num_filter[j+3], step=conv_step, border_mode=border_mode, name='conv_{}'.format(i))) conv_layers1.append(BatchNormalization(name='BNconv_{}'.format(i))) conv_layers1.append(conv_activation[0]) conv_layers1.append(MaxPooling(pooling_size[j+2], name='pool_{}'.format(i))) conv_sequence1 = ConvolutionalSequence(conv_layers1, num_channels=num_channels, image_size=image_size, weights_init=Uniform(width=0.2), biases_init=Constant(0.), name='ConvSeq1_{}'.format(i)) conv_sequence1.initialize() out1 = conv_sequence1.apply(x) ################# Convolutional Sequence 2 ################# # conv_layers2 parameters i = i+1 #Sequence j = 0 #Sub Layer filter_size = [(7,7), (5,5), (2,2), (5,5)] num_filter = [16, 32, 48, 64] num_channels = 3 pooling_size = [(3,3), (2,2), (2,2)] conv_step = (1,1) border_mode = 'valid' conv_layers2 = []
def Fire(image_shape, num_input, conv1, conv2, conv3, out, i): layers11 = [] layers12 = [] layers13 = [] layers14 = [] ############# SQUEEZE ########### ### 4 Conv 1x1 ### layers11.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers11.append(BatchNormalization(name='batch_{}'.format(i))) layers11.append(Rectifier()) conv_sequence11 = ConvolutionalSequence(layers11, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence11.initialize() out11 = conv_sequence11.apply(out) i = i + 1 layers12.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers12.append(BatchNormalization(name='batch_{}'.format(i))) layers12.append(Rectifier()) conv_sequence12 = ConvolutionalSequence(layers12, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence12.initialize() out12 = conv_sequence12.apply(out) i = i + 1 layers13.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers13.append(BatchNormalization(name='batch_{}'.format(i))) layers13.append(Rectifier()) conv_sequence13 = ConvolutionalSequence(layers13, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence13.initialize() out13 = conv_sequence13.apply(out) i = i + 1 layers14.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers14.append(BatchNormalization(name='batch_{}'.format(i))) layers14.append(Rectifier()) conv_sequence14 = ConvolutionalSequence(layers14, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence14.initialize() out14 = conv_sequence14.apply(out) i = i + 1 squeezed = T.concatenate([out11, out12, out13, out14], axis=1) ####### EXPAND ##### layers21 = [] layers22 = [] layers23 = [] layers24 = [] layers31 = [] layers32 = [] layers33 = [] layers34 = [] num_input2 = conv1 * 4 ### 4 conv 1x1 ### layers21.append(Convolutional(filter_size=(1,1), num_channels=num_input2, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers21.append(BatchNormalization(name='batch_{}'.format(i))) layers21.append(Rectifier()) conv_sequence21 = ConvolutionalSequence(layers21, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence21.initialize() out21 = conv_sequence21.apply(squeezed) i = i + 1 layers22.append(Convolutional(filter_size=(1,1), num_channels=num_input2, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers22.append(BatchNormalization(name='batch_{}'.format(i))) layers22.append(Rectifier()) conv_sequence22 = ConvolutionalSequence(layers22, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence22.initialize() out22 = conv_sequence22.apply(squeezed) i = i + 1 layers23.append(Convolutional(filter_size=(1,1), num_channels=num_input2, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers23.append(BatchNormalization(name='batch_{}'.format(i))) layers23.append(Rectifier()) conv_sequence23 = ConvolutionalSequence(layers23, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence23.initialize() out23 = conv_sequence23.apply(squeezed) i = i + 1 layers24.append(Convolutional(filter_size=(1,1), num_channels=num_input2, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers24.append(BatchNormalization(name='batch_{}'.format(i))) layers24.append(Rectifier()) conv_sequence24 = ConvolutionalSequence(layers24, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence24.initialize() out24 = conv_sequence24.apply(squeezed) i = i + 1 ### 4 conv 3x3 ### layers31.append(Convolutional(filter_size=(3,3), num_channels=num_input2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers31.append(BatchNormalization(name='batch_{}'.format(i))) layers31.append(Rectifier()) conv_sequence31 = ConvolutionalSequence(layers31, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence31.initialize() out31 = conv_sequence31.apply(squeezed) i = i + 1 layers32.append(Convolutional(filter_size=(3,3), num_channels=num_input2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers32.append(BatchNormalization(name='batch_{}'.format(i))) layers32.append(Rectifier()) conv_sequence32 = ConvolutionalSequence(layers32, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence32.initialize() out32 = conv_sequence32.apply(squeezed) i = i + 1 layers33.append(Convolutional(filter_size=(3,3), num_channels=num_input2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers33.append(BatchNormalization(name='batch_{}'.format(i))) layers33.append(Rectifier()) conv_sequence33 = ConvolutionalSequence(layers33, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence33.initialize() out33 = conv_sequence33.apply(squeezed) i = i + 1 layers34.append(Convolutional(filter_size=(3,3), num_channels=num_input2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers34.append(BatchNormalization(name='batch_{}'.format(i))) layers34.append(Rectifier()) conv_sequence34 = ConvolutionalSequence(layers34, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence34.initialize() out34 = conv_sequence34.apply(squeezed) i = i + 1 #Merge return T.concatenate([out21, out22, out23, out24, out31, out32, out33, out34], axis=1)
num_filters=192, border_mode='half', name='conv_2')) layers.append(BatchNormalization(name='batch_2')) layers.append(Rectifier()) layers.append(MaxPooling((3, 3), step=(2, 2), padding=(1, 1), name='pool_2')) #Create the sequence conv_sequence = ConvolutionalSequence(layers, num_channels=3, image_size=(160, 160), weights_init=Orthogonal(), use_bias=False, name='convSeq') #Initialize the convnet conv_sequence.initialize() #Output the first result out = conv_sequence.apply(x) ###############SECOND STAGE##################### out2 = inception((20, 20), 192, 64, 96, 128, 16, 32, 32, out, 10) out3 = inception((20, 20), 256, 128, 128, 192, 32, 96, 64, out2, 20) out31 = MaxPooling((2, 2), name='poolLow').apply(out3) out4 = inception((10, 10), 480, 192, 96, 208, 16, 48, 64, out31, 30) out5 = inception((10, 10), 512, 160, 112, 224, 24, 64, 64, out4, 40) out6 = inception((10, 10), 512, 128, 128, 256, 24, 64, 64, out5, 50) out7 = inception((10, 10), 512, 112, 144, 288, 32, 64, 64, out6, 60) out8 = inception((10, 10), 528, 256, 160, 320, 32, 128, 128, out7, 70) out81 = MaxPooling((2, 2), name='poolLow1').apply(out8)
def create_network(inputs=None, batch=batch_size): if inputs is None: inputs = T.tensor4('features') x = T.cast(inputs, 'float32') x = x / 255. if dataset != 'binarized_mnist' else x # GatedPixelCNN gated = GatedPixelCNN(name='gated_layer_0', filter_size=7, image_size=(img_dim, img_dim), num_filters=h * n_channel, num_channels=n_channel, batch_size=batch, weights_init=IsotropicGaussian(std=0.02, mean=0), biases_init=Constant(0.02), res=False) gated.initialize() x_v, x_h = gated.apply(x, x) for i in range(n_layer): gated = GatedPixelCNN(name='gated_layer_{}'.format(i + 1), filter_size=3, image_size=(img_dim, img_dim), num_channels=h * n_channel, batch_size=batch, weights_init=IsotropicGaussian(std=0.02, mean=0), biases_init=Constant(0.02), res=True) gated.initialize() x_v, x_h = gated.apply(x_v, x_h) conv_list = [] conv_list.extend([ Rectifier(), ConvolutionalNoFlip((1, 1), h * n_channel, mask_type='B', name='1x1_conv_1') ]) #conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask='B', name='1x1_conv_2')]) conv_list.extend([ Rectifier(), ConvolutionalNoFlip(*third_layer, mask_type='B', name='output_layer') ]) sequence = ConvolutionalSequence(conv_list, num_channels=h * n_channel, batch_size=batch, image_size=(img_dim, img_dim), border_mode='half', weights_init=IsotropicGaussian(std=0.02, mean=0), biases_init=Constant(0.02), tied_biases=False) sequence.initialize() x = sequence.apply(x_h) if MODE == '256ary': x = x.reshape( (-1, 256, n_channel, img_dim, img_dim)).dimshuffle(0, 2, 3, 4, 1) x = x.reshape((-1, 256)) x_hat = Softmax().apply(x) inp = T.cast(inputs, 'int64').flatten() cost = CategoricalCrossEntropy().apply(inp, x_hat) * img_dim * img_dim cost_bits_dim = categorical_crossentropy(log_softmax(x), inp) else: x_hat = Logistic().apply(x) cost = BinaryCrossEntropy().apply(inputs, x_hat) * img_dim * img_dim #cost = T.nnet.binary_crossentropy(x_hat, inputs) #cost = cost.sum() / inputs.shape[0] cost_bits_dim = -(inputs * T.log2(x_hat) + (1.0 - inputs) * T.log2(1.0 - x_hat)).mean() cost_bits_dim.name = "nnl_bits_dim" cost.name = 'loglikelihood_nat' return cost, cost_bits_dim
def run_experiment(): np.random.seed(42) #X = tensor.matrix('features') X = tensor.tensor4('features') y = tensor.matrix('targets') nbr_channels = 3 image_shape = (30, 30) conv_layers = [ ConvolutionalLayer( filter_size=(4,4), num_filters=10, activation=Rectifier().apply, border_mode='full', pooling_size=(1,1), weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name='conv0'), ConvolutionalLayer( filter_size=(3,3), num_filters=14, activation=Rectifier().apply, border_mode='full', pooling_size=(1,1), weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name='conv1')] conv_sequence = ConvolutionalSequence( conv_layers, num_channels=nbr_channels, image_size=image_shape) #conv_sequence.push_allocation_config() conv_sequence.initialize() conv_output_dim = np.prod(conv_sequence.get_dim('output')) #conv_output_dim = 25*25 flattener = Flattener() mlp = MLP( activations=[Rectifier(), Rectifier(), Softmax()], dims=[conv_output_dim, 50, 50, 10], weights_init=IsotropicGaussian(std=0.1), biases_init=IsotropicGaussian(std=0.01)) mlp.initialize() conv_output = conv_sequence.apply(X) y_hat = mlp.apply(flattener.apply(conv_output)) cost = CategoricalCrossEntropy().apply(y, y_hat) #cost = CategoricalCrossEntropy().apply(y_hat, y) #cost = BinaryCrossEntropy().apply(y.flatten(), y_hat.flatten()) cg = ComputationGraph([y_hat]) """ print "--- INPUT ---" for v in VariableFilter(bricks=mlp.linear_transformations, roles=[INPUT])(cg.variables): print v.tag.annotations[0].name print "--- OUTPUT ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables): print v.tag.annotations[0].name print "--- WEIGHT ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables): print v.tag.annotations[0].name print "--- BIAS ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables): print v.tag.annotations[0].name """ # check out .tag on the variables to see which layer they belong to print "----------------------------" D_by_layer = get_linear_transformation_roles(mlp, cg) # returns a vector with one entry for each in the mini-batch individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_linear_transformations(D_by_layer, cost) #import pprint #pp = pprint.PrettyPrinter(indent=4) #pp.pprint(get_conv_layers_transformation_roles(ComputationGraph(conv_output)).items()) D_by_layer = get_conv_layers_transformation_roles(ComputationGraph(conv_output)) individual_sum_square_norm_gradients_method_00 += get_sum_square_norm_gradients_conv_transformations(D_by_layer, cost) print "There are %d entries in cg.parameters." % len(cg.parameters) L_grads_method_01 = [tensor.grad(cost, p) for p in cg.parameters] L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[WEIGHT, BIAS])(cg.variables)] # works on the sum of the gradients in a mini-batch sum_square_norm_gradients_method_01 = sum([tensor.sqr(g).sum() for g in L_grads_method_01]) sum_square_norm_gradients_method_02 = sum([tensor.sqr(g).sum() for g in L_grads_method_02]) N = 8 Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32) # Option 1. ytrain = np.zeros((N, 10), dtype=np.float32) for n in range(N): label = np.random.randint(low=0, high=10) ytrain[n, label] = 1.0 # Option 2, just to debug situations with NaN. #ytrain = np.random.rand(N, 10).astype(np.float32) #for n in range(N): # ytrain[n,:] = ytrain[n,:] / ytrain[n,:].sum() f = theano.function([X,y], [cost, individual_sum_square_norm_gradients_method_00, sum_square_norm_gradients_method_01, sum_square_norm_gradients_method_02]) [c, v0, gs1, gs2] = f(Xtrain, ytrain) #print "[c, v0, gs1, gs2]" L_c, L_v0, L_gs1, L_gs2 = ([], [], [], []) for n in range(N): [nc, nv0, ngs1, ngs2] = f(Xtrain[n,:].reshape((1,Xtrain.shape[1],Xtrain.shape[2], Xtrain.shape[3])), ytrain[n,:].reshape((1,10))) L_c.append(nc) L_v0.append(nv0) L_gs1.append(ngs1) L_gs2.append(ngs2) print "Cost for whole mini-batch in single shot : %f." % c print "Cost for whole mini-batch accumulated : %f." % sum(L_c) print "" print "Square-norm of all gradients for each data point in single shot :" print v0.reshape((1,-1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs1).reshape((1,-1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs2).reshape((1,-1)) print "" print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs1))) print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2))) print "" print "Ratios : " print np.array(L_gs1).reshape((1,-1)) / v0.reshape((1,-1))
def build_submodel(image_size, num_channels, L_dim_conv_layers, L_filter_size, L_pool_size, L_activation_conv, L_dim_full_layers, L_activation_full, dropout, prediction, allow_comment=False, sub_dropout=0, L_pool_step=[], L_pool_padding=[]): # CONVOLUTION params_channels = [10**(-i) for i in range(len(L_dim_conv_layers) + 1)] index_params = 0 params_channels.reverse() output_dim = num_channels * np.prod(image_size) conv_layers = [] assert len(L_dim_conv_layers) == len(L_filter_size) assert len(L_dim_conv_layers) == len(L_pool_size) assert len(L_dim_conv_layers) == len(L_activation_conv) if len(L_pool_step) == 0: L_pool_step = [(1, 1) for i in range(len(L_dim_conv_layers))] L_pool_padding = [(0, 0) for i in range(len(L_dim_conv_layers))] assert len(L_dim_conv_layers) == len(L_pool_step) assert len(L_dim_conv_layers) == len(L_pool_padding) L_conv_dropout = [dropout] * len( L_dim_conv_layers) # unique value of dropout for now convnet = None mlp = None if len(L_dim_conv_layers): for (num_filters, filter_size, pool_size, activation_str, dropout, index, step, padding) in zip(L_dim_conv_layers, L_filter_size, L_pool_size, L_activation_conv, L_conv_dropout, xrange(len(L_dim_conv_layers)), L_pool_step, L_pool_padding): # convert filter_size and pool_size in tuple filter_size = tuple(filter_size) if pool_size is None: pool_size = (0, 0) else: pool_size = tuple(pool_size) # TO DO : leaky relu if activation_str.lower() == 'rectifier': activation = Rectifier() elif activation_str.lower() == 'tanh': activation = Tanh() elif activation_str.lower() in ['sigmoid', 'logistic']: activation = Logistic() elif activation_str.lower() in ['id', 'identity']: activation = Identity() else: raise Exception("unknown activation function : %s", activation_str) assert 0.0 <= dropout and dropout < 1.0 num_filters = num_filters - int(num_filters * dropout) layer_conv = Convolutional(filter_size=filter_size, num_filters=num_filters, name="layer_%d" % index, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) conv_layers.append(layer_conv) conv_layers.append(activation) index_params += 1 if not (pool_size[0] == 0 and pool_size[1] == 0): #pool = MaxPooling(pooling_size=pool_size, step=step, padding=padding) pool = MaxPooling(pooling_size=pool_size) conv_layers.append(pool) convnet = ConvolutionalSequence(conv_layers, num_channels=num_channels, image_size=image_size, name="conv_section") convnet.push_allocation_config() convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) # MLP assert len(L_dim_full_layers) == len(L_activation_full) L_full_dropout = [dropout] * len( L_dim_full_layers) # unique value of dropout for now # reguarding the batch dropout : the dropout is applied on the filter # which is equivalent to the output dimension # you have to look at the dropout_rate of the next layer # that is why we throw away the first value of L_exo_dropout_full_layers pre_dim = output_dim if allow_comment: print "When constructing the model, the output_dim of the conv section is %d." % output_dim activations = [] dims = [pre_dim] if len(L_dim_full_layers): for (dim, activation_str, dropout, index) in zip( L_dim_full_layers, L_activation_full, L_full_dropout, range(len(L_dim_conv_layers), len(L_dim_conv_layers) + len(L_dim_full_layers))): # TO DO : leaky relu if activation_str.lower() == 'rectifier': activation = Rectifier().apply elif activation_str.lower() == 'tanh': activation = Tanh().apply elif activation_str.lower() in ['sigmoid', 'logistic']: activation = Logistic().apply elif activation_str.lower() in ['id', 'identity']: activation = Identity().apply else: raise Exception("unknown activation function : %s", activation_str) activations.append(activation) assert 0.0 <= dropout and dropout < 1.0 dim = dim - int(dim * dropout) if allow_comment: print "When constructing the fully-connected section, we apply dropout %f to add an MLP going from pre_dim %d to dim %d." % ( dropout, pre_dim, dim) dims.append(dim) #now construct the full MLP in one pass: activations.append(Identity()) #params_channels[index_params] dims.append(prediction) mlp = MLP(activations=activations, dims=dims, weights_init=IsotropicGaussian(0.1), biases_init=Constant(0.0), name="layer_%d" % index) mlp.push_allocation_config() mlp.initialize() return (convnet, mlp)
num_filters=num_filter, activation=activation, name='conv_{}'.format(i)) for i, (activation, filter_size, num_filter) in enumerate(conv_parameters)), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.)) #Initialize the convnet conv_sequence.initialize() #Add the MLP top_mlp_dims = [np.prod(conv_sequence.get_dim('output')) ] + mlp_hiddens + [output_size] out = Flattener().apply(conv_sequence.apply(x)) mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) #Initialisze the MLP mlp.initialize() #Get the output predict = mlp.apply(out) cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict)
def run_experiment(): np.random.seed(42) #X = tensor.matrix('features') X = tensor.tensor4('features') y = tensor.matrix('targets') nbr_channels = 3 image_shape = (30, 30) conv_layers = [ ConvolutionalLayer(filter_size=(4, 4), num_filters=10, activation=Rectifier().apply, border_mode='full', pooling_size=(1, 1), weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name='conv0'), ConvolutionalLayer(filter_size=(3, 3), num_filters=14, activation=Rectifier().apply, border_mode='full', pooling_size=(1, 1), weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name='conv1') ] conv_sequence = ConvolutionalSequence(conv_layers, num_channels=nbr_channels, image_size=image_shape) #conv_sequence.push_allocation_config() conv_sequence.initialize() conv_output_dim = np.prod(conv_sequence.get_dim('output')) #conv_output_dim = 25*25 flattener = Flattener() mlp = MLP(activations=[Rectifier(), Rectifier(), Softmax()], dims=[conv_output_dim, 50, 50, 10], weights_init=IsotropicGaussian(std=0.1), biases_init=IsotropicGaussian(std=0.01)) mlp.initialize() conv_output = conv_sequence.apply(X) y_hat = mlp.apply(flattener.apply(conv_output)) cost = CategoricalCrossEntropy().apply(y, y_hat) #cost = CategoricalCrossEntropy().apply(y_hat, y) #cost = BinaryCrossEntropy().apply(y.flatten(), y_hat.flatten()) cg = ComputationGraph([y_hat]) """ print "--- INPUT ---" for v in VariableFilter(bricks=mlp.linear_transformations, roles=[INPUT])(cg.variables): print v.tag.annotations[0].name print "--- OUTPUT ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables): print v.tag.annotations[0].name print "--- WEIGHT ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables): print v.tag.annotations[0].name print "--- BIAS ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables): print v.tag.annotations[0].name """ # check out .tag on the variables to see which layer they belong to print "----------------------------" D_by_layer = get_linear_transformation_roles(mlp, cg) # returns a vector with one entry for each in the mini-batch individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_linear_transformations( D_by_layer, cost) #import pprint #pp = pprint.PrettyPrinter(indent=4) #pp.pprint(get_conv_layers_transformation_roles(ComputationGraph(conv_output)).items()) D_by_layer = get_conv_layers_transformation_roles( ComputationGraph(conv_output)) individual_sum_square_norm_gradients_method_00 += get_sum_square_norm_gradients_conv_transformations( D_by_layer, cost) print "There are %d entries in cg.parameters." % len(cg.parameters) L_grads_method_01 = [tensor.grad(cost, p) for p in cg.parameters] L_grads_method_02 = [ tensor.grad(cost, v) for v in VariableFilter(roles=[WEIGHT, BIAS])(cg.variables) ] # works on the sum of the gradients in a mini-batch sum_square_norm_gradients_method_01 = sum( [tensor.sqr(g).sum() for g in L_grads_method_01]) sum_square_norm_gradients_method_02 = sum( [tensor.sqr(g).sum() for g in L_grads_method_02]) N = 8 Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32) # Option 1. ytrain = np.zeros((N, 10), dtype=np.float32) for n in range(N): label = np.random.randint(low=0, high=10) ytrain[n, label] = 1.0 # Option 2, just to debug situations with NaN. #ytrain = np.random.rand(N, 10).astype(np.float32) #for n in range(N): # ytrain[n,:] = ytrain[n,:] / ytrain[n,:].sum() f = theano.function([X, y], [ cost, individual_sum_square_norm_gradients_method_00, sum_square_norm_gradients_method_01, sum_square_norm_gradients_method_02 ]) [c, v0, gs1, gs2] = f(Xtrain, ytrain) #print "[c, v0, gs1, gs2]" L_c, L_v0, L_gs1, L_gs2 = ([], [], [], []) for n in range(N): [nc, nv0, ngs1, ngs2] = f( Xtrain[n, :].reshape( (1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3])), ytrain[n, :].reshape((1, 10))) L_c.append(nc) L_v0.append(nv0) L_gs1.append(ngs1) L_gs2.append(ngs2) print "Cost for whole mini-batch in single shot : %f." % c print "Cost for whole mini-batch accumulated : %f." % sum(L_c) print "" print "Square-norm of all gradients for each data point in single shot :" print v0.reshape((1, -1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs1).reshape((1, -1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs2).reshape((1, -1)) print "" print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs1))) print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2))) print "" print "Ratios : " print np.array(L_gs1).reshape((1, -1)) / v0.reshape((1, -1))