def __init__(self, **kwargs): conv_layers = [ Convolutional(filter_size=(3, 3), num_filters=64, border_mode=(1, 1), name='conv_1'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=64, border_mode=(1, 1), name='conv_2'), Rectifier(), MaxPooling((2, 2), step=(2, 2), name='pool_2'), Convolutional(filter_size=(3, 3), num_filters=128, border_mode=(1, 1), name='conv_3'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=128, border_mode=(1, 1), name='conv_4'), Rectifier(), MaxPooling((2, 2), step=(2, 2), name='pool_4'), Convolutional(filter_size=(3, 3), num_filters=256, border_mode=(1, 1), name='conv_5'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=256, border_mode=(1, 1), name='conv_6'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=256, border_mode=(1, 1), name='conv_7'), Rectifier(), MaxPooling((2, 2), step=(2, 2), name='pool_7'), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_8'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_9'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_10'), Rectifier(), MaxPooling((2, 2), step=(2, 2), name='pool_10'), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_11'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_12'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_13'), Rectifier(), MaxPooling((2, 2), step=(2, 2), name='pool_13'), ] mlp = MLP([Rectifier(name='fc_14'), Rectifier('fc_15'), Softmax()], [25088, 4096, 4096, 1000], ) conv_sequence = ConvolutionalSequence( conv_layers, 3, image_size=(224, 224)) super(VGGNet, self).__init__( [conv_sequence.apply, Flattener().apply, mlp.apply], **kwargs)
def build_model(images, labels): # Construct a bottom convolutional sequence bottom_conv_sequence = convolutional_sequence((3,3), 16, (160, 160)) bottom_conv_sequence._push_allocation_config() # Flatten layer flattener = Flattener() # Construct a top MLP conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output')) #top_mlp = MLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0)) top_mlp = BatchNormalizedMLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0)) # Construct feedforward sequence ss_seq = FeedforwardSequence([bottom_conv_sequence.apply, flattener.apply, top_mlp.apply]) ss_seq.push_initialization_config() ss_seq.initialize() prediction = ss_seq.apply(images) cost_noreg = CategoricalCrossEntropy().apply(labels.flatten(), prediction) # add regularization selector = Selector([top_mlp]) Ws = selector.get_parameters('W') mlp_brick_name = 'batchnormalizedmlp' W0 = Ws['/%s/linear_0.W' % mlp_brick_name] W1 = Ws['/%s/linear_1.W' % mlp_brick_name] cost = cost_noreg + .01 * (W0 ** 2).mean() + .01 * (W1 ** 2).mean() return cost
def __init__(self, conv_activations, num_channels, image_shape, filter_sizes, feature_maps, pooling_sizes, top_mlp_activations, top_mlp_dims, conv_step=None, border_mode='valid', **kwargs): if conv_step is None: self.conv_step = (1, 1) else: self.conv_step = conv_step self.num_channels = num_channels self.image_shape = image_shape self.top_mlp_activations = top_mlp_activations self.top_mlp_dims = top_mlp_dims self.border_mode = border_mode conv_parameters = zip(filter_sizes, feature_maps) # Construct convolutional, activation, and pooling layers with corresponding parameters self.convolution_layer = ( Convolutional(filter_size=filter_size, num_filters=num_filter, step=self.conv_step, border_mode=self.border_mode, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)) self.BN_layer = (BatchNormalization(name='bn_conv_{}'.format(i)) for i in enumerate(conv_parameters)) self.pooling_layer = (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes)) self.layers = list( interleave([ self.convolution_layer, self.BN_layer, conv_activations, self.pooling_layer ])) self.conv_sequence = ConvolutionalSequence(self.layers, num_channels, image_size=image_shape) # Construct a top MLP self.top_mlp = MLP(top_mlp_activations, top_mlp_dims) # We need to flatten the output of the last convolutional layer. # This brick accepts a tensor of dimension (batch_size, ...) and # returns a matrix (batch_size, features) self.flattener = Flattener() application_methods = [ self.conv_sequence.apply, self.flattener.apply, self.top_mlp.apply ] super(LeNet, self).__init__(application_methods, **kwargs)
def build_model(images, labels): # Construct a bottom convolutional sequence bottom_conv_sequence = convolutional_sequence((3, 3), 64, (150, 150)) bottom_conv_sequence._push_allocation_config() # Flatten layer flattener = Flattener() # Construct a top MLP conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output')) top_mlp = MLP([ LeakyRectifier(name='non_linear_9'), LeakyRectifier(name='non_linear_10'), Softmax(name='non_linear_11') ], [conv_out_dim, 2048, 612, 10], weights_init=IsotropicGaussian(), biases_init=Constant(1)) # Construct feedforward sequence ss_seq = FeedforwardSequence( [bottom_conv_sequence.apply, flattener.apply, top_mlp.apply]) ss_seq.push_initialization_config() ss_seq.initialize() prediction = ss_seq.apply(images) cost = CategoricalCrossEntropy().apply(labels.flatten(), prediction) return cost
def create_OLD_kim_cnn(layer0_input, embedding_size, input_len, config, pref): ''' One layer convolution with the same filtersize ''' filter_width_list = [ int(fw) for fw in config[pref + '_filterwidth'].split() ] print filter_width_list num_filters = int(config[pref + '_num_filters']) totfilters = 0 for i, fw in enumerate(filter_width_list): num_feature_map = input_len - fw + 1 #39 conv = Convolutional(filter_size=(fw, embedding_size), num_filters=num_filters, num_channels=1, image_size=(input_len, embedding_size), name="conv" + str(fw)) pooling = MaxPooling((num_feature_map, 1), name="pool" + str(fw)) initialize([conv]) totfilters += num_filters outpool = Flattener(name="flat" + str(fw)).apply( Rectifier(name=pref + 'act_' + str(fw)).apply( pooling.apply(conv.apply(layer0_input)))) if i == 0: outpools = outpool else: outpools = T.concatenate([outpools, outpool], axis=1) name_rep_len = totfilters return outpools, name_rep_len
def __init__(self, image_dimension, **kwargs): layers = [] ############################################# # a first block with 2 convolutions of 32 (3, 3) filters layers.append(Convolutional((3, 3), 32, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 32, border_mode='half')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) ############################################# # a 2nd block with 3 convolutions of 64 (3, 3) filters layers.append(Convolutional((3, 3), 64, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 64, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 64, border_mode='half')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) ############################################# # a 3rd block with 4 convolutions of 128 (3, 3) filters layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) self.conv_sequence = ConvolutionalSequence(layers, 3, image_size=image_dimension) flattener = Flattener() self.top_mlp = MLP(activations=[Rectifier(), Logistic()], dims=[500, 1]) application_methods = [ self.conv_sequence.apply, flattener.apply, self.top_mlp.apply ] super(VGGNet, self).__init__(application_methods, biases_init=Constant(0), weights_init=Uniform(width=.1), **kwargs)
def __init__(self, conv_seq=None, mlp=None, **kwargs): self.conv_seq = conv_seq self.mlp = mlp self.flatten = Flattener() self.T = 1. application_methods = [ self.conv_seq.apply, self.flatten.apply, self.mlp.apply ] super(CompositeSequence, self).__init__(application_methods=application_methods, **kwargs)
def net_dvc(image_size=(32, 32)): convos = [5, 5, 5] pools = [2, 2, 2] filters = [100, 200, 300] tuplify = lambda x: (x, x) convos = list(map(tuplify, convos)) conv_layers = [Convolutional(filter_size=s,num_filters=o, num_channels=i, name="Conv"+str(n))\ for s,o,i,n in zip(convos, filters, [3] + filters, range(1000))] pool_layers = [MaxPooling(p) for p in map(tuplify, pools)] activations = [Rectifier() for i in convos] layers = [i for l in zip(conv_layers, activations, pool_layers) for i in l] cnn = ConvolutionalSequence(layers, 3, image_size=image_size, name="cnn", weights_init=Uniform(width=.1), biases_init=Constant(0)) cnn._push_allocation_config() cnn_output = np.prod(cnn.get_dim('output')) mlp_size = [cnn_output, 500, 2] mlp = MLP([Rectifier(), Softmax()], mlp_size, name="mlp", weights_init=Uniform(width=.1), biases_init=Constant(0)) seq = FeedforwardSequence([net.apply for net in [cnn, Flattener(), mlp]]) seq.push_initialization_config() seq.initialize() return seq
def __init__(self, image_dimension, **kwargs): layers = [] ############################################# # a first block with 2 convolutions of 64 (3, 3) filters layers.append( Convolutional((3, 3), 64, border_mode='half', name='conv_1_1')) layers.append(Rectifier()) layers.append( Convolutional((3, 3), 64, border_mode='half', name='conv_1_2')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) ############################################# # a 2nd block with 3 convolutions of 128 (3, 3) filters layers.append( Convolutional((3, 3), 128, border_mode='half', name='conv_2_1')) layers.append(Rectifier()) layers.append( Convolutional((3, 3), 128, border_mode='half', name='conv_2_2')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) ############################################# # a 3rd block with 4 convolutions of 256 (3, 3) filters layers.append( Convolutional((3, 3), 256, border_mode='half', name='conv_3_1')) layers.append(Rectifier()) layers.append( Convolutional((3, 3), 256, border_mode='half', name='conv_3_2')) layers.append(Rectifier()) layers.append( Convolutional((3, 3), 256, border_mode='half', name='conv_3_3')) layers.append(Rectifier()) layers.append( Convolutional((3, 3), 256, border_mode='half', name='conv_3_4')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) ############################################# # a 4th block with 4 convolutions of 512 (3, 3) filters layers.append( Convolutional((3, 3), 512, border_mode='half', name='conv_4_1')) layers.append(Rectifier()) layers.append( Convolutional((3, 3), 512, border_mode='half', name='conv_4_2')) layers.append(Rectifier()) layers.append( Convolutional((3, 3), 512, border_mode='half', name='conv_4_3')) layers.append(Rectifier()) layers.append( Convolutional((3, 3), 512, border_mode='half', name='conv_4_4')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) ############################################# # a 5th block with 4 convolutions of 512 (3, 3) filters layers.append( Convolutional((3, 3), 512, border_mode='half', name='conv_5_1')) layers.append(Rectifier()) layers.append( Convolutional((3, 3), 512, border_mode='half', name='conv_5_2')) layers.append(Rectifier()) layers.append( Convolutional((3, 3), 512, border_mode='half', name='conv_5_3')) layers.append(Rectifier()) layers.append( Convolutional((3, 3), 512, border_mode='half', name='conv_5_4')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) self.conv_sequence = ConvolutionalSequence(layers, 3, image_size=image_dimension) flattener = Flattener() self.top_mlp = BatchNormalizedMLP( activations=[Rectifier(), Rectifier(), Rectifier(), Logistic()], dims=[4096, 4096, 1000, 1]) application_methods = [ self.conv_sequence.apply, flattener.apply, self.top_mlp.apply ] super(VGGNet, self).__init__(application_methods, biases_init=Constant(0), weights_init=Uniform(width=.1), **kwargs)
def convert_model(self, L): """Converts a Matlab model into Blocks. The method expects an argument L specifying the layers of the Matlab model, e.g. as returned by load_model. It returns a list of bricks. This list may be longer than the list of layers in L, because additional padding bricks are introduced to work around limitiations in the Blocks pooling bricks.""" layers = [] image_size = self.imagesize for i in range(37): # 37 layers in Matlab model l = L[0][i][0][0] tp = l["type"][0] name = l["name"][0] if tp == "conv": wt = l["weights"][0, 0] bias = l["weights"][0, 1] pad = l["pad"][0] stride = l["stride"][0] # WORK-AROUND to get to 7x7 output after last convolution if name == 'conv5_3': pad = [0, 1, 0, 1] if sum(pad) > 0: pad = [int(d) for d in pad] layer = Padding(pad) layer.image_size = image_size image_size = layer.get_dim("output")[1:3] layers.append(layer) layer, outdim = self.conv_layer(name, wt, bias, image_size) layers.append(layer) image_size = outdim elif tp == "pool": method = l["method"][0] pool = l["pool"][0] stride = l["stride"][0] pad = l["pad"][0] stride = [int(d) for d in stride] pool = [int(d) for d in pool] pad = [int(d) for d in pad] layer, outdim = self.pool_layer(name, method, pool, pad, stride, image_size) layers.append(layer) image_size = outdim elif tp == "relu": layers.append(self.relu_layer(name)) elif tp == "softmax": layers.append(Flattener('flatten')) layers.append(self.softmax_layer(name)) print(len(layers), 'layers created') return layers
Rectifier(), MaxPooling((2, 2), name='MaxPol1'), Convolutional(filter_size=(1, 1), num_filters=1024, name='Convx3'), Rectifier(), MaxPooling((2, 2), name='MaxPol2'), Convolutional(filter_size=(1, 1), num_filters=2, name='Convx4'), Rectifier(), ]) conv_sequence1 = ConvolutionalSequence(conv_layers1, num_channels=512, image_size=(10, 10), weights_init=Orthogonal(), use_bias=False, name='ConvSeq3') conv_sequence1.initialize() out_soft1 = Flattener(name='Flatt1').apply(conv_sequence1.apply(out5)) predict1 = NDimensionalSoftmax(name='Soft1').apply(out_soft1) cost1 = CategoricalCrossEntropy(name='Cross1').apply( y.flatten(), predict1).copy(name='cost1') #SECOND SOFTMAX conv_layers2 = list([ MaxPooling((2, 2), name='MaxPol2'), Convolutional(filter_size=(1, 1), num_filters=128, name='Convx21'), Rectifier(), MaxPooling((2, 2), name='MaxPol11'), Convolutional(filter_size=(1, 1), num_filters=1024, name='Convx31'), Rectifier(), MaxPooling((2, 2), name='MaxPol21'), Convolutional(filter_size=(1, 1), num_filters=2, name='Convx41'), Rectifier(),
biases_init=IsotropicGaussian(std=0.01), use_bias=True, border_mode="valid", step=(1, 1)) l.initialize() o = l.apply(o) l = BatchNormalizationConv(input_shape=l.get_dim("output"), B_init=IsotropicGaussian(std=0.01), Y_init=IsotropicGaussian(std=0.01)) l.initialize() o = l.apply(o) shape = np.prod(l.get_dim("output")) o = Flattener().apply(o) l = Linear(input_dim=shape, output_dim=200, weights_init=IsotropicGaussian(std=0.01), biases_init=IsotropicGaussian(std=0.01)) l.initialize() o = l.apply(o) l = BatchNormalization(input_dim=l.get_dim("output"), B_init=IsotropicGaussian(std=0.01), Y_init=IsotropicGaussian(std=0.01)) l.initialize() o = l.apply(o) o = Rectifier().apply(o)
def build_and_run(label, config): ############## CREATE THE NETWORK ############### #Define the parameters num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation = config[ 'num_epochs'], config['num_batches'], config['num_channels'], config[ 'image_shape'], config['filter_size'], config[ 'num_filter'], config['pooling_sizes'], config[ 'mlp_hiddens'], config['output_size'], config[ 'batch_size'], config['activation'], config[ 'mlp_activation'] # print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation) lambda_l1 = 0.000025 lambda_l2 = 0.000025 print("Building model") #Create the symbolics variable x = T.tensor4('image_features') y = T.lmatrix('targets') #Get the parameters conv_parameters = zip(filter_size, num_filter) #Create the convolutions layers conv_layers = list( interleave([(Convolutional(filter_size=filter_size, num_filters=num_filter, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)), (activation), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) # (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.)) #Initialize the convnet conv_sequence.initialize() #Add the MLP top_mlp_dims = [np.prod(conv_sequence.get_dim('output')) ] + mlp_hiddens + [output_size] out = Flattener().apply(conv_sequence.apply(x)) mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) #Initialisze the MLP mlp.initialize() #Get the output predict = mlp.apply(out) cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict) #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') ########### REGULARIZATION ################## cg = ComputationGraph([cost]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) biases = VariableFilter(roles=[BIAS])(cg.variables) # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer l2_penalty = T.sum([ lambda_l2 * (W**2).sum() for i, W in enumerate(weights + biases) ]) # Gradually increase penalty for layer # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases]) # # #l2_penalty = l2_penalty_weights + l2_penalty_bias l2_penalty.name = 'l2_penalty' l1_penalty = T.sum([lambda_l1 * T.abs_(z).sum() for z in weights + biases]) # l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer # l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases]) # l1_penalty = l1_penalty_biases + l1_penalty_weights l1_penalty.name = 'l1_penalty' costreg = cost + l2_penalty + l1_penalty costreg.name = 'costreg' ########### DEFINE THE ALGORITHM ############# # algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum()) algorithm = GradientDescent(cost=costreg, parameters=cg.parameters, step_rule=Adam()) ########### GET THE DATA ##################### istest = 'test' in config.keys() train_stream, valid_stream, test_stream = get_stream(batch_size, image_shape, test=istest) ########### INITIALIZING EXTENSIONS ########## checkpoint = Checkpoint('models/best_' + label + '.tar') checkpoint.add_condition( ['after_epoch'], predicate=OnLogRecord('valid_error_rate_best_so_far')) #Adding a live plot with the bokeh server plot = Plot( label, channels=[ ['train_error_rate', 'valid_error_rate'], ['valid_cost', 'valid_error_rate2'], # ['train_costreg','train_grad_norm']], # [ 'train_costreg', 'train_total_gradient_norm', 'train_l2_penalty', 'train_l1_penalty' ] ], server_url="http://hades.calculquebec.ca:5042") grad_norm = aggregation.mean(algorithm.total_gradient_norm) grad_norm.name = 'grad_norm' extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), DataStreamMonitoring([cost, error_rate, error_rate2], valid_stream, prefix="valid"), TrainingDataMonitoring([ costreg, error_rate, error_rate2, grad_norm, l2_penalty, l1_penalty ], prefix="train", after_epoch=True), plot, ProgressBar(), Printing(), TrackTheBest('valid_error_rate', min), #Keep best checkpoint, #Save best FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4) ] # Early-stopping model = Model(cost) main_loop = MainLoop(algorithm, data_stream=train_stream, model=model, extensions=extensions) main_loop.run()
b.ConvolutionalLayer(activation, filter_size, num_filters_, pooling_size, num_channels=3) for filter_size, num_filters_, pooling_size in zip(filter_sizes, num_filters, pooling_sizes) ] convnet = ConvolutionalSequence(conv_layers, num_channels=3, image_size=(32, 32), weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) convnet.initialize() conv_features = Flattener().apply(convnet.apply(X)) # MLP mlp = MLP(activations=[Logistic(name='sigmoid_0'), Softmax(name='softmax_1')], dims=[256, 256, 256, 2], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) [child.name for child in mlp.children] ['linear_0', 'sigmoid_0', 'linear_1', 'softmax_1'] Y = mlp.apply(conv_features) mlp.initialize() # Setting up the cost function from blocks.bricks.cost import CategoricalCrossEntropy
def run_experiment(): np.random.seed(42) X = tensor.tensor4('features') nbr_channels = 3 image_shape = (5, 5) conv_layers = [ ConvolutionalLayer( filter_size=(2, 2), num_filters=10, activation=Rectifier().apply, border_mode='valid', pooling_size=(1, 1), weights_init=Uniform(width=0.1), #biases_init=Uniform(width=0.01), biases_init=Constant(0.0), name='conv0') ] conv_sequence = ConvolutionalSequence(conv_layers, num_channels=nbr_channels, image_size=image_shape) #conv_sequence.push_allocation_config() conv_sequence.initialize() flattener = Flattener() conv_output = conv_sequence.apply(X) y_hat = flattener.apply(conv_output) # Whatever. Not important since we're not going to actually train anything. cost = tensor.sqr(y_hat).sum() #L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[FILTER, BIAS])(ComputationGraph([y_hat]).variables)] L_grads_method_02 = [ tensor.grad(cost, v) for v in VariableFilter( roles=[BIAS])(ComputationGraph([y_hat]).variables) ] # works on the sum of the gradients in a mini-batch sum_square_norm_gradients_method_02 = sum( [tensor.sqr(g).sum() for g in L_grads_method_02]) D_by_layer = get_conv_layers_transformation_roles( ComputationGraph(conv_output)) individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_conv_transformations( D_by_layer, cost) # why does this thing depend on N again ? # I don't think I've used a cost that divides by N. N = 2 Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32) #Xtrain[1:,:,:,:] = 0.0 Xtrain[:, :, :, :] = 1.0 convolution_filter_variable = VariableFilter(roles=[FILTER])( ComputationGraph([y_hat]).variables)[0] convolution_filter_variable_value = convolution_filter_variable.get_value() convolution_filter_variable_value[:, :, :, :] = 1.0 #convolution_filter_variable_value[0,0,:,:] = 1.0 convolution_filter_variable.set_value(convolution_filter_variable_value) f = theano.function([X], [ cost, individual_sum_square_norm_gradients_method_00, sum_square_norm_gradients_method_02 ]) [c, v0, gs2] = f(Xtrain) #print "[c, v0, gs2]" L_c, L_v0, L_gs2 = ([], [], []) for n in range(N): [nc, nv0, ngs2] = f(Xtrain[n, :, :, :].reshape( (1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3]))) L_c.append(nc) L_v0.append(nv0) L_gs2.append(ngs2) print "Cost for whole mini-batch in single shot : %f." % c print "Cost for whole mini-batch accumulated : %f." % sum(L_c) print "" print "Square-norm of all gradients for each data point in single shot :" print v0.reshape((1, -1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs2).reshape((1, -1)) print "" print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2))) print "" print "Ratios : " print np.array(L_gs2).reshape((1, -1)) / v0.reshape((1, -1))
num_filters=num_filter[j + 1], step=conv_step, border_mode=border_mode, name='conv_{}'.format(i))) conv_layers4.append(BatchNormalization(name='BNconv_{}'.format(i))) conv_layers4.append(conv_activation[0]) conv_layers4.append(MaxPooling(pooling_size[j + 1], name='pool_{}'.format(i))) conv_sequence4 = ConvolutionalSequence(conv_layers1, num_channels=num_channels, image_size=image_size, weights_init=Uniform(width=0.2), biases_init=Constant(0.), name='ConvSeq_{}'.format(i)) conv_sequence4.initialize() out = Flattener().apply(conv_sequence4.apply(out)) ################# Final MLP layers ################# # MLP parameters mlp_hiddens = 1000 top_mlp_dims = [numpy.prod(conv_sequence4.get_dim('output')) ] + [mlp_hiddens] + [output_size] top_mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(width=0.2), biases_init=Constant(0.)) top_mlp.initialize() probs = top_mlp.apply(out) cost = CategoricalCrossEntropy(name='Cross1').apply(y.flatten(),
def build_model(images, labels): vgg = VGG(layer='conv3_4') vgg.push_initialization_config() vgg.initialize() sb = SubstractBatch() # Construct a bottom convolutional sequence layers = [ Convolutional(filter_size=(3, 3), num_filters=100, use_bias=True, tied_biases=True, name='final_conv0'), BatchNormalization(name='batchnorm_1'), Rectifier(name='final_conv0_act'), Convolutional(filter_size=(3, 3), num_filters=100, use_bias=True, tied_biases=True, name='final_conv1'), BatchNormalization(name='batchnorm_2'), Rectifier(name='final_conv1_act'), MaxPooling(pooling_size=(2, 2), name='maxpool_final') ] bottom_conv_sequence = ConvolutionalSequence( layers, num_channels=256, image_size=(40, 40), biases_init=Constant(0.), weights_init=IsotropicGaussian(0.01)) bottom_conv_sequence._push_allocation_config() # Flatten layer flattener = Flattener() # Construct a top MLP conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output')) print 'dim output conv:', bottom_conv_sequence.get_dim('output') # conv_out_dim = 20 * 40 * 40 top_mlp = BatchNormalizedMLP( [Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0)) # Construct feedforward sequence ss_seq = FeedforwardSequence([ vgg.apply, bottom_conv_sequence.apply, flattener.apply, top_mlp.apply ]) ss_seq.push_initialization_config() ss_seq.initialize() prediction = ss_seq.apply(images) cost_noreg = CategoricalCrossEntropy().apply(labels.flatten(), prediction) # add regularization selector = Selector([top_mlp]) Ws = selector.get_parameters('W') mlp_brick_name = 'batchnormalizedmlp' W0 = Ws['/%s/linear_0.W' % mlp_brick_name] W1 = Ws['/%s/linear_1.W' % mlp_brick_name] cost = cost_noreg + .0001 * (W0**2).sum() + .001 * (W1**2).sum() # define learned parameters selector = Selector([ss_seq]) Ws = selector.get_parameters('W') bs = selector.get_parameters('b') BNSCs = selector.get_parameters('batch_norm_scale') BNSHs = selector.get_parameters('batch_norm_shift') parameters_top = [] parameters_top += [v for k, v in Ws.items()] parameters_top += [v for k, v in bs.items()] parameters_top += [v for k, v in BNSCs.items()] parameters_top += [v for k, v in BNSHs.items()] selector = Selector([vgg]) convs = selector.get_parameters() parameters_all = [] parameters_all += parameters_top parameters_all += [v for k, v in convs.items()] return cost, [parameters_top, parameters_all]
ConvolutionalLayer(Rectifier().apply, (3, 3), 32, (2, 2), name='l2') ] convnet = ConvolutionalSequence(conv_layers, num_channels=1, image_size=(28, 28), weights_init=IsotropicGaussian(0.1), biases_init=Constant(0)) convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) print(output_dim) # Fully connected layers features = Flattener().apply(convnet.apply(x)) mlp = MLP(activations=[Rectifier(), None], dims=[output_dim, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() y_hat = mlp.apply(features) # numerically stable softmax cost = Softmax().categorical_cross_entropy(y.flatten(), y_hat) cost.name = 'nll' error_rate = MisclassificationRate().apply(y.flatten(), y_hat) #cost = MisclassificationRate().apply(y, y_hat) #cost.name = 'error_rate'
# Theano variables x = tensor.tensor4('image_features') y = tensor.lmatrix('targets') ### setting up "clean" and "dirty" images with data_preprocessing if mode == ("GPU_run"): try: x1 = data_preprocessing1(x).copy(name='x_clean')) x2 = data_preprocessing2(x).copy(name='x_dirty')) out1 = conv_sequence2.apply(x1) out2 = conv_sequence2.apply(x2) ### Flattening data conv_out1 = Flattener(name='flattener1').apply(out1) conv_out2 = Flattener(name='flattener2').apply(out2) conv_out = tensor.concatenate([conv_out1,conv_out2],axis=1) ### MLP mlp_hiddens = 1000 top_mlp_dims = [numpy.prod(conv_sequence1.get_dim('output'))+numpy.prod(conv_sequence1.get_dim('output'))] + [mlp_hiddens] + [output_size] top_mlp = MLP(mlp_activation, top_mlp_dims,weights_init=Uniform(width=0.2),biases_init=Constant(0.)) top_mlp.initialize() ### Getting the data from fuel.datasets.dogs_vs_cats import DogsVsCats from fuel.streams import DataStream, ServerDataStream
out2 = Fire((55,55), 128, 16, 16, 16, out1, 25) out3 = Fire((55,55), 128, 32, 32, 32, out2, 300) out31 = MaxPooling((3,3), step=(2,2), padding=(1,1), name='poolLow').apply(out3) out4 = Fire((28,28), 256, 32, 32, 32, out31, 45) out5 = Fire((28,28), 256, 48, 48, 48, out4, 500) out6 = Fire((28,28), 384, 48, 48, 48, out5, 65) out7 = Fire((28,28), 384, 64, 64, 64, out6, 700) out71 = MaxPooling((3,3), step=(2,2), padding=(1,1), name='poolLow2').apply(out7) out8 = Fire((14,14), 512, 64, 64, 64, out71, 85) #LAST LAYERS conv_layers1 = list([Convolutional(filter_size=(1,1), num_filters=2, name='Convx2'), BatchNormalization(name='batch_vx2'), Rectifier(), AveragePooling((14,14), name='MaxPol1')]) conv_sequence1 = ConvolutionalSequence(conv_layers1, num_channels=512, image_size=(14,14), weights_init=Orthogonal(), use_bias=False, name='ConvSeq3') conv_sequence1.initialize() out_soft1 = Flattener(name='Flatt1').apply(conv_sequence1.apply(out8)) predict1 = NDimensionalSoftmax(name='Soft1').apply(out_soft1) cost = CategoricalCrossEntropy(name='Cross1').apply(y.flatten(), predict1).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict1) #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') cg = ComputationGraph([cost, error_rate]) ########### GET THE DATA ##################### stream_train = ServerDataStream(('image_features','targets'), False, port=5512, hwm=40) stream_valid = ServerDataStream(('image_features','targets'), False, port=5513, hwm=40) ########### DEFINE THE ALGORITHM ############# algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum(learning_rate=0.01, momentum=0.9))
num_filter) in enumerate(conv_parameters)), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.)) #Initialize the convnet conv_sequence.initialize() #Add the MLP top_mlp_dims = [np.prod(conv_sequence.get_dim('output')) ] + mlp_hiddens + [output_size] out = Flattener().apply(conv_sequence.apply(x)) mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) #Initialisze the MLP mlp.initialize() #Get the output predict = mlp.apply(out) cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict) #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') cg = ComputationGraph([cost, error_rate])
def run_experiment(): np.random.seed(42) #X = tensor.matrix('features') X = tensor.tensor4('features') y = tensor.matrix('targets') nbr_channels = 3 image_shape = (30, 30) conv_layers = [ ConvolutionalLayer(filter_size=(4, 4), num_filters=10, activation=Rectifier().apply, border_mode='full', pooling_size=(1, 1), weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name='conv0'), ConvolutionalLayer(filter_size=(3, 3), num_filters=14, activation=Rectifier().apply, border_mode='full', pooling_size=(1, 1), weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name='conv1') ] conv_sequence = ConvolutionalSequence(conv_layers, num_channels=nbr_channels, image_size=image_shape) #conv_sequence.push_allocation_config() conv_sequence.initialize() conv_output_dim = np.prod(conv_sequence.get_dim('output')) #conv_output_dim = 25*25 flattener = Flattener() mlp = MLP(activations=[Rectifier(), Rectifier(), Softmax()], dims=[conv_output_dim, 50, 50, 10], weights_init=IsotropicGaussian(std=0.1), biases_init=IsotropicGaussian(std=0.01)) mlp.initialize() conv_output = conv_sequence.apply(X) y_hat = mlp.apply(flattener.apply(conv_output)) cost = CategoricalCrossEntropy().apply(y, y_hat) #cost = CategoricalCrossEntropy().apply(y_hat, y) #cost = BinaryCrossEntropy().apply(y.flatten(), y_hat.flatten()) cg = ComputationGraph([y_hat]) """ print "--- INPUT ---" for v in VariableFilter(bricks=mlp.linear_transformations, roles=[INPUT])(cg.variables): print v.tag.annotations[0].name print "--- OUTPUT ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables): print v.tag.annotations[0].name print "--- WEIGHT ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables): print v.tag.annotations[0].name print "--- BIAS ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables): print v.tag.annotations[0].name """ # check out .tag on the variables to see which layer they belong to print "----------------------------" D_by_layer = get_linear_transformation_roles(mlp, cg) # returns a vector with one entry for each in the mini-batch individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_linear_transformations( D_by_layer, cost) #import pprint #pp = pprint.PrettyPrinter(indent=4) #pp.pprint(get_conv_layers_transformation_roles(ComputationGraph(conv_output)).items()) D_by_layer = get_conv_layers_transformation_roles( ComputationGraph(conv_output)) individual_sum_square_norm_gradients_method_00 += get_sum_square_norm_gradients_conv_transformations( D_by_layer, cost) print "There are %d entries in cg.parameters." % len(cg.parameters) L_grads_method_01 = [tensor.grad(cost, p) for p in cg.parameters] L_grads_method_02 = [ tensor.grad(cost, v) for v in VariableFilter(roles=[WEIGHT, BIAS])(cg.variables) ] # works on the sum of the gradients in a mini-batch sum_square_norm_gradients_method_01 = sum( [tensor.sqr(g).sum() for g in L_grads_method_01]) sum_square_norm_gradients_method_02 = sum( [tensor.sqr(g).sum() for g in L_grads_method_02]) N = 8 Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32) # Option 1. ytrain = np.zeros((N, 10), dtype=np.float32) for n in range(N): label = np.random.randint(low=0, high=10) ytrain[n, label] = 1.0 # Option 2, just to debug situations with NaN. #ytrain = np.random.rand(N, 10).astype(np.float32) #for n in range(N): # ytrain[n,:] = ytrain[n,:] / ytrain[n,:].sum() f = theano.function([X, y], [ cost, individual_sum_square_norm_gradients_method_00, sum_square_norm_gradients_method_01, sum_square_norm_gradients_method_02 ]) [c, v0, gs1, gs2] = f(Xtrain, ytrain) #print "[c, v0, gs1, gs2]" L_c, L_v0, L_gs1, L_gs2 = ([], [], [], []) for n in range(N): [nc, nv0, ngs1, ngs2] = f( Xtrain[n, :].reshape( (1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3])), ytrain[n, :].reshape((1, 10))) L_c.append(nc) L_v0.append(nv0) L_gs1.append(ngs1) L_gs2.append(ngs2) print "Cost for whole mini-batch in single shot : %f." % c print "Cost for whole mini-batch accumulated : %f." % sum(L_c) print "" print "Square-norm of all gradients for each data point in single shot :" print v0.reshape((1, -1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs1).reshape((1, -1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs2).reshape((1, -1)) print "" print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs1))) print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2))) print "" print "Ratios : " print np.array(L_gs1).reshape((1, -1)) / v0.reshape((1, -1))