def __init__(self, rng, X_data, y_data, batch_size, training_enabled, layer_ndo_p, L2_reg, lrelu_alpha): layer0 = myConvLayerBN( rng, is_train=training_enabled, input_data=X_data, ## extreme augmented data input filter_shape=(320, 3, 2, 2), image_shape=(batch_size, 3, 126, 126), ssample=(1, 1), bordermode='valid', p=1.0, alpha=lrelu_alpha ##leaky relu ) layer1 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer0.output, filter_shape=(320, 320, 2, 2), image_shape=(batch_size, 320, 125, 125), ssample=(1, 1), bordermode='valid', p=1.0, alpha=lrelu_alpha ##leaky relu ) layer2 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer1.output, filter_shape=(320, 320, 2, 2), image_shape=(batch_size, 320, 124, 124), ssample=(2, 2), bordermode='valid', p=1.0, alpha=lrelu_alpha ##leaky relu ) layer3 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer2.output, filter_shape=(640, 320, 2, 2), image_shape=(batch_size, 320, 62, 62), ssample=(1, 1), bordermode='valid', p=0.9, alpha=lrelu_alpha ##leaky relu ) layer4 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer3.output, filter_shape=(640, 640, 2, 2), image_shape=(batch_size, 640, 61, 61), ssample=(1, 1), bordermode='valid', p=0.9, alpha=lrelu_alpha ##leaky relu ) layer5 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer4.output, filter_shape=(640, 640, 2, 2), image_shape=(batch_size, 640, 60, 60), ssample=(2, 2), bordermode='valid', p=1.0, alpha=lrelu_alpha ##leaky relu ) layer6 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer5.output, filter_shape=(960, 640, 2, 2), image_shape=(batch_size, 640, 30, 30), ssample=(1, 1), bordermode='valid', p=0.8, alpha=lrelu_alpha ##leaky relu ) layer7 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer6.output, filter_shape=(960, 960, 2, 2), image_shape=(batch_size, 960, 29, 29), ssample=(1, 1), bordermode='valid', p=0.8, alpha=lrelu_alpha ##leaky relu ) layer8 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer7.output, filter_shape=(960, 960, 2, 2), image_shape=(batch_size, 960, 28, 28), ssample=(2, 2), bordermode='valid', p=1.0, alpha=lrelu_alpha ##leaky relu ) layer9 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer8.output, filter_shape=(1280, 960, 2, 2), image_shape=(batch_size, 960, 14, 14), ssample=(1, 1), bordermode='valid', p=0.7, alpha=lrelu_alpha ##leaky relu ) layer10 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer9.output, filter_shape=(1280, 1280, 2, 2), image_shape=(batch_size, 1280, 13, 13), ssample=(1, 1), bordermode='valid', p=0.7, alpha=lrelu_alpha ##leaky relu ) layer11 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer10.output, filter_shape=(1280, 1280, 2, 2), image_shape=(batch_size, 1280, 12, 12), ssample=(2, 2), bordermode='valid', p=1.0, alpha=lrelu_alpha ##leaky relu ) layer12 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer11.output, filter_shape=(1600, 1280, 2, 2), image_shape=(batch_size, 1280, 6, 6), ssample=(1, 1), bordermode='valid', p=0.6, alpha=lrelu_alpha ##leaky relu ) layer13 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer12.output, filter_shape=(1600, 1600, 2, 2), image_shape=(batch_size, 1600, 5, 5), ssample=(1, 1), bordermode='valid', p=0.6, alpha=lrelu_alpha ##leaky relu ) layer14 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer13.output, filter_shape=(1600, 1600, 2, 2), image_shape=(batch_size, 1600, 4, 4), ssample=(2, 2), bordermode='valid', p=1.0, alpha=lrelu_alpha ##leaky relu ) layer15 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer14.output, filter_shape=(1920, 1600, 2, 2), image_shape=(batch_size, 1600, 2, 2), ssample=(1, 1), bordermode='valid', p=0.5, alpha=lrelu_alpha ##leaky relu ) layer16 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer15.output, filter_shape=(1920, 1920, 1, 1), image_shape=(batch_size, 1920, 1, 1), ssample=(1, 1), bordermode='valid', p=0.5, alpha=lrelu_alpha ##leaky relu ) layer17 = myConvLayerBN( rng, is_train=training_enabled, input_data=layer16.output, filter_shape=(10, 1920, 1, 1), image_shape=(batch_size, 1920, 1, 1), ssample=(1, 1), bordermode='valid', p=1.0, alpha=lrelu_alpha ##leaky relu ) # no global averaging required in this case softmax_input = layer17.output.flatten(2) softmax_layer = SoftmaxWrapper(input_data=softmax_input, n_in=10, n_out=10) self.errors = softmax_layer.errors(y_data) L2_sqr = ((layer0.W**2).sum() + (layer1.W**2).sum() + (layer2.W**2).sum() + (layer3.W**2).sum() + (layer4.W**2).sum() + (layer5.W**2).sum() + (layer6.W**2).sum() + (layer7.W**2).sum() + (layer8.W**2).sum() + (layer9.W**2).sum() + (layer10.W**2).sum() + (layer11.W**2).sum() + (layer12.W**2).sum() + (layer13.W**2).sum() + (layer14.W**2).sum() + (layer15.W**2).sum() + (layer16.W**2).sum() + (layer17.W**2).sum()) # the cost we minimize during training is the NLL of the model self.cost = (softmax_layer.negative_log_likelihood(y_data) + L2_reg * L2_sqr) self.params = layer17.params + layer16.params + layer15.params + layer14.params + layer13.params + layer12.params + layer11.params + layer10.params + layer9.params + layer8.params + layer7.params + layer6.params + layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params self.input = X_data self.y = y_data
def test_ModelC_AllCNN(learning_rate=0.05, n_epochs=350, batch_size=200, L2_reg=0.001, input_ndo_p=0.8, layer_ndo_p=0.5, save_model=True, save_freq=50): """ :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type batch_size: int :param batch_size: the number of training examples per batch """ rng = numpy.random.RandomState(23455) datasets = load_data2() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size print 'n_train_batches: ', n_train_batches print 'n_valid_batches: ', n_valid_batches print 'n_test_batches: ', n_test_batches learning_rate = numpy.asarray(learning_rate, dtype=numpy.float32) print 'learning_rate: ', learning_rate # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch lr = T.fscalar() training_enabled = T.iscalar('training_enabled') # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') layer0_input = x.reshape((batch_size, 3, 32, 32)) # drop the input only while training, don't drop while testing dropout_input = T.switch(T.neq(training_enabled, 0), drop(layer0_input, p=input_ndo_p), input_ndo_p * layer0_input) layer0 = myConvLayer(rng, is_train=training_enabled, input_data=dropout_input, filter_shape=(96, 3, 3, 3), image_shape=(batch_size, 3, 32, 32), ssample=(1, 1), bordermode='half', p=1.0) layer1 = myConvLayer(rng, is_train=training_enabled, input_data=layer0.output, filter_shape=(96, 96, 3, 3), image_shape=(batch_size, 96, 32, 32), ssample=(1, 1), bordermode='half', p=1.0) layer2 = myConvLayer(rng, is_train=training_enabled, input_data=layer1.output, filter_shape=(96, 96, 3, 3), image_shape=(batch_size, 96, 32, 32), ssample=(2, 2), bordermode='half', p=layer_ndo_p) layer3 = myConvLayer(rng, is_train=training_enabled, input_data=layer2.output, filter_shape=(192, 96, 3, 3), image_shape=(batch_size, 96, 16, 16), ssample=(1, 1), bordermode='half', p=1.0) layer4 = myConvLayer(rng, is_train=training_enabled, input_data=layer3.output, filter_shape=(192, 192, 3, 3), image_shape=(batch_size, 192, 16, 16), ssample=(1, 1), bordermode='half', p=1.0) layer5 = myConvLayer(rng, is_train=training_enabled, input_data=layer4.output, filter_shape=(192, 192, 3, 3), image_shape=(batch_size, 192, 16, 16), ssample=(2, 2), bordermode='half', p=layer_ndo_p) layer6 = myConvLayer(rng, is_train=training_enabled, input_data=layer5.output, filter_shape=(192, 192, 3, 3), image_shape=(batch_size, 192, 8, 8), ssample=(1, 1), bordermode='half', p=1.0) layer7 = myConvLayer(rng, is_train=training_enabled, input_data=layer6.output, filter_shape=(192, 192, 1, 1), image_shape=(batch_size, 192, 8, 8), ssample=(1, 1), bordermode='half', p=1.0) layer8 = myConvLayer(rng, is_train=training_enabled, input_data=layer7.output, filter_shape=(10, 192, 1, 1), image_shape=(batch_size, 192, 8, 8), ssample=(1, 1), bordermode='half', p=1.0) # make sure this is what global averaging does global_average = layer8.output.mean(axis=(2, 3)) softmax_layer = SoftmaxWrapper(input_data=global_average, n_in=10, n_out=10) L2_sqr = ((layer0.W**2).sum() + (layer1.W**2).sum() + (layer2.W**2).sum() + (layer3.W**2).sum() + (layer4.W**2).sum() + (layer5.W**2).sum() + (layer6.W**2).sum() + (layer7.W**2).sum() + (layer8.W**2).sum()) # the cost we minimize during training is the NLL of the model cost = (softmax_layer.negative_log_likelihood(y) + L2_reg * L2_sqr) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], softmax_layer.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) }) validate_model = theano.function( [index], softmax_layer.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) }) # create a list of all model parameters to be fit by gradient descent params = layer8.params + layer7.params + layer6.params + layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. momentum = theano.shared(numpy.cast[theano.config.floatX](0.9), name='momentum') updates = [] for param in params: param_update = theano.shared(param.get_value() * numpy.cast[theano.config.floatX](0.)) updates.append((param, param - lr * param_update)) updates.append((param_update, momentum * param_update + (numpy.cast[theano.config.floatX](1.) - momentum) * T.grad(cost, param))) train_model = theano.function( [index, lr], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](1) }) # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters # patience = 10000 # look as this many examples regardless # patience_increase = 2 # wait this much longer when a new best is found # improvement_threshold = 0.995 # a relative improvement of this much is considered significant # validation_frequency = min(n_train_batches, patience // 2) validation_frequency = n_train_batches // 2 best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False updateLRAfter = 200 while (epoch < n_epochs) and (not done_looping): # shuffle data before starting the epoch epoch = epoch + 1 if (epoch > updateLRAfter): learning_rate *= 0.1 updateLRAfter += 50 print 'epoch: ', epoch print 'updateLRAfter: ', updateLRAfter print 'learning_rate: ', learning_rate for minibatch_index in range(n_train_batches): #print 'epoch: {0}, minibatch: {1}'.format(epoch, minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 50 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index, learning_rate) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough # if this_validation_loss < best_validation_loss * \ # improvement_threshold: # patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) # if patience <= iter: # done_looping = True # break if save_model and epoch % save_freq == 0: # add model name to the file to differentiate different models with gzip.open('parameters_epoch_{0}.pklz'.format(epoch), 'wb') as fp: cPickle.dump([param.get_value() for param in params], fp, protocol=2) end_time = timeit.default_timer() print('Optimization complete.') print( 'Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), sys.stderr)
def __init__(self, rng, X_data, y_data, batch_size, training_enabled, layer_ndo_p, L2_reg): layer0 = myConvLayerBN(rng, is_train=training_enabled, input_data=X_data, filter_shape=(96, 3, 3, 3), image_shape=(batch_size, 3, 32, 32), ssample=(1, 1), bordermode='half', p=1.0) layer1 = myConvLayerBN(rng, is_train=training_enabled, input_data=layer0.output, filter_shape=(96, 96, 3, 3), image_shape=(batch_size, 96, 32, 32), ssample=(1, 1), bordermode='half', p=1.0) layer2 = myConvLayerBN(rng, is_train=training_enabled, input_data=layer1.output, filter_shape=(96, 96, 3, 3), image_shape=(batch_size, 96, 32, 32), ssample=(2, 2), bordermode='half', p=layer_ndo_p) layer3 = myConvLayerBN(rng, is_train=training_enabled, input_data=layer2.output, filter_shape=(192, 96, 3, 3), image_shape=(batch_size, 96, 16, 16), ssample=(1, 1), bordermode='half', p=1.0) layer4 = myConvLayerBN(rng, is_train=training_enabled, input_data=layer3.output, filter_shape=(192, 192, 3, 3), image_shape=(batch_size, 192, 16, 16), ssample=(1, 1), bordermode='half', p=1.0) layer5 = myConvLayerBN(rng, is_train=training_enabled, input_data=layer4.output, filter_shape=(192, 192, 3, 3), image_shape=(batch_size, 192, 16, 16), ssample=(2, 2), bordermode='half', p=layer_ndo_p) layer6 = myConvLayerBN(rng, is_train=training_enabled, input_data=layer5.output, filter_shape=(192, 192, 3, 3), image_shape=(batch_size, 192, 8, 8), ssample=(1, 1), bordermode='half', p=1.0) layer7 = myConvLayerBN(rng, is_train=training_enabled, input_data=layer6.output, filter_shape=(192, 192, 1, 1), image_shape=(batch_size, 192, 8, 8), ssample=(1, 1), bordermode='half', p=1.0) layer8 = myConvLayerBN(rng, is_train=training_enabled, input_data=layer7.output, filter_shape=(10, 192, 1, 1), image_shape=(batch_size, 192, 8, 8), ssample=(1, 1), bordermode='half', p=1.0) # make sure this is what global averaging does global_average = layer8.output.mean(axis=(2, 3)) softmax_layer = SoftmaxWrapper(input_data=global_average, n_in=10, n_out=10) self.errors = softmax_layer.errors(y_data) L2_sqr = ((layer0.W**2).sum() + (layer1.W**2).sum() + (layer2.W**2).sum() + (layer3.W**2).sum() + (layer4.W**2).sum() + (layer5.W**2).sum() + (layer6.W**2).sum() + (layer7.W**2).sum() + (layer8.W**2).sum()) # the cost we minimize during training is the NLL of the model self.cost = (softmax_layer.negative_log_likelihood(y_data) + L2_reg * L2_sqr) self.params = layer8.params + layer7.params + layer6.params + layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params self.input = X_data self.y = y_data