def test_Highway(datasets, learning_rate=0.1, rho=0.9, n_epochs=200, n_hidden=10, n_hiddenLayers=1, n_highwayLayers=5, activation_hidden=T.nnet.nnet.relu, activation_highway=T.nnet.nnet.sigmoid, b_T=-5, L1_reg=0, L2_reg=0, batch_size=500, verbose=False): rng = numpy.random.RandomState(23455) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_in = train_set_x.get_value(borrow=True).shape[1] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch itr = T.fscalar() # index to an iteration # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### highway_net = HighwayNetwork(rng=rng, input=x, n_in=n_in, n_hidden=n_hidden, n_out=10, n_hiddenLayers=n_hiddenLayers, n_highwayLayers=n_highwayLayers, activation_hidden=activation_hidden, activation_highway=activation_highway, b_T=b_T) print('... building the model') # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( highway_net.logRegressionLayer.negative_log_likelihood(y) #+ L1_reg * L1 #+ L2_reg * L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=highway_net.logRegressionLayer.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=highway_net.logRegressionLayer.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) updates = RMSprop(cost, highway_net.params, lr=learning_rate, rho=rho) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index, itr], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }, on_unused_input='ignore') result = train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) res = pd.DataFrame([ result.RunningTime, result.BestXEntropy, result.TestPerformance, result.BestValidationScore, n_epochs, result.N_Epochs, activation_hidden, activation_highway, L2_reg, L1_reg, batch_size, result.N_Iterations, n_hidden, n_hiddenLayers, n_highwayLayers, learning_rate, rho, result.Patience ], index=[ 'Running time', 'XEntropy', 'Test performance', 'Best Validation score', 'Max epochs', 'N epochs', 'Activation function - hidden', 'Activation function - highway', 'L2_reg parameter', 'L1_reg parameter', 'Batch size', 'Iterations', 'Hidden neurons per layer', 'Hidden Layers', 'Highway Layers', 'Learning rate', 'Rho', 'Patience' ]).transpose() res.to_csv('Results.csv', mode='a', index=None, header=False) idx = pd.read_csv('Results.csv').index.values[-1] pickle.dump(result.XEntropy, open("cross_entropy" + str(idx) + ".p", "wb")) print('Cross entropy is stored in cross_entropy' + str(idx) + '.p')
def run_experiment(lr=0.01, num_epochs=128, nkerns=[96, 192, 10], lambda_decay=1e-3, conv_arch=all_CNN_C, n_class=10, batch_size=128, verbose=False, filter_size=(3,3)): """ Wrapper function for testing the all convolutional networks implemented here :type lr: float :param lr: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type filter_size: tuple(int) :param filter_size: size of the filters. :type conv_arch: function :param verbose: Convolutional Network to run :type weight_decay: float :param weight_decay: L2 regularization parameter :type n_class: int :param n_class: Number of classes/output units of final layer (10 vs. 100) """ datasets = load_data( simple=False if n_class == 100 else True ) X_train, y_train = datasets[0] X_val, y_val = datasets[1] X_test, y_test = datasets[2] n_train_batches = X_train.get_value(borrow=True).shape[0] n_valid_batches = X_val.get_value(borrow=True).shape[0] n_test_batches = X_test.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size index = T.lscalar() # index to a [mini]batch x = T.tensor4('x') y = T.ivector('y') channel = 3 imsize = 32 data_size = X_train.eval().shape[0] tdata_size = X_test.eval().shape[0] vdata_size = X_val.eval().shape[0] X_train = X_train.reshape((data_size, channel, imsize, imsize)) X_test = X_test.reshape((tdata_size, channel, imsize, imsize)) X_val = X_val.reshape((vdata_size, channel, imsize, imsize)) # Building the all conv network network = all_CNN_C(x, filter_size=filter_size, n_class=n_class) # Loss and prediction calculation # Training loss function used is Categorical Cross Entropy # which computes the categorical cross-entropy between predictions and targets. train_prediction = lasagne.layers.get_output(network) train_loss = lasagne.objectives.categorical_crossentropy(train_prediction, y) train_loss = train_loss.mean() # Regularization l2_penalty = lasagne.regularization.regularize_network_params(network, lasagne.regularization.l2) train_loss += lambda_decay * l2_penalty params = lasagne.layers.get_all_params(network, trainable=True) #Updates to the parameters are defined here updates = lasagne.updates.nesterov_momentum( train_loss, params, learning_rate=lr, momentum=0.9) val_prediction = lasagne.layers.get_output(network) val_loss = errors(val_prediction, y) test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = errors(test_prediction, y) # Training, Validation and test models are defined here train_fn = theano.function([index], train_loss, updates=updates, givens={ x: X_train[index * batch_size: (index + 1) * batch_size], y: y_train[index * batch_size: (index + 1) * batch_size] } ) val_fn = theano.function( [index], val_loss, givens={ x: X_val[index * batch_size: (index + 1) * batch_size], y: y_val[index * batch_size: (index + 1) * batch_size] } ) test_fn = theano.function( [index], test_loss, givens={ x: X_test[index * batch_size: (index + 1) * batch_size], y: y_test[index * batch_size: (index + 1) * batch_size] } ) train_nn(train_fn, val_fn, test_fn, n_train_batches, n_valid_batches, n_test_batches, num_epochs, verbose=verbose)