def test_gradient_updates_Adam(): # Find minimum of a parabola x = T.matrix('x') w = theano.shared(100.0,borrow=True) h = T.dot(x,w) cost=T.mean(h**2) updates=cnn.gradient_updates_Adam(cost,[w],10) f=theano.function([x],cost,updates=updates) for i in range(100): cost_i=f(np.ones((1,1))) assert cost_i < 0.06 x2 = T.matrix('x2') w2 = theano.shared(10.0,borrow=True) h2 = T.dot(x2,w2) cost2 = T.mean(T.sin(h2)**2+0.1*h2**2) updates2=cnn.gradient_updates_Adam(cost2,[w2],0.1) f2=theano.function([x2],cost2,updates=updates2) for i in range(200): cost_i2=f2(np.ones((1,1))) assert cost_i2 < 1
def test_fullyConnectedLayer(): ''' Test that the fully connected layer works. This trains sine function for a FCNN with one hidden layer of 4 units. For visualization check test.py. NOTE: Activations are done out of FC layer, since for atomic calculations linear activation is used. ''' pi=3.14159265358 xtrain=np.linspace(0,7,300) ytrain=np.sin(xtrain) Xtrain=np.zeros((300,1)) for i in range(300): Xtrain[i]=xtrain[i] Ytrain=np.sin(Xtrain) rng = np.random.RandomState(23455) x=T.matrix('x') y=T.matrix('y') [hout, params_1] = cnn.fullyConnectedLayer( rng=rng, data_input=x, num_in=1, num_out=4) [y_pred_lin, params_2] = cnn.fullyConnectedLayer( rng=rng, data_input=T.tanh(hout), num_in=4, num_out=1) y_pred=T.tanh(y_pred_lin) cost=cnn.MSE(y,y_pred) params = params_1 + params_2 updates = cnn.gradient_updates_Adam(cost,params,0.05) train = theano.function( inputs=[x,y], outputs=[cost], updates=updates) for i in range(2000): cost_i=train(Xtrain,Ytrain) assert cost_i[0] < 0.015
def TrainCNN(): # Training, validation and test data valid_set_x, valid_set_y, valid_set = load_data.shared_dataset( datapar.Xval, datapar.Yval, sample_size=hyppar.Nval) train_set_x, train_set_y, train_set = load_data.shared_dataset( datapar.Xtrain, datapar.Ytrain, sample_size=hyppar.Ntrain) test_set_x = load_data.shared_testset(datapar.Xtest) # Hyperparameters learning_rate = hyppar.learning_rate num_epochs = hyppar.Nepoch num_filters = hyppar.Nchannel mini_batch_size = hyppar.mbs reg = hyppar.reg # Random set for following activations rset = rd.sample(range(valid_set_x.get_value(borrow=True).shape[0]), mini_batch_size) print(rset) # Seeding the random number generator rng = np.random.RandomState(23455) # Computing number of mini-batches n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= mini_batch_size n_valid_batches //= mini_batch_size n_test_batches //= mini_batch_size print('train: %d batches, validation: %d batches' % (n_train_batches, n_valid_batches)) # mini-batch index mb_index = T.lscalar() # Coulomb matrices ( mini_batch_size x 80 x 80 matrix) x = T.matrix('x') # Target energies (1 x mini_batch_size) y = T.matrix('y') print('***** Constructing model ***** ') # Reshaping tensor of mini_batch_size set of images into a # 4-D tensor of dimensions: mini_batch_size x 1 x 80 x 80 layer0_input = x.reshape((mini_batch_size, 1, 80, 80)) # Define the CNN function E_pred, cn_output, params = CNNStructure(layer0_input, mini_batch_size, rng) # Cost that is minimised during stochastic descent. Includes regularization cost = cnn.MSE(y, E_pred) L2_reg = 0 for i in range(len(params)): L2_reg = L2_reg + T.mean(T.sqr(params[i][0])) cost = cost + reg * L2_reg # Creates a Theano function that computes the mistakes on the validation set. # This performs validation. # Note: the givens parameter allows us to separate the description of the # Theano model from the exact definition of the inputs variable. The 'key' # that is passed to the graph is subsituted with the data from the givens # parameter. In this demo we built the model with a regular Theano tensor # and we use givens to speed up the GPU. We swap the input index with a # slice corresponding to the mini-batch of the dataset to use. # mb_index is the mini_batch_index valid_model = theano.function( [mb_index], cost, givens={ x: valid_set_x[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size], y: valid_set_y[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size] }) predict = theano.function( [mb_index], E_pred, givens={ x: valid_set_x[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size] }) test_model = theano.function( [mb_index], E_pred, givens={ x: test_set_x[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size] }) get_activations = theano.function([], cn_output, givens={x: valid_set_x[rset]}) # Creates a function that updates the model parameters by SGD. # The updates list is created by looping over all # (params[i], grads[i]) pairs. updates = cnn.gradient_updates_Adam(cost, params, learning_rate) # Create a Theano function to train our convolutional neural network. train_model = theano.function( [mb_index], cost, updates=updates, givens={ x: train_set_x[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size], y: train_set_y[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size] }) iter = 0 epoch = 0 cost_ij = 0 valid_losses = [valid_model(i) for i in range(n_valid_batches)] valid_score = np.mean(valid_losses) train_error = [] valid_error = [] statistics.saveParameters(params) # This is where we call the previously defined Theano functions. print('***** Training model *****') while (epoch < num_epochs): epoch = epoch + 1 for minibatch_index in range(n_train_batches): # Compute number of iterations performed or total number # of mini-batches executed. iter = (epoch - 1) * n_train_batches + minibatch_index # Perform the training of our convolution neural network. # Obtain the cost of each minibatch specified using the # minibatch_index. cost_ij = train_model(minibatch_index) if iter % 10 == 0: statistics.saveParameters(params) if iter % 2 == 0: activations = get_activations() statistics.saveActivations(activations) # Save training error train_error.append(float(cost_ij)) valid_losses = [valid_model(i) for i in range(n_valid_batches)] # Compute the mean prediction error across all the mini-batches. valid_score = np.mean(valid_losses) # Save validation error valid_error.append(valid_score) print("Iteration: " + str(iter + 1) + "/" + str(num_epochs * n_train_batches) + ", training error: " + str(cost_ij) + ", validation error: " + str(valid_score)) if (iter % 20 == 0): # Get predicted energies from validation set E = np.zeros((n_valid_batches * mini_batch_size, 1)) step = 0 for i in range(n_valid_batches): buf = predict(i) for j in range(mini_batch_size): E[step, 0] = buf[j] step = step + 1 np.savetxt('output/E_pred_' + str(iter) + '.txt', E) # Predict energies for test set E_test = np.zeros((n_test_batches * mini_batch_size, 1)) step = 0 for i in range(n_test_batches): buf = test_model(i) for j in range(mini_batch_size): E_test[step, 0] = buf[j] step = step + 1 statistics.writeActivations() # Return values: statistics.saveParameters(params) statistics.writeParameters()
def visualize_MISTtraining(): ''' A function to demonstrate how convolutional and fully connected layers are used to train CNN to learn to label MNIST digits. Same function is used in testing, without any output. Downloads data from online, if mnist zip file is dot present. More plotting features and such should be included, now only terminal output. Benchmark error on test set with current settings: 0.0445 ''' dataset = 'mnist.pkl.gz' data_dir, data_file = os.path.split(dataset) rd.seed(23455) # Check if data file present if data_dir == "" and not os.path.isfile(dataset): new_path = os.path.join('', dataset) if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz': dataset = new_path # Download the file from MILA if not present if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz': from six.moves import urllib origin = ( 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz') print('Downloading data from %s' % origin) urllib.request.urlretrieve(origin, dataset) print('***** Loading data *****') # Open the file with gzip.open(dataset, 'rb') as f: try: train_set, valid_set, test_set = pickle.load(f, encoding='latin1') except: train_set, valid_set, test_set = pickle.load(f) train_x, train_y = train_set valid_x, valid_y = valid_set test_x, test_y = test_set # Load data into tensors train_size = 6000 test_set_x, test_set_y_float, test_set = load_data.shared_dataset( test_x, test_y, sample_size=train_size // 3) valid_set_x, valid_set_y_float, valid_set = load_data.shared_dataset( valid_x, valid_y, sample_size=train_size // 3) train_set_x, train_set_y_float, train_set = load_data.shared_dataset( train_x, train_y, sample_size=train_size) train_set_y = T.cast(train_set_y_float, 'int32') valid_set_y = T.cast(valid_set_y_float, 'int32') test_set_y = T.cast(test_set_y_float, 'int32') # Training set dimension: 6000 x 784 print('Training set: %d samples' % (train_set_x.get_value(borrow=True).shape[0])) # Test set dimension: 2000 x 784 print('Test set: %d samples' % (test_set_x.get_value(borrow=True).shape[0])) # Validation set dimension: 2000 x 784 print('Validation set: %d samples' % (valid_set_x.get_value(borrow=True).shape[0])) print('The training set looks like this: ') print(train_set[0]) print('The labels looks like this:') print(train_set[1]) # set learning rate used for Stochastic Gradient Descent learning_rate = 0.005 # set number of training epochs num_epochs = 4 # set number of kernels for each convolution layer # for e.g. 2 layers - [20, 50]. layer1 = 20, layer2 = 50 num_filters = [9] # set mini-batch size to be used mini_batch_size = 50 # Seeding the random number generator rng = np.random.RandomState(23455) # Computing number of mini-batches n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= mini_batch_size n_valid_batches //= mini_batch_size n_test_batches //= mini_batch_size print('train: %d batches, test: %d batches, validation: %d batches' % (n_train_batches, n_test_batches, n_valid_batches)) # mini-batch index mb_index = T.lscalar() # rasterised images x = T.matrix('x') # image labels y = T.ivector('y') print('***** Constructing model ***** ') # Reshaping matrix of mini_batch_size set of images into a # 4-D tensor of dimensions: mini_batch_size x 1 x 28 x 28 layer0_input = x.reshape((mini_batch_size, 1, 28, 28)) # First convolution and pooling layer # 4D output tensor is of shape: # mini_batch_size x 9 x 12 x 12 [layer0_output, layer0_params] = cnn.convLayer(rng, data_input=layer0_input, image_spec=(mini_batch_size, 1, 28, 28), filter_spec=(num_filters[0], 1, 5, 5), pool_size=(2, 2), activation=T.tanh) # Flatten the output into dimensions: # mini_batch_size x 1296 fc_layer_input = layer0_output.flatten(2) # The fully connected layer operates on a matrix of # dimensions: mini_batch_size x 1296 # It clasifies the values using the softmax function. [y_lin, fc_layer_params ] = cnn.fullyConnectedLayer(rng, data_input=fc_layer_input, num_in=num_filters[0] * 12 * 12, num_out=10) # The likelihood of the categories p_y_given_x = T.nnet.softmax(y_lin) # Predictions y_pred = T.argmax(p_y_given_x, axis=1) # Cost that is minimised during stochastic descent. cost = negative_log_lik(y=y, p_y_given_x=p_y_given_x) # Creates a Theano function that computes the mistakes on the validation set. # This performs validation. # Note: the givens parameter allows us to separate the description of the # Theano model from the exact definition of the inputs variable. The 'key' # that is passed to the graph is subsituted with the data from the givens # parameter. In this demo we built the model with a regular Theano tensor # and we use givens to speed up the GPU. We swap the input index with a # slice corresponding to the mini-batch of the dataset to use. # mb_index is the mini_batch_index valid_model = theano.function( [mb_index], errors(y, y_pred), givens={ x: valid_set_x[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size], y: valid_set_y[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size] }) # Create a Theano function that computes the mistakes on the test set. # This evaluated our model's accuracy. test_model = theano.function( [mb_index], errors(y, y_pred), givens={ x: test_set_x[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size], y: test_set_y[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size] }) # List of parameters to be fit during training params = fc_layer_params + layer0_params # Creates a list of gradients grads = T.grad(cost, params) # Creates a function that updates the model parameters by SGD. # The updates list is created by looping over all # (params[i], grads[i]) pairs. #updates = [(param_i, param_i - learning_rate * grad_i) # for param_i, grad_i in zip(params, grads)] updates = cnn.gradient_updates_Adam(cost, params, learning_rate) # Create a Theano function to train our convolutional neural network. train_model = theano.function( [mb_index], cost, updates=updates, givens={ x: train_set_x[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size], y: train_set_y[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size] }) # Some code to help with the plotting. # You don't need to go through the plotting # code in detail. iter = 0 epoch = 0 cost_ij = 0 train_costs = [] valid_accuracy = [] # This is where we call the previously defined Theano functions. print('***** Training model *****') while (epoch < num_epochs): print('epoch: ' + str(epoch)) epoch = epoch + 1 for minibatch_index in range(n_train_batches): # Compute number of iterations performed or total number # of mini-batches executed. iter = (epoch - 1) * n_train_batches + minibatch_index # Perform the training of our convolution neural network. # Obtain the cost of each minibatch specified using the # minibatch_index. cost_ij = train_model(minibatch_index) print('iter: ' + str(iter) + ', cost_ij: ' + str(cost_ij)) train_costs.append(cost_ij) # Compute the prediction error on each validation mini-batch by # calling the previously defined Theano valid_losses = [valid_model(i) for i in range(n_valid_batches)] # Compute the mean prediction error across all the mini-batches. valid_score = np.mean(valid_losses) valid_accuracy.append(valid_score) print('***** Training Complete *****') test_losses = [test_model(i) for i in range(n_test_batches)] # Compute the mean prediction error across all the mini-batches. test_score = np.mean(test_losses) print('Accuracy on the test set: ' + str(test_score)) fig, (ax1, ax2) = plt.subplots(2) ax1.plot(train_costs) ax2.plot(valid_accuracy) plt.show()
def visualize_sinetraining(Npoints, Nnodes, Nsteps, learning_rate): pi = 3.14159265358 xtrain = np.linspace(0, 7, Npoints) ytrain = np.sin(xtrain) Xtrain = np.zeros((Npoints, 1)) for i in range(Npoints): Xtrain[i] = xtrain[i] Ytrain = np.sin(Xtrain) rng = np.random.RandomState(23455) x = T.matrix('x') y = T.matrix('y') [hout, params_1] = cnn.fullyConnectedLayer(rng=rng, data_input=x, num_in=1, num_out=Nnodes) [y_pred_lin, params_2] = cnn.fullyConnectedLayer(rng=rng, data_input=T.tanh(hout), num_in=Nnodes, num_out=1) y_pred = T.tanh(y_pred_lin) cost = cnn.MSE(y, y_pred) params = params_1 + params_2 updates = cnn.gradient_updates_Adam(cost, params, learning_rate) train = theano.function(inputs=[x, y], outputs=[y_pred, cost, hout], updates=updates) plt.ion() fig = plt.figure() ax1 = fig.add_subplot(311) ax2 = fig.add_subplot(312) ax3 = fig.add_subplot(313) ax1.plot(xtrain, ytrain) #plt.ion() ax1.plot(xtrain, ytrain, 'b-') errors = [] for i in range(Nsteps): pred, cost_i, hout = train(Xtrain, Ytrain) if (i % 20 == 0): ax1.clear() line11, = ax1.plot(xtrain, ytrain, 'b--', label='Inline label') line12, = ax1.plot(Xtrain, pred, 'r-', label='Inline label') line11.set_label('Training data') line12.set_label('prediction') ax1.legend() ax1.set_title('Prediction') errors.append(cost_i) ax3.clear() line31, = ax3.plot(errors, 'r-+', label='Inline label') ax3.set_title('Error') line31.set_label(str(cost_i)) ax3.legend() ax2.clear() houtbout = T.tanh(hout) ax2.plot(Xtrain, houtbout) ax2.set_title('Activations') fig.canvas.draw() plt.pause(0.05) fig.canvas.draw() print('Final error: ' + str(cost_i))
def test_convLayer(): ''' Constructs a CNN with one convolutional and one fully connected layer. Then the function trains the network to interpret MNIST digits. Same script with some output and plotting features is found from "test.py". Calls two functions for cost and accuracy from above. Test: digit labeling accuracy > 92% NOTE: Valid set is not present here. ''' dataset = 'mnist.pkl.gz' data_dir, data_file = os.path.split(dataset) rd.seed(23455) # Check if data file present if data_dir == "" and not os.path.isfile(dataset): new_path = os.path.join('', dataset) if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz': dataset = new_path # Download the file from MILA if not present if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz': from six.moves import urllib origin = ( 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' ) print('Downloading data from %s' % origin) urllib.request.urlretrieve(origin, dataset) print('***** Loading data *****') # Open the file with gzip.open(dataset, 'rb') as f: try: train_set, valid_set, test_set = pickle.load(f, encoding='latin1') except: train_set, valid_set, test_set = pickle.load(f) train_x,train_y=train_set test_x,test_y=test_set # Load data into tensors train_size = 6000 test_set_x, test_set_y_float, test_set = load_data.shared_dataset( test_x,test_y, sample_size=train_size//3 ) train_set_x, train_set_y_float, train_set = load_data.shared_dataset( train_x,train_y, sample_size=train_size ) train_set_y=T.cast(train_set_y_float,'int32') test_set_y=T.cast(test_set_y_float,'int32') # set learning rate used for Stochastic Gradient Descent learning_rate = 0.005 # set number of training epochs num_epochs = 4 # set number of kernels for each convolution layer # for e.g. 2 layers - [20, 50]. layer1 = 20, layer2 = 50 num_filters = [9] # set mini-batch size to be used mini_batch_size = 50 # Seeding the random number generator rng = np.random.RandomState(23455) # Computing number of mini-batches n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= mini_batch_size n_test_batches //= mini_batch_size # mini-batch index mb_index = T.lscalar() # rasterised images x = T.matrix('x') # image labels y = T.ivector('y') # Reshaping matrix of mini_batch_size set of images into a # 4-D tensor of dimensions: mini_batch_size x 1 x 28 x 28 layer0_input = x.reshape((mini_batch_size, 1, 28, 28)) # First convolution and pooling layer # 4D output tensor is of shape: # mini_batch_size x 9 x 12 x 12 [layer0_output, layer0_params] = cnn.convLayer( rng, data_input=layer0_input, image_spec=(mini_batch_size, 1, 28, 28), filter_spec=(num_filters[0], 1, 5, 5), pool_size=(2, 2), activation=T.tanh) # Flatten the output into dimensions: # mini_batch_size x 1296 fc_layer_input = layer0_output.flatten(2) # The fully connected layer operates on a matrix of # dimensions: mini_batch_size x 1296 # It clasifies the values using the softmax function. [y_lin, fc_layer_params] = cnn.fullyConnectedLayer( rng, data_input=fc_layer_input, num_in=num_filters[0]*12*12, num_out=10) # The likelihood of the categories p_y_given_x = T.nnet.softmax(y_lin) # Predictions y_pred = T.argmax(p_y_given_x,axis=1) # Cost that is minimised during stochastic descent. cost = negative_log_lik(y=y, p_y_given_x=p_y_given_x) # Create a Theano function that computes the mistakes on the test set. # This evaluated our model's accuracy. test_model = theano.function( [mb_index], errors(y, y_pred), givens={ x: test_set_x[ mb_index * mini_batch_size: (mb_index + 1) * mini_batch_size ], y: test_set_y[ mb_index * mini_batch_size: (mb_index + 1) * mini_batch_size ]}) # List of parameters to be fit during training params = fc_layer_params + layer0_params updates = cnn.gradient_updates_Adam(cost,params,learning_rate) # Create a Theano function to train our convolutional neural network. train_model = theano.function( [mb_index], cost, updates=updates, givens={ x: train_set_x[ mb_index * mini_batch_size: (mb_index + 1) * mini_batch_size ], y: train_set_y[ mb_index * mini_batch_size: (mb_index + 1) * mini_batch_size ]}) iter = 0 epoch = 0 cost_ij = 0 # This is where we call the previously defined Theano functions. while (epoch < num_epochs): epoch = epoch + 1 for minibatch_index in range(n_train_batches): cost_ij = train_model(minibatch_index) test_losses = [test_model(i) for i in range(n_test_batches)] test_score = np.mean(test_losses) assert test_score < 0.08