def test_mlp(initial_learning_rate, learning_rate_decay, squared_filter_length_limit, n_epochs, batch_size, mom_params, activations, dropout, dropout_rates, results_file_name, layer_sizes, dataset, use_bias, random_seed=1234): """ The dataset is the one from the mlp demo on deeplearning.net. This training function is lifted from there almost exactly. :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ assert len(layer_sizes) - 1 == len(dropout_rates) # extract the params for momentum mom_start = mom_params["start"] mom_end = mom_params["end"] mom_epoch_interval = mom_params["interval"] from utils import load_vc #datasets = load_mnist(dataset) print '... loading the data' dataset = 'c2s.npy' datasets, x_mean, y_mean, x_std, y_std = load_vc(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch epoch = T.scalar() x = T.matrix('x') # the data is presented as rasterized images y = T.matrix('y') # the labels are presented as 1D vector of # [int] labels learning_rate = theano.shared( np.asarray(initial_learning_rate, dtype=theano.config.floatX)) rng = np.random.RandomState(random_seed) # construct the MLP class if 1: # load f = open('c2s_pre.npy.dnn.pkl', 'r') pretrained = cPickle.load(f) f.close() else: pretrained = None classifier = MLP(rng=rng, input=x, layer_sizes=layer_sizes, dropout_rates=dropout_rates, activations=activations, use_bias=use_bias, pretrained=pretrained) # Build the expresson for the cost function. cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) # Compile theano function for testing. test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) #theano.printing.pydotprint(test_model, outfile="test_file.png", # var_with_name_simple=True) # Compile theano function for validation. validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) test_fprop = theano.function(inputs=[], outputs=classifier.layers[-1].output, givens={x: test_set_x}) #theano.printing.pydotprint(validate_model, outfile="validate_file.png", # var_with_name_simple=True) # Compute gradients of the model wrt parameters gparams = [] for param in classifier.params: # Use the right cost function here to train with or without dropout. gparam = T.grad(dropout_cost if dropout else cost, param) gparams.append(gparam) # ... and allocate mmeory for momentum'd versions of the gradient gparams_mom = [] for param in classifier.params: gparam_mom = theano.shared( np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) gparams_mom.append(gparam_mom) # Compute momentum for the current epoch mom = ifelse( epoch < mom_epoch_interval, mom_start * (1.0 - epoch / mom_epoch_interval) + mom_end * (epoch / mom_epoch_interval), mom_end) # Update the step direction using momentum updates = OrderedDict() for gparam_mom, gparam in zip(gparams_mom, gparams): # Misha Denil's original version #updates[gparam_mom] = mom * gparam_mom + (1. - mom) * gparam # change the update rule to match Hinton's dropout paper updates[gparam_mom] = mom * gparam_mom - (1. - mom) * learning_rate * gparam # ... and take a step along that direction for param, gparam_mom in zip(classifier.params, gparams_mom): # Misha Denil's original version #stepped_param = param - learning_rate * updates[gparam_mom] # since we have included learning_rate in gparam_mom, we don't need it # here stepped_param = param + updates[gparam_mom] # This is a silly hack to constrain the norms of the rows of the weight # matrices. This just checks if there are two dimensions to the # parameter and constrains it if so... maybe this is a bit silly but it # should work for now. if param.get_value(borrow=True).ndim == 2: #squared_norms = T.sum(stepped_param**2, axis=1).reshape((stepped_param.shape[0],1)) #scale = T.clip(T.sqrt(squared_filter_length_limit / squared_norms), 0., 1.) #updates[param] = stepped_param * scale # constrain the norms of the COLUMNs of the weight, according to # https://github.com/BVLC/caffe/issues/109 col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param # Compile theano function for training. This returns the training cost and # updates the model parameters. output = dropout_cost if dropout else cost train_model = theano.function( inputs=[epoch, index], outputs=output, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) #theano.printing.pydotprint(train_model, outfile="train_file.png", # var_with_name_simple=True) # Theano function to decay the learning rate, this is separate from the # training function because we only want to do this once each epoch instead # of after each minibatch. decay_learning_rate = theano.function( inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * learning_rate_decay}) ############### # TRAIN MODEL # ############### print '... training' best_params = None best_validation_errors = np.inf best_iter = 0 test_score = 0. epoch_counter = 0 start_time = time.clock() results_file = open(results_file_name, 'wb') X2 = test_set_y.eval() X2 *= y_std X2 += y_mean X1 = test_set_x.eval() X1 *= x_std X1 += x_mean last_reg = 10000.0 while epoch_counter < n_epochs: # Train this epoch epoch_counter = epoch_counter + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(epoch_counter, minibatch_index) # Compute loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_errors = np.mean(validation_losses) # Report and save progress. print "epoch {}, test error {}, learning_rate={}{}".format( epoch_counter, this_validation_errors, learning_rate.get_value(borrow=True), " **" if this_validation_errors < best_validation_errors else "") best_validation_errors = min(best_validation_errors, this_validation_errors) results_file.write("{0}\n".format(this_validation_errors)) results_file.flush() new_learning_rate = decay_learning_rate() YH = test_fprop() YH *= y_std YH += y_mean print 'Regression ', np.mean(np.mean((YH - X2)**2, 1)) print 'Baseline! ', np.mean(np.mean((X1 - X2)**2, 1)) if np.mean(np.mean((YH - X2)**2, 1)) < last_reg: print 'This is better. Saving the model to ' + dataset + '.dnn.pkl' f = open(dataset + '.dnn.pkl', 'w+') cPickle.dump(classifier, f) f.flush() f.close() last_reg = np.mean(np.mean((YH - X2)**2, 1)) end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_errors * 100., best_iter, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_dA(learning_rate=0.01, training_epochs=15000, dataset='mnist.pkl.gz', batch_size=5, output_folder='dA_plots'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ ##datasets = load_data(dataset) #from SdA_mapping import load_data_half #datasets = load_data_half(dataset) print 'loading data' datasets, x_mean, y_mean, x_std, y_std = load_vc() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print 'loaded data' # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x1 = T.matrix('x1') # the data is presented as rasterized images x2 = T.matrix('x2') # the data is presented as rasterized images cor_reg = T.scalar('cor_reg') if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) #da = dA_joint( #numpy_rng=rng, #theano_rng=theano_rng, #input1=x1, #input2=x2, #n_visible1=28 * 28/2, #n_visible2=28 * 28/2, #n_hidden=500 #) print 'initialize functions' da = dA_joint( numpy_rng=rng, theano_rng=theano_rng, input1=x1, input2=x2, cor_reg=cor_reg, #n_visible1=28 * 28/2, #n_visible2=28 * 28/2, n_visible1=24, n_visible2=24, n_hidden=50) cost, updates = da.get_cost_updates(corruption_level=0.3, learning_rate=learning_rate) cor_reg_val = numpy.float32(5.0) train_da = theano.function( [index], cost, updates=updates, givens={ x1: train_set_x[index * batch_size:(index + 1) * batch_size], x2: train_set_y[index * batch_size:(index + 1) * batch_size] }) fprop_x1 = theano.function([], outputs=da.output1, givens={x1: test_set_x}, name='fprop_x1') fprop_x2 = theano.function([], outputs=da.output2, givens={x2: test_set_y}, name='fprop_x2') fprop_x1t = theano.function([], outputs=da.output1, givens={x1: train_set_x}, name='fprop_x1') fprop_x2t = theano.function([], outputs=da.output2, givens={x2: train_set_y}, name='fprop_x2') rec_x1 = theano.function([], outputs=da.rec1, givens={x1: test_set_x}, name='rec_x1') rec_x2 = theano.function([], outputs=da.rec2, givens={x2: test_set_y}, name='rec_x2') fprop_x1_to_x2 = theano.function([], outputs=da.reg, givens={x1: test_set_x}, name='fprop_x12x2') updates_reg = [(da.cor_reg, da.cor_reg + theano.shared(numpy.float32(0.1))) ] update_reg = theano.function([], updates=updates_reg) print 'initialize functions ended' start_time = time.clock() ############ # TRAINING # ############ print 'training started' X1 = test_set_x.eval() X1 *= x_std X1 += x_mean X2 = test_set_y.eval() X2 *= y_std X2 += y_mean from dcca_numpy import cor_cost # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) #cor_reg_val += 1 #da.cor_reg = theano.shared(cor_reg_val) update_reg() X1H = rec_x1() X2H = rec_x2() X1H *= x_std X1H += x_mean X2H *= y_std X2H += y_mean H1 = fprop_x1() H2 = fprop_x2() print 'Training epoch' print 'Reconstruction ', numpy.mean(numpy.mean((X1H-X1)**2,1)),\ numpy.mean(numpy.mean((X2H-X2)**2,1)) if epoch % 5 == 2: # pretrain middle layer print '... pre-training MIDDLE layer' H1t = fprop_x1t() H2t = fprop_x2t() h1 = T.matrix('x') # the data is presented as rasterized images h2 = T.matrix('y') # the labels are presented as 1D vector of from mlp import HiddenLayer numpy_rng = numpy.random.RandomState(89677) log_reg = HiddenLayer(numpy_rng, h1, 50, 50, activation=T.tanh) if 1: # for middle layer learning_rate = 0.1 #H1=theano.shared(H1) #H2=theano.shared(H2) # compute the gradients with respect to the model parameters logreg_cost = log_reg.mse(h2) gparams = T.grad(logreg_cost, log_reg.params) # compute list of fine-tuning updates updates = [(param, param - gparam * learning_rate) for param, gparam in zip(log_reg.params, gparams)] train_fn_middle = theano.function(inputs=[], outputs=logreg_cost, updates=updates, givens={ h1: theano.shared(H1t), h2: theano.shared(H2t) }, name='train_middle') epoch = 0 while epoch < 100: print epoch, train_fn_middle() epoch += 1 ##X2H=fprop_x1_to_x2() X2H = numpy.tanh(H1.dot(log_reg.W.eval()) + log_reg.b.eval()) X2H = numpy.tanh(X2H.dot(da.W2_prime.eval()) + da.b2_prime.eval()) X2H *= y_std X2H += y_mean print 'Regression ', numpy.mean(numpy.mean((X2H - X2)**2, 1)) print 'Correlation ', cor_cost(H1, H2) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)) image = Image.fromarray( tile_raster_images(X=da.W1.get_value(borrow=True).T, img_shape=(28, 14), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') from matplotlib import pyplot as pp pp.plot(H1[:10, :2], 'b') pp.plot(H2[:10, :2], 'r') pp.show() print cor
def test_dA(learning_rate=0.01, training_epochs=15000, dataset='mnist.pkl.gz', batch_size=5, output_folder='dA_plots'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ ##datasets = load_data(dataset) #from SdA_mapping import load_data_half #datasets = load_data_half(dataset) print 'loading data' datasets, x_mean, y_mean, x_std, y_std = load_vc() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print 'loaded data' # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x1 = T.matrix('x1') # the data is presented as rasterized images x2 = T.matrix('x2') # the data is presented as rasterized images cor_reg = T.scalar('cor_reg') if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) #da = dA_joint( #numpy_rng=rng, #theano_rng=theano_rng, #input1=x1, #input2=x2, #n_visible1=28 * 28/2, #n_visible2=28 * 28/2, #n_hidden=500 #) print 'initialize functions' da = dA_joint( numpy_rng=rng, theano_rng=theano_rng, input1=x1, input2=x2, cor_reg=cor_reg, #n_visible1=28 * 28/2, #n_visible2=28 * 28/2, n_visible1=24, n_visible2=24, n_hidden=50 ) cost, updates = da.get_cost_updates( corruption_level=0.3, learning_rate=learning_rate ) cor_reg_val = numpy.float32(5.0) train_da = theano.function( [index], cost, updates=updates, givens={ x1: train_set_x[index * batch_size: (index + 1) * batch_size], x2: train_set_y[index * batch_size: (index + 1) * batch_size] } ) fprop_x1 = theano.function( [], outputs=da.output1, givens={ x1: test_set_x }, name='fprop_x1' ) fprop_x2 = theano.function( [], outputs=da.output2, givens={ x2: test_set_y }, name='fprop_x2' ) fprop_x1t = theano.function( [], outputs=da.output1, givens={ x1: train_set_x }, name='fprop_x1' ) fprop_x2t = theano.function( [], outputs=da.output2, givens={ x2: train_set_y }, name='fprop_x2' ) rec_x1 = theano.function( [], outputs=da.rec1, givens={ x1: test_set_x }, name='rec_x1' ) rec_x2 = theano.function( [], outputs=da.rec2, givens={ x2: test_set_y }, name='rec_x2' ) fprop_x1_to_x2 = theano.function( [], outputs=da.reg, givens={ x1: test_set_x }, name='fprop_x12x2' ) updates_reg = [ (da.cor_reg, da.cor_reg+theano.shared(numpy.float32(0.1))) ] update_reg = theano.function( [], updates=updates_reg ) print 'initialize functions ended' start_time = time.clock() ############ # TRAINING # ############ print 'training started' X1=test_set_x.eval() X1 *= x_std X1 += x_mean X2=test_set_y.eval() X2 *= y_std X2 += y_mean from dcca_numpy import cor_cost # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) #cor_reg_val += 1 #da.cor_reg = theano.shared(cor_reg_val) update_reg() X1H=rec_x1() X2H=rec_x2() X1H *= x_std X1H += x_mean X2H *= y_std X2H += y_mean H1=fprop_x1() H2=fprop_x2() print 'Training epoch' print 'Reconstruction ', numpy.mean(numpy.mean((X1H-X1)**2,1)),\ numpy.mean(numpy.mean((X2H-X2)**2,1)) if epoch%5 == 2 : # pretrain middle layer print '... pre-training MIDDLE layer' H1t=fprop_x1t() H2t=fprop_x2t() h1 = T.matrix('x') # the data is presented as rasterized images h2 = T.matrix('y') # the labels are presented as 1D vector of from mlp import HiddenLayer numpy_rng = numpy.random.RandomState(89677) log_reg = HiddenLayer(numpy_rng, h1, 50, 50, activation=T.tanh) if 1: # for middle layer learning_rate = 0.1 #H1=theano.shared(H1) #H2=theano.shared(H2) # compute the gradients with respect to the model parameters logreg_cost = log_reg.mse(h2) gparams = T.grad(logreg_cost, log_reg.params) # compute list of fine-tuning updates updates = [ (param, param - gparam * learning_rate) for param, gparam in zip(log_reg.params, gparams) ] train_fn_middle = theano.function( inputs=[], outputs=logreg_cost, updates=updates, givens={ h1: theano.shared(H1t), h2: theano.shared(H2t) }, name='train_middle' ) epoch = 0 while epoch < 100: print epoch, train_fn_middle() epoch += 1 ##X2H=fprop_x1_to_x2() X2H=numpy.tanh(H1.dot(log_reg.W.eval())+log_reg.b.eval()) X2H=numpy.tanh(X2H.dot(da.W2_prime.eval())+da.b2_prime.eval()) X2H *= y_std X2H += y_mean print 'Regression ', numpy.mean(numpy.mean((X2H-X2)**2,1)) print 'Correlation ', cor_cost(H1, H2) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)) image = Image.fromarray( tile_raster_images(X=da.W1.get_value(borrow=True).T, img_shape=(28, 14), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') from matplotlib import pyplot as pp pp.plot(H1[:10,:2],'b');pp.plot(H2[:10,:2],'r');pp.show() print cor
def test_mlp( initial_learning_rate, learning_rate_decay, squared_filter_length_limit, n_epochs, batch_size, mom_params, activations, dropout, dropout_rates, results_file_name, layer_sizes, dataset, use_bias, random_seed=1234): """ The dataset is the one from the mlp demo on deeplearning.net. This training function is lifted from there almost exactly. :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ assert len(layer_sizes) - 1 == len(dropout_rates) # extract the params for momentum mom_start = mom_params["start"] mom_end = mom_params["end"] mom_epoch_interval = mom_params["interval"] from utils import load_vc #datasets = load_mnist(dataset) print '... loading the data' dataset = 'c2s.npy' datasets, x_mean, y_mean, x_std, y_std = load_vc(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch epoch = T.scalar() x = T.matrix('x') # the data is presented as rasterized images y = T.matrix('y') # the labels are presented as 1D vector of # [int] labels learning_rate = theano.shared(np.asarray(initial_learning_rate, dtype=theano.config.floatX)) rng = np.random.RandomState(random_seed) # construct the MLP class if 1: # load f = open('c2s_pre.npy.dnn.pkl','r') pretrained = cPickle.load(f) f.close() else: pretrained=None classifier = MLP(rng=rng, input=x, layer_sizes=layer_sizes, dropout_rates=dropout_rates, activations=activations, use_bias=use_bias, pretrained=pretrained) # Build the expresson for the cost function. cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) # Compile theano function for testing. test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) #theano.printing.pydotprint(test_model, outfile="test_file.png", # var_with_name_simple=True) # Compile theano function for validation. validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) test_fprop = theano.function(inputs=[], outputs=classifier.layers[-1].output, givens={ x: test_set_x }) #theano.printing.pydotprint(validate_model, outfile="validate_file.png", # var_with_name_simple=True) # Compute gradients of the model wrt parameters gparams = [] for param in classifier.params: # Use the right cost function here to train with or without dropout. gparam = T.grad(dropout_cost if dropout else cost, param) gparams.append(gparam) # ... and allocate mmeory for momentum'd versions of the gradient gparams_mom = [] for param in classifier.params: gparam_mom = theano.shared(np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) gparams_mom.append(gparam_mom) # Compute momentum for the current epoch mom = ifelse(epoch < mom_epoch_interval, mom_start*(1.0 - epoch/mom_epoch_interval) + mom_end*(epoch/mom_epoch_interval), mom_end) # Update the step direction using momentum updates = OrderedDict() for gparam_mom, gparam in zip(gparams_mom, gparams): # Misha Denil's original version #updates[gparam_mom] = mom * gparam_mom + (1. - mom) * gparam # change the update rule to match Hinton's dropout paper updates[gparam_mom] = mom * gparam_mom - (1. - mom) * learning_rate * gparam # ... and take a step along that direction for param, gparam_mom in zip(classifier.params, gparams_mom): # Misha Denil's original version #stepped_param = param - learning_rate * updates[gparam_mom] # since we have included learning_rate in gparam_mom, we don't need it # here stepped_param = param + updates[gparam_mom] # This is a silly hack to constrain the norms of the rows of the weight # matrices. This just checks if there are two dimensions to the # parameter and constrains it if so... maybe this is a bit silly but it # should work for now. if param.get_value(borrow=True).ndim == 2: #squared_norms = T.sum(stepped_param**2, axis=1).reshape((stepped_param.shape[0],1)) #scale = T.clip(T.sqrt(squared_filter_length_limit / squared_norms), 0., 1.) #updates[param] = stepped_param * scale # constrain the norms of the COLUMNs of the weight, according to # https://github.com/BVLC/caffe/issues/109 col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param # Compile theano function for training. This returns the training cost and # updates the model parameters. output = dropout_cost if dropout else cost train_model = theano.function(inputs=[epoch, index], outputs=output, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) #theano.printing.pydotprint(train_model, outfile="train_file.png", # var_with_name_simple=True) # Theano function to decay the learning rate, this is separate from the # training function because we only want to do this once each epoch instead # of after each minibatch. decay_learning_rate = theano.function(inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * learning_rate_decay}) ############### # TRAIN MODEL # ############### print '... training' best_params = None best_validation_errors = np.inf best_iter = 0 test_score = 0. epoch_counter = 0 start_time = time.clock() results_file = open(results_file_name, 'wb') X2=test_set_y.eval() X2 *= y_std X2 += y_mean X1=test_set_x.eval() X1 *= x_std X1 += x_mean last_reg = 10000.0 while epoch_counter < n_epochs: # Train this epoch epoch_counter = epoch_counter + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(epoch_counter, minibatch_index) # Compute loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_errors = np.mean(validation_losses) # Report and save progress. print "epoch {}, test error {}, learning_rate={}{}".format( epoch_counter, this_validation_errors, learning_rate.get_value(borrow=True), " **" if this_validation_errors < best_validation_errors else "") best_validation_errors = min(best_validation_errors, this_validation_errors) results_file.write("{0}\n".format(this_validation_errors)) results_file.flush() new_learning_rate = decay_learning_rate() YH=test_fprop() YH *= y_std YH += y_mean print 'Regression ', np.mean(np.mean((YH-X2)**2,1)) print 'Baseline! ', np.mean(np.mean((X1-X2)**2,1)) if np.mean(np.mean((YH-X2)**2,1)) < last_reg: print 'This is better. Saving the model to ' + dataset+'.dnn.pkl' f = open(dataset+'.dnn.pkl','w+') cPickle.dump(classifier, f) f.flush() f.close() last_reg = np.mean(np.mean((YH-X2)**2,1)) end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_errors * 100., best_iter, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))