def svm_cva(learning_rate=3e-4, n_epochs=10000, dirs=None, batch_size=500): print learning_rate, batch_size datasets = datapy.load_data_svhn_features(dirs, have_matrix=True) train_set_x, train_set_y, train_y_matrix = datasets[0] test_set_x, test_set_y, test_y_matrix = datasets[1] valid_set_x, valid_set_y, valid_y_matrix = datasets[2] #datasets = datapy.load_data_svhn(dataset, have_matrix=False) #train_set_x, train_set_y = datasets[0] #test_set_x, test_set_y = datasets[1] #valid_set_x, valid_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # generate symbolic variables for input (x and y represent a # minibatch) x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels ''' Differences ''' y_matrix = T.imatrix( 'y_matrix') # labels, presented as 2D matrix of int labels # construct the logistic regression class # Each MNIST image has size 28*28 rng = np.random.RandomState(0) classifier = Pegasos.Pegasos(input=x, rng=rng, n_in=4 * 4 * 96, n_out=10, weight_decay=2e-6, loss=10) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.objective(10, y, y_matrix) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size] }) # compute the gradient of cost with respect to theta = (W,b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) params = [classifier.W, classifier.b] grads = [g_W, g_b] # start-snippet-3 # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) #get_optimizer = optimizer.get_simple_optimizer(learning_rate=learning_rate) get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r, decay1=0.1, decay2=0.001) updates = get_optimizer(params, grads) # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=[cost], updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size:(index + 1) * batch_size] }) # end-snippet-3 ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters patience = 50000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = 200 # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = np.inf best_test_score = np.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) #print minibatch_avg_cost # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) this_test_losses = [ test_model(i) for i in xrange(n_test_batches) ] this_test_score = np.mean(this_test_losses) if this_test_score < best_test_score: best_test_score = this_test_score print( 'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100, this_test_score * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of' ' best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))) print best_test_score
def svm_cva(dir, start=0, end=500, learning_rate=3e-4, n_epochs=10000, dataset='./data/mnist.pkl.gz', batch_size=500): """ Demonstrate stochastic gradient descent optimization of a log-linear model This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ ''' Difference ''' print start, end, learning_rate, batch_size datasets = datapy.load_data_gpu(dataset, have_matrix=True) _, train_set_y, train_y_matrix = datasets[0] _, valid_set_y, valid_y_matrix = datasets[1] _, test_set_y, test_y_matrix = datasets[2] train_set_x, valid_set_x, test_set_x = datapy.load_feature_gpu(dir=dir, start=start,end=end) print train_set_x.get_value().shape print valid_set_x.get_value().shape print test_set_x.get_value().shape # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # generate symbolic variables for input (x and y represent a # minibatch) x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels ''' Differences ''' y_matrix = T.imatrix('y_matrix') # labels, presented as 2D matrix of int labels # construct the logistic regression class # Each MNIST image has size 28*28 rng = np.random.RandomState(0) n_in=end-start classifier = Pegasos.Pegasos(input=x, rng=rng, n_in=n_in, n_out=10, weight_decay=1e-4, loss=1) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.objective(10, y, y_matrix) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size], #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size], #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta = (W,b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) params = [classifier.W, classifier.b] grads = [g_W, g_b] # start-snippet-3 # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) #get_optimizer = optimizer.get_simple_optimizer(learning_rate=learning_rate) get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r, decay1 = 0.1, decay2 = 0.001) updates = get_optimizer(params,grads) # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=[cost], updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-3 ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = np.inf best_test_score = np.inf test_score = 0. start_time = time.clock() logdir = dir + str(learning_rate)+'_c-' done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) #print minibatch_avg_cost # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) this_test_losses = [test_model(i) for i in xrange(n_test_batches)] this_test_score = np.mean(this_test_losses) if this_test_score < best_test_score: best_test_score = this_test_score with open(logdir+'hook.txt', 'a') as f: print >>f, ( 'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100, this_test_score *100. ) ) print( 'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100, this_test_score *100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) with open(logdir+'hook.txt', 'a') as f: print >>f,( ( ' epoch %i, minibatch %i/%i, test error of' ' best model %f %%' ) % ( epoch, minibatch_index + 1, n_train_batches, test_score * 100. ) ) print( ( ' epoch %i, minibatch %i/%i, test error of' ' best model %f %%' ) % ( epoch, minibatch_index + 1, n_train_batches, test_score * 100. ) ) if patience <= iter: done_looping = True break end_time = time.clock() with open(logdir+'hook.txt', 'a') as f: print>>f,( ( 'Optimization complete with best validation score of %f %%,' 'with test performance %f %%' ) % (best_validation_loss * 100., test_score * 100.) ) print>>f, 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print>>f, sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))) print>>f, best_test_score print( ( 'Optimization complete with best validation score of %f %%,' 'with test performance %f %%' ) % (best_validation_loss * 100., test_score * 100.) ) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))) print best_test_score
def deep_cnn_6layer_mnist_50000(learning_rate=3e-4, n_epochs=250, dataset='mnist.pkl.gz', batch_size=500, dropout_flag=0, seed=0, activation=None): #cp->cd->cpd->cd->c nkerns=[32, 32, 64, 64, 64] drops=[1, 0, 1, 0, 0] #skerns=[5, 3, 3, 3, 3] #pools=[2, 1, 1, 2, 1] #modes=['same']*5 n_hidden=[500] logdir = 'results/supervised/cnn/mnist/deep_cnn_6layer_50000_'+str(nkerns)+str(drops)+str(n_hidden)+'_'+str(learning_rate)+'_'+str(int(time.time()))+'/' if dropout_flag==1: logdir = 'results/supervised/cnn/mnist/deep_cnn_6layer_50000_'+str(nkerns)+str(drops)+str(n_hidden)+'_'+str(learning_rate)+'_dropout_'+str(int(time.time()))+'/' if not os.path.exists(logdir): os.makedirs(logdir) print 'logdir:', logdir print 'deep_cnn_6layer_mnist_50000_', nkerns, n_hidden, drops, seed, dropout_flag with open(logdir+'hook.txt', 'a') as f: print >>f, 'logdir:', logdir print >>f, 'deep_cnn_6layer_mnist_50000_', nkerns, n_hidden, drops, seed, dropout_flag rng = np.random.RandomState(0) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) ''' ''' datasets = datapy.load_data_gpu_60000(dataset, have_matrix=True) train_set_x, train_set_y, train_y_matrix = datasets[0] valid_set_x, valid_set_y, valid_y_matrix = datasets[1] test_set_x, test_set_y, test_y_matrix = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ''' dropout ''' drop = T.iscalar('drop') y_matrix = T.imatrix('y_matrix') # labels, presented as 2D matrix of int labels print '... building the model' layer0_input = x.reshape((batch_size, 1, 28, 28)) if activation =='nonlinearity.relu': activation = nonlinearity.relu elif activation =='nonlinearity.tanh': activation = nonlinearity.tanh elif activation =='nonlinearity.softplus': activation = nonlinearity.softplus recg_layer = [] cnn_output = [] #1 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), border_mode='valid', activation=activation )) if drops[0]==1: cnn_output.append(recg_layer[-1].drop_output(layer0_input, drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(layer0_input)) #2 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[1]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #3 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[1], 12, 12), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='valid', activation=activation )) if drops[2]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #4 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[2], 5, 5), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[3]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #5 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[3], 5, 5), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[4]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) mlp_input = cnn_output[-1].flatten(2) recg_layer.append(FullyConnected.FullyConnected( rng=rng, n_in=nkerns[4] * 5 * 5, n_out=500, activation=activation )) feature = recg_layer[-1].drop_output(mlp_input, drop=drop, rng=rng_share) # classify the values of the fully-connected sigmoidal layer classifier = Pegasos.Pegasos(input=feature, rng=rng, n_in=500, n_out=10, weight_decay=0, loss=1) # the cost we minimize during training is the NLL of the model cost = classifier.hinge_loss(10, y, y_matrix) * batch_size weight_decay=1.0/n_train_batches # create a list of all model parameters to be fit by gradient descent params=[] for r in recg_layer: params+=r.params params += classifier.params # create a list of gradients for all model parameters grads = T.grad(cost, params) l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r, decay1 = 0.1, decay2 = 0.001, weight_decay=weight_decay) updates = get_optimizer(params,grads) ''' Save parameters and activations ''' parameters = theano.function( inputs=[], outputs=params, ) # create a function to compute the mistakes that are made by the model test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) train_model_average = theano.function( inputs=[index], outputs=[cost, classifier.errors(y)], givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](dropout_flag) } ) train_model = theano.function( inputs=[index], outputs=[cost, classifier.errors(y)], updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](dropout_flag) } ) print '... training' # early-stopping parameters patience = n_train_batches * 100 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = np.inf best_test_score = np.inf test_score = 0. start_time = time.clock() epoch = 0 decay_epochs = 150 while (epoch < n_epochs): epoch = epoch + 1 tmp1 = time.clock() minibatch_avg_cost = 0 train_error = 0 for minibatch_index in xrange(n_train_batches): co, te = train_model(minibatch_index) minibatch_avg_cost+=co train_error+=te #print minibatch_avg_cost # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: test_epoch = epoch - decay_epochs if test_epoch > 0 and test_epoch % 10 == 0: print l_r.get_value() with open(logdir+'hook.txt', 'a') as f: print >>f,l_r.get_value() l_r.set_value(np.cast['float32'](l_r.get_value()/3.0)) # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) this_test_losses = [test_model(i) for i in xrange(n_test_batches)] this_test_score = np.mean(this_test_losses) train_thing = [train_model_average(i) for i in xrange(n_train_batches)] train_thing = np.mean(train_thing, axis=0) print epoch, 'hinge loss and training error', train_thing with open(logdir+'hook.txt', 'a') as f: print >>f, epoch, 'hinge loss and training error', train_thing if this_test_score < best_test_score: best_test_score = this_test_score print( 'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100, this_test_score *100. ) ) with open(logdir+'hook.txt', 'a') as f: print >>f, ( 'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100, this_test_score *100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) print( ( ' epoch %i, minibatch %i/%i, test error of' ' best model %f %%' ) % ( epoch, minibatch_index + 1, n_train_batches, test_score * 100. ) ) with open(logdir+'hook.txt', 'a') as f: print >>f, ( ( ' epoch %i, minibatch %i/%i, test error of' ' best model %f %%' ) % ( epoch, minibatch_index + 1, n_train_batches, test_score * 100. ) ) if epoch%50==0: model = parameters() for i in xrange(len(model)): model[i] = np.asarray(model[i]).astype(np.float32) np.savez(logdir+'model-'+str(epoch), model=model) print 'hinge loss and training error', minibatch_avg_cost / float(n_train_batches), train_error / float(n_train_batches) print 'time', time.clock() - tmp1 with open(logdir+'hook.txt', 'a') as f: print >>f,'hinge loss and training error', minibatch_avg_cost / float(n_train_batches), train_error / float(n_train_batches) print >>f,'time', time.clock() - tmp1 end_time = time.clock() print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
def c_6layer_mnist_imputation(seed=0, pertub_type=3, pertub_prob=6, pertub_prob1=14, predir=None, n_batch=144, dataset='mnist.pkl.gz', batch_size=500): """ Missing data imputation """ #cp->cd->cpd->cd->c nkerns = [32, 32, 64, 64, 64] drops = [0, 0, 0, 0, 0, 1] #skerns=[5, 3, 3, 3, 3] #pools=[2, 1, 1, 2, 1] #modes=['same']*5 n_hidden = [500, 50] drop_inverses = [ 1, ] # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28 if dataset == 'mnist.pkl.gz': dim_input = (28, 28) colorImg = False train_set_x, test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data( dirs='data_imputation/', pertub_type=pertub_type, pertub_prob=pertub_prob, pertub_prob1=pertub_prob1) datasets = datapy.load_data_gpu(dataset, have_matrix=True) _, train_set_y, train_y_matrix = datasets[0] valid_set_x, valid_set_y, valid_y_matrix = datasets[1] _, test_set_y, test_y_matrix = datasets[2] # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') #x_pertub = T.matrix('x_pertub') # the data is presented as rasterized images #p_label = T.matrix('p_label') y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels y_matrix = T.imatrix('y_matrix') drop = T.iscalar('drop') drop_inverse = T.iscalar('drop_inverse') activation = nonlinearity.relu rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) input_x = x.reshape((batch_size, 1, 28, 28)) recg_layer = [] cnn_output = [] #1 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), border_mode='valid', activation=activation)) if drops[0] == 1: cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(input=input_x)) #2 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[1] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #3 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[1], 12, 12), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='valid', activation=activation)) if drops[2] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #4 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[2], 5, 5), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[3] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #5 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[3], 5, 5), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[4] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) mlp_input = cnn_output[-1].flatten(2) recg_layer.append( FullyConnected.FullyConnected(rng=rng, n_in=nkerns[4] * 5 * 5, n_out=500, activation=activation)) feature = recg_layer[-1].drop_output(mlp_input, drop=drop, rng=rng_share) # classify the values of the fully-connected sigmoidal layer classifier = Pegasos.Pegasos(input=feature, rng=rng, n_in=500, n_out=10, weight_decay=0, loss=1) # the cost we minimize during training is the NLL of the model cost = classifier.hinge_loss(10, y, y_matrix) * batch_size weight_decay = 1.0 / n_train_batches # create a list of all model parameters to be fit by gradient descent params = [] for r in recg_layer: params += r.params params += classifier.params # create a list of gradients for all model parameters grads = T.grad(cost, params) learning_rate = 3e-4 l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r, decay1=0.1, decay2=0.001, weight_decay=weight_decay) updates = get_optimizer(params, grads) ''' Save parameters and activations ''' parameters = theano.function( inputs=[], outputs=params, ) # create a function to compute the mistakes that are made by the model test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) }) test_pertub_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x_pertub[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](0) }) ################## # Pretrain MODEL # ################## model_epoch = 250 if os.environ.has_key('model_epoch'): model_epoch = int(os.environ['model_epoch']) if predir is not None: color.printBlue('... setting parameters') color.printBlue(predir) if model_epoch == -1: pre_train = np.load(predir + 'best-model.npz') else: pre_train = np.load(predir + 'model-' + str(model_epoch) + '.npz') pre_train = pre_train['model'] for (para, pre) in zip(params, pre_train): para.set_value(pre) else: exit() ############### # TRAIN MODEL # ############### valid_losses = [validate_model(i) for i in xrange(n_valid_batches)] valid_score = np.mean(valid_losses) test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) test_losses_pertub = [test_pertub_model(i) for i in xrange(n_test_batches)] test_score_pertub = np.mean(test_losses_pertub) print valid_score, test_score, test_score_pertub
def deep_cnn_6layer_mnist_50000(learning_rate=3e-4, n_epochs=250, dataset='mnist.pkl.gz', batch_size=500, dropout_flag=0, seed=0, activation=None): #cp->cd->cpd->cd->c nkerns = [32, 32, 64, 64, 64] drops = [1, 0, 1, 0, 0] #skerns=[5, 3, 3, 3, 3] #pools=[2, 1, 1, 2, 1] #modes=['same']*5 n_hidden = [500] logdir = 'results/supervised/cnn/mnist/deep_cnn_6layer_50000_' + str( nkerns) + str(drops) + str(n_hidden) + '_' + str( learning_rate) + '_' + str(int(time.time())) + '/' if dropout_flag == 1: logdir = 'results/supervised/cnn/mnist/deep_cnn_6layer_50000_' + str( nkerns) + str(drops) + str(n_hidden) + '_' + str( learning_rate) + '_dropout_' + str(int(time.time())) + '/' if not os.path.exists(logdir): os.makedirs(logdir) print 'logdir:', logdir print 'deep_cnn_6layer_mnist_50000_', nkerns, n_hidden, drops, seed, dropout_flag with open(logdir + 'hook.txt', 'a') as f: print >> f, 'logdir:', logdir print >> f, 'deep_cnn_6layer_mnist_50000_', nkerns, n_hidden, drops, seed, dropout_flag rng = np.random.RandomState(0) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) ''' ''' datasets = datapy.load_data_gpu_60000(dataset, have_matrix=True) train_set_x, train_set_y, train_y_matrix = datasets[0] valid_set_x, valid_set_y, valid_y_matrix = datasets[1] test_set_x, test_set_y, test_y_matrix = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ''' dropout ''' drop = T.iscalar('drop') y_matrix = T.imatrix( 'y_matrix') # labels, presented as 2D matrix of int labels print '... building the model' layer0_input = x.reshape((batch_size, 1, 28, 28)) if activation == 'nonlinearity.relu': activation = nonlinearity.relu elif activation == 'nonlinearity.tanh': activation = nonlinearity.tanh elif activation == 'nonlinearity.softplus': activation = nonlinearity.softplus recg_layer = [] cnn_output = [] #1 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), border_mode='valid', activation=activation)) if drops[0] == 1: cnn_output.append(recg_layer[-1].drop_output(layer0_input, drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(layer0_input)) #2 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[1] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #3 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[1], 12, 12), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='valid', activation=activation)) if drops[2] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #4 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[2], 5, 5), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[3] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #5 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[3], 5, 5), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[4] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) mlp_input = cnn_output[-1].flatten(2) recg_layer.append( FullyConnected.FullyConnected(rng=rng, n_in=nkerns[4] * 5 * 5, n_out=500, activation=activation)) feature = recg_layer[-1].drop_output(mlp_input, drop=drop, rng=rng_share) # classify the values of the fully-connected sigmoidal layer classifier = Pegasos.Pegasos(input=feature, rng=rng, n_in=500, n_out=10, weight_decay=0, loss=1) # the cost we minimize during training is the NLL of the model cost = classifier.hinge_loss(10, y, y_matrix) * batch_size weight_decay = 1.0 / n_train_batches # create a list of all model parameters to be fit by gradient descent params = [] for r in recg_layer: params += r.params params += classifier.params # create a list of gradients for all model parameters grads = T.grad(cost, params) l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r, decay1=0.1, decay2=0.001, weight_decay=weight_decay) updates = get_optimizer(params, grads) ''' Save parameters and activations ''' parameters = theano.function( inputs=[], outputs=params, ) # create a function to compute the mistakes that are made by the model test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](0) }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](0) }) train_model_average = theano.function( inputs=[index], outputs=[cost, classifier.errors(y)], givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](dropout_flag) }) train_model = theano.function( inputs=[index], outputs=[cost, classifier.errors(y)], updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](dropout_flag) }) print '... training' # early-stopping parameters patience = n_train_batches * 100 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = np.inf best_test_score = np.inf test_score = 0. start_time = time.clock() epoch = 0 decay_epochs = 150 while (epoch < n_epochs): epoch = epoch + 1 tmp1 = time.clock() minibatch_avg_cost = 0 train_error = 0 for minibatch_index in xrange(n_train_batches): co, te = train_model(minibatch_index) minibatch_avg_cost += co train_error += te #print minibatch_avg_cost # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: test_epoch = epoch - decay_epochs if test_epoch > 0 and test_epoch % 10 == 0: print l_r.get_value() with open(logdir + 'hook.txt', 'a') as f: print >> f, l_r.get_value() l_r.set_value(np.cast['float32'](l_r.get_value() / 3.0)) # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) this_test_losses = [ test_model(i) for i in xrange(n_test_batches) ] this_test_score = np.mean(this_test_losses) train_thing = [ train_model_average(i) for i in xrange(n_train_batches) ] train_thing = np.mean(train_thing, axis=0) print epoch, 'hinge loss and training error', train_thing with open(logdir + 'hook.txt', 'a') as f: print >> f, epoch, 'hinge loss and training error', train_thing if this_test_score < best_test_score: best_test_score = this_test_score print( 'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100, this_test_score * 100.)) with open(logdir + 'hook.txt', 'a') as f: print >> f, ( 'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100, this_test_score * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of' ' best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) with open(logdir + 'hook.txt', 'a') as f: print >> f, ( (' epoch %i, minibatch %i/%i, test error of' ' best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if epoch % 50 == 0: model = parameters() for i in xrange(len(model)): model[i] = np.asarray(model[i]).astype(np.float32) np.savez(logdir + 'model-' + str(epoch), model=model) print 'hinge loss and training error', minibatch_avg_cost / float( n_train_batches), train_error / float(n_train_batches) print 'time', time.clock() - tmp1 with open(logdir + 'hook.txt', 'a') as f: print >> f, 'hinge loss and training error', minibatch_avg_cost / float( n_train_batches), train_error / float(n_train_batches) print >> f, 'time', time.clock() - tmp1 end_time = time.clock() print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
def svm_cva(dir, predir, start=0, end=500, learning_rate=3e-4, n_epochs=10000, dataset='./data/mnist.pkl.gz', batch_size=500): """ Demonstrate stochastic gradient descent optimization of a log-linear model This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ ''' Difference ''' print start, end, learning_rate, batch_size datasets = datapy.load_data_gpu(dataset, have_matrix=True) _, train_set_y, train_y_matrix = datasets[0] _, valid_set_y, valid_y_matrix = datasets[1] _, test_set_y, test_y_matrix = datasets[2] train_set_x, valid_set_x, test_set_x = datapy.load_feature_gpu(dir=dir, start=start, end=end) print train_set_x.get_value().shape print valid_set_x.get_value().shape print test_set_x.get_value().shape # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # generate symbolic variables for input (x and y represent a # minibatch) x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels ''' Differences ''' y_matrix = T.imatrix( 'y_matrix') # labels, presented as 2D matrix of int labels # construct the logistic regression class # Each MNIST image has size 28*28 rng = np.random.RandomState(0) n_in = end - start classifier = Pegasos.Pegasos(input=x, rng=rng, n_in=n_in, n_out=10, weight_decay=1e-4, loss=1) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.objective(10, y, y_matrix) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size] }) # compute the gradient of cost with respect to theta = (W,b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) params = [classifier.W, classifier.b] grads = [g_W, g_b] # start-snippet-3 # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) #get_optimizer = optimizer.get_simple_optimizer(learning_rate=learning_rate) get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r, decay1=0.1, decay2=0.001) updates = get_optimizer(params, grads) # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=[cost], updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size:(index + 1) * batch_size] }) # end-snippet-3 ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = np.inf best_test_score = np.inf test_score = 0. start_time = time.clock() logdir = dir + str(learning_rate) + '_c-' done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) #print minibatch_avg_cost # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) this_test_losses = [ test_model(i) for i in xrange(n_test_batches) ] this_test_score = np.mean(this_test_losses) if this_test_score < best_test_score: best_test_score = this_test_score with open(logdir + 'hook.txt', 'a') as f: print >> f, ( 'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100, this_test_score * 100.)) print( 'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100, this_test_score * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = np.mean(test_losses) with open(logdir + 'hook.txt', 'a') as f: print >> f, ( (' epoch %i, minibatch %i/%i, test error of' ' best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) print((' epoch %i, minibatch %i/%i, test error of' ' best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() with open(logdir + 'hook.txt', 'a') as f: print >> f, ( ('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> f, 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> f, sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))) print >> f, best_test_score print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))) print best_test_score if predir is not None: # output put the joint result pre_train = np.load(predir + 'model-600.npz') pre_train = pre_train['model'] pw = pre_train[-2] pb = pre_train[-1] params[0].set_value(pw) params[1].set_value(pb) ptest_losses = [test_model(i) for i in xrange(n_test_batches)] ptest_score = np.mean(ptest_losses) with open(logdir + 'hook.txt', 'a') as f: print >> f, 'Jointly trained classifier', ptest_score print 'Jointly trained classifier', ptest_score
def c_6layer_mnist_imputation(seed=0, pertub_type=3, pertub_prob=6, pertub_prob1=14, predir=None, n_batch=144, dataset='mnist.pkl.gz', batch_size=500): """ Missing data imputation """ #cp->cd->cpd->cd->c nkerns=[32, 32, 64, 64, 64] drops=[0, 0, 0, 0, 0, 1] #skerns=[5, 3, 3, 3, 3] #pools=[2, 1, 1, 2, 1] #modes=['same']*5 n_hidden=[500, 50] drop_inverses=[1,] # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28 if dataset=='mnist.pkl.gz': dim_input=(28, 28) colorImg=False train_set_x, test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data(dirs='data_imputation/', pertub_type=pertub_type, pertub_prob=pertub_prob,pertub_prob1=pertub_prob1) datasets = datapy.load_data_gpu(dataset, have_matrix=True) _, train_set_y, train_y_matrix = datasets[0] valid_set_x, valid_set_y, valid_y_matrix = datasets[1] _, test_set_y, test_y_matrix = datasets[2] # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') #x_pertub = T.matrix('x_pertub') # the data is presented as rasterized images #p_label = T.matrix('p_label') y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels y_matrix = T.imatrix('y_matrix') drop = T.iscalar('drop') drop_inverse = T.iscalar('drop_inverse') activation = nonlinearity.relu rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) input_x = x.reshape((batch_size, 1, 28, 28)) recg_layer = [] cnn_output = [] #1 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), border_mode='valid', activation=activation )) if drops[0]==1: cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(input=input_x)) #2 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[1]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #3 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[1], 12, 12), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='valid', activation=activation )) if drops[2]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #4 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[2], 5, 5), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[3]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #5 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[3], 5, 5), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[4]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) mlp_input = cnn_output[-1].flatten(2) recg_layer.append(FullyConnected.FullyConnected( rng=rng, n_in=nkerns[4] * 5 * 5, n_out=500, activation=activation )) feature = recg_layer[-1].drop_output(mlp_input, drop=drop, rng=rng_share) # classify the values of the fully-connected sigmoidal layer classifier = Pegasos.Pegasos(input=feature, rng=rng, n_in=500, n_out=10, weight_decay=0, loss=1) # the cost we minimize during training is the NLL of the model cost = classifier.hinge_loss(10, y, y_matrix) * batch_size weight_decay=1.0/n_train_batches # create a list of all model parameters to be fit by gradient descent params=[] for r in recg_layer: params+=r.params params += classifier.params # create a list of gradients for all model parameters grads = T.grad(cost, params) learning_rate = 3e-4 l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r, decay1 = 0.1, decay2 = 0.001, weight_decay=weight_decay) updates = get_optimizer(params,grads) ''' Save parameters and activations ''' parameters = theano.function( inputs=[], outputs=params, ) # create a function to compute the mistakes that are made by the model test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size], #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) test_pertub_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x_pertub[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size], #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) ################## # Pretrain MODEL # ################## model_epoch = 250 if os.environ.has_key('model_epoch'): model_epoch = int(os.environ['model_epoch']) if predir is not None: color.printBlue('... setting parameters') color.printBlue(predir) if model_epoch == -1: pre_train = np.load(predir+'best-model.npz') else: pre_train = np.load(predir+'model-'+str(model_epoch)+'.npz') pre_train = pre_train['model'] for (para, pre) in zip(params, pre_train): para.set_value(pre) else: exit() ############### # TRAIN MODEL # ############### valid_losses = [validate_model(i) for i in xrange(n_valid_batches)] valid_score = np.mean(valid_losses) test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) test_losses_pertub = [test_pertub_model(i) for i in xrange(n_test_batches)] test_score_pertub = np.mean(test_losses_pertub) print valid_score, test_score, test_score_pertub