def train_model( train_set_x_org=None, train_set_y_org=None, valid_set_x_org=None, valid_set_y_org=None, learning_rate=0.1, alpha=0.01, lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0, n_hidden=[256, 128, 16], n_epochs=1000, batch_size=100, activation_func="tanh", rng=numpy.random.RandomState(100), ): """ Train a deep feature selection model. INPUTS: train_set_x_org: numpy 2d array, each row is a training sample. train_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of training samples. valid_set_x_org: numpy 2d array, each row is a validation sample. This set is to monitor the convergence of optimization. valid_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of validation samples. learning_rate: float scalar, the initial learning rate. alpha: float, parameter to trade off the momentum term. lambda1: float scalar, control the sparsity of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda1 is, the sparser the input weights are. lambda2: float scalar, control the smoothness of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda2 is, the smoother the input weights are. alpha1: float scalar, control the sparsity of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha1 is, the sparser the MLP weights are. alpha2: float scalar, control the smoothness of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha2 is, the smoother the MLP weights are. n_hidden, vector of int, n_hidden[i]: number of hidden units of the i-th layer. n_epochs: int scalar, the maximal number of epochs. batch_size: int scalar, minibatch size. activation_func: string, specify activation function. {"tanh" (default),"sigmoid"} rng: numpy random number state. OUTPUTS: classifier: object of MLP, the model learned, returned for testing. training_time: float, training time in seconds. """ train_set_x = theano.shared(numpy.asarray(train_set_x_org, dtype=theano.config.floatX), borrow=True) train_set_y = T.cast( theano.shared(numpy.asarray(train_set_y_org, dtype=theano.config.floatX), borrow=True), "int32" ) valid_set_x = theano.shared(numpy.asarray(valid_set_x_org, dtype=theano.config.floatX), borrow=True) valid_set_y = T.cast( theano.shared(numpy.asarray(valid_set_y_org, dtype=theano.config.floatX), borrow=True), "int32" ) # compute number of minibatches for training, validation and testing n_train_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size)) # shared variable to reduce the learning rate learning_rate_shared = theano.shared(learning_rate, name="learn_rate_shared") decay_rate = T.scalar(name="decay_rate", dtype=theano.config.floatX) reduce_learning_rate = theano.function( [decay_rate], learning_rate_shared, updates=[(learning_rate_shared, learning_rate_shared * decay_rate)] ) ## define the model below num_feat = train_set_x.get_value(borrow=True).shape[1] # number of features n_cl = len(numpy.unique(train_set_y_org)) # number of classes activations = {"tanh": T.tanh, "sigmoid": T.nnet.sigmoid} activation = activations[activation_func] # build a MPL object classifier = DFS( rng=rng, n_in=num_feat, n_hidden=n_hidden, n_out=n_cl, lambda1=lambda1, lambda2=lambda2, alpha1=alpha1, alpha2=alpha2, activation=activation, ) train_model_one_iteration = classifier.build_train_function( train_set_x, train_set_y, batch_size, alpha, learning_rate_shared ) validate_model = classifier.build_valid_function(valid_set_x, valid_set_y, batch_size) print "... training" # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf max_num_epoch_change_learning_rate = 100 max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate max_num_epoch_change_rate = 0.8 learning_rate_decay_rate = 0.8 epoch_change_count = 0 start_time = time.clock() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 epoch_change_count = epoch_change_count + 1 if epoch_change_count % max_num_epoch_change_learning_rate == 0: reduce_learning_rate(learning_rate_decay_rate) max_num_epoch_change_learning_rate = cl.change_max_num_epoch_change_learning_rate( max_num_epoch_change_learning_rate, max_num_epoch_change_rate ) max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate epoch_change_count = 0 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model_one_iteration(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print ( "epoch %i, minibatch %i/%i, validation error %f %%" % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: num_epoch_not_improve = 0 if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # save a copy of the currently best model parameter best_model_params = classifier.get_params() if patience <= iter: done_looping = True break if this_validation_loss >= best_validation_loss: num_epoch_not_improve = num_epoch_not_improve + 1 if num_epoch_not_improve >= max_num_epoch_not_improve: done_looping = True break # set the best model parameters classifier.set_params(best_model_params) end_time = time.clock() training_time = end_time - start_time print "Training time: %f" % (training_time / 60) print "Optimization complete with best validation score of %f," % (best_validation_loss * 100.0) return classifier, training_time
def train_model(train_set_x_org=None, train_set_y_org=None, valid_set_x_org=None, valid_set_y_org=None, learning_rate=0.1, alpha=0.01, lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0, n_hidden=[256,128,16], n_epochs=1000, batch_size=100, activation_func="tanh", rng=numpy.random.RandomState(100)): """ Train a deep feature selection model. INPUTS: train_set_x_org: numpy 2d array, each row is a training sample. train_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of training samples. valid_set_x_org: numpy 2d array, each row is a validation sample. This set is to monitor the convergence of optimization. valid_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of validation samples. learning_rate: float scalar, the initial learning rate. alpha: float, parameter to trade off the momentum term. lambda1: float scalar, control the sparsity of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda1 is, the sparser the input weights are. lambda2: float scalar, control the smoothness of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda2 is, the smoother the input weights are. alpha1: float scalar, control the sparsity of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha1 is, the sparser the MLP weights are. alpha2: float scalar, control the smoothness of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha2 is, the smoother the MLP weights are. n_hidden, vector of int, n_hidden[i]: number of hidden units of the i-th layer. n_epochs: int scalar, the maximal number of epochs. batch_size: int scalar, minibatch size. activation_func: string, specify activation function. {"tanh" (default),"sigmoid"} rng: numpy random number state. OUTPUTS: classifier: object of MLP, the model learned, returned for testing. training_time: float, training time in seconds. """ train_set_x = theano.shared(numpy.asarray(train_set_x_org,dtype=theano.config.floatX),borrow=True) train_set_y = T.cast(theano.shared(numpy.asarray(train_set_y_org,dtype=theano.config.floatX),borrow=True),'int32') valid_set_x = theano.shared(numpy.asarray(valid_set_x_org,dtype=theano.config.floatX),borrow=True) valid_set_y = T.cast(theano.shared(numpy.asarray(valid_set_y_org,dtype=theano.config.floatX),borrow=True),'int32') # compute number of minibatches for training, validation and testing n_train_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size)) # shared variable to reduce the learning rate learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared') decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX) reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)]) ## define the model below num_feat=train_set_x.get_value(borrow=True).shape[1] # number of features n_cl=len(numpy.unique(train_set_y_org)) # number of classes activations={"tanh":T.tanh,"sigmoid":T.nnet.sigmoid} activation=activations[activation_func] # build a MPL object classifier = DFS(rng=rng, n_in=num_feat, n_hidden=n_hidden, n_out=n_cl, lambda1=lambda1, lambda2=lambda2, alpha1=alpha1, alpha2=alpha2, activation=activation) train_model_one_iteration=classifier.build_train_function(train_set_x, train_set_y, batch_size, alpha, learning_rate_shared) validate_model=classifier.build_valid_function(valid_set_x, valid_set_y, batch_size) print '... training' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf max_num_epoch_change_learning_rate=100 max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate max_num_epoch_change_rate=0.8 learning_rate_decay_rate=0.8 epoch_change_count=0 start_time = time.clock() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 epoch_change_count=epoch_change_count+1 if epoch_change_count % max_num_epoch_change_learning_rate ==0: reduce_learning_rate(learning_rate_decay_rate) max_num_epoch_change_learning_rate= \ cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate) max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate epoch_change_count=0 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model_one_iteration(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: num_epoch_not_improve=0 if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # save a copy of the currently best model parameter best_model_params=classifier.get_params() if patience <= iter: done_looping = True break if this_validation_loss >= best_validation_loss: num_epoch_not_improve=num_epoch_not_improve+1 if num_epoch_not_improve>=max_num_epoch_not_improve: done_looping = True break # set the best model parameters classifier.set_params(best_model_params) end_time = time.clock() training_time=end_time-start_time print 'Training time: %f' %(training_time/60) print 'Optimization complete with best validation score of %f,' %(best_validation_loss * 100.) return classifier, training_time
def finetune_model(classifier=None, train_set_x=None, train_set_y=None, valid_set_x=None, valid_set_y=None, learning_rate=0.1, alpha=0.01, n_hidden=[256, 128, 16], n_cl=2, n_epochs=1000, batch_size=100, rng=numpy.random.RandomState(100)): """ Finetune the model by training and validation sets. """ # compute number of minibatches for training, validation and testing n_train_batches = int( math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size)) #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size # shared variable to reduce the learning rate learning_rate_shared = theano.shared(learning_rate, name='learn_rate_shared') decay_rate = T.scalar(name='decay_rate', dtype=theano.config.floatX) reduce_learning_rate = theano.function( [decay_rate], learning_rate_shared, updates=[(learning_rate_shared, learning_rate_shared * decay_rate)]) train_model_one_iteration, validate_model = classifier.build_finetune_functions( train_set_x, train_set_y, valid_set_x, valid_set_y, batch_size, learning_rate_shared) print '... finetuning' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf max_num_epoch_change_learning_rate = 100 max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate max_num_epoch_change_rate = 0.8 learning_rate_decay_rate = 0.8 epoch_change_count = 0 start_time = time.clock() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 epoch_change_count = epoch_change_count + 1 if epoch_change_count % max_num_epoch_change_learning_rate == 0: reduce_learning_rate(learning_rate_decay_rate) max_num_epoch_change_learning_rate= \ cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate) max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate epoch_change_count = 0 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model_one_iteration(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: num_epoch_not_improve = 0 if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # save a copy of the currently best model parameter best_model_params = classifier.get_params() if patience <= iter: done_looping = True break if this_validation_loss >= best_validation_loss: num_epoch_not_improve = num_epoch_not_improve + 1 if num_epoch_not_improve >= max_num_epoch_not_improve: done_looping = True break # set the best model parameters classifier.set_params(best_model_params) end_time = time.clock() training_time = end_time - start_time print 'Training time: %f' % (training_time / 60) print 'Optimization complete with best validation score of %f,' % ( best_validation_loss * 100.)
def train_model(learning_rate=0.1, n_epochs=1000, train_set_x_org=None,train_set_y_org=None,valid_set_x_org=None,valid_set_y_org=None, batch_size=100): """ Train the logistic regression model. INPUTS: learning_rate: float scalar, the initial learning rate. n_epochs: int scalar, the maximal number of epochs. train_set_x_org: numpy 2d array, each row is a training sample. train_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of training samples. valid_set_x_org: numpy 2d array, each row is a validation sample. This set is to monitor the convergence of optimization. valid_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of validation samples. batch_size: int scalar, minibatch size. OUTPUTS: classifier: object of logisticRegression, the model learned, returned for testing. training_time: float, training time in seconds. """ train_set_x = theano.shared(numpy.asarray(train_set_x_org,dtype=theano.config.floatX),borrow=True) train_set_y = T.cast(theano.shared(numpy.asarray(train_set_y_org,dtype=theano.config.floatX),borrow=True),'int32') valid_set_x = theano.shared(numpy.asarray(valid_set_x_org,dtype=theano.config.floatX),borrow=True) valid_set_y = T.cast(theano.shared(numpy.asarray(valid_set_y_org,dtype=theano.config.floatX),borrow=True),'int32') # compute number of minibatches for training, validation and testing #n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_train_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size)) n_valid_batches = int(math.ceil(valid_set_x.get_value(borrow=True).shape[0] / batch_size)) # shared variable to reduce the learning rate learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared') #learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX) #epoch_variable=T.iscalar(name='epoch_variable') decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX) #compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \ #updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)]) # define the model # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # each row is a sample y = T.ivector('y') # the labels are presented as 1D vector of [int] labels num_feat=train_set_x.get_value(borrow=True).shape[1] #print train_set_y.get_value() n_cl=len(numpy.unique(train_set_y_org)) classifier = LogisticRegression(input=x, n_in=num_feat, n_out=n_cl) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) validate_model2 = theano.function(inputs=[], outputs=classifier.errors(y), givens={ x: valid_set_x, y: valid_set_y}) validate_model3 = theano.function(inputs=[], outputs=classifier.y_pred, givens={x:valid_set_x}) # compute the gradient of cost with respect to theta = (W,b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model_one_iteration` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model_one_iteration = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) # training the model below # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf max_num_epoch_change_learning_rate=100 # initial maximal number of epochs to change learning rate max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate # max number of epochs without improvmenet to terminate the optimization max_num_epoch_change_rate=0.8 # change to max number of epochs to change learning rate learning_rate_decay_rate=0.8 epoch_change_count=0 start_time = time.clock() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 epoch_change_count=epoch_change_count+1 if epoch_change_count % max_num_epoch_change_learning_rate ==0: reduce_learning_rate(learning_rate_decay_rate) max_num_epoch_change_learning_rate= \ cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate) max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate epoch_change_count=0 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model_one_iteration(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: num_epoch_not_improve=0 if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # save a copy of the currently best model parameter best_model_params=classifier.get_params() if patience <= iter: done_looping = True break if this_validation_loss >= best_validation_loss: num_epoch_not_improve=num_epoch_not_improve+1 if num_epoch_not_improve>=max_num_epoch_not_improve: done_looping = True break # set the best model parameters classifier.set_params(best_model_params) end_time = time.clock() training_time=end_time-start_time print 'Training time: %f' %(training_time/60) print 'Optimization complete with best validation score of %f,' %(best_validation_loss * 100.) return classifier,training_time
def train_model(rng=numpy.random.RandomState(100), train_set_x_org=None, n_hidden=100, learning_rate=0.1, training_epochs=100, batch_size=100, persistent_chain_k=15): """ Train a RBM model given training data. INPUTS: rng: numpy random number state. train_set_x_org: numpy 2d array, each row is a training sample. n_hidden, int, number of hidden units. learning_rate: float scalar, the initial learning rate. training_epochs: int scalar, the maximal number of epochs. batch_size: int scalar, minibatch size. persistent_chain_k: length of persistent chain from the last sampling to new sampling. OUTPUTS: rbm: object of RBM. The model learned. mean_hidden: numpy 2d array, each row is a reduced training sample. training_time: training time. """ train_set_x = theano.shared(numpy.asarray(train_set_x_org, dtype=theano.config.floatX), borrow=True) n_train_batches = int( math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size)) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images # shared variable to reduce the learning rate learning_rate_shared = theano.shared(learning_rate, name='learn_rate_shared') # learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX) # epoch_variable=T.iscalar(name='epoch_variable') decay_rate = T.scalar(name='decay_rate', dtype=theano.config.floatX) # compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \ # updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b reduce_learning_rate = theano.function( [decay_rate], learning_rate_shared, updates=[(learning_rate_shared, learning_rate_shared * decay_rate)]) n_visible = train_set_x_org.shape[1] # number of input features theano_rng = RandomStreams(rng.randint(2**30)) # initialize storage for the persistent chain (state = hidden # layer of chain) persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) # construct the RBM class rbm = RBM(input=x, n_visible=n_visible, n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng) # get the cost and the gradient corresponding to one step of CD-15 cost, updates = rbm.get_cost_updates(lr=learning_rate, persistent=persistent_chain, k=persistent_chain_k) # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters train_rbm_one_iteration = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}, name='train_rbm') # optimization, gradient descent max_num_epoch_change_learning_rate = 100 max_num_epoch_not_improve = 2 * max_num_epoch_change_learning_rate max_num_epoch_change_rate = 0.8 epoch_change_count = 0 best_cost = numpy.inf # train the model using training set start_time = time.clock() for epoch in xrange(training_epochs): c = [] # costs of all minibatches of this epoch epoch_change_count = epoch_change_count + 1 if epoch_change_count % max_num_epoch_change_learning_rate == 0: reduce_learning_rate(0.5) max_num_epoch_change_learning_rate= \ cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate) max_num_epoch_not_improve = 2 * max_num_epoch_change_learning_rate epoch_change_count = 0 for batch_index in xrange(n_train_batches): c_batch = train_rbm_one_iteration(batch_index) c.append(c_batch) this_cost = numpy.mean(c) print 'Training eopch: %d, cost: %f' % (epoch, this_cost) if this_cost < best_cost: best_cost = this_cost num_epoch_not_improve = 0 if this_cost >= best_cost: num_epoch_not_improve = num_epoch_not_improve + 1 if num_epoch_not_improve >= max_num_epoch_not_improve: break end_time = time.clock() training_time = end_time - start_time print 'Training time: %f' % (training_time / 60) # return the trained model and the reduced training set extracted = rbm.propup(train_set_x) get_extracted = theano.function([], extracted) pre_activation, mean_hidden = get_extracted() return rbm, mean_hidden, training_time
def train_model(train_set_x_org=None, training_epochs=1000, batch_size=100, n_hidden=10,learning_rate=0.1,contraction_level=0.1, cost_measure="cross_entropy", rng=numpy.random.RandomState(100)): """ Train a contractive autoencoder. INPUTS: train_set_x_org: numpy 2d array, each row is a training sample. training_epochs: int scalar, the maximal number of epochs. batch_size: int scalar, minibatch size. n_hidden: int scalar, number of hidden units learning_rate: float scalar, the initial learning rate. corruption_level: float from interval [0,1), corruption level. cost_measure: string from {"cross_entropy", "euclidean"}, measure to compute the restructive cost. rng: numpy random number state. OUTPUTS: ca: object of cA, the model learned, returned for testing. train_set_x_extracted: reduced training set. training_time: float, training time in seconds. """ train_set_x = theano.shared(numpy.asarray(train_set_x_org,dtype=theano.config.floatX),borrow=True) #train_set_y = T.cast(theano.shared(numpy.asarray(train_set_y_org,dtype=theano.config.floatX),borrow=True),'int32') n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size #n_train_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size)) # shared variable to reduce the learning rate learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared') # learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX) # epoch_variable=T.iscalar(name='epoch_variable') decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX) # compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \ # updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)]) n_visible=train_set_x_org.shape[1] # number of input features # define the model x=T.matrix(name='x',dtype=theano.config.floatX) # define a symbol for the input data (training, validation, or test data) ca=cA(numpy_rng=rng, input=x, n_visible=n_visible, n_hidden=n_hidden,n_batchsize=batch_size) # get the formula of the cost and updates cost,updates=ca.get_cost_updates(contraction_level=contraction_level, learning_rate=learning_rate, cost_measure=cost_measure) index=T.lscalar() # symbol for the index # define a function to update the cost and model parameters using the formula above train_ca_one_iteration=theano.function([index], [ca.L_rec, ca.L_jacob], updates=updates, givens={x:train_set_x[index*batch_size:(index+1)*batch_size]}) max_num_epoch_change_learning_rate=100 max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate max_num_epoch_change_rate=0.8 learning_rate_decay_rate=0.8 epoch_change_count=0 best_cost=numpy.inf # train the model using training set start_time=time.clock() for epoch in xrange(training_epochs): c=[] # costs of all minibatches of this epoch epoch_change_count=epoch_change_count+1 if epoch_change_count % max_num_epoch_change_learning_rate ==0: reduce_learning_rate(learning_rate_decay_rate) max_num_epoch_change_learning_rate= \ cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate) max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate epoch_change_count=0 for batch_index in xrange(n_train_batches): c_batch,j_batch=train_ca_one_iteration(batch_index) c.append(c_batch) this_cost=numpy.mean(c) print 'Training eopch: %d, cost: %f' % (epoch,this_cost) if this_cost<best_cost: best_cost=this_cost num_epoch_not_improve=0 if this_cost>=best_cost: num_epoch_not_improve=num_epoch_not_improve+1 if num_epoch_not_improve>=max_num_epoch_not_improve: break end_time=time.clock() training_time=end_time-start_time print 'Training time: %f' %(training_time/60) # return the trained model and the reduced training set extracted=ca.get_hidden_values(train_set_x) get_extracted=theano.function([],extracted) train_set_x_extracted=get_extracted() return ca, train_set_x_extracted, training_time
def finetune_model(classifier=None, train_set_x=None,train_set_y=None,valid_set_x=None,valid_set_y=None, n_row_each_sample=1, learning_rate=0.1, alpha=0.1, n_epochs=1000, rng=numpy.random.RandomState(1000), nkerns=[4,4,4],batch_size=500, receptive_fields=((1,8),(1,8),(1,8)),poolsizes=((1,8),(1,8),(1,4)),full_hidden_sub=[16],full_hidden_all=[16], max_num_epoch_change_learning_rate=80, max_num_epoch_change_rate=0.8, learning_rate_decay_rate=0.8): """ Finetune the model using training and validation data. """ n_train=train_set_x.get_value(borrow=True).shape[0] n_train_batches=n_train//batch_size #n_train_batches = int(math.floor(train_set_x.get_value(borrow=True).shape[0] / batch_size)) #n_valid_batches = int(math.ceil(valid_set_x.get_value(borrow=True).shape[0] / batch_size)) # shared variable to reduce the learning rate learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared') # learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX) # epoch_variable=T.iscalar(name='epoch_variable') decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX) # compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \ # updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)]) train_model_one_iteration=classifier.build_train_function(train_set_x, train_set_y, batch_size, alpha, learning_rate_shared) validate_model=classifier.build_valid_function(valid_set_x, valid_set_y, batch_size) ############### # TRAIN MODEL # ############### print '... finetuning' # early-stopping parameters #max_num_epoch_change_learning_rate=100 max_num_epoch_not_improve=5*max_num_epoch_change_learning_rate #max_num_epoch_change_rate=0.8 #learning_rate_decay_rate=0.8 epoch_change_count=0 patience = 1000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = n_train_batches; # min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): # for every epoch epoch = epoch + 1 epoch_change_count=epoch_change_count+1 if epoch_change_count % max_num_epoch_change_learning_rate ==0: reduce_learning_rate(learning_rate_decay_rate) max_num_epoch_change_learning_rate= \ cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate) max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate epoch_change_count=0 #compute_learn_rate(learning_rate,epoch,0.5) print 'The current learning rate is ', learning_rate_shared.get_value() for minibatch_index in xrange(n_train_batches): # for every minibatch iter = (epoch - 1) * n_train_batches + minibatch_index # number of total minibatchs so far #if iter % 100 == 0: #print 'training @ iter = ', iter # if minibatch_index==n_train_batches-1: # batch_size_current=n_train - minibatch_index*batch_size # else: # batch_size_current=batch_size # cost_ij = train_model_one_iteration(minibatch_index,batch_size_current) cost_ij = train_model_one_iteration(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %0.4f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: num_epoch_not_improve=0 #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # save a copy of the currently best model parameter best_model_params=classifier.get_params() if patience <= iter: done_looping = True break if this_validation_loss >= best_validation_loss: num_epoch_not_improve=num_epoch_not_improve+1 if num_epoch_not_improve>=max_num_epoch_not_improve: done_looping = True break # set the best model parameters classifier.set_params(best_model_params) end_time = time.clock() training_time=end_time -start_time print 'Finetuning time: %f' %(training_time/60) print 'Optimization complete with best validation score of %f,' %(best_validation_loss * 100.)
def finetune_model(classifier=None, train_set_x=None, train_set_y=None, valid_set_x=None, valid_set_y=None, learning_rate=0.1, alpha=0.01, n_hidden=[256,128,16], n_cl=2, n_epochs=1000, batch_size=100, rng=numpy.random.RandomState(100)): """ Finetune the model by training and validation sets. """ # compute number of minibatches for training, validation and testing n_train_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size)) #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size # shared variable to reduce the learning rate learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared') decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX) reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)]) train_model_one_iteration,validate_model=classifier.build_finetune_functions(train_set_x, train_set_y, valid_set_x, valid_set_y, batch_size, learning_rate_shared) print '... finetuning' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf max_num_epoch_change_learning_rate=100 max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate max_num_epoch_change_rate=0.8 learning_rate_decay_rate=0.8 epoch_change_count=0 start_time = time.clock() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 epoch_change_count=epoch_change_count+1 if epoch_change_count % max_num_epoch_change_learning_rate ==0: reduce_learning_rate(learning_rate_decay_rate) max_num_epoch_change_learning_rate= \ cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate) max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate epoch_change_count=0 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model_one_iteration(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: num_epoch_not_improve=0 if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # save a copy of the currently best model parameter best_model_params=classifier.get_params() if patience <= iter: done_looping = True break if this_validation_loss >= best_validation_loss: num_epoch_not_improve=num_epoch_not_improve+1 if num_epoch_not_improve>=max_num_epoch_not_improve: done_looping = True break # set the best model parameters classifier.set_params(best_model_params) end_time = time.clock() training_time=end_time-start_time print 'Training time: %f' %(training_time/60) print 'Optimization complete with best validation score of %f,' %(best_validation_loss * 100.)
def train_model_old(train_set_x_org=None, train_set_y_org=None, valid_set_x_org=None, valid_set_y_org=None, learning_rate=0.1, alpha=0.01, L1_reg=0.00, L2_reg=0.0001, n_hidden=[256,128,16], n_epochs=1000, batch_size=100, rng=numpy.random.RandomState(100)): """ Train the model by training and validation sets. """ train_set_x = theano.shared(numpy.asarray(train_set_x_org,dtype=theano.config.floatX),borrow=True) train_set_y = T.cast(theano.shared(numpy.asarray(train_set_y_org,dtype=theano.config.floatX),borrow=True),'int32') valid_set_x = theano.shared(numpy.asarray(valid_set_x_org,dtype=theano.config.floatX),borrow=True) valid_set_y = T.cast(theano.shared(numpy.asarray(valid_set_y_org,dtype=theano.config.floatX),borrow=True),'int32') # compute number of minibatches for training, validation and testing n_train_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size)) n_valid_batches = int(math.ceil(valid_set_x.get_value(borrow=True).shape[0] / batch_size)) # shared variable to reduce the learning rate learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared') decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX) reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)]) # define the model below # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # matrix, each row is a sample y = T.ivector('y') # vector, intergers from {0,1,2,...,C-1} num_feat=train_set_x.get_value(borrow=True).shape[1] # number of features n_cl=len(numpy.unique(train_set_y_org)) # number of classes # build a MPL object classifier = MLP(rng=rng, x=x, y=y, n_in=num_feat, n_hidden=n_hidden, n_out=n_cl) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = classifier.negative_log_likelihood(y) \ + L1_reg * classifier.L1 \ + L2_reg * classifier.L2_sqr # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams grads = [] for param in classifier.params: grad = T.grad(cost, param) grads.append(grad) delta_before=[] for param_i in classifier.params: delta_before_i=theano.shared(value=numpy.zeros(param_i.get_value().shape)) delta_before.append(delta_before_i) updates = [] # to add momentum? for param_i, grad_i, delta_before_i in zip(classifier.params, grads, delta_before): delta_i=-learning_rate_shared * grad_i + alpha*delta_before_i updates.append((param_i, param_i + delta_i )) updates.append((delta_before_i,delta_i)) # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model_one_iteration = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) print '... training' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf max_num_epoch_change_learning_rate=100 max_num_epoch_not_improve=100#3*max_num_epoch_change_learning_rate max_num_epoch_change_rate=0.9 epoch_change_count=0 start_time = time.clock() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 epoch_change_count=epoch_change_count+1 if epoch_change_count % max_num_epoch_change_learning_rate ==0: reduce_learning_rate(0.9) max_num_epoch_change_learning_rate= \ cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate) max_num_epoch_not_improve=100#3*max_num_epoch_change_learning_rate epoch_change_count=0 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model_one_iteration(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: num_epoch_not_improve=0 if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # save a copy of the currently best model parameter best_model_params=classifier.get_params() if patience <= iter: done_looping = True break if this_validation_loss >= best_validation_loss: num_epoch_not_improve=num_epoch_not_improve+1 if num_epoch_not_improve>=max_num_epoch_not_improve: done_looping = True break # set the best model parameters classifier.set_params(best_model_params) end_time = time.clock() training_time=end_time-start_time print 'Training time: %f' %(training_time/60) print 'Optimization complete with best validation score of %f,' %(best_validation_loss * 100.) return classifier
def train_model(train_set_x_org=None,train_set_y_org=None,valid_set_x_org=None,valid_set_y_org=None, n_row_each_sample=1, learning_rate=0.1, alpha=0.1, n_epochs=1000, rng=numpy.random.RandomState(1000), nkerns=[4,4,4],batch_size=500, receptive_fields=((2,8),(2,8),(2,8)),poolsizes=((1,8),(1,8),(1,4)),full_hidden=16): """ Train the model using training and validation data. INPUTS: train_set_x_org: numpy 2d array, each row is a training sample. train_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of training samples. valid_set_x_org: numpy 2d array, each row is a validation sample. This set is to monitor the convergence of optimization. valid_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of validation samples. n_row_each_sample: int, for each vectorized sample, the number of rows when matricize it. The vectorized sample is in the form of [row_0,row_1,...,row_{n_row_each_sample-1}]. learning_rate: float, the initial learning rate. alpha: float, parameter to trade off the momentum term. n_epochs: int, maximal number of epochs allowed. rng: numpy random number state. nkerns: list, tuple, or vector, nkerns[i] is the number of feature maps in the i-th convolutional layer batch_size: int, minibatch size. receptive_fields: list or tuple of the same length as nkerns, receptive_fields[i] is a list or tuple of length 2, the size of receptive field in the i-th convolutional layer. receptive_fields[i]= (#rows of the receptive field, #columns of the receptive field). poolsizes: list or tuple of the same length as nkerns, the size to reduce to scalar. poolsizes[i]=(#rows, #columns) full_hidden: the number of hidden units fulling connecting the units in the previous layer. OUTPUTS: classifier: object of CNN class, the model trained. training_time: training time. """ train_set_x = theano.shared(numpy.asarray(train_set_x_org,dtype=theano.config.floatX),borrow=True) train_set_y = T.cast(theano.shared(numpy.asarray(train_set_y_org,dtype=theano.config.floatX),borrow=True),'int32') valid_set_x = theano.shared(numpy.asarray(valid_set_x_org,dtype=theano.config.floatX),borrow=True) valid_set_y = T.cast(theano.shared(numpy.asarray(valid_set_y_org,dtype=theano.config.floatX),borrow=True),'int32') n_train=train_set_x.get_value(borrow=True).shape[0] n_train_batches=n_train//batch_size #n_train_batches = int(math.floor(train_set_x.get_value(borrow=True).shape[0] / batch_size)) #n_valid_batches = int(math.ceil(valid_set_x.get_value(borrow=True).shape[0] / batch_size)) # shared variable to reduce the learning rate learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared') # learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX) # epoch_variable=T.iscalar(name='epoch_variable') decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX) # compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \ # updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)]) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' num_feat=train_set_x.get_value(borrow=True).shape[1] input_size_row=n_row_each_sample # how many rows for each sample input_size_col=num_feat//n_row_each_sample input_size=(input_size_row,input_size_col) n_out=len(numpy.unique(train_set_y_org)) # number of classes classifier=cnn(rng=rng, batch_size=batch_size, input_size=input_size, nkerns=nkerns, receptive_fields=receptive_fields, poolsizes=poolsizes, full_hidden=full_hidden, n_out=n_out) train_model_one_iteration=classifier.build_train_function(train_set_x, train_set_y, batch_size, alpha, learning_rate_shared) validate_model=classifier.build_valid_function(valid_set_x, valid_set_y, batch_size) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters max_num_epoch_change_learning_rate=100 max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate max_num_epoch_change_rate=0.8 learning_rate_decay_rate=0.8 epoch_change_count=0 patience = 1000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = n_train_batches; # min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): # for every epoch epoch = epoch + 1 epoch_change_count=epoch_change_count+1 if epoch_change_count % max_num_epoch_change_learning_rate ==0: reduce_learning_rate(learning_rate_decay_rate) max_num_epoch_change_learning_rate= \ cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate) max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate epoch_change_count=0 #compute_learn_rate(learning_rate,epoch,0.5) print 'The current learning rate is ', learning_rate_shared.get_value() for minibatch_index in xrange(n_train_batches): # for every minibatch iter = (epoch - 1) * n_train_batches + minibatch_index # number of total minibatchs so far if iter % 100 == 0: print 'training @ iter = ', iter # if minibatch_index==n_train_batches-1: # batch_size_current=n_train - minibatch_index*batch_size # else: # batch_size_current=batch_size # cost_ij = train_model_one_iteration(minibatch_index,batch_size_current) cost_ij = train_model_one_iteration(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %0.4f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: num_epoch_not_improve=0 #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # save a copy of the currently best model parameter best_model_params=classifier.get_params() if patience <= iter: done_looping = True break if this_validation_loss >= best_validation_loss: num_epoch_not_improve=num_epoch_not_improve+1 if num_epoch_not_improve>=max_num_epoch_not_improve: done_looping = True break # set the best model parameters classifier.set_params(best_model_params) end_time = time.clock() training_time=end_time -start_time print 'Training time: %f' %(training_time/60) print 'Optimization complete with best validation score of %f,' %(best_validation_loss * 100.) return classifier, training_time
def train_model(train_set_x_org=None, training_epochs=1000, batch_size=100, n_hidden=10, learning_rate=0.1, corruption_level=0.1, W=None, bhid=None, bvis=None, cost_measure="cross_entropy", rng=numpy.random.RandomState(100)): """ Train a denoising autoencoder. INPUTS: train_set_x_org: numpy 2d array, each row is a training sample. training_epochs: int scalar, the maximal number of epochs. batch_size: int scalar, minibatch size. n_hidden: int scalar, number of hidden units learning_rate: float scalar, the initial learning rate. corruption_level: float from interval [0,1), corruption level. cost_measure: string from {"cross_entropy", "euclidean"}, measure to compute the restructive cost. rng: numpy random number state. OUTPUTS: da: object of dA, the model learned, returned for testing. train_set_x_extracted: reduced training set. training_time: float, training time in seconds. """ train_set_x = theano.shared(numpy.asarray(train_set_x_org, dtype=theano.config.floatX), borrow=True) #train_set_y = T.cast(theano.shared(numpy.asarray(train_set_y_org,dtype=theano.config.floatX),borrow=True),'int32') #n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_train_batches = int( math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size)) # shared variable to reduce the learning rate learning_rate_shared = theano.shared(learning_rate, name='learn_rate_shared') # learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX) # epoch_variable=T.iscalar(name='epoch_variable') decay_rate = T.scalar(name='decay_rate', dtype=theano.config.floatX) # compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \ # updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b reduce_learning_rate = theano.function( [decay_rate], learning_rate_shared, updates=[(learning_rate_shared, learning_rate_shared * decay_rate)]) n_visible = train_set_x_org.shape[1] # number of input features theano_rng = RandomStreams(rng.randint(2**30)) # random symbol # define the model x = T.matrix( name='x', dtype=theano.config.floatX ) # define a symbol for the input data (training, validation, or test data) da = dA_finetuning(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=n_visible, n_hidden=n_hidden, W=W, bhid=bhid, bvis=bvis) # get the formula of the cost and updates cost, updates = da.get_cost_updates( corruption_level=corruption_level, learning_rate=learning_rate, cost_measure=cost_measure ) # cost_measure can be either"cross_entropy" or "euclidean" index = T.lscalar() # symbol for the index # define a function to update the cost and model parameters using the formula above train_da_one_iteration = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) max_num_epoch_change_learning_rate = 100 max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate max_num_epoch_change_rate = 0.8 learning_rate_decay_rate = 0.8 epoch_change_count = 0 best_cost = numpy.inf # train the model using training set start_time = time.clock() for epoch in range(training_epochs): c = [] # costs of all minibatches of this epoch epoch_change_count = epoch_change_count + 1 if epoch_change_count % max_num_epoch_change_learning_rate == 0: reduce_learning_rate(learning_rate_decay_rate) max_num_epoch_change_learning_rate= \ cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate) max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate epoch_change_count = 0 for batch_index in range(n_train_batches): c_batch = train_da_one_iteration(batch_index) #print ("function output=" + str(c_batch) + " for " + str(batch_index)) # c_batch is the cost c.append(c_batch) this_cost = numpy.mean(c) print('Training eopch: %d, cost: %f' % (epoch, this_cost)) if this_cost < best_cost: best_cost = this_cost num_epoch_not_improve = 0 if this_cost >= best_cost: num_epoch_not_improve = num_epoch_not_improve + 1 if num_epoch_not_improve >= max_num_epoch_not_improve: break end_time = time.clock() training_time = end_time - start_time print('Training time: %f' % (training_time / 60)) # return the trained model and the reduced training set extracted = da.get_hidden_values(train_set_x) get_extracted = theano.function([], extracted) train_set_x_extracted = get_extracted() return da, train_set_x_extracted, training_time
def train_model(rng=numpy.random.RandomState(100), train_set_x_org=None, n_hidden=100, learning_rate=0.1, training_epochs=100, batch_size=100, persistent_chain_k=15): """ Train a RBM model given training data. INPUTS: rng: numpy random number state. train_set_x_org: numpy 2d array, each row is a training sample. n_hidden, int, number of hidden units. learning_rate: float scalar, the initial learning rate. training_epochs: int scalar, the maximal number of epochs. batch_size: int scalar, minibatch size. persistent_chain_k: length of persistent chain from the last sampling to new sampling. OUTPUTS: rbm: object of RBM. The model learned. mean_hidden: numpy 2d array, each row is a reduced training sample. training_time: training time. """ train_set_x = theano.shared(numpy.asarray(train_set_x_org,dtype=theano.config.floatX),borrow=True) n_train_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size)) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images # shared variable to reduce the learning rate learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared') # learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX) # epoch_variable=T.iscalar(name='epoch_variable') decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX) # compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \ # updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)]) n_visible=train_set_x_org.shape[1] # number of input features theano_rng = RandomStreams(rng.randint(2 ** 30)) # initialize storage for the persistent chain (state = hidden # layer of chain) persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) # construct the RBM class rbm = RBM(input=x, n_visible=n_visible, n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng) # get the cost and the gradient corresponding to one step of CD-15 cost, updates = rbm.get_cost_updates(lr=learning_rate,persistent=persistent_chain,k=persistent_chain_k) # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters train_rbm_one_iteration = theano.function([index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}, name='train_rbm') # optimization, gradient descent max_num_epoch_change_learning_rate=100 max_num_epoch_not_improve=2*max_num_epoch_change_learning_rate max_num_epoch_change_rate=0.8 epoch_change_count=0 best_cost=numpy.inf # train the model using training set start_time=time.clock() for epoch in xrange(training_epochs): c=[] # costs of all minibatches of this epoch epoch_change_count=epoch_change_count+1 if epoch_change_count % max_num_epoch_change_learning_rate ==0: reduce_learning_rate(0.5) max_num_epoch_change_learning_rate= \ cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate) max_num_epoch_not_improve=2*max_num_epoch_change_learning_rate epoch_change_count=0 for batch_index in xrange(n_train_batches): c_batch=train_rbm_one_iteration(batch_index) c.append(c_batch) this_cost=numpy.mean(c) print 'Training eopch: %d, cost: %f' % (epoch,this_cost) if this_cost<best_cost: best_cost=this_cost num_epoch_not_improve=0 if this_cost>=best_cost: num_epoch_not_improve=num_epoch_not_improve+1 if num_epoch_not_improve>=max_num_epoch_not_improve: break end_time=time.clock() training_time=end_time-start_time print 'Training time: %f' %(training_time/60) # return the trained model and the reduced training set extracted=rbm.propup(train_set_x) get_extracted=theano.function([],extracted) pre_activation,mean_hidden=get_extracted() return rbm, mean_hidden, training_time
def train_model(train_set_x_org=None, train_set_y_org=None, valid_set_x_org=None, valid_set_y_org=None, n_row_each_sample=1, learning_rate=0.1, alpha=0.1, n_epochs=1000, rng=numpy.random.RandomState(1000), nkerns=[4, 4, 4], batch_size=500, receptive_fields=((2, 8), (2, 8), (2, 8)), poolsizes=((1, 8), (1, 8), (1, 4)), full_hidden=16): """ Train the model using training and validation data. INPUTS: train_set_x_org: numpy 2d array, each row is a training sample. train_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of training samples. valid_set_x_org: numpy 2d array, each row is a validation sample. This set is to monitor the convergence of optimization. valid_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of validation samples. n_row_each_sample: int, for each vectorized sample, the number of rows when matricize it. The vectorized sample is in the form of [row_0,row_1,...,row_{n_row_each_sample-1}]. learning_rate: float, the initial learning rate. alpha: float, parameter to trade off the momentum term. n_epochs: int, maximal number of epochs allowed. rng: numpy random number state. nkerns: list, tuple, or vector, nkerns[i] is the number of feature maps in the i-th convolutional layer batch_size: int, minibatch size. receptive_fields: list or tuple of the same length as nkerns, receptive_fields[i] is a list or tuple of length 2, the size of receptive field in the i-th convolutional layer. receptive_fields[i]= (#rows of the receptive field, #columns of the receptive field). poolsizes: list or tuple of the same length as nkerns, the size to reduce to scalar. poolsizes[i]=(#rows, #columns) full_hidden: the number of hidden units fulling connecting the units in the previous layer. OUTPUTS: classifier: object of CNN class, the model trained. training_time: training time. """ train_set_x = theano.shared(numpy.asarray(train_set_x_org, dtype=theano.config.floatX), borrow=True) train_set_y = T.cast( theano.shared(numpy.asarray(train_set_y_org, dtype=theano.config.floatX), borrow=True), 'int32') valid_set_x = theano.shared(numpy.asarray(valid_set_x_org, dtype=theano.config.floatX), borrow=True) valid_set_y = T.cast( theano.shared(numpy.asarray(valid_set_y_org, dtype=theano.config.floatX), borrow=True), 'int32') n_train = train_set_x.get_value(borrow=True).shape[0] n_train_batches = n_train // batch_size #n_train_batches = int(math.floor(train_set_x.get_value(borrow=True).shape[0] / batch_size)) #n_valid_batches = int(math.ceil(valid_set_x.get_value(borrow=True).shape[0] / batch_size)) # shared variable to reduce the learning rate learning_rate_shared = theano.shared(learning_rate, name='learn_rate_shared') # learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX) # epoch_variable=T.iscalar(name='epoch_variable') decay_rate = T.scalar(name='decay_rate', dtype=theano.config.floatX) # compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \ # updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b reduce_learning_rate = theano.function( [decay_rate], learning_rate_shared, updates=[(learning_rate_shared, learning_rate_shared * decay_rate)]) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' num_feat = train_set_x.get_value(borrow=True).shape[1] input_size_row = n_row_each_sample # how many rows for each sample input_size_col = num_feat // n_row_each_sample input_size = (input_size_row, input_size_col) n_out = len(numpy.unique(train_set_y_org)) # number of classes classifier = cnn(rng=rng, batch_size=batch_size, input_size=input_size, nkerns=nkerns, receptive_fields=receptive_fields, poolsizes=poolsizes, full_hidden=full_hidden, n_out=n_out) train_model_one_iteration = classifier.build_train_function( train_set_x, train_set_y, batch_size, alpha, learning_rate_shared) validate_model = classifier.build_valid_function(valid_set_x, valid_set_y, batch_size) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters max_num_epoch_change_learning_rate = 100 max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate max_num_epoch_change_rate = 0.8 learning_rate_decay_rate = 0.8 epoch_change_count = 0 patience = 1000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = n_train_batches # min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): # for every epoch epoch = epoch + 1 epoch_change_count = epoch_change_count + 1 if epoch_change_count % max_num_epoch_change_learning_rate == 0: reduce_learning_rate(learning_rate_decay_rate) max_num_epoch_change_learning_rate= \ cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate) max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate epoch_change_count = 0 #compute_learn_rate(learning_rate,epoch,0.5) print 'The current learning rate is ', learning_rate_shared.get_value() for minibatch_index in xrange(n_train_batches): # for every minibatch iter = ( epoch - 1 ) * n_train_batches + minibatch_index # number of total minibatchs so far if iter % 100 == 0: print 'training @ iter = ', iter # if minibatch_index==n_train_batches-1: # batch_size_current=n_train - minibatch_index*batch_size # else: # batch_size_current=batch_size # cost_ij = train_model_one_iteration(minibatch_index,batch_size_current) cost_ij = train_model_one_iteration(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %0.4f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: num_epoch_not_improve = 0 #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # save a copy of the currently best model parameter best_model_params = classifier.get_params() if patience <= iter: done_looping = True break if this_validation_loss >= best_validation_loss: num_epoch_not_improve = num_epoch_not_improve + 1 if num_epoch_not_improve >= max_num_epoch_not_improve: done_looping = True break # set the best model parameters classifier.set_params(best_model_params) end_time = time.clock() training_time = end_time - start_time print 'Training time: %f' % (training_time / 60) print 'Optimization complete with best validation score of %f,' % ( best_validation_loss * 100.) return classifier, training_time