def test_mlp(learning_rate=0.05, L1_reg=0.00, L2_reg=0.001, n_epochs=1000, dataset='data/dataset_BioNlp2009_GE_95_all.pkl.gz', batch_size=32, n_hidden=2000): """ MLP参数 :type learning_rate: float :param learning_rate: 学习率 :type L1_reg: float :param L1_reg: L1正则化项权重 :type L2_reg: float :param L2_reg: L2正则化项权重 :type n_epochs: int :param n_epochs: 迭代最大次数 :type dataset: string :param dataset: 数据集文件地址 """ datasets = load_bionlp_data(dataset) # 共享变量 train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # 计算MiniBatch的个数 n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP( rng=rng, input=x, n_in=2577, n_hidden=n_hidden, n_out=10 ) # 损失函数 cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # 在测试集上测试正确率 test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) # 在验证集上测试正确率 validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams # 计算梯度 w(1)\w(2) gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of # same length, zip generates a list C of same size, where each element # is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-5 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) ''' logger.info('epoch %i, minibatch %i/%i, validation error %f %% \r\n' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. )) ''' # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) ''' logger.info((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %% \r\n') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) ''' if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) '''
def test_dA(learning_rate=0.2, training_epochs=10, dataset=['../train.txt ',None,'../test.txt'], batch_size=1): datasets = load_bionlp_data(dataset) train_set_x, train_set_y = datasets[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar('index') # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images ##################################### # BUILDING THE MODEL CORRUPTION 30% # ##################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_hidden=4, n_visible=4 ) cost, updates = da.get_cost_updates( corruption_level=0.0, learning_rate=learning_rate ) train_da = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size] } ) start_time = time.clock() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) #print 'Training epoch %d, %fcost \n' % (epoch, numpy.mean(c)) print da.get_hidden_values(train_set_x).eval() print da.get_restructed_error(train_set_x).eval() end_time = time.clock() print '耗时:%f'%(end_time-start_time)
def test_mlp( initial_learning_rate,#learning rate learning_rate_decay, squared_filter_length_limit, n_epochs, batch_size, mom_params,#momentum{'start':1,'end':2,'interval':3} activations, dropout, dropout_rates,# results_file_name, layer_sizes,#structure dataset,#[train,valid,test] use_bias,# random_seed=1234): assert len(layer_sizes) - 1 == len(dropout_rates) # extract the params for momentum mom_start = mom_params["start"] mom_end = mom_params["end"] mom_epoch_interval = mom_params["interval"] datasets = dm.load_bionlp_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch epoch = T.scalar() x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels learning_rate = theano.shared(np.asarray(initial_learning_rate, dtype=theano.config.floatX)) rng = np.random.RandomState(random_seed) # construct the MLP class classifier = MLP(rng=rng, input=x, layer_sizes=layer_sizes, dropout_rates=dropout_rates, activations=activations, use_bias=use_bias) # Build the expresson for the cost function. cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) # Compile theano function for testing. test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) #theano.printing.pydotprint(test_model, outfile="test_file.png", # var_with_name_simple=True) # Compile theano function for validation. validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) #theano.printing.pydotprint(validate_model, outfile="validate_file.png", # var_with_name_simple=True) # Compute gradients of the model wrt parameters gparams = [] for param in classifier.params: # Use the right cost function here to train with or without dropout. gparam = T.grad(dropout_cost if dropout else cost, param) gparams.append(gparam) # ... and allocate mmeory for momentum'd versions of the gradient gparams_mom = [] for param in classifier.params: gparam_mom = theano.shared(np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) gparams_mom.append(gparam_mom) # Compute momentum for the current epoch mom = ifelse(epoch < mom_epoch_interval, mom_start*(1.0 - epoch/mom_epoch_interval) + mom_end*(epoch/mom_epoch_interval), mom_end) # Update the step direction using momentum updates = OrderedDict() for gparam_mom, gparam in zip(gparams_mom, gparams): # Misha Denil's original version #updates[gparam_mom] = mom * gparam_mom + (1. - mom) * gparam # change the update rule to match Hinton's dropout paper updates[gparam_mom] = mom * gparam_mom - (1. - mom) * learning_rate * gparam # ... and take a step along that direction for param, gparam_mom in zip(classifier.params, gparams_mom): # Misha Denil's original version #stepped_param = param - learning_rate * updates[gparam_mom] # since we have included learning_rate in gparam_mom, we don't need it # here stepped_param = param + updates[gparam_mom] # This is a silly hack to constrain the norms of the rows of the weight # matrices. This just checks if there are two dimensions to the # parameter and constrains it if so... maybe this is a bit silly but it # should work for now. if param.get_value(borrow=True).ndim == 2: #squared_norms = T.sum(stepped_param**2, axis=1).reshape((stepped_param.shape[0],1)) #scale = T.clip(T.sqrt(squared_filter_length_limit / squared_norms), 0., 1.) #updates[param] = stepped_param * scale # constrain the norms of the COLUMNs of the weight, according to # https://github.com/BVLC/caffe/issues/109 col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param # Compile theano function for training. This returns the training cost and # updates the model parameters. output = dropout_cost if dropout else cost train_model = theano.function(inputs=[epoch, index], outputs=output, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) #theano.printing.pydotprint(train_model, outfile="train_file.png", # var_with_name_simple=True) # Theano function to decay the learning rate, this is separate from the # training function because we only want to do this once each epoch instead # of after each minibatch. decay_learning_rate = theano.function(inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * learning_rate_decay}) ############### # TRAIN MODEL # ############### print '... training' best_params = None best_validation_errors = np.inf best_iter = 0 test_score = 0. epoch_counter = 0 start_time = time.clock() results_file = open(results_file_name, 'wb') while epoch_counter < n_epochs: # Train this epoch epoch_counter = epoch_counter + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(epoch_counter, minibatch_index) # Compute loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_errors = np.sum(validation_losses) # Report and save progress. print "epoch {}, test error {}, learning_rate={}{}".format( epoch_counter, this_validation_errors, learning_rate.get_value(borrow=True), " **" if this_validation_errors < best_validation_errors else "") best_validation_errors = min(best_validation_errors, this_validation_errors) results_file.write("{0}\n".format(this_validation_errors)) results_file.flush() new_learning_rate = decay_learning_rate() end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_errors * 100., best_iter, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def sgd_optimization(learning_rate=0.13, n_epochs=1000, dataset=['../train.txt',None,'../test.txt'], batch_size=1): datasets = dm.load_bionlp_data(dataset) train_set_x, train_set_y = datasets[0] if datasets[1]!=None: valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # 计算MiniBatch的个数;每个minibatch,更新一次参数 n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data # 通过索引获取数据 index = T.lscalar() # index to a [mini]batch # x,y : minibatch x = T.matrix('x') # data y = T.ivector('y') # labels,一维向量 # construct the logistic regression class classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) # 负对数自然函数,损失函数 cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch # 计算模型在验证集与测试集的误差.. test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # 梯度计算 g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # 更新参数 updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print '... training the model' patience = 5000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): # 每一次迭代... epoch = epoch + 1 #对于每次迭代;遍历一遍训练集 for minibatch_index in xrange(n_train_batches): # 在一个MiniBatch中的平均损失 minibatch_avg_cost = train_model(minibatch_index) # iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # 在验证集上计算平均损失 validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print( ( ' epoch %i, minibatch %i/%i, test error of' ' best model %f %%' ) % ( epoch, minibatch_index + 1, n_train_batches, test_score * 100. ) ) if patience <= iter: done_looping = True break end_time = time.clock() print( ( 'Optimization complete with best validation score of %f %%,' 'with test performance %f %%' ) % (best_validation_loss * 100., test_score * 100.) ) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))