def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=20, n_hidden=500): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] # (50000L, 784L) (50000L,) valid_set_x, valid_set_y = datasets[1] # (10000L, 784L) (10000L,) test_set_x, test_set_y = datasets[2] # (10000L, 784L) (10000L,) # print "train" # print numpy.shape(train_set_x) # print numpy.shape(train_set_y) # print "valid" # print numpy.shape(valid_set_x) # print numpy.shape(valid_set_y) # print "test" # print numpy.shape(test_set_x) # print numpy.shape(test_set_y) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.shape[0] / batch_size n_valid_batches = valid_set_x.shape[0] / batch_size n_test_batches = test_set_x.shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP( rng=rng, n_in=28 * 28, n_hidden=n_hidden, # 隐藏层节点的个数,这里应该是只有一个隐藏层 n_out=10 ) # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically # end-snippet-4 classifier_validation = MLP(rng=rng, n_in=28 * 28, n_hidden=n_hidden, n_out=10) classifier_test = MLP(rng=rng, n_in=28 * 28, n_hidden=n_hidden, n_out=10) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams # gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] # updates = [ # (param, param - learning_rate * gparam) # for param, gparam in zip(classifier.params, gparams) # ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` # end-snippet-5 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 jj = 0 for minibatch_index in xrange(n_train_batches): jj += 1 # minibatch_avg_cost = train_model(minibatch_index) # iteration number train_subset_x = train_set_x[minibatch_index * batch_size: (minibatch_index + 1) * batch_size] # (20L, 784L) train_subset_y = train_set_y[minibatch_index * batch_size: (minibatch_index + 1) * batch_size] # (20L,) classifier.input = train_subset_x # 大小为20*784 classifier.y = train_subset_y # 大小为(20,) # print "before updating W" # print classifier.hidden_layer_list[-1].W # print classifier.output_layer.W # 更新权重W和b classifier.feedforward(classifier.input) # 先进行前向传播运算,得到每一层的输出a # print classifier.hiddenLayer.a classifier.backpropagation(classifier.input, classifier.y, learning_rate, L2_reg=L2_reg) # 这里先只用L2的规范项,L1的暂时不用 # minibatch_avg_cost = classifier.negative_log_likelihood()+ L1_reg * classifier.L1\ # + L2_reg * classifier.L2_sqr # print "00000000000000000" # print numpy.shape(train_subset_x) # print numpy.shape(classifier.y) # print classifier.negative_log_likelihood() # print L2_reg # print classifier.L2_sqr minibatch_avg_cost = classifier.negative_log_likelihood() + L2_reg * classifier.L2_sqr # print "minibatch cost" # print minibatch_avg_cost if jj >2: pass iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set # validation_losses = [validate_model(i) for i # in xrange(n_valid_batches)] # this_validation_loss = numpy.mean(validation_losses) validation_losses = [] # 考虑这里用W和b还是必须得用最终所有的参数params # 注意这里的W和b分开复制,因为每一层的W和b都是分别存放的 # 输出层的W和b存在classifier.output_layer.W中,隐藏层的W分别存在classifier.hidden_layer_list[i-1]每个对象中 classifier_validation.output_layer.W = classifier.output_layer.W classifier_validation.output_layer.b = classifier.output_layer.b kkk = len(classifier.hidden_layer_list) # i为隐藏层的个数 while kkk > 0: curr_hidden_lay = classifier.hidden_layer_list[kkk-1] # 当前隐藏层,这是个Hidden_layer的对象 (classifier_validation.hidden_layer_list[kkk-1]).W = curr_hidden_lay.W (classifier_validation.hidden_layer_list[kkk-1]).b = curr_hidden_lay.b kkk -= 1 for i in xrange(n_valid_batches): valid_subset_x = valid_set_x[i * batch_size: (i + 1) * batch_size] valid_subset_y = valid_set_y[i * batch_size: (i + 1) * batch_size] classifier_validation.input = numpy.array(valid_subset_x) classifier_validation.y = numpy.array(valid_subset_y) # 在计算errors之前应该是需要调用feedforward函数计算各层的输出,直到输出层,最后就可以得到errors classifier_validation.feedforward(classifier_validation.input) # 待改进:因为classifier_validation.feedforward函数已经计算过p_y_given_x, # 但是LG_MNIST也计算了p_y_given_x!!!!!!!!!! # classifier_validation.output_layer.input = classifier_validation.hidden_layer_list[-1].a # print numpy.shape(classifier.output_layer.input) # print numpy.shape(classifier.output_layer.W) # print numpy.shape(classifier.output_layer.b) # print "*********************************************************************" validation_losses.append(classifier_validation.errors()) this_validation_loss = numpy.mean(numpy.array(validation_losses)) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter test_losses = [] for i in xrange(n_test_batches): test_subset_x = test_set_x[i * batch_size: (i + 1) * batch_size] test_subset_y = test_set_y[i * batch_size: (i + 1) * batch_size] classifier_test.input = numpy.array(test_subset_x) classifier_test.y = numpy.array(test_subset_y) classifier_test.output_layer.W = classifier.output_layer.W classifier_test.output_layer.b = classifier.output_layer.b kkk = len(classifier.hidden_layer_list) # i为隐藏层的个数 while kkk > 0: curr_hidden_lay = classifier.hidden_layer_list[kkk-1] # 当前隐藏层,这是个Hidden_layer的对象 (classifier_test.hidden_layer_list[kkk-1]).W = curr_hidden_lay.W (classifier_test.hidden_layer_list[kkk-1]).b = curr_hidden_lay.b kkk -= 1 classifier_test.feedforward(classifier_test.input) # classifier_test.output_layer.input = classifier_test.hidden_layer_list[-1].a test_losses.append(classifier_test.errors()) test_score = numpy.mean(numpy.array(test_losses)) # test it on the test set test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_cnn(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=500, n_hidden=500): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] # (50000L, 784L) (50000L,) valid_set_x, valid_set_y = datasets[1] # (10000L, 784L) (10000L,) test_set_x, test_set_y = datasets[2] # (10000L, 784L) (10000L,) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.shape[0] / batch_size n_valid_batches = valid_set_x.shape[0] / batch_size n_test_batches = test_set_x.shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' rng = numpy.random.RandomState(1234) nkerns = [20, 50] # the cost we minimize during training is the NLL of the model # cost = layer3.negative_log_likelihood(y) # 初始化CNN分类器 classifier = cnn(rng, nkerns, batch_size) classifier_validation = cnn(rng, nkerns, batch_size) classifier_test = cnn(rng, nkerns, batch_size) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): start_time_epoch = timeit.default_timer() print "-----------------------------------------------------" print "epoch :" + str(epoch) epoch = epoch + 1 # jj = 0 for minibatch_index in xrange(n_train_batches): # jj += 1 # minibatch_avg_cost = train_model(minibatch_index) # iteration number train_subset_x = train_set_x[minibatch_index * batch_size: (minibatch_index + 1) * batch_size] # (20L, 784L) train_subset_y = train_set_y[minibatch_index * batch_size: (minibatch_index + 1) * batch_size] # (20L,) classifier_input = train_subset_x.reshape(batch_size, 1, 28, 28) # 大小为20*784 classifier_y = train_subset_y # 大小为(20,) # 更新权重W和b classifier.feedforward(classifier_input) # 先进行前向传播运算,得到每一层的输出a # print classifier.hiddenLayer.a classifier.back_propogation(classifier_input, classifier_y, learning_rate, L2_reg=L2_reg) # 这里先只用L2的规范项,L1的暂时不用 # minibatch_avg_cost = classifier.negative_log_likelihood()+ L1_reg * classifier.L1\ # + L2_reg * classifier.L2_sqr minibatch_avg_cost = classifier.negative_log_likelihood(train_subset_y) + L2_reg * classifier.layer3.L2_sqr print "minibatch_avg_cost" + str(minibatch_avg_cost) iter = (epoch - 1) * n_train_batches + minibatch_index # if jj >2: # pass if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set # validation_losses = [validate_model(i) for i # in xrange(n_valid_batches)] # this_validation_loss = numpy.mean(validation_losses) validation_losses = [] # 考虑这里用W和b还是必须得用最终所有的参数params # 注意这里的W和b分开复制,因为每一层的W和b都是分别存放的 # 输出层的W和b存在classifier.output_layer.W中,隐藏层的W分别存在classifier.hidden_layer_list[i-1]每个对象中 classifier_validation.layer3.W = classifier.layer3.W classifier_validation.layer3.b = classifier.layer3.b classifier_validation.layer2.W = classifier.layer2.W classifier_validation.layer2.b = classifier.layer2.b classifier_validation.layer1.W = classifier.layer1.W classifier_validation.layer1.b = classifier.layer1.b classifier_validation.layer0.W = classifier.layer0.W classifier_validation.layer0.b = classifier.layer0.b for i in xrange(n_valid_batches): valid_subset_x = valid_set_x[i * batch_size: (i + 1) * batch_size] valid_subset_y = valid_set_y[i * batch_size: (i + 1) * batch_size] classifier_validation_input = valid_subset_x.reshape(batch_size, 1, 28, 28) # 大小为20*784 classifier_validation_y = valid_subset_y # 大小为(20,) # 在计算errors之前应该是需要调用feedforward函数计算各层的输出,直到输出层,最后就可以得到errors classifier_validation.feedforward(classifier_validation_input) # 待改进:因为classifier_validation.feedforward函数已经计算过p_y_given_x, # 但是LG_MNIST也计算了p_y_given_x!!!!!!!!!! # classifier_validation.output_layer.input = classifier_validation.hidden_layer_list[-1].a validation_losses.append(classifier_validation.errors(classifier_validation_y)) this_validation_loss = numpy.mean(numpy.array(validation_losses)) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter test_losses = [] for i in xrange(n_test_batches): test_subset_x = test_set_x[i * batch_size: (i + 1) * batch_size] test_subset_y = test_set_y[i * batch_size: (i + 1) * batch_size] classifier_test_input = test_subset_x.reshape(batch_size, 1, 28, 28) # 大小为20*784 classifier_test_y = test_subset_y # 大小为(20,) classifier_test.layer3.W = classifier.layer3.W classifier_test.layer3.b = classifier.layer3.b classifier_test.layer2.W = classifier.layer2.W classifier_test.layer2.b = classifier.layer2.b classifier_test.layer1.W = classifier.layer1.W classifier_test.layer1.b = classifier.layer1.b classifier_test.layer0.W = classifier.layer0.W classifier_test.layer0.b = classifier.layer0.b classifier_test.feedforward(classifier_test_input) test_losses.append(classifier_test.errors(classifier_test_y)) test_score = numpy.mean(numpy.array(test_losses)) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time_epoch = timeit.default_timer() print "Time: " + str(end_time_epoch-start_time_epoch) print "#############################################################" end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))