def loadData(self, data): if isinstance(data, dict): # If 'data' is preferences on users for training self.prefsOnUser = data self.prefs = tool.transposePrefs(self.prefsOnUser) elif isinstance(data, str): # If 'data' is a file path of training data self.prefsOnUser = tool.loadData(data) self.prefs = tool.transposePrefs(self.prefsOnUser) self.itemList = self.prefs.keys()
def loadData(self, data): if isinstance(data, dict): # If 'data' is preferences on users for training self.prefs = data elif isinstance(data, str): # If 'data' is a file path of training data self.prefs = tool.loadData(data) self.itemList = {} for user in self.prefs: for item in self.prefs[user]: self.itemList[item] = None
#!/usr/bin/env python # coding=utf-8 from tool import loadData import numpy as np from sklearn import cross_validation,decomposition,svm train,valid,test = loadData("newIndian60percentN.mat") X_train = train[0] Y_train = train[1] X_valid = valid[0] Y_valid = valid[1] X_test = test[0] Y_test = test[1] pca = decomposition.RandomizedPCA(n_components = 100,whiten=True) pca.fit(X_train) X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) clf = svm.SVC(C=1.2, kernel = 'linear', gamma=0.0008, probability=True, tol = 0.000000001, verbose=True, max_iter = -1) clf.fit(X_train, Y_train) print "在测试集上的平均正确率为:" print clf.score(X_test, Y_test) #result = clf.predict(X_train)
def train(bcontinue_train=False, params_fileName='params.mat', structure_fileName='structure.txt', learning_rate=0.01, batch_size=9, n_epochs=1000000):#8*11*17 = 1496, 3*12*41 = 1476 #'U_PaviaReducedData1960_10.mat', 'VegetationReducedData1500_10.mat', 'PaviaU_ReducedData.mat', 'Indian_pines_ReducedData_200_9.mat', 'Salinas_ReducedData_200_16.mat', 'Salinas_ReducedData_600_16.mat' #'U_PaviaReducedData1960_10.mat', 'PaviaU_ReducedData.mat', 'Salinas_ReducedData_200_16.mat' datasets = tool.loadData('newKSC1N4.mat', -1, False) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size print '... building the model' if bcontinue_train: previous_params = tool.loadParams(params_fileName) convDims, convNodes, convKernels, convFilters, convPools, convLayers, fullNodes, fullLayers = tool.loadStructure(structure_fileName) else: previous_params = None convDims = len(train_set_x.get_value(borrow=True).shape) - 1 convNodes = train_set_x.get_value(borrow=True).shape[1] convKernels = [30] convFilters = [21] convPools = [5] convLayers = [layer.ConvPoolLayer] fullNodes = [100, train_set_y.get_value(borrow=True).max()+1] fullLayers = [layer.FullLayer, layer.SoftmaxLayer] '''convKernels = [30] convFilters = [14] convPools = [3] convLayers = [layer.ConvPoolLayer] fullNodes = [100, train_set_y.get_value(borrow=True).max()+1] fullLayers = [layer.FullLayer, layer.SoftmaxLayer]''' # ok NN # deepCNN # ok class accuracy in column figure # dataRateDistribution all in Pavia # area cluster image # testing time '''convKernels = [] convFilters = [] convPools = [] convLayers = [] fullNodes = [100, 50, train_set_y.get_value(borrow=True).max()+1] fullLayers = [layer.FullLayer, layer.FullLayer, layer.SoftmaxLayer]''' '''convKernels = [20, 30] convFilters = [9, 5] convPools = [2, 2] convLayers = [layer.ConvPoolLayer, layer.ConvPoolLayer] fullNodes = [100, train_set_y.get_value(borrow=True).max()+1] fullLayers = [layer.FullLayer, layer.SoftmaxLayer]''' train_model, validate_model, test_model, params = buildMode(train_set_x, train_set_y, valid_set_x, valid_set_y, test_set_x, test_set_y, batch_size, bcontinue_train, previous_params, convDims, convNodes, convKernels, convFilters, convPools, convLayers, fullNodes, fullLayers) print '... training the model' times = [0.0] accus = [0.0] costs = [0.0] epoch = 0 cost = 1.0 test_score = 0.0 best_validation_loss = numpy.inf b_save_best_params = False best_params = None show_valid = False start_time = time.clock() Keyboard.StartKeyboardListener() while (epoch < n_epochs) and (cost > 0.0001): epoch = epoch + 1 show_valid = False key = Keyboard.KeyDown() if key == 'q': break elif key == '+': learning_rate = learning_rate * 1.2 elif key == '-': learning_rate = learning_rate / 1.2 elif key == 'c': b_save_best_params = not b_save_best_params print 'b_save_best_params: ', b_save_best_params elif key == 'x': tool.saveParams(params_fileName, params) tool.saveStructure(structure_fileName, convDims, convNodes, convKernels, convFilters, convPools, convLayers, fullNodes, fullLayers) print 'save_cur_params' elif key == 's': tool.saveParams(params_fileName, best_params) tool.saveStructure(structure_fileName, convDims, convNodes, convKernels, convFilters, convPools, convLayers, fullNodes, fullLayers) print('test best error: %f %%' %(test_score * 100.)) show_valid = True cost = 0 for minibatch_index in xrange(n_train_batches): cost += train_model(minibatch_index, learning_rate) cost /= n_train_batches if epoch % 100 != 1 and not show_valid: print('epoch %i, batches %i, rate %.4f, cost %.4f' %(epoch, n_train_batches, learning_rate, cost)) continue validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, batches %i, rate %.4f, cost %.4f, validation error %.4f %%' % \ (epoch, n_train_batches, learning_rate, cost, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: best_validation_loss = this_validation_loss #test_losses = [test_model(i) # for i in xrange(n_test_batches)] #test_score = numpy.mean(test_losses) test_losses = validation_losses test_score = this_validation_loss best_params = copy.deepcopy(params) times.append((time.clock() - start_time) / 60.0) accus.append(100.0 * (1.0 - test_score)) costs.append(cost) if b_save_best_params: tool.saveParams(params_fileName, best_params) tool.saveStructure(structure_fileName, convDims, convNodes, convKernels, convFilters, convPools, convLayers, fullNodes, fullLayers) print((' epoch %i, batches %i, time %.2f, test error of best' ' model %.4f %%') % (epoch, n_train_batches, time.clock() - start_time, test_score * 100.)) Keyboard.StopKeyboardListener() end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))) tool.saveParams(params_fileName, best_params) tool.saveStructure(structure_fileName, convDims, convNodes, convKernels, convFilters, convPools, convLayers, fullNodes, fullLayers) tool.saveList('./times.txt', times) tool.saveList('./accus.txt', accus) tool.saveList('./costs.txt', costs) times = tool.loadList('./times.txt') accus = tool.loadList('./accus.txt') costs = tool.loadList('./costs.txt')
def InitCNNModel(fileName, neighbor_strategy): batch_size = 9 rng = numpy.random.RandomState(23455) datasets = tool.loadData(fileName) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] #定义三个向量用于控制特征向量 x = T.matrix('x') y = T.ivector('y') index = T.lscalar() #定义学习效率,该值决定的是寻找最优解时每一步下降的幅度, #也就是优化的步长,太大容易导致局部最优解困境,太小容易导致训练速度过慢 learning_rate = 0.01 #__todo__ = '现在先按照一个固定的结构来定义CNN,以后要改成更灵活、可配置的方式' #输入层结点,其大小与特征向量的维度一致 input_nodes = train_set_x.get_value(borrow = True).shape[1] #构建第一个卷积层:在使用中,采用 input_nodes × 1的拉伸格式 layer0_input = x.reshape((batch_size, 1, input_nodes, 1)) layer0_conv_kernel_number = 20 #该变量用于保存卷积跳步,其值等于采取的领域策略的数值+1 kernel_jump_step = neighbor_strategy + 1 layer0_conv_kernel_size = int(math.ceil( (input_nodes/kernel_jump_step/9. * kernel_jump_step))) n2_nodes_number = ((input_nodes - layer0_conv_kernel_size)/ kernel_jump_step + 1) n3_nodes_number = 40 layer1_max_pool_kernel_size = math.ceil( float(n2_nodes_number / n3_nodes_number )) # max_pool_kernel_size = math.ceil() layer0 = ConvolutionalLayer( rng , input = layer0_input, image_shape = (batch_size, 1, input_nodes, 1), filter_shape = (layer0_conv_kernel_number, 1, layer0_conv_kernel_size, 1), poolsize = (int(layer1_max_pool_kernel_size),1) ) #这一层是连接Convolutional Layer MaxPooling之后的那一层 #与MaxPooling那一层和输出层连接的中间层次,其实是全连接层 layer1_input = layer0.output.flatten(2) layer1 = HiddenLayer( rng, input = layer1_input, n_in = layer0_conv_kernel_number * n3_nodes_number * 1, n_out = 100, activation = T.tanh ) #这一层是连接前面的全连接层以及后面的输出层之间的那一层连接 #其操作实际上是一个logistics regression n_out_nodes = train_set_y.get_value(borrow = True).max() + 1 layer2 = LogisticRegressionLayer( input=layer1.output, n_in = 100, n_out = n_out_nodes ) cost = layer2.negative_log_likelihood(y) test_model = theano.function( [index], layer2.errors(y), givens = { x:test_set_x[index * batch_size: (index + 1) * batch_size], y:test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer2.errors(y), givens = { x:valid_set_x[index * batch_size: (index + 1) * batch_size], y:valid_set_y[index * batch_size: (index + 1) * batch_size] } ) params = layer2.params + layer1.params + layer0.params #为所有的权值参数创建一个梯度矩阵,以便用梯度下降算法进行训练 grads = T.grad(cost, params) #权值更新法则 updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] #根据更新法则以及相关参数,构造网络的基于反向传播梯度下降的训练函数 train_model = theano.function( [index], cost, updates = updates, givens = { x:train_set_x[index * batch_size: (index + 1) * batch_size], y:train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') n_train_batches = train_set_x.get_value(borrow = True).shape[0] n_valid_batches = valid_set_x.get_value(borrow = True).shape[0] n_test_batches = test_set_x.get_value(borrow = True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 #n_epochs这个值表示的是,对于网络的最大的优化次数 n_epochs = 200 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def InitCNNModel(fileName, neighbor_strategy): batch_size = 9 rng = numpy.random.RandomState(23455) datasets = tool.loadData(fileName) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] #定义三个向量用于控制特征向量 x = T.matrix('x') y = T.ivector('y') index = T.lscalar() #定义学习效率,该值决定的是寻找最优解时每一步下降的幅度, #也就是优化的步长,太大容易导致局部最优解困境,太小容易导致训练速度过慢 learning_rate = 0.01 #__todo__ = '现在先按照一个固定的结构来定义CNN,以后要改成更灵活、可配置的方式' #输入层结点,其大小与特征向量的维度一致 input_nodes = train_set_x.get_value(borrow=True).shape[1] #构建第一个卷积层:在使用中,采用 input_nodes × 1的拉伸格式 layer0_input = x.reshape((batch_size, 1, input_nodes, 1)) layer0_conv_kernel_number = 20 #该变量用于保存卷积跳步,其值等于采取的领域策略的数值+1 kernel_jump_step = neighbor_strategy + 1 layer0_conv_kernel_size = int( math.ceil((input_nodes / kernel_jump_step / 9. * kernel_jump_step))) n2_nodes_number = ( (input_nodes - layer0_conv_kernel_size) / kernel_jump_step + 1) n3_nodes_number = 40 layer1_max_pool_kernel_size = math.ceil( float(n2_nodes_number / n3_nodes_number)) # max_pool_kernel_size = math.ceil() layer0 = ConvolutionalLayer(rng, input=layer0_input, image_shape=(batch_size, 1, input_nodes, 1), filter_shape=(layer0_conv_kernel_number, 1, layer0_conv_kernel_size, 1), poolsize=(int(layer1_max_pool_kernel_size), 1)) #这一层是连接Convolutional Layer MaxPooling之后的那一层 #与MaxPooling那一层和输出层连接的中间层次,其实是全连接层 layer1_input = layer0.output.flatten(2) layer1 = HiddenLayer(rng, input=layer1_input, n_in=layer0_conv_kernel_number * n3_nodes_number * 1, n_out=100, activation=T.tanh) #这一层是连接前面的全连接层以及后面的输出层之间的那一层连接 #其操作实际上是一个logistics regression n_out_nodes = train_set_y.get_value(borrow=True).max() + 1 layer2 = LogisticRegressionLayer(input=layer1.output, n_in=100, n_out=n_out_nodes) cost = layer2.negative_log_likelihood(y) test_model = theano.function( [index], layer2.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer2.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) params = layer2.params + layer1.params + layer0.params #为所有的权值参数创建一个梯度矩阵,以便用梯度下降算法进行训练 grads = T.grad(cost, params) #权值更新法则 updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] #根据更新法则以及相关参数,构造网络的基于反向传播梯度下降的训练函数 train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 #n_epochs这个值表示的是,对于网络的最大的优化次数 n_epochs = 200 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print( ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
temp = numpy.ndarray(CTest[i]) for j in xrange(CTest[i]): if typeId == -1: temp[j] = i elif typeId == i: temp[j] = 0 else: temp[j] = 1 test_set_y = numpy.hstack((test_set_y, temp)) test_set_y = numpy.asarray(CTest, dtype='int32') # test_set_x.setflags(align=1) # test_set_y.setflags(align=1) return test_set_x, test_set_y if __name__ == '__main__': test_set_x, test_set_y = tool.loadData('newKSC1N4.mat', -1)[2] test_set_x = test_set_x.get_value() test_set_y = test_set_y.get_value() inputNodes = test_set_x.shape[1] outputNodes = test_set_y.max()+1 Detect_model = buildMode(inputNodes, outputNodes) ok = 0 sum = 0 typeOk = numpy.zeros((outputNodes, outputNodes), dtype='int32') typeSum = numpy.zeros(outputNodes, dtype='int32') datas = [[] for i in xrange(outputNodes)] for i in xrange(outputNodes): for j in xrange(test_set_x.shape[0]): if test_set_y[j] == i: [label, prob] = Detect_model(test_set_x[j:j+1])
def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='newKSC1N4.mat', nkerns=[20, 50], batch_size=9): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets = tool.loadData(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print (train_set_x) print (train_set_y) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. spectralBands = train_set_x.get_value(borrow=True).shape[1] n1 = spectralBands # neighbor = 5 step = 5 layer0_input = x.reshape((batch_size, 1, spectralBands, 1)) # kernelSize = math.ceil(spectralBands / 9) * 5 k1 = (math.ceil(spectralBands / step / batch_size)) * step n3 = 40 n2 = (n1 - k1)/step + 1 # k2 = math.ceil((spectralBands - k1 + 1) / n3) k2 = math.ceil(n2/n3) classNumber = train_set_y.get_value(borrow = True).max() + 1 n5 = classNumber n4 = 100 # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, spectralBands, 1), filter_shape=(20, 1, k1, 1), poolsize=(int(k2), 1) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) # layer1 = LeNetConvPoolLayer( # rng, # input=layer0.output, # image_shape=(batch_size, nkerns[0], 12, 12), # filter_shape=(nkerns[1], nkerns[0], 5, 5), # poolsize=(2, 2) # ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer1_input = layer0.output.flatten(2) # construct a fully-connected sigmoidal layer layer1 = HiddenLayer( rng, input=layer1_input, n_in=20 * 40 * 1, n_out=100, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer2 = LogisticRegression(input=layer1.output, n_in=100, n_out=classNumber) # the cost we minimize during training is the NLL of the model cost = layer2.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer2.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer2.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent params = layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)