if __name__ == '__main__': #step1 参数初始化 inputSize = 28 * 28 numClasses = 5 hiddenSize = 200 sparsityParam = 0.1 la = 3e-3 beta = 3 #step2 获取无标签数据集, 有标签训练数据集, 有标签测试数据集 unlabeledData, trainData, trainLabels, testData, testLabels = GetDivideSets( ) #step3 用无标签数据集训练自编码的特征 theta = sparseAutoencoder.initiallize(hiddenSize, inputSize) [X,cost,d]=sop.fmin_l_bfgs_b(lambda (x) :sparseAutoencoder.sparseAutoencoderCost(x, inputSize, \ hiddenSize, la, sparsityParam, beta, unlabeledData),x0=theta,maxiter=400,disp=1) W1 = X[0:hiddenSize * inputSize].reshape(hiddenSize, inputSize) W1 = W1.T opttheta = X #step4 获得有标签训练集的激活值并用来训练softmax分类器的权值 trainImages = feedforward(opttheta, hiddenSize, inputSize, trainData) thetaSoftmax = softmax.initiallize(numClasses, hiddenSize) la = 1e-4 [X, cost, d] = sop.fmin_l_bfgs_b(lambda (x): softmax.SoftmaxCost( trainImages, trainLabels, x, la, hiddenSize, numClasses), x0=thetaSoftmax, maxiter=100, disp=1) #step5 获得有标签测试集的激活值并且给出softmax分类准确率, 5分类的准确率应该在98%附近
beta = 3 #稀疏惩罚项的权重 gradientCheck = False #是否进行导数检验 ##====================================================================== ## STEP 1: 加载图片 images = loadMnist.loadMnistImages(".\\dataset\\mnist\\train-images-idx3-ubyte") patches = images[:, 0 : 100] ##====================================================================== ## STEP 2: 初始化参数初值 theta = sparseAutoencoder.initialize(hiddenSize, visibleSize) ##====================================================================== ## STEP 3: 梯度检验 if gradientCheck: cost, grad = sparseAutoencoder.sparseAutoencoderCost(theta, visibleSize, hiddenSize, lambda_, sparsityParam, beta, patches) J = lambda x: sparseAutoencoder.sparseAutoencoderCost(x, visibleSize, hiddenSize, lambda_, sparsityParam, beta, patches) numGrad = gradient.computeNumericGradient(J, theta) gradient.checkGradient(grad, numGrad) ##====================================================================== ## STEP 4: 算法实现检验完成之后,对稀疏自编码器进行测试 J = lambda x: sparseAutoencoder.sparseAutoencoderCost(x, visibleSize, hiddenSize, lambda_, sparsityParam, beta, patches) options_ = {'maxiter': 400, 'disp': True} result = scipy.optimize.minimize(J, theta, method='L-BFGS-B', jac=True, options=options_) opt_theta = result.x print result ##======================================================================
if __name__ == '__main__': #step1 参数初始化 inputSize = 28*28 numClasses = 5 hiddenSize = 200 sparsityParam = 0.1 la = 3e-3 beta = 3 #step2 获取无标签数据集, 有标签训练数据集, 有标签测试数据集 unlabeledData, trainData, trainLabels, testData, testLabels = GetDivideSets() #step3 用无标签数据集训练自编码的特征 theta = sparseAutoencoder.initiallize(hiddenSize, inputSize) [X,cost,d]=sop.fmin_l_bfgs_b(lambda (x) :sparseAutoencoder.sparseAutoencoderCost(x, inputSize, \ hiddenSize, la, sparsityParam, beta, unlabeledData),x0=theta,maxiter=400,disp=1) W1 = X[0:hiddenSize*inputSize].reshape(hiddenSize, inputSize) W1 = W1.T opttheta = X #step4 获得有标签训练集的激活值并用来训练softmax分类器的权值 trainImages = feedforward(opttheta, hiddenSize, inputSize, trainData) thetaSoftmax = softmax.initiallize(numClasses, hiddenSize) la = 1e-4 [X,cost,d] = sop.fmin_l_bfgs_b(lambda (x) :softmax.SoftmaxCost(trainImages, trainLabels,x,la,hiddenSize,numClasses),x0=thetaSoftmax,maxiter=100,disp=1) #step5 获得有标签测试集的激活值并且给出softmax分类准确率, 5分类的准确率应该在98%附近 testImages = feedforward(opttheta, hiddenSize, inputSize, testData) optthetaSoftmax = X accuracy = softmax.predict(optthetaSoftmax,testImages,testLabels,hiddenSize,numClasses) print accuracy
numClasses = 10 hiddenSizeL1 = 200 hiddenSizeL2 = 200 sparsityParam = 0.1 la = 3e-3 beta = 3 #step2 读取MNIST数据集,只取1/3的数据集防止内存溢出 trainData, trainLabels, testData, testLabels = ReadMNIST() length = trainData.shape[1] / 3 trainData = trainData[:, 0:length] trainLabels = trainLabels[0:length] sae1Theta = sparseAutoencoder.initiallize(hiddenSizeL1, inputSize) #step3 训练第一层神经网络的权值 [sae1OptTheta, cost, d]=sop.fmin_l_bfgs_b(lambda (x) :sparseAutoencoder.sparseAutoencoderCost(x, inputSize, \ hiddenSizeL1, la, sparsityParam, beta, trainData),x0=sae1Theta,maxiter=400,disp=1) sae1Features = feedforward(sae1OptTheta, hiddenSizeL1, inputSize, trainData) #step4 训练第二层神经网络的权值 sae2Theta = sparseAutoencoder.initiallize(hiddenSizeL2, hiddenSizeL1) [sae2OptTheta, cost, d]=sop.fmin_l_bfgs_b(lambda (x) : sparseAutoencoder.sparseAutoencoderCost(x, hiddenSizeL1, \ hiddenSizeL2, la, sparsityParam, beta, sae1Features),x0=sae2Theta, maxiter=400,disp=1) sae2Features = feedforward(sae2OptTheta, hiddenSizeL2, hiddenSizeL1, sae1Features) #step5 训练softmax权值 saeSoftmaxTheta = softmax.initiallize(numClasses, hiddenSizeL2) laSoftmax = 1e-4 [saeSoftmaxOptTheta, cost, d] = sop.fmin_l_bfgs_b(lambda (x) :softmax.SoftmaxCost(sae2Features, \ trainLabels, x, laSoftmax, hiddenSizeL2, numClasses), x0=saeSoftmaxTheta, maxiter=100, disp=1)
'patches-' + FLAGS.input_type + '.jpg') # Obtain random parameters theta theta = initializeParameters(hiddenSize, visibleSize) ##====================================================================== ## Gradient Checking # # Hint: If you are debugging your code, performing gradient checking on smaller models # and smaller training sets (e.g., using only 10 training examples and 1-2 hidden # units) may speed things up. if FLAGS.debug: # Now we can use it to check your cost function and derivative calculations # for the sparse autoencoder. cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, decay, sparsityParam, beta, patches) numGrad = computeNumericalGradient( lambda x: sparseAutoencoderCost(x, visibleSize, hiddenSize, decay, sparsityParam, beta, patches), theta) # Use this to visually compare the gradients side by side print(np.stack((numGrad, grad)).T) # Compare numerically computed gradients with the ones obtained from backpropagation diff = norm(numGrad - grad) / norm(numGrad + grad) print(diff) # Should be small. In our implementation, these values are # usually less than 1e-9. sys.exit(1) # When you got this working, Congratulations!!! ##====================================================================== ## After verifying that your implementation of sparseAutoencoderCost is
def run_training(FLAGS, patches): ##====================================================================== ## STEP 1: Here we provide the relevant parameters values that will # allow your sparse autoencoder to get good filters; you do not need to # change the parameters below. visibleSize = FLAGS.visibleSize # number of input units hiddenSize = FLAGS.hiddenSize # number of hidden units sparsityParam = FLAGS.rho # desired average activation \rho of the hidden units. decay = FLAGS.decay # weight decay parameter beta = FLAGS.beta # weight of sparsity penalty term # Obtain random parameters theta theta = initializeParameters(hiddenSize, visibleSize) ##====================================================================== ## STEP 2: Implement sparseAutoencoderCost # # You can implement all of the components (squared error cost, weight decay term, # sparsity penalty) in the cost function at once, but it may be easier to do # it step-by-step and run gradient checking (see STEP 3) after each step. We # suggest implementing the sparseAutoencoderCost function using the following steps: # # (a) Implement forward propagation in your neural network, and implement the # squared error term of the cost function. Implement backpropagation to # compute the derivatives. Then (using lambda=beta=0), run Gradient Checking # to verify that the calculations corresponding to the squared error cost # term are correct. # # (b) Add in the weight decay term (in both the cost function and the derivative # calculations), then re-run Gradient Checking to verify correctness. # # (c) Add in the sparsity penalty term, then re-run Gradient Checking to # verify correctness. # # Feel free to change the training settings when debugging your # code. (For example, reducing the training set size or # number of hidden units may make your code run faster; and setting beta # and/or lambda to zero may be helpful for debugging.) However, in your # final submission of the visualized weights, please use parameters we # gave in Step 0 above. cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, decay, sparsityParam, beta, patches) ##====================================================================== ## STEP 3: Gradient Checking # # Hint: If you are debugging your code, performing gradient checking on smaller models # and smaller training sets (e.g., using only 10 training examples and 1-2 hidden # units) may speed things up. if FLAGS.debug: # Now we can use it to check your cost function and derivative calculations # for the sparse autoencoder. cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, decay, \ sparsityParam, beta, patches) numGrad = computeNumericalGradient(lambda x: sparseAutoencoderCost(x, visibleSize, hiddenSize, decay, sparsityParam, beta, patches), theta) # Use this to visually compare the gradients side by side print(np.stack((numGrad, grad)).T) # Compare numerically computed gradients with the ones obtained from backpropagation diff = norm(numGrad - grad) / norm(numGrad + grad) print(diff) # Should be small. In our implementation, these values are # usually less than 1e-9. sys.exit(1) # When you got this working, Congratulations!!! ##====================================================================== ## STEP 4: After verifying that your implementation of # sparseAutoencoderCost is correct, You can start training your sparse # autoencoder with minFunc (L-BFGS). # Randomly initialize the parameters. theta = initializeParameters(hiddenSize, visibleSize) # Use L-BFGS to minimize the function. theta, _, _ = fmin_l_bfgs_b(sparseAutoencoderCost, theta, args = (visibleSize, hiddenSize, decay, sparsityParam, beta, patches), maxiter = 400, disp = 1) # save the learned parameters to external file pickle.dump(theta, open(FLAGS.log_dir + '/' + FLAGS.params_file, 'wb')) ##====================================================================== ## STEP 5: Visualization # Fold W1 parameters into a matrix format. W1 = np.reshape(theta[:hiddenSize * visibleSize], (hiddenSize, visibleSize)) # Save the visualization to a file. displayNetwork(W1.T, file_name = 'weights_digits.jpg') return theta
# Obtain random parameters theta theta = initializeParameters(hiddenSize, visibleSize) ##====================================================================== ## Gradient Checking # # Hint: If you are debugging your code, performing gradient checking on smaller models # and smaller training sets (e.g., using only 10 training examples and 1-2 hidden # units) may speed things up. if FLAGS.debug: # Check your cost function and derivative calculations for the sparse autoencoder. cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, decay, \ sparsityParam, beta, patches) numGrad = computeNumericalGradient(lambda x: sparseAutoencoderCost(x, visibleSize, hiddenSize, decay, sparsityParam, beta, patches), theta) # Use this to visually compare the gradients side by side print(np.stack((numGrad, grad)).T) # Compare numerically computed gradients with the ones obtained from backpropagation diff = norm(numGrad - grad) / norm(numGrad + grad) print(diff) # Should be small. In our implementation, these values are # usually less than 1e-9. sys.exit(1) # When you got this working, Congratulations!!! ##====================================================================== ## After verifying that your implementation of sparseAutoencoderCost is # correct, You can start training your sparse autoencoder with minFunc (L-BFGS).
numClasses = 10 hiddenSizeL1 = 200 hiddenSizeL2 = 200 sparsityParam = 0.1 la = 3e-3 beta = 3 #step2 读取MNIST数据集,只取1/3的数据集防止内存溢出 trainData, trainLabels, testData, testLabels = ReadMNIST() length = trainData.shape[1]/3 trainData = trainData[:, 0:length] trainLabels = trainLabels[0:length] sae1Theta = sparseAutoencoder.initiallize(hiddenSizeL1, inputSize) #step3 训练第一层神经网络的权值 [sae1OptTheta, cost, d]=sop.fmin_l_bfgs_b(lambda (x) :sparseAutoencoder.sparseAutoencoderCost(x, inputSize, \ hiddenSizeL1, la, sparsityParam, beta, trainData),x0=sae1Theta,maxiter=400,disp=1) sae1Features = feedforward(sae1OptTheta, hiddenSizeL1, inputSize, trainData) #step4 训练第二层神经网络的权值 sae2Theta = sparseAutoencoder.initiallize(hiddenSizeL2, hiddenSizeL1) [sae2OptTheta, cost, d]=sop.fmin_l_bfgs_b(lambda (x) : sparseAutoencoder.sparseAutoencoderCost(x, hiddenSizeL1, \ hiddenSizeL2, la, sparsityParam, beta, sae1Features),x0=sae2Theta, maxiter=400,disp=1) sae2Features = feedforward(sae2OptTheta, hiddenSizeL2, hiddenSizeL1, sae1Features) #step5 训练softmax权值 saeSoftmaxTheta = softmax.initiallize(numClasses, hiddenSizeL2) laSoftmax = 1e-4 [saeSoftmaxOptTheta, cost, d] = sop.fmin_l_bfgs_b(lambda (x) :softmax.SoftmaxCost(sae2Features, \ trainLabels, x, laSoftmax, hiddenSizeL2, numClasses), x0=saeSoftmaxTheta, maxiter=100, disp=1) #step6 微调栈式神经网络的所有权值