if __name__ == '__main__':
    #step1 参数初始化
    inputSize = 28 * 28
    numClasses = 5
    hiddenSize = 200
    sparsityParam = 0.1
    la = 3e-3
    beta = 3

    #step2 获取无标签数据集, 有标签训练数据集, 有标签测试数据集
    unlabeledData, trainData, trainLabels, testData, testLabels = GetDivideSets(
    )

    #step3 用无标签数据集训练自编码的特征
    theta = sparseAutoencoder.initiallize(hiddenSize, inputSize)
    [X,cost,d]=sop.fmin_l_bfgs_b(lambda (x) :sparseAutoencoder.sparseAutoencoderCost(x, inputSize, \
    hiddenSize, la, sparsityParam, beta, unlabeledData),x0=theta,maxiter=400,disp=1)
    W1 = X[0:hiddenSize * inputSize].reshape(hiddenSize, inputSize)
    W1 = W1.T
    opttheta = X

    #step4 获得有标签训练集的激活值并用来训练softmax分类器的权值
    trainImages = feedforward(opttheta, hiddenSize, inputSize, trainData)
    thetaSoftmax = softmax.initiallize(numClasses, hiddenSize)
    la = 1e-4
    [X, cost, d] = sop.fmin_l_bfgs_b(lambda (x): softmax.SoftmaxCost(
        trainImages, trainLabels, x, la, hiddenSize, numClasses),
                                     x0=thetaSoftmax,
                                     maxiter=100,
                                     disp=1)

    #step5 获得有标签测试集的激活值并且给出softmax分类准确率, 5分类的准确率应该在98%附近
Example #2
0
beta = 3 #稀疏惩罚项的权重
gradientCheck = False #是否进行导数检验

##======================================================================
## STEP 1: 加载图片
images = loadMnist.loadMnistImages(".\\dataset\\mnist\\train-images-idx3-ubyte")
patches = images[:, 0 : 100]

##======================================================================
## STEP 2: 初始化参数初值
theta = sparseAutoencoder.initialize(hiddenSize, visibleSize)

##======================================================================
## STEP 3: 梯度检验
if gradientCheck:
    cost, grad = sparseAutoencoder.sparseAutoencoderCost(theta, visibleSize, hiddenSize, lambda_, sparsityParam, beta, patches)
    J = lambda x: sparseAutoencoder.sparseAutoencoderCost(x, visibleSize, hiddenSize, lambda_, sparsityParam, beta, patches)
    numGrad = gradient.computeNumericGradient(J, theta)
    gradient.checkGradient(grad, numGrad)
     
##======================================================================
## STEP 4: 算法实现检验完成之后,对稀疏自编码器进行测试
J = lambda x: sparseAutoencoder.sparseAutoencoderCost(x, visibleSize, hiddenSize,
                                                         lambda_, sparsityParam,
                                                         beta, patches)
options_ = {'maxiter': 400, 'disp': True}
result = scipy.optimize.minimize(J, theta, method='L-BFGS-B', jac=True, options=options_)
opt_theta = result.x
print result

##======================================================================
if __name__ == '__main__':
    #step1 参数初始化
    inputSize = 28*28
    numClasses = 5
    hiddenSize = 200
    sparsityParam = 0.1
    la = 3e-3
    beta = 3

    #step2 获取无标签数据集, 有标签训练数据集, 有标签测试数据集
    unlabeledData, trainData, trainLabels, testData, testLabels = GetDivideSets()

    #step3 用无标签数据集训练自编码的特征
    theta = sparseAutoencoder.initiallize(hiddenSize, inputSize)
    [X,cost,d]=sop.fmin_l_bfgs_b(lambda (x) :sparseAutoencoder.sparseAutoencoderCost(x, inputSize, \
    hiddenSize, la, sparsityParam, beta, unlabeledData),x0=theta,maxiter=400,disp=1)
    W1 = X[0:hiddenSize*inputSize].reshape(hiddenSize, inputSize)
    W1 = W1.T
    opttheta = X

    #step4 获得有标签训练集的激活值并用来训练softmax分类器的权值
    trainImages = feedforward(opttheta, hiddenSize, inputSize, trainData)
    thetaSoftmax = softmax.initiallize(numClasses, hiddenSize)
    la = 1e-4
    [X,cost,d] = sop.fmin_l_bfgs_b(lambda (x) :softmax.SoftmaxCost(trainImages, trainLabels,x,la,hiddenSize,numClasses),x0=thetaSoftmax,maxiter=100,disp=1)

    #step5 获得有标签测试集的激活值并且给出softmax分类准确率, 5分类的准确率应该在98%附近
    testImages = feedforward(opttheta, hiddenSize, inputSize,          testData)
    optthetaSoftmax = X
    accuracy = softmax.predict(optthetaSoftmax,testImages,testLabels,hiddenSize,numClasses)
    print accuracy
Example #4
0
    numClasses = 10
    hiddenSizeL1 = 200
    hiddenSizeL2 = 200
    sparsityParam = 0.1
    la = 3e-3
    beta = 3

    #step2 读取MNIST数据集,只取1/3的数据集防止内存溢出
    trainData, trainLabels, testData, testLabels = ReadMNIST()
    length = trainData.shape[1] / 3
    trainData = trainData[:, 0:length]
    trainLabels = trainLabels[0:length]
    sae1Theta = sparseAutoencoder.initiallize(hiddenSizeL1, inputSize)

    #step3 训练第一层神经网络的权值
    [sae1OptTheta, cost, d]=sop.fmin_l_bfgs_b(lambda (x) :sparseAutoencoder.sparseAutoencoderCost(x, inputSize, \
    hiddenSizeL1, la, sparsityParam, beta, trainData),x0=sae1Theta,maxiter=400,disp=1)
    sae1Features = feedforward(sae1OptTheta, hiddenSizeL1, inputSize,
                               trainData)

    #step4 训练第二层神经网络的权值
    sae2Theta = sparseAutoencoder.initiallize(hiddenSizeL2, hiddenSizeL1)
    [sae2OptTheta, cost, d]=sop.fmin_l_bfgs_b(lambda (x) : sparseAutoencoder.sparseAutoencoderCost(x, hiddenSizeL1, \
    hiddenSizeL2, la, sparsityParam, beta, sae1Features),x0=sae2Theta, maxiter=400,disp=1)
    sae2Features = feedforward(sae2OptTheta, hiddenSizeL2, hiddenSizeL1,
                               sae1Features)

    #step5 训练softmax权值
    saeSoftmaxTheta = softmax.initiallize(numClasses, hiddenSizeL2)
    laSoftmax = 1e-4
    [saeSoftmaxOptTheta, cost, d] = sop.fmin_l_bfgs_b(lambda (x) :softmax.SoftmaxCost(sae2Features, \
    trainLabels, x, laSoftmax, hiddenSizeL2, numClasses), x0=saeSoftmaxTheta, maxiter=100, disp=1)
               'patches-' + FLAGS.input_type + '.jpg')

#  Obtain random parameters theta
theta = initializeParameters(hiddenSize, visibleSize)

##======================================================================
## Gradient Checking
#
# Hint: If you are debugging your code, performing gradient checking on smaller models
# and smaller training sets (e.g., using only 10 training examples and 1-2 hidden
# units) may speed things up.

if FLAGS.debug:
    # Now we can use it to check your cost function and derivative calculations
    # for the sparse autoencoder.
    cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, decay,
                                       sparsityParam, beta, patches)
    numGrad = computeNumericalGradient(
        lambda x: sparseAutoencoderCost(x, visibleSize, hiddenSize, decay,
                                        sparsityParam, beta, patches), theta)

    # Use this to visually compare the gradients side by side
    print(np.stack((numGrad, grad)).T)

    # Compare numerically computed gradients with the ones obtained from backpropagation
    diff = norm(numGrad - grad) / norm(numGrad + grad)
    print(diff)  # Should be small. In our implementation, these values are
    # usually less than 1e-9.
    sys.exit(1)  # When you got this working, Congratulations!!!

##======================================================================
## After verifying that your implementation of sparseAutoencoderCost is
def run_training(FLAGS, patches):
  ##======================================================================
  ## STEP 1: Here we provide the relevant parameters values that will
  #  allow your sparse autoencoder to get good filters; you do not need to 
  #  change the parameters below.
  
  visibleSize = FLAGS.visibleSize  # number of input units 
  hiddenSize = FLAGS.hiddenSize    # number of hidden units 
  sparsityParam = FLAGS.rho        # desired average activation \rho of the hidden units.
  decay = FLAGS.decay              # weight decay parameter       
  beta = FLAGS.beta                # weight of sparsity penalty term
  
  #  Obtain random parameters theta
  theta = initializeParameters(hiddenSize, visibleSize)
  
  ##======================================================================
  ## STEP 2: Implement sparseAutoencoderCost
  #
  #  You can implement all of the components (squared error cost, weight decay term,
  #  sparsity penalty) in the cost function at once, but it may be easier to do 
  #  it step-by-step and run gradient checking (see STEP 3) after each step.  We 
  #  suggest implementing the sparseAutoencoderCost function using the following steps:
  #
  #  (a) Implement forward propagation in your neural network, and implement the 
  #      squared error term of the cost function.  Implement backpropagation to 
  #      compute the derivatives.   Then (using lambda=beta=0), run Gradient Checking 
  #      to verify that the calculations corresponding to the squared error cost 
  #      term are correct.
  #
  #  (b) Add in the weight decay term (in both the cost function and the derivative
  #      calculations), then re-run Gradient Checking to verify correctness. 
  #
  #  (c) Add in the sparsity penalty term, then re-run Gradient Checking to 
  #      verify correctness.
  #
  #  Feel free to change the training settings when debugging your
  #  code.  (For example, reducing the training set size or 
  #  number of hidden units may make your code run faster; and setting beta 
  #  and/or lambda to zero may be helpful for debugging.)  However, in your 
  #  final submission of the visualized weights, please use parameters we 
  #  gave in Step 0 above.
  
  cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, decay,
                                     sparsityParam, beta, patches)
  
  ##======================================================================
  ## STEP 3: Gradient Checking
  #
  # Hint: If you are debugging your code, performing gradient checking on smaller models 
  # and smaller training sets (e.g., using only 10 training examples and 1-2 hidden 
  # units) may speed things up.
  
  
  if FLAGS.debug:
    # Now we can use it to check your cost function and derivative calculations
    # for the sparse autoencoder.
    cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, decay, \
                                       sparsityParam, beta, patches)
    numGrad = computeNumericalGradient(lambda x: sparseAutoencoderCost(x, visibleSize, hiddenSize, decay, sparsityParam, beta, patches), theta)
  
    # Use this to visually compare the gradients side by side
    print(np.stack((numGrad, grad)).T)
  
    # Compare numerically computed gradients with the ones obtained from backpropagation
    diff = norm(numGrad - grad) / norm(numGrad + grad)
    print(diff) # Should be small. In our implementation, these values are
                # usually less than 1e-9.
    sys.exit(1) # When you got this working, Congratulations!!!
    
  
  ##======================================================================
  ## STEP 4: After verifying that your implementation of
  #  sparseAutoencoderCost is correct, You can start training your sparse
  #  autoencoder with minFunc (L-BFGS).
  
  #  Randomly initialize the parameters.
  theta = initializeParameters(hiddenSize, visibleSize)
  
  #  Use L-BFGS to minimize the function.
  theta, _, _ = fmin_l_bfgs_b(sparseAutoencoderCost, theta,
                              args = (visibleSize, hiddenSize, decay, sparsityParam, beta, patches),
                              maxiter = 400, disp = 1)

  # save the learned parameters to external file
  pickle.dump(theta, open(FLAGS.log_dir + '/' + FLAGS.params_file, 'wb'))
  
  ##======================================================================
  ## STEP 5: Visualization 
  
  # Fold W1 parameters into a matrix format.
  W1 = np.reshape(theta[:hiddenSize * visibleSize], (hiddenSize, visibleSize))
  
  # Save the visualization to a file.
  displayNetwork(W1.T, file_name = 'weights_digits.jpg')

  return theta

#  Obtain random parameters theta
theta = initializeParameters(hiddenSize, visibleSize)

##======================================================================
## Gradient Checking
#
# Hint: If you are debugging your code, performing gradient checking on smaller models 
# and smaller training sets (e.g., using only 10 training examples and 1-2 hidden 
# units) may speed things up.


if FLAGS.debug:
  # Check your cost function and derivative calculations for the sparse autoencoder.
  cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, decay, \
                                     sparsityParam, beta, patches)
  numGrad = computeNumericalGradient(lambda x: sparseAutoencoderCost(x, visibleSize, hiddenSize, decay, sparsityParam, beta, patches), theta)

  # Use this to visually compare the gradients side by side
  print(np.stack((numGrad, grad)).T)

  # Compare numerically computed gradients with the ones obtained from backpropagation
  diff = norm(numGrad - grad) / norm(numGrad + grad)
  print(diff) # Should be small. In our implementation, these values are
              # usually less than 1e-9.
  sys.exit(1) # When you got this working, Congratulations!!!
  

##======================================================================
## After verifying that your implementation of sparseAutoencoderCost is 
# correct, You can start training your sparse autoencoder with minFunc (L-BFGS).
    numClasses = 10
    hiddenSizeL1 = 200
    hiddenSizeL2 = 200
    sparsityParam = 0.1
    la = 3e-3
    beta = 3

    #step2 读取MNIST数据集,只取1/3的数据集防止内存溢出
    trainData, trainLabels, testData, testLabels = ReadMNIST()
    length = trainData.shape[1]/3
    trainData = trainData[:, 0:length]
    trainLabels = trainLabels[0:length]
    sae1Theta = sparseAutoencoder.initiallize(hiddenSizeL1, inputSize)

    #step3 训练第一层神经网络的权值
    [sae1OptTheta, cost, d]=sop.fmin_l_bfgs_b(lambda (x) :sparseAutoencoder.sparseAutoencoderCost(x, inputSize, \
    hiddenSizeL1, la, sparsityParam, beta, trainData),x0=sae1Theta,maxiter=400,disp=1)
    sae1Features = feedforward(sae1OptTheta, hiddenSizeL1, inputSize, trainData)

    #step4 训练第二层神经网络的权值
    sae2Theta = sparseAutoencoder.initiallize(hiddenSizeL2, hiddenSizeL1)
    [sae2OptTheta, cost, d]=sop.fmin_l_bfgs_b(lambda (x) : sparseAutoencoder.sparseAutoencoderCost(x, hiddenSizeL1, \
    hiddenSizeL2, la, sparsityParam, beta, sae1Features),x0=sae2Theta, maxiter=400,disp=1)
    sae2Features = feedforward(sae2OptTheta, hiddenSizeL2, hiddenSizeL1, sae1Features)

    #step5 训练softmax权值
    saeSoftmaxTheta = softmax.initiallize(numClasses, hiddenSizeL2)
    laSoftmax = 1e-4
    [saeSoftmaxOptTheta, cost, d] = sop.fmin_l_bfgs_b(lambda (x) :softmax.SoftmaxCost(sae2Features, \
    trainLabels, x, laSoftmax, hiddenSizeL2, numClasses), x0=saeSoftmaxTheta, maxiter=100, disp=1)

    #step6 微调栈式神经网络的所有权值