n, m = data.shape
    mean = np.mean(data, axis = 1) #分别为每个像素块计算像素强度均值
    data = data - np.tile(mean,(m, 1)).T#将数据零均值化
    cov = (data).dot(data.T) / m #计算协方差,因为均值已经为0,所以可以这样求
    u, s, v = np.linalg.svd(cov) #对协方差矩阵进行特征值分解, u为特征向量, s为排好序由大到小的特征值
    dataRot = (u.T).dot(data)
    #将旋转后的数据进行PCA白化
    pcaWhiteningData = np.diag(1.0 / np.sqrt(s + epsilon)).dot(dataRot)
     #将数据ZCA白化
    zcaWhiteningData = u.dot(pcaWhiteningData)
    return zcaWhiteningData
    
if __name__ == '__main__':
    import loadSampleImages
    import displayNetwork
    patches = loadSampleImages.loadIMAGES_RAW()
    num_samples = patches.shape[1]
    random_sel = random.sample(range(num_samples), 400) #随机选400个patth
    displayNetwork.displayNetwork(patches[:, random_sel], "./outputs/01rawdata.png")
    pcaWhiteningData = pcaWhitening(patches[:, random_sel], 1.0, 0.1)
    displayNetwork.displayNetwork(pcaWhiteningData, "./outputs/02pcaWhiteningdata1.0.png")
    displayNetwork.displayNetwork(pcaWhitening(patches[:, random_sel], 0.99, 0.1), "./outputs/02pcaVarianceRetain0.99.png")
    displayNetwork.displayNetwork(pcaWhitening(patches[:, random_sel], 0.90, 0.1), "./outputs/02pcaVarianceRetain0.9.png")
    zcaWhiteningData = zcaWhitening(patches[:, random_sel], 0.1)
    displayNetwork.displayNetwork(zcaWhiteningData, "./outputs/03zcaWhiteningData.png")
    
    
    
    

Example #2
0
            p = p + 1

    return patches


##================================================================
## Step 0a: Load data
#  Here we provide the code to load natural image data into x.
#  x will be a 144 * 10000 matrix, where the kth column x(:, k) corresponds to
#  the raw image data from the kth 12x12 image patch sampled.
#  You do not need to change the code below.

x = sampleIMAGESRAW('../../data/IMAGES_RAW.mat')
randsel = randint(x.shape[1],
                  size=196)  # A random selection of samples for visualization.
displayNetwork(x[:, randsel], file_name='figure7.jpg', opt_normalize=True)

##================================================================
## Step 0b: Zero-mean the data (by row)

# -------------------- YOUR CODE HERE --------------------

x0 = x - np.mean(x, axis=0)

# --------------------------------------------------------

##================================================================
## Step 1a: Implement PCA to obtain xRot
#  Implement PCA to obtain xRot, the matrix in which the data is expressed
#  with respect to the eigenbasis of sigma, which is the matrix U.
Example #3
0
patches = images[:, 0 : 100]

##======================================================================
## STEP 2: 初始化参数初值
theta = sparseAutoencoder.initialize(hiddenSize, visibleSize)

##======================================================================
## STEP 3: 梯度检验
if gradientCheck:
    cost, grad = sparseAutoencoder.sparseAutoencoderCost(theta, visibleSize, hiddenSize, lambda_, sparsityParam, beta, patches)
    J = lambda x: sparseAutoencoder.sparseAutoencoderCost(x, visibleSize, hiddenSize, lambda_, sparsityParam, beta, patches)
    numGrad = gradient.computeNumericGradient(J, theta)
    gradient.checkGradient(grad, numGrad)
     
##======================================================================
## STEP 4: 算法实现检验完成之后,对稀疏自编码器进行测试
J = lambda x: sparseAutoencoder.sparseAutoencoderCost(x, visibleSize, hiddenSize,
                                                         lambda_, sparsityParam,
                                                         beta, patches)
options_ = {'maxiter': 400, 'disp': True}
result = scipy.optimize.minimize(J, theta, method='L-BFGS-B', jac=True, options=options_)
opt_theta = result.x
print result

##======================================================================
## STEP 5: 可视化学习到的特征
W1 = opt_theta[0:hiddenSize * visibleSize].reshape(hiddenSize, visibleSize)
displayNetwork.displayNetwork(W1.T, "weights.png")


Example #4
0
      patches[:, p] = sample.ravel()
      p = p + 1

  return patches


##================================================================
## Step 0a: Load data
#  Here we provide the code to load natural image data into x.
#  x will be a 144 * 10000 matrix, where the kth column x(:, k) corresponds to
#  the raw image data from the kth 12x12 image patch sampled.
#  You do not need to change the code below.

x = sampleIMAGESRAW('IMAGES_RAW.mat')
randsel = randint(x.shape[1], size=196) # A random selection of samples for visualization.
displayNetwork(x[:, randsel], file_name = 'figure7.jpg', opt_normalize = True)

##================================================================
## Step 0b: Zero-mean the data (by row)

# -------------------- YOUR CODE HERE -------------------- 

x_mean = x.mean(axis=1)
x0 = (x - x_mean.reshape(-1, 1))

# -------------------------------------------------------- 

##================================================================
## Step 1a: Implement PCA to obtain xRot
#  Implement PCA to obtain xRot, the matrix in which the data is expressed
#  with respect to the eigenbasis of sigma, which is the matrix U.
smOptB = smModel["b"]
#保存训练得到的模型
with open('smModel.pickle', 'wb') as f:
    cPickle.dump(smModel, f)
print('Saved successfully')
##======================================================================
## STEP 5: 对整个网络进行微调, fine-tuning
layer1 = stackedAutoencoder.Layer(1)
layer1.W = sae1OptTheta[0 : sae1HiddenSize * sae1InputSize].reshape(sae1HiddenSize, sae1InputSize)
layer1.b = sae1OptTheta[2 * sae1HiddenSize * sae1InputSize : 2 * sae1HiddenSize * sae1InputSize + sae1HiddenSize]
layer2 = stackedAutoencoder.Layer(2)
layer2.W = sae2OptTheta[0 : sae2HiddenSize * sae2InputSize].reshape(sae2HiddenSize, sae2InputSize)
layer2.b = sae2OptTheta[2 * sae2HiddenSize * sae2InputSize : 2 * sae2HiddenSize * sae2InputSize + sae2HiddenSize]
stack = [layer1, layer2]
#分别可视化第一个SparseAutoencoder和第二个SparseAutoencoder学习到的特征
displayNetwork.displayNetwork(layer1.W.T, "sae1.png")
displayNetwork.displayNetwork(layer2.W.dot(layer1.W).T, "sae2.png")

smOptTheta = np.concatenate((smOptW.flatten(), smOptB))

saeTheta = np.concatenate((smOptTheta, stackedAutoencoder.stack2Params(stack)))

saeOptTheta = stackedAutoencoder.fineTuning(saeTheta, numClasses, netConfig, lambda4SM, trainImages, trainLabels, options4SAE )

##======================================================================
## STEP 6: 测试稀疏自编码
testImages = loadMnist.loadMnistImages(".\\dataset\\mnist\\t10k-images-idx3-ubyte")
testLabels = loadMnist.loadMnistLabels(".\\dataset\\mnist\\t10k-labels-idx1-ubyte")

predLabels = stackedAutoencoder.classify(saeTheta, numClasses, netConfig, testImages)
print "before fine tuning's accuracy: ", np.sum(predLabels == testLabels) / float(testLabels.shape[0])
def run_training():
  """Train sAE for a number of epochs."""

  # Get the sets of images and for training
  numPatches = 10000
  if FLAGS.input_type == 'natural':
    from sampleNaturalImages import sampleNaturalImages
    patches = sampleNaturalImages('IMAGES.mat', numPatches)
    epochs = 4 * FLAGS.num_epochs
  else:
    from sampleDigitImages import sampleDigitImages
    patches = sampleDigitImages(FLAGS.train_dir, 2 * numPatches)
    epochs = FLAGS.num_epochs

  # Tell TensorFlow that the model will be built into the default Graph.
  with tf.Graph().as_default():
    with tf.name_scope('input'):
      # Input data
      images_initializer = tf.placeholder(
          dtype=patches.dtype,
          shape=patches.T.shape)
      labels_initializer = tf.placeholder(
          dtype=patches.dtype,
          shape=patches.T.shape)
      input_images = tf.Variable(
          images_initializer, trainable=False, collections=[])
      input_labels = tf.Variable(
          labels_initializer, trainable=False, collections=[])

      image, label = tf.train.slice_input_producer(
          [input_images, input_labels], num_epochs=epochs)
      image = tf.cast(image, tf.float32)
      label = tf.cast(label, tf.float32)
      images, labels = tf.train.batch(
          [image, label], batch_size=FLAGS.batch_size)

    # Build a Graph that computes the loss for the sparse AutoEncoder.
    loss = sparseAutoencoder.loss(images, FLAGS.visibleSize, FLAGS.hiddenSize, FLAGS.decay, FLAGS.rho, FLAGS.beta)

    # Add to the Graph the Ops that calculate and apply gradients.
    train_op = sparseAutoencoder.training(loss, FLAGS.learning_rate)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.summary.merge_all()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()

    # Create the op for initializing variables.
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Run the Op to initialize the variables.
    sess.run(init_op)
    sess.run(input_images.initializer,
             feed_dict={images_initializer: patches.T})
    sess.run(input_labels.initializer,
             feed_dict={labels_initializer: patches.T})

    # Instantiate a SummaryWriter to output summaries and the Graph.
    summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)

    # Start input enqueue threads.
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    # And then after everything is built, start the training loop.
    try:
      step = 0
      while not coord.should_stop():
        start_time = time.time()

        # Run one step of the model.
        _, loss_value = sess.run([train_op, loss])

        duration = time.time() - start_time

        # Write the summaries and print an overview fairly often.
        if step % 100 == 0:
          # Print status to stdout.
          print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
          # Update the events file.
          summary_str = sess.run(summary_op)
          summary_writer.add_summary(summary_str, step)
          step += 1

        # Save a checkpoint periodically.
        if (step + 1) % 5000 == 0:
          print('Saving')
          saver.save(sess, FLAGS.train_dir, global_step=step)

        step += 1
    except tf.errors.OutOfRangeError:
      print('Saving')
      saver.save(sess, FLAGS.train_dir, global_step=step)
      print('Done training for %d epochs, %d steps.' % (epochs, step))
    finally:
      # When done, ask the threads to stop.
      coord.request_stop()

    # Wait for threads to finish.
    coord.join(threads)

    # Display learned filters.
    weights = [v for v in tf.trainable_variables() if 'sparseAE/weights1' in v.name][0]
    weights = weights.eval(session=sess)
    # weights = sess.run(weights)
    displayNetwork(weights, file_name = 'weights-' + FLAGS.input_type + '.jpg')

    sess.close()
elif FLAGS.input_type == 'natural':
    numPatches = 10000
else:
    numPatches = 20000

if FLAGS.input_type == 'natural':
    from sampleNaturalImages import sampleNaturalImages
    patches = sampleNaturalImages(FLAGS.input_data_dir + 'images.mat',
                                  numPatches)
else:
    from sampleDigitImages import sampleDigitImages
    patches = sampleDigitImages(FLAGS.input_data_dir + 'mnist', numPatches)

#print(patches[0:5])

displayNetwork(patches[:, randint(0, patches.shape[1], 200)], 8,
               'patches-' + FLAGS.input_type + '.jpg')

#  Obtain random parameters theta
theta = initializeParameters(hiddenSize, visibleSize)

##======================================================================
## Gradient Checking
#
# Hint: If you are debugging your code, performing gradient checking on smaller models
# and smaller training sets (e.g., using only 10 training examples and 1-2 hidden
# units) may speed things up.

if FLAGS.debug:
    # Now we can use it to check your cost function and derivative calculations
    # for the sparse autoencoder.
    cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, decay,
加载mnist图像标签
@param fileName
 读取的图像标签的文件名
@return 标签矩阵
"""
def loadMnistLabels(fileName):
    f = open(fileName, "rb") #以二进制可读形式打开文件
    
    magic = np.fromfile(f, dtype=np.dtype('>i4'), count=1)
    numLabels = np.fromfile(f, dtype=np.dtype('>i4'), count=1) #读取标签个数
    
    images = np.fromfile(f, dtype=np.uint8)
    f.close()
    return images
    
if __name__ == '__main__':
     images = loadMnistImages(".\\dataset\\mnist\\train-images-idx3-ubyte")
     labels = loadMnistLabels(".\\dataset\\mnist\\train-labels-idx1-ubyte")
     print images.shape
     print labels.shape
     #显示读取的图像
     import matplotlib.pyplot as plt
     import matplotlib
     plt.imshow(images[:, 2].reshape((28,28)), cmap = matplotlib.cm.gray_r) #gray: 0为黑色, 1为白色, gray_r: 0为白色, 1为黑色
     #保存读取的图像
     #plt.imsave("test.png", images[:, 10].reshape((28,28)), cmap = matplotlib.cm.gray_r)
     import displayNetwork
     displayNetwork.displayNetwork(images[:, 0:400], "./outputs/mnist.png")


@return 标签矩阵
"""


def loadMnistLabels(fileName):
    f = open(fileName, "rb")  #以二进制可读形式打开文件

    magic = np.fromfile(f, dtype=np.dtype('>i4'), count=1)
    numLabels = np.fromfile(f, dtype=np.dtype('>i4'), count=1)  #读取标签个数

    images = np.fromfile(f, dtype=np.uint8)
    f.close()
    return images


if __name__ == '__main__':
    images = loadMnistImages(".\\dataset\\mnist\\train-images-idx3-ubyte")
    labels = loadMnistLabels(".\\dataset\\mnist\\train-labels-idx1-ubyte")
    print images.shape
    print labels.shape
    #显示读取的图像
    import matplotlib.pyplot as plt
    import matplotlib
    plt.imshow(
        images[:, 2].reshape((28, 28)),
        cmap=matplotlib.cm.gray_r)  #gray: 0为黑色, 1为白色, gray_r: 0为白色, 1为黑色
    #保存读取的图像
    #plt.imsave("test.png", images[:, 10].reshape((28,28)), cmap = matplotlib.cm.gray_r)
    import displayNetwork
    displayNetwork.displayNetwork(images[:, 0:400], "./outputs/mnist.png")
Example #10
0
    cov = (data).dot(data.T) / m  #计算协方差,因为均值已经为0,所以可以这样求
    u, s, v = np.linalg.svd(cov)  #对协方差矩阵进行特征值分解, u为特征向量, s为排好序由大到小的特征值
    dataRot = (u.T).dot(data)
    #将旋转后的数据进行PCA白化
    pcaWhiteningData = np.diag(1.0 / np.sqrt(s + epsilon)).dot(dataRot)
    #将数据ZCA白化
    zcaWhiteningData = u.dot(pcaWhiteningData)
    return zcaWhiteningData


if __name__ == '__main__':
    import loadSampleImages
    import displayNetwork
    patches = loadSampleImages.loadIMAGES_RAW()
    num_samples = patches.shape[1]
    random_sel = random.sample(range(num_samples), 400)  #随机选400个patth
    displayNetwork.displayNetwork(patches[:, random_sel],
                                  "./outputs/01rawdata.png")
    pcaWhiteningData = pcaWhitening(patches[:, random_sel], 1.0, 0.1)
    displayNetwork.displayNetwork(pcaWhiteningData,
                                  "./outputs/02pcaWhiteningdata1.0.png")
    displayNetwork.displayNetwork(
        pcaWhitening(patches[:, random_sel], 0.99, 0.1),
        "./outputs/02pcaVarianceRetain0.99.png")
    displayNetwork.displayNetwork(
        pcaWhitening(patches[:, random_sel], 0.90, 0.1),
        "./outputs/02pcaVarianceRetain0.9.png")
    zcaWhiteningData = zcaWhitening(patches[:, random_sel], 0.1)
    displayNetwork.displayNetwork(zcaWhiteningData,
                                  "./outputs/03zcaWhiteningData.png")
Example #11
0
def run_training(FLAGS, patches):
  ##======================================================================
  ## STEP 1: Here we provide the relevant parameters values that will
  #  allow your sparse autoencoder to get good filters; you do not need to 
  #  change the parameters below.
  
  visibleSize = FLAGS.visibleSize  # number of input units 
  hiddenSize = FLAGS.hiddenSize    # number of hidden units 
  sparsityParam = FLAGS.rho        # desired average activation \rho of the hidden units.
  decay = FLAGS.decay              # weight decay parameter       
  beta = FLAGS.beta                # weight of sparsity penalty term
  
  #  Obtain random parameters theta
  theta = initializeParameters(hiddenSize, visibleSize)
  
  ##======================================================================
  ## STEP 2: Implement sparseAutoencoderCost
  #
  #  You can implement all of the components (squared error cost, weight decay term,
  #  sparsity penalty) in the cost function at once, but it may be easier to do 
  #  it step-by-step and run gradient checking (see STEP 3) after each step.  We 
  #  suggest implementing the sparseAutoencoderCost function using the following steps:
  #
  #  (a) Implement forward propagation in your neural network, and implement the 
  #      squared error term of the cost function.  Implement backpropagation to 
  #      compute the derivatives.   Then (using lambda=beta=0), run Gradient Checking 
  #      to verify that the calculations corresponding to the squared error cost 
  #      term are correct.
  #
  #  (b) Add in the weight decay term (in both the cost function and the derivative
  #      calculations), then re-run Gradient Checking to verify correctness. 
  #
  #  (c) Add in the sparsity penalty term, then re-run Gradient Checking to 
  #      verify correctness.
  #
  #  Feel free to change the training settings when debugging your
  #  code.  (For example, reducing the training set size or 
  #  number of hidden units may make your code run faster; and setting beta 
  #  and/or lambda to zero may be helpful for debugging.)  However, in your 
  #  final submission of the visualized weights, please use parameters we 
  #  gave in Step 0 above.
  
  cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, decay,
                                     sparsityParam, beta, patches)
  
  ##======================================================================
  ## STEP 3: Gradient Checking
  #
  # Hint: If you are debugging your code, performing gradient checking on smaller models 
  # and smaller training sets (e.g., using only 10 training examples and 1-2 hidden 
  # units) may speed things up.
  
  
  if FLAGS.debug:
    # Now we can use it to check your cost function and derivative calculations
    # for the sparse autoencoder.
    cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, decay, \
                                       sparsityParam, beta, patches)
    numGrad = computeNumericalGradient(lambda x: sparseAutoencoderCost(x, visibleSize, hiddenSize, decay, sparsityParam, beta, patches), theta)
  
    # Use this to visually compare the gradients side by side
    print(np.stack((numGrad, grad)).T)
  
    # Compare numerically computed gradients with the ones obtained from backpropagation
    diff = norm(numGrad - grad) / norm(numGrad + grad)
    print(diff) # Should be small. In our implementation, these values are
                # usually less than 1e-9.
    sys.exit(1) # When you got this working, Congratulations!!!
    
  
  ##======================================================================
  ## STEP 4: After verifying that your implementation of
  #  sparseAutoencoderCost is correct, You can start training your sparse
  #  autoencoder with minFunc (L-BFGS).
  
  #  Randomly initialize the parameters.
  theta = initializeParameters(hiddenSize, visibleSize)
  
  #  Use L-BFGS to minimize the function.
  theta, _, _ = fmin_l_bfgs_b(sparseAutoencoderCost, theta,
                              args = (visibleSize, hiddenSize, decay, sparsityParam, beta, patches),
                              maxiter = 400, disp = 1)

  # save the learned parameters to external file
  pickle.dump(theta, open(FLAGS.log_dir + '/' + FLAGS.params_file, 'wb'))
  
  ##======================================================================
  ## STEP 5: Visualization 
  
  # Fold W1 parameters into a matrix format.
  W1 = np.reshape(theta[:hiddenSize * visibleSize], (hiddenSize, visibleSize))
  
  # Save the visualization to a file.
  displayNetwork(W1.T, file_name = 'weights_digits.jpg')

  return theta
      default='../../data/',
      help='Directory to put the training data.'
  )
  FLAGS, unparsed = parser.parse_known_args()
  
  parser.add_argument('--visibleSize', type=int)
  parser.add_argument('--hiddenSize', type=int)
  parser.add_argument('--rho', type=float)
  parser.add_argument('--decay', type=float)
  parser.add_argument('--beta', type = float)
  if FLAGS.input_type == 'natural':
    parser.parse_args(args=['--visibleSize', str(8*8)], namespace=FLAGS)
    parser.parse_args(args=['--hiddenSize', '25'], namespace=FLAGS)
    parser.parse_args(args=['--rho', '0.01'], namespace=FLAGS)
    parser.parse_args(args=['--decay', '0.0001'], namespace=FLAGS)
    parser.parse_args(args=['--beta', '3'], namespace=FLAGS)
  else:
    parser.parse_args(args=['--visibleSize', str(28*28)], namespace=FLAGS)
    parser.parse_args(args=['--hiddenSize', '196'], namespace=FLAGS)
    parser.parse_args(args=['--rho', '0.1'], namespace=FLAGS)
    parser.parse_args(args=['--decay', '3e-3'], namespace=FLAGS)
    parser.parse_args(args=['--beta', '3'], namespace=FLAGS)
  
  np.random.seed(1)

  # Train filters.
  weights = train()

  # Display learned filters.
  displayNetwork(weights.data.numpy().T, file_name = 'weights-' + FLAGS.input_type + '.jpg')
Example #13
0
trainLabel = labels[labeledIndex[0:totalLabeledSamples / 2]]
#一半做测试样本
testData = images[:, labeledIndex[totalLabeledSamples / 2:]]
testLabel = labels[labeledIndex[totalLabeledSamples / 2:]]

## ======================================================================
#  STEP 2: 利用无标签的样本训练稀疏自编码器,并将学习到的特征可视化
#训练稀疏自编码器
theta = sparseAutoencoder.initialize(hiddenSize, visibleSize)  #初始化参数
options = {'maxiter': 400, 'disp': True}  #设置最优化方法的参数
optTheta = sparseAutoencoder.train(theta, visibleSize, hiddenSize, lambda_,
                                   sparsityParam, beta, unlabeledData, options)
#将训练得到的特征可视化
W1 = optTheta[0:hiddenSize * visibleSize].reshape(hiddenSize,
                                                  visibleSize).transpose()
displayNetwork.displayNetwork(W1, "stlFeats.png")

## ======================================================================
#  STEP 3: 利用学习到的特征,对有标签的训练和测试样本进行编码
encodedTrainData = sparseAutoencoder.sparseAutoencoder(optTheta, visibleSize,
                                                       hiddenSize, trainData)
encodedTestData = sparseAutoencoder.sparseAutoencoder(optTheta, visibleSize,
                                                      hiddenSize, testData)

## ======================================================================
#  STEP 4: 训练softmax分类器
theta = np.random.randn(numClasses * (hiddenSize + 1))  #利用正态分布随机初始化W 以及 b
lambda_ = 1e-4
options_ = {'maxiter': 400, 'disp': True}
model = softmax.buildClassifier(theta, lambda_, numClasses, encodedTrainData,
                                trainLabel, options_)
#一半做训练样本
trainData = images[:, labeledIndex[0: totalLabeledSamples / 2]] 
trainLabel = labels[labeledIndex[0: totalLabeledSamples / 2]]
#一半做测试样本
testData = images[:, labeledIndex[totalLabeledSamples / 2 : ]]
testLabel= labels[labeledIndex[totalLabeledSamples / 2 : ]]

## ======================================================================
#  STEP 2: 利用无标签的样本训练稀疏自编码器,并将学习到的特征可视化
#训练稀疏自编码器
theta = sparseAutoencoder.initialize(hiddenSize, visibleSize) #初始化参数
options = {'maxiter': 400, 'disp': True} #设置最优化方法的参数
optTheta = sparseAutoencoder.train(theta, visibleSize, hiddenSize, lambda_, sparsityParam, beta, unlabeledData, options)
#将训练得到的特征可视化
W1 = optTheta[0:hiddenSize * visibleSize].reshape(hiddenSize, visibleSize).transpose()
displayNetwork.displayNetwork(W1, "stlFeats.png")

## ======================================================================
#  STEP 3: 利用学习到的特征,对有标签的训练和测试样本进行编码
encodedTrainData = sparseAutoencoder.sparseAutoencoder(optTheta, visibleSize, hiddenSize, trainData)
encodedTestData = sparseAutoencoder.sparseAutoencoder(optTheta, visibleSize, hiddenSize, testData)

## ======================================================================
#  STEP 4: 训练softmax分类器
theta =  np.random.randn(numClasses * (hiddenSize + 1)) #利用正态分布随机初始化W 以及 b
lambda_ =  1e-4 
options_ = {'maxiter': 400, 'disp': True}
model = softmax.buildClassifier(theta, lambda_, numClasses, encodedTrainData, trainLabel, options_)

## ======================================================================
#  STEP 5: 测试训练好的softmax分类器
if FLAGS.debug:
  numPatches = 10
  hiddenSize = 2
elif FLAGS.input_type == 'natural':
  numPatches = 10000
else:
  numPatches = 20000 # need more samples, because of larger inputs.
  
if FLAGS.input_type == 'natural':
  from sampleNaturalImages import sampleNaturalImages
  patches = sampleNaturalImages('IMAGES.mat', numPatches)
else:
  from sampleDigitImages import sampleDigitImages
  patches = sampleDigitImages(FLAGS.input_data_dir, numPatches)

displayNetwork(patches[:, randint(0, patches.shape[1], 200)], 8,
               'patches-' + FLAGS.input_type + '.jpg')


#  Obtain random parameters theta
theta = initializeParameters(hiddenSize, visibleSize)

##======================================================================
## Gradient Checking
#
# Hint: If you are debugging your code, performing gradient checking on smaller models 
# and smaller training sets (e.g., using only 10 training examples and 1-2 hidden 
# units) may speed things up.


if FLAGS.debug:
  # Check your cost function and derivative calculations for the sparse autoencoder.