def sgd_optimization_mnist(learning_rate=0.1, n_epochs=1000,
                           batch_size=600):
    """
    This is the stochastic gradient descent training optimization part.
    :param learning_rate: learning rate used for stochastic gradient.
    :param n_epochs: maximal nnumber of epochs to run the optimizer
    :param batch_size:
    :return:
    """

    trainset, validset, testset = load_mnistdata()
    trainset_x, trainset_y = shared_dataset(trainset)
    validset_x, validset_y = shared_dataset(validset)
    testset_x, testset_y = shared_dataset(testset)

    # Here, borrow means that the value will be shallow copied, if borrow is
    # False, the value can be deep copied.
    # In this case, borrow should help to get the number of samples in train,
    # valid, test dataset. batchsize will be how many samples to compute in onetime.

    n_train_batches = trainset_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = validset_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = testset_x.get_value(borrow=True).shape[0] / batch_size

    print("building model starts....")

    # only returns a scalar variables. This means the variable only contains values.
    index = T.lscalar()

    x, y, classifier = initializeClass()

    cost = classifier.negative_log_likelihood(y)

    # Here is going to make the training, validating, testing model.
    # batch size is used for the number of samples.
    # index is which part of the sample will be used.
    # givens indicate the number of samples.
    # as a result:

    valid_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: validset_x[index * batch_size: (index + 1) * batch_size],
            y: validset_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: testset_x[index * batch_size: (index + 1) * batch_size],
            y: testset_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # the training model is a little bit different, the parameter will
    # update parameters of the model.
    # The update is related to the W, b, learning rate, etc.
    # So, firstly, compute the gradient cost, then compute the update params

    # gradient cost:
    g_W = T.grad(cost=classifier.negative_log_likelihood(y), wrt=classifier.W)
    g_b = T.grad(cost=classifier.negative_log_likelihood(y), wrt=classifier.b)

    updates = [(classifier.W, classifier.W - learning_rate * g_W),
              (classifier.b, classifier.b - learning_rate * g_b)]

    train_model = theano.function(
        inputs=[index],
        outputs=classifier.negative_log_likelihood(y),
        updates=updates,
        givens={
            x: trainset_x[index * batch_size: (index + 1) * batch_size],
            y: trainset_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    print("training the model.")

    # Training phase will train the examples.

    patience = 5000 # Only 5000 examples will be made.
    patience_increase = 2 # wait a little more examples.

    improvement_threshold = 0.995 # a relative improvement of threshold.

    validation_frequency = min(n_train_batches, patience / 2)
                                # in this case we check every epoch.

    best_validation_loss = np.inf

    testscore = 0

    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0

    # for every epoch, we will iterate the result.
    while(epoch < n_epochs) and (not done_looping):
        epoch += 1
        # for each part of train batch in training dataset
        for minibatch_index in range(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index
            # validation_frequency is for every iteration, should we validate
            if(iter + 1) % validation_frequency == 0:
                # for every data batches in validation dataset.
                validation_losses = [valid_model(i) for i in range(n_valid_batches)]
                this_validation_losses = np.mean(validation_losses)

                print(
                    "epoch %i, minibatches %i / %i, validation error: %f %%" %
                    (epoch, minibatch_index + 1, n_train_batches, this_validation_losses * 100.)
                )
                if this_validation_losses < best_validation_loss:
                    if this_validation_losses < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                    best_validation_loss = this_validation_losses

                    test_losses = [test_model(i) for i in range(n_test_batches)]
                    testscore = np.mean(test_losses)

                    print("epoch %i, minibatch %i / %i, test error of best model %f %%",
                          (epoch, minibatch_index + 1, n_train_batches, testscore * 100.)
                    )
                    datadir = HomeDir.GetDataDir()
                    with open(os.path.join(datadir, "logRegressBestModel.pkl", "w")) as f:
                        pickle.dump(classifier, f)

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()

    print("optimization complete with best validation score of %f %%, with test performance %f %%" %
         (best_validation_loss * 100., testscore * 100.))

    print("the code run for %d epochs, with %f epochs/sec" & (epoch, 1. * epoch / (end_time - start_time)))
def test_mlp(learning_rate=0.01, L1_reg=0.01, L2_reg=0.0001, n_epochs=1000,
             batch_size=20, n_hidden=200):
    """
    This is a function used for testing the multi layer perceptron model.

    :param learning_rate: this is the learning rate for stochastic gradient
    :param L1_reg: this is the L1-norm weight when added to the cost
    :param L2_reg: this is the L2-norm weight when added to the cost
    :param n_epochs: this is the number of epochs to run the optimizer
    :param batch_size: this is the number of examples we will use for one iteration
    :param n_hidden: hidden layer neuron number.
    :return:
    """

    trainset, validset, testset = load_mnistdata()
    trainset_x, trainset_y = shared_dataset(trainset)
    validset_x, validset_y = shared_dataset(validset)
    testset_x, testset_y = shared_dataset(testset)

    n_train_batches = trainset_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = validset_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = testset_x.get_value(borrow=True).shape[0] / batch_size

    print("building model starts....")

    index = T.lscalar()
    x, y, MLPclassifier = initializeClass(
            rng=np.random.RandomState(6666),
            n_in=28 * 28,
            n_hidden=n_hidden,
            n_out=10
    )

    # cost function
    cost = (MLPclassifier.negative_log_likelihood(y) + L1_reg * MLPclassifier.L1_norm +
            L2_reg * MLPclassifier.L2_norm)

    test_model = theano.function(
        inputs=[index],
        outputs=MLPclassifier.errors(y),
        givens={
            x: testset_x[index * batch_size: (index + 1) * batch_size],
            y: testset_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    valid_model = theano.function(
        inputs=[index],
        outputs=MLPclassifier.errors(y),
        givens={
            x: validset_x[index * batch_size: (index + 1) * batch_size],
            y: validset_y[index * batch_size: (index + 1) * batch_size],
        }
    )

    # the parameter for MLP(two W and two b)
    Gparams = [T.grad(cost, param) for param in MLPclassifier.params]

    # update function
    updates = (
        (param, param - learning_rate * Gparam)
        for param, Gparam in zip(MLPclassifier.params, Gparams)
    )


    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: trainset_x[index * batch_size: (index + 1) * batch_size],
            y: trainset_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    print("now the training begins...")

    patience = 5000  # Only 5000 examples will be made.
    patience_increase = 2  # wait a little more examples.

    improvement_threshold = 0.995  # a relative improvement of threshold.

    validation_frequency = min(n_train_batches, patience / 2)
    # in this case we check every epoch.

    best_validation_loss = np.inf

    best_iter = 0

    testscore = 0

    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0

    # for every epoch, we will iterate the result.
    while(epoch < n_epochs) and (not done_looping):
        epoch += 1
        # for each part of train batch in training dataset
        for minibatch_index in range(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index
            # validation_frequency is for every iteration, should we validate
            if(iter + 1) % validation_frequency == 0:
                # for every data batches in validation dataset.
                validation_losses = [valid_model(i) for i in range(n_valid_batches)]
                this_validation_losses = np.mean(validation_losses)

                print(
                    "epoch %i, minibatches %i / %i, validation error: %f %%" %
                    (epoch, minibatch_index + 1, n_train_batches, this_validation_losses * 100.)
                )
                if this_validation_losses < best_validation_loss:
                    if this_validation_losses < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                    best_validation_loss = this_validation_losses

                    best_iter = iter

                    test_losses = [test_model(i) for i in range(n_test_batches)]
                    testscore = np.mean(test_losses)

                    print("epoch %i, minibatch %i / %i, test error of best model %f %%",
                          (epoch, minibatch_index + 1, n_train_batches, testscore * 100.)
                    )
                    datadir = HomeDir.GetDataDir()
                    with open(os.path.join(datadir, "logRegressBestModel.pkl", "w")) as f:
                        pickle.dump(MLPclassifier, f)

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()

    print("optimization complete with best validation score of %f %%, obtained in iteration %d, with test performance %f %%" %
          (best_validation_loss * 100., best_iter + 1, testscore * 100.))

    print("the code run for %d epochs, with %f epochs/sec" % (epoch, 1. * epoch / (end_time - start_time)))