def main():
    # define lenet
    layers = get_lenet()

    # load data
    # change the following value to true to load the entire dataset
    fullset = True
    print("Loading MNIST Dataset...")
    xtrain, ytrain, xval, yval, xtest, ytest = cnn_lenet.load_mnist(fullset)
    print("MNIST Dataset Loading Complete!\n")

    xtrain = np.hstack([xtrain, xval])
    ytrain = np.hstack([ytrain, yval])
    m_train = xtrain.shape[1]

    # cnn parameters
    batch_size = 64
    mu = 0.9
    epsilon = 0.01
    gamma = 0.0001
    power = 0.75
    weight_decay = 0.0005
    w_lr = 1
    b_lr = 2

    test_interval = 15
    display_interval = 15
    snapshot = 5000
    max_iter = 30

    # initialize parameters
    print("Initializing Parameters...")
    params = cnn_lenet.init_convnet(layers)
    param_winc = copy.deepcopy(params)
    print("Initilization Complete!\n")

    for l_idx in range(1, len(layers)):
        param_winc[l_idx]['w'] = np.zeros(param_winc[l_idx]['w'].shape)
        param_winc[l_idx]['b'] = np.zeros(param_winc[l_idx]['b'].shape)

    # learning iterations
    random.seed(100000)
    indices = range(m_train)
    random.shuffle(indices)

    print("Training Started. Printing report on training data every " +
          str(display_interval) + " steps.")
    print("Printing report on test data every " + str(test_interval) +
          " steps.\n")
    train_acc_100 = []
    test_acc_100 = []
    train_cost_100 = []
    test_cost_100 = []

    program_starts = time.time()
    for step in range(max_iter):
        # get mini-batch and setup the cnn with the mini-batch
        start_idx = step * batch_size % m_train
        end_idx = (step + 1) * batch_size % m_train
        if start_idx > end_idx:
            random.shuffle(indices)
            continue
        idx = indices[start_idx:end_idx]

        layers[2]['iteration'] = step
        [cp, param_grad] = cnn_lenet.conv_net(params, layers, xtrain[:, idx],
                                              ytrain[idx], True)

        # we have different epsilons for w and b
        w_rate = cnn_lenet.get_lr(step, epsilon * w_lr, gamma, power)
        b_rate = cnn_lenet.get_lr(step, epsilon * b_lr, gamma, power)
        params, param_winc = cnn_lenet.sgd_momentum(w_rate, b_rate, mu,
                                                    weight_decay, params,
                                                    param_winc, param_grad)

        # display training loss
        if (step + 1) % display_interval == 0:
            train_cost_100.append(cp['cost'])
            train_acc_100.append(cp['percent'])
            print 'training_cost = %f training_accuracy = %f' % (
                cp['cost'], cp['percent']) + ' current_step = ' + str(step + 1)
            now = time.time()
            print "It has been {0} seconds since the training started".format(
                now - program_starts)

        # display test accuracy
        if (step + 1) % test_interval == 0:
            layers[1]['batch_size'] = xtest.shape[1]
            cptest, _ = cnn_lenet.conv_net(params, layers, xtest, ytest, False)
            layers[1]['batch_size'] = 64
            test_cost_100.append(cptest['cost'])
            test_acc_100.append(cptest['percent'])
            print 'test_cost = %f test_accuracy = %f' % (
                cptest['cost'],
                cptest['percent']) + ' current_step = ' + str(step + 1) + '\n'

        # save params peridocally to recover from any crashes
        if (step + 1) % snapshot == 0:
            pickle_path = 'lenet.mat'
            pickle_file = open(pickle_path, 'wb')
            pickle.dump(params, pickle_file)
            pickle_file.close()
def main():
    # define lenet
    layers = get_lenet()

    # load data
    # change the following value to true to load the entire dataset
    fullset = False
    xtrain, ytrain, xval, yval, xtest, ytest = cnn_lenet.load_mnist(fullset)

    xtrain = np.hstack([xtrain, xval])
    ytrain = np.hstack([ytrain, yval])
    m_train = xtrain.shape[1]

    # cnn parameters
    batch_size = 64
    mu = 0.9
    epsilon = 0.01
    gamma = 0.0001
    power = 0.75
    weight_decay = 0.0005
    w_lr = 1
    b_lr = 2

    test_interval = 200
    display_interval = 50
    snapshot = 500
    max_iter = 10000

    # initialize parameters
    params = cnn_lenet.init_convnet(layers)
    param_winc = copy.deepcopy(params)

    for l_idx in range(1, len(layers)):
        param_winc[l_idx]['w'] = np.zeros(param_winc[l_idx]['w'].shape)
        param_winc[l_idx]['b'] = np.zeros(param_winc[l_idx]['b'].shape)

    # learning iterations
    indices = range(m_train)
    random.shuffle(indices)
    for step in range(max_iter):
        # get mini-batch and setup the cnn with the mini-batch
        start_idx = step * batch_size % m_train
        end_idx = (step+1) * batch_size % m_train
        if start_idx > end_idx:
            random.shuffle(indices)
            continue
        idx = indices[start_idx: end_idx]

        [cp, param_grad] = cnn_lenet.conv_net(params,
                        layers,
                        xtrain[:, idx],
                        ytrain[idx])

        # we have different epsilons for w and b
        w_rate = cnn_lenet.get_lr(step, epsilon*w_lr, gamma, power)
        b_rate = cnn_lenet.get_lr(step, epsilon*b_lr, gamma, power)
        params, param_winc = cnn_lenet.sgd_momentum(w_rate,
                    b_rate,
                    mu,
                    weight_decay,
                    params,
                    param_winc,
                    param_grad)

        # display training loss
        if (step+1) % display_interval == 0:
            print 'cost = %f training_percent = %f' % (cp['cost'], cp['percent'])

        # display test accuracy
        if (step+1) % test_interval == 0:
            layers[1]['batch_size'] = xtest.shape[1]
            cptest, _ = cnn_lenet.conv_net(params, layers, xtest, ytest)
            layers[1]['batch_size'] = 64
            print '\ntest accuracy: %f\n' % (cptest['percent'])

        # save params peridocally to recover from any crashes
        if (step+1) % snapshot == 0:
            pickle_path = 'lenet.mat'
            pickle_file = open(pickle_path, 'wb')
            pickle.dump(params, pickle_file)
            pickle_file.close()
def trainNet():
    # define lenet
    layers = get_lenet()

    # load data
    # change the following value to true to load the entire dataset
    fullset = True
    print("Loading MNIST Dataset...")
    xtrain, ytrain, xval, yval, xtest, ytest = cnn_lenet.load_mnist(fullset)
    print("MNIST Dataset Loading Complete!\n")

    xtrain = np.hstack([xtrain, xval])
    ytrain = np.hstack([ytrain, yval])
    m_train = xtrain.shape[1]

    # cnn parameters
    batch_size = 64
    mu = 0.9
    epsilon = 0.01
    gamma = 0.0001
    power = 0.75
    weight_decay = 0.0005
    w_lr = 1
    b_lr = 2

    test_interval = 100
    display_interval = 100
    snapshot = 5000
    max_iter = 10000
    # Lets it run the entire way

    # initialize parameters
    print("Initializing Parameters...")
    # You can make the params your params, and not the initialized ones, in order to visualize the results
    params = cnn_lenet.init_convnet(layers)
    param_winc = copy.deepcopy(params)
    print("Initilization Complete!\n")

    for l_idx in range(1, len(layers)):
        param_winc[l_idx]['w'] = np.zeros(param_winc[l_idx]['w'].shape)
        param_winc[l_idx]['b'] = np.zeros(param_winc[l_idx]['b'].shape)

    # learning iterations
    random.seed(100000)
    indices = range(m_train)
    random.shuffle(indices)

    train_cost = np.array([])
    train_accuracy = np.array([])
    test_cost = np.array([])
    test_accuracy = np.array([])

    print("Training Started. Printing report on training data every " +
          str(display_interval) + " steps.")
    print("Printing report on test data every " + str(test_interval) +
          " steps.\n")
    for step in range(max_iter):
        # get mini-batch and setup the cnn with the mini-batch
        start_idx = step * batch_size % m_train
        end_idx = (step + 1) * batch_size % m_train
        if start_idx > end_idx:
            random.shuffle(indices)
            continue
        idx = indices[start_idx:end_idx]

        [cp, param_grad] = cnn_lenet.conv_net(params, layers, xtrain[:, idx],
                                              ytrain[idx], True)
        # True there is to get backtracking, but you can just use it for forward, to visualize
        # You have to make the function return output for you, so that you can reshape it into an image matrix, to show the image

        # we have different epsilons for w and b
        w_rate = cnn_lenet.get_lr(step, epsilon * w_lr, gamma, power)
        b_rate = cnn_lenet.get_lr(step, epsilon * b_lr, gamma, power)
        params, param_winc = cnn_lenet.sgd_momentum(w_rate, b_rate, mu,
                                                    weight_decay, params,
                                                    param_winc, param_grad)

        # display training loss
        if (step + 1) % display_interval == 0:
            print 'training_cost = %f training_accuracy = %f' % (
                cp['cost'], cp['percent']) + ' current_step = ' + str(step + 1)
            train_cost = np.append(train_cost, cp['cost'])
            train_accuracy = np.append(train_accuracy, cp['percent'])

        # display test accuracy
        if (step + 1) % test_interval == 0:
            layers[1]['batch_size'] = xtest.shape[1]
            cptest, _ = cnn_lenet.conv_net(params, layers, xtest, ytest, False)
            layers[1]['batch_size'] = 64
            print 'test_cost = %f test_accuracy = %f' % (
                cptest['cost'],
                cptest['percent']) + ' current_step = ' + str(step + 1) + '\n'
            test_cost = np.append(test_cost, cptest['cost'])
            test_accuracy = np.append(test_accuracy, cptest['percent'])

        # save params peridocally to recover from any crashes
        if (step + 1) % snapshot == 0:
            pickle_path = 'lenet.mat'
            pickle_file = open(pickle_path, 'wb')
            pickle.dump(params, pickle_file)
            pickle_file.close()

        # Saves params at 30 for Question 4
        if (step + 1) == 30:
            pickle_path = 'lenetAt30Iterations.mat'
            pickle_file = open(pickle_path, 'wb')
            pickle.dump(params, pickle_file)
            pickle_file.close()

        if (step + 1) == max_iter:
            np.savetxt('trainCost.txt', train_cost)
            np.savetxt('trainAccuracy.txt', train_accuracy)
            np.savetxt('testCost.txt', test_cost)
            np.savetxt('testAccuracy.txt', test_accuracy)
            # np.savetxt('costsStacked.txt', np.column_stack(train_cost, test_cost))
            # np.savetxt('accuracyStacked.txt', np.column_stack(train_accuracy, test_accuracy))
            pickle_path = 'lenetAt10000Iterations.mat'
            pickle_file = open(pickle_path, 'wb')
            pickle.dump(params, pickle_file)
            pickle_file.close()

        if (step) == max_iter:
            np.savetxt('trainCost1.txt', train_cost)
            np.savetxt('trainAccuracy1.txt', train_accuracy)
            np.savetxt('testCost1.txt', test_cost)
            np.savetxt('testAccuracy1.txt', test_accuracy)
            # np.savetxt('costsStacked1.txt', np.column_stack(train_cost, test_cost))
            # np.savetxt('accuracyStacked1.txt', np.column_stack(train_accuracy, test_accuracy))
            pickle_path = 'lenetAtMAXPLUSONEIterations.mat'
            pickle_file = open(pickle_path, 'wb')
            pickle.dump(params, pickle_file)
            pickle_file.close()
def main():
    # define lenet
    layers = get_lenet()

    # load data
    # change the following value to true to load the entire dataset
    fullset = True
    print("Loading MNIST Dataset...")
    xtrain, ytrain, xval, yval, xtest, ytest = cnn_lenet.load_mnist(fullset)
    print("MNIST Dataset Loading Complete!\n")

    xtrain = np.hstack([xtrain, xval])
    ytrain = np.hstack([ytrain, yval])
    m_train = xtrain.shape[1]

    # cnn parameters
    batch_size = 64
    mu = 0.9
    epsilon = 0.01
    gamma = 0.0001
    power = 0.75
    weight_decay = 0.0005
    w_lr = 1
    b_lr = 2

    test_interval = 100
    display_interval = 100
    snapshot = 5000
    max_iter = 10000

    # initialize parameters
    print("Initializing Parameters...")
    params = cnn_lenet.init_convnet(layers)
    param_winc = copy.deepcopy(params)
    print("Initilization Complete!\n")

    for l_idx in range(1, len(layers)):
        param_winc[l_idx]['w'] = np.zeros(param_winc[l_idx]['w'].shape)
        param_winc[l_idx]['b'] = np.zeros(param_winc[l_idx]['b'].shape)

    # learning iterations
    random.seed(100000)
    indices = range(m_train)
    random.shuffle(indices)

    print("Training Started. Printing report on training data every " +
          str(display_interval) + " steps.")
    print("Printing report on test data every " + str(test_interval) +
          " steps.\n")

    startTime = time.time()
    numSamples = int(max_iter / display_interval)
    costResults = np.zeros((2, numSamples))
    accuracyResults = np.zeros((2, numSamples))

    displayIndex = 0
    for step in range(max_iter):
        print("current step is: {}".format(step))
        # get mini-batch and setup the cnn with the mini-batch
        start_idx = step * batch_size % m_train
        end_idx = (step + 1) * batch_size % m_train
        if start_idx > end_idx:
            random.shuffle(indices)
            continue
        idx = indices[start_idx:end_idx]

        [output, cp,
         param_grad] = cnn_lenet.conv_net(params, layers, xtrain[:, idx],
                                          ytrain[idx], True)

        # we have different epsilons for w and b
        w_rate = cnn_lenet.get_lr(step, epsilon * w_lr, gamma, power)
        b_rate = cnn_lenet.get_lr(step, epsilon * b_lr, gamma, power)
        params, param_winc = cnn_lenet.sgd_momentum(w_rate, b_rate, mu,
                                                    weight_decay, params,
                                                    param_winc, param_grad)

        # display training loss
        if (step + 1) % display_interval == 0:
            print 'training_cost = %f training_accuracy = %f' % (
                cp['cost'], cp['percent']) + ' current_step = ' + str(step + 1)
            costResults[0, displayIndex] = cp['cost']
            accuracyResults[0, displayIndex] = cp['percent']

        # display test accuracy
        if (step + 1) % test_interval == 0:
            layers[1]['batch_size'] = xtest.shape[1]
            _, cptest, _ = cnn_lenet.conv_net(params, layers, xtest, ytest,
                                              False)
            layers[1]['batch_size'] = 64
            print 'test_cost = %f test_accuracy = %f' % (
                cptest['cost'],
                cptest['percent']) + ' current_step = ' + str(step + 1) + '\n'
            costResults[1, displayIndex] = cptest['cost']
            accuracyResults[1, displayIndex] = cptest['percent']
            displayIndex += 1

        # save params peridocally to recover from any crashes
        if (step + 1) % snapshot == 0:
            pickle_path = 'lenet.mat'
            pickle_file = open(pickle_path, 'wb')
            pickle.dump(params, pickle_file)
            pickle_file.close()

        # print(output[1]['data'].shape, output[2]['data'].shape, output[3]['data'].shape)
        if step == 29:
            layers[1]['batch_size'] = 1
            output, _, _ = cnn_lenet.conv_net(params, layers, xtest, ytest,
                                              False)
            layers[1]['batch_size'] = 64
            printableData = output[2]['data'][:, 0]
            printableData.shape = [24, 24, 20]
            plt.figure()
            for num in range(20):
                plt.subplot(4, 5, num + 1)
                plt.imshow(printableData[:, :, num])
            plt.show()

    endTime = time.time()

    print(endTime - startTime)
    pickle.dump(accuracyResults, open('accuracyResults.p', 'wb'))
    pickle.dump(costResults, open('costResults.p', 'wb'))

    layers[1]['batch_size'] = 1
    output, _, _ = cnn_lenet.conv_net(params, layers, xtest, ytest, False)
    layers[1]['batch_size'] = 64
    pickle.dump(output, open('output.p', 'wb'))

    # print(output[1]['data'].shape, output[2]['data'].shape,)
    print("printing original image")
    printableData = output[1]['data'][:, 0]
    printableData.shape = [28, 28]
    plt.figure()
    plt.imshow(printableData)
    plt.show()

    print("printing output 2")
    printableData = output[2]['data'][:, 0]
    printableData.shape = [24, 24, 20]
    plt.figure()
    for num in range(20):
        plt.subplot(4, 5, num + 1)
        plt.imshow(printableData[:, :, num])
    plt.show()

    print("printing output 3")
    printableData = output[3]['data'][:, 0]
    printableData.shape = [12, 12, 20]

    plt.figure()
    for num in range(20):
        plt.subplot(4, 5, num + 1)
        plt.imshow(printableData[:, :, num])
    plt.show()

    plt.figure()
    plt.plot(np.linspace(1, numSamples, numSamples),
             costResults[0, :],
             color='k',
             linewidth=3,
             label='train cost')
    plt.plot(np.linspace(1, numSamples, numSamples),
             costResults[1, :],
             color='r',
             linewidth=3,
             linestyle=':',
             label='test cost')
    plt.title('cost comparison')
    plt.legend()
    plt.xlabel('iteration/100')
    plt.ylabel('cost')
    plt.grid()
    plt.show()

    plt.figure()
    plt.plot(np.linspace(1, numSamples, numSamples),
             accuracyResults[0, :],
             color='k',
             linewidth=3,
             label='train accuracy')
    plt.plot(np.linspace(1, numSamples, numSamples),
             accuracyResults[1, :],
             color='r',
             linewidth=3,
             linestyle=':',
             label='test accuracy')
    plt.title('accuracy comparison')
    plt.legend()
    plt.xlabel('iteration/100')
    plt.ylabel('accuracy')
    plt.grid()
    plt.show()
Exemple #5
0
def main():
  # define lenet
  layers = get_lenet()

  mat1 = np.zeros((1,1))
  mat2 = np.zeros((1,1))
  mat1traic = np.zeros((1,1))
  mat1testc = np.zeros((1,1))
  mat1traiac = np.zeros((1,1))
  mat1testac = np.zeros((1,1))

  # load data
  # change the following value to true to load the entire dataset
  fullset = True
  print("Loading MNIST Dataset...")
  xtrain, ytrain, xval, yval, xtest, ytest = cnn_lenet.load_mnist(fullset)
  print("MNIST Dataset Loading Complete!\n")

  xtrain = np.hstack([xtrain, xval])
  ytrain = np.hstack([ytrain, yval])
  m_train = xtrain.shape[1]

  # cnn parameters
  batch_size = 64
  mu = 0.9
  epsilon = 0.01
  gamma = 0.0001
  power = 0.75
  weight_decay = 0.0005
  w_lr = 1
  b_lr = 2

  test_interval = 100
  display_interval = 100
  snapshot = 5000
  max_iter = 10000

  # initialize parameters
  print("Initializing Parameters...")
  params = cnn_lenet.init_convnet(layers)
  param_winc = copy.deepcopy(params)
  print("Initilization Complete!\n")

  for l_idx in range(1, len(layers)):
    param_winc[l_idx]['w'] = np.zeros(param_winc[l_idx]['w'].shape)
    param_winc[l_idx]['b'] = np.zeros(param_winc[l_idx]['b'].shape)

  # learning iterations
  random.seed(100000)
  indices = range(m_train)
  random.shuffle(indices)

  print("Training Started. Printing report on training data every " + str(display_interval) + " steps.")
  print("Printing report on test data every " + str(test_interval) + " steps.\n")
  for step in range(max_iter):
    # get mini-batch and setup the cnn with the mini-batch
    start_idx = step * batch_size % m_train
    end_idx = (step+1) * batch_size % m_train
    if start_idx > end_idx:
      random.shuffle(indices)
      continue
    idx = indices[start_idx: end_idx]

    [cp, param_grad] = cnn_lenet.conv_net(params,
                                          layers,
                                          xtrain[:, idx],
                                          ytrain[idx], True)

    # we have different epsilons for w and b
    w_rate = cnn_lenet.get_lr(step, epsilon*w_lr, gamma, power)
    b_rate = cnn_lenet.get_lr(step, epsilon*b_lr, gamma, power)
    params, param_winc = cnn_lenet.sgd_momentum(w_rate,
                           b_rate,
                           mu,
                           weight_decay,
                           params,
                           param_winc,
                           param_grad)

    # display training loss
    if (step+1) % display_interval == 0:
      print 'training_cost = %f training_accuracy = %f' % (cp['cost'], cp['percent']) + ' current_step = ' + str(step + 1)

    #show input to maxpooling
      
    # display test accuracy
    if (step+1) % test_interval == 0:
      layers[1]['batch_size'] = xtest.shape[1]
      cptest, _ = cnn_lenet.conv_net(params, layers, xtest, ytest, False)
      layers[1]['batch_size'] = 64
      print 'test_cost = %f test_accuracy = %f' % (cptest['cost'], cptest['percent']) + ' current_step = ' + str(step + 1) + '\n'

    # save params peridocally to recover from any crashes
  
    if (step+1) % snapshot == 0:
      pickle_path = 'lenet.mat'
      pickle_file = open(pickle_path, 'wb')
      pickle.dump(params, pickle_file)
      pickle_file.close()


    
    if (step+1) % test_interval == 0:
      mat1traic = np.insert(mat1traic, 0, cp['cost'])
      mat1testc = np.insert(mat1testc, 0, cptest['cost'])
      mat1traiac = np.insert(mat1traiac, 0, cp['percent'])
      mat1testac = np.insert(mat1testac, 0, cptest['percent'])
      print mat1traic

      
    if (step+1) == 30:
      pickle_path = 'lay2it30w.mat'
      pickle_file = open(pickle_path, 'wb')
      mat1 = params[1]['w']
      pickle.dump(mat1, pickle_file)
      pickle_file.close()
      pickle_path = 'lay2it30b.mat'
      pickle_file = open(pickle_path, 'wb')
      mat1 = params[1]['b']
      pickle.dump(mat1, pickle_file)
      pickle_file.close()
      pickle_path = 'inputdata30it.mat'
      pickle_file = open(pickle_path, 'wb')
      mat1 = xtrain[:, idx]
      pickle.dump(mat1, pickle_file)
      pickle_file.close()      

    if (step+1) == 1000:
      pickle_path = 'lay2it10000w.mat'
      pickle_file = open(pickle_path, 'wb')
      mat2 = params[1]['w']
      pickle.dump(mat2, pickle_file)
      pickle_file.close()
      pickle_path = 'lay2it10000b.mat'
      pickle_file = open(pickle_path, 'wb')
      mat2 = params[1]['b']
      pickle.dump(mat2, pickle_file)
      pickle_file.close()
      pickle_path = 'inputdata10000it.mat'
      pickle_file = open(pickle_path, 'wb')
      mat2 = xtrain[:, idx]
      pickle.dump(mat2, pickle_file)
      pickle_file.close()

      pickle_path = 'trainingcost.mat'
      pickle_file = open(pickle_path, 'wb')
      #mat1 = np.zeros((1,1))     
      pickle.dump(mat1traic, pickle_file)
      pickle_file.close()
      
      pickle_path = 'testcost.mat'
      pickle_file = open(pickle_path, 'wb')
      #mat1 = np.zeros((1,1))          
      pickle.dump(mat1testc, pickle_file)
      pickle_file.close()
      
      pickle_path = 'trainingaccu.mat'
      pickle_file = open(pickle_path, 'wb')
      #mat1 = np.zeros((1,1))           
      pickle.dump(mat1traiac, pickle_file)
      pickle_file.close()
      
      pickle_path = 'testaccu.mat'
      pickle_file = open(pickle_path, 'wb')
      #mat1 = np.zeros((1,1))          
      pickle.dump(mat1testac, pickle_file)
      pickle_file.close()
def main():
    # define lenet
    layers = get_lenet()

    # load data
    # change the following value to true to load the entire dataset
    fullset = False
    """xtrain, ytrain, xval, yval, xtest, ytest = cnn_lenet.load_mnist(fullset)"""
    negPath = "/home/lui/CMU/Clinical-feature-learning/dataset/negative.txt"
    posPath = "/home/lui/CMU/Clinical-feature-learning/dataset/positive.txt"
    XTrain, yTrain = seq2mat.genTrain(posPath, negPath)
    print XTrain.shape, yTrain.shape
    threshold = XTrain.shape[1] / 10 * 9
    print "threshold: ", threshold
    XTrainTrue = XTrain[:, :threshold]
    Xtest = XTrain[:, threshold:]
    yTrainTrue = yTrain[:threshold]
    ytest = yTrain[threshold:]
    print "finish loading data"
    print "shape: xTrain, yTrain, xTest, yTest"
    print XTrainTrue.shape, yTrainTrue.shape, Xtest.shape, ytest.shape
    """xtrain = np.hstack([xtrain, xval])
  ytrain = np.hstack([ytrain, yval])"""
    m_train = XTrainTrue.shape[1]

    # cnn parameters
    batch_size = 64
    mu = 0.9
    epsilon = 0.01
    gamma = 0.0001
    power = 0.75
    weight_decay = 0.0005
    w_lr = 1
    b_lr = 2

    test_interval = 500
    display_interval = 10
    snapshot = 100
    max_iter = 10000

    loadPara = bool(raw_input("load parameters, True or False: "))
    if loadPara:
        pickle_path = 'lenet.mat'
        pickle_file = open(pickle_path, 'rb')
        params = pickle.load(pickle_file)
        param_winc = copy.deepcopy(params)
    else:
        # initialize parameters
        params = cnn_lenet.init_convnet(layers)
        param_winc = copy.deepcopy(params)

    for l_idx in range(1, len(layers)):
        param_winc[l_idx]['w'] = np.zeros(param_winc[l_idx]['w'].shape)
        param_winc[l_idx]['b'] = np.zeros(param_winc[l_idx]['b'].shape)

    # learning iterations
    indices = range(m_train)
    random.shuffle(indices)
    for step in range(1500, max_iter):
        # get mini-batch and setup the cnn with the mini-batch
        start_idx = step * batch_size % m_train
        end_idx = (step + 1) * batch_size % m_train
        if start_idx > end_idx:
            random.shuffle(indices)
            continue
        idx = indices[start_idx:end_idx]

        [cp, param_grad] = cnn_lenet.conv_net(params, layers, XTrainTrue[:,
                                                                         idx],
                                              yTrainTrue[idx])

        # we have different epsilons for w and b
        w_rate = cnn_lenet.get_lr(step, epsilon * w_lr, gamma, power)
        b_rate = cnn_lenet.get_lr(step, epsilon * b_lr, gamma, power)
        params, param_winc = cnn_lenet.sgd_momentum(w_rate, b_rate, mu,
                                                    weight_decay, params,
                                                    param_winc, param_grad)

        # display training loss
        if (step + 1) % display_interval == 0:
            print 'cost = %f training_percent = %f' % (cp['cost'],
                                                       cp['percent'])

        # display test accuracy
        if (step + 1) % test_interval == 0:
            layers[1]['batch_size'] = Xtest.shape[1]
            cptest, _ = cnn_lenet.conv_net(params, layers, Xtest, ytest)
            layers[1]['batch_size'] = 64
            print '\ntest accuracy: %f\n' % (cptest['percent'])

        # save params peridocally to recover from any crashes
        if (step + 1) % snapshot == 0:
            pickle_path = 'lenet.mat'
            pickle_file = open(pickle_path, 'wb')
            pickle.dump(params, pickle_file)
            pickle_file.close()