コード例 #1
0
def miniBatchStochasticGradientDescent(model, momentum, _lambda, _alpha,
                                       _learning_rate):
    '''
        Input:
            model: Dictionary containing all parameters of the model
            momentum: Check add_momentum() function in utils.py to understand this parameter
            _lambda: Regularization constant
            _alpha: Momentum hyperparameter
            _learning_rate: Learning rate for the update

        Note: You can learn more about momentum here: https://blog.paperspace.com/intro-to-optimization-momentum-rmsprop-adam/

        Returns: Updated model
    '''

    for module_name, module in model.items():

        # check if a module has learnable parameters
        if hasattr(module, 'params'):
            for key, _ in module.params.items():
                g = module.gradient[key] + _lambda * module.params[key]

                if _alpha > 0.0:

                    #################################################################################
                    # TODO: Update momentun using the formula:
                    # m = alpha * m - learning_rate * g (Check add_momentum() function in utils file)
                    # And update model parameter
                    #################################################################################
                    momentum = add_momentum(model)
                    momentum[module_name + '_' +
                             key] = _alpha * momentum[module_name + '_' +
                                                      key] - _learning_rate * g
                    module.params[key] += momentum[module_name + '_' + key]

                else:

                    #################################################################################
                    # TODO: update model parameter without momentum
                    #################################################################################

                    module.params[key] -= _learning_rate * g

    return model
コード例 #2
0
def main(main_params, optimization_type="minibatch_sgd"):

    ### set the random seed ###
    np.random.seed(int(main_params['random_seed']))

    ### data processing ###
    Xtrain, Ytrain, Xval, Yval, _, _ = data_loader_mnist(
        dataset=main_params['input_file'])
    N_train, d = Xtrain.shape
    N_val, _ = Xval.shape

    index = np.arange(10)
    unique, counts = np.unique(Ytrain, return_counts=True)
    counts = dict(zip(unique, counts)).values()

    trainSet = DataSplit(Xtrain, Ytrain)
    valSet = DataSplit(Xval, Yval)

    ### building/defining MLP ###
    """
    In this script, we are going to build a MLP for a 10-class classification problem on MNIST.
    The network structure is input --> linear --> relu --> dropout --> linear --> softmax_cross_entropy loss
    the hidden_layer size (num_L1) is 1000
    the output_layer size (num_L2) is 10
    """
    model = dict()
    num_L1 = 1000
    num_L2 = 10

    # experimental setup
    num_epoch = int(main_params['num_epoch'])
    minibatch_size = int(main_params['minibatch_size'])

    # optimization setting: _alpha for momentum, _lambda for weight decay
    _learning_rate = float(main_params['learning_rate'])
    _step = 10
    _alpha = float(main_params['alpha'])
    _lambda = float(main_params['lambda'])
    _dropout_rate = float(main_params['dropout_rate'])
    _activation = main_params['activation']

    if _activation == 'relu':
        act = relu
    else:
        act = tanh

    # create objects (modules) from the module classes
    model['L1'] = linear_layer(input_D=d, output_D=num_L1)
    model['nonlinear1'] = act()
    model['drop1'] = dropout(r=_dropout_rate)
    model['L2'] = linear_layer(input_D=num_L1, output_D=num_L2)
    model['loss'] = softmax_cross_entropy()

    # Momentum
    if _alpha > 0.0:
        momentum = add_momentum(model)
    else:
        momentum = None

    train_acc_record = []
    val_acc_record = []

    train_loss_record = []
    val_loss_record = []

    ### run training and validation ###
    for t in range(num_epoch):
        print('At epoch ' + str(t + 1))
        if (t % _step == 0) and (t != 0):
            _learning_rate = _learning_rate * 0.1

        idx_order = np.random.permutation(N_train)

        train_acc = 0.0
        train_loss = 0.0
        train_count = 0

        val_acc = 0.0
        val_count = 0
        val_loss = 0.0

        for i in range(int(np.floor(N_train / minibatch_size))):

            # get a mini-batch of data
            x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) *
                                                  minibatch_size])

            ### forward ###
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train=True)
            a2 = model['L2'].forward(d1)
            loss = model['loss'].forward(a2, y)

            ### backward ###
            grad_a2 = model['loss'].backward(a2, y)
            ######################################################################################
            # TODO: Call the backward methods of every layer in the model in reverse order
            # We have given the first and last backward calls
            # Do not modify them.
            ######################################################################################
            grad_d1 = model['L2'].backward(d1, grad_a2)
            grad_h1 = model['drop1'].backward(h1, grad_d1)
            grad_a1 = model['nonlinear1'].backward(a1, grad_h1)
            #raise NotImplementedError("Not Implemented BACKWARD PASS in main()")

            ######################################################################################
            # NOTE: DO NOT MODIFY CODE BELOW THIS, until next TODO
            ######################################################################################
            grad_x = model['L1'].backward(x, grad_a1)

            ### gradient_update ###
            model = miniBatchGradientDescent(model, momentum, _lambda, _alpha,
                                             _learning_rate)

        ### Computing training accuracy and obj ###
        for i in range(int(np.floor(N_train / minibatch_size))):

            x, y = trainSet.get_example(
                np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            ### forward ###
            ######################################################################################
            # TODO: Call the forward methods of every layer in the model in order
            # Check above forward code
            # Make sure to keep train as False
            ######################################################################################

            #raise NotImplementedError("Not Implemented COMPUTING TRAINING ACCURACY in main()")
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train=False)
            a2 = model['L2'].forward(d1)
            loss = model['loss'].forward(a2, y)

            ######################################################################################
            # NOTE: DO NOT MODIFY CODE BELOW THIS, until next TODO
            ######################################################################################

            loss = model['loss'].forward(a2, y)
            train_loss += loss
            train_acc += np.sum(predict_label(a2) == y)
            train_count += len(y)

        train_loss = train_loss
        train_acc = train_acc / train_count
        train_acc_record.append(train_acc)
        train_loss_record.append(train_loss)

        print('Training loss at epoch ' + str(t + 1) + ' is ' +
              str(train_loss))
        print('Training accuracy at epoch ' + str(t + 1) + ' is ' +
              str(train_acc))

        ### Computing validation accuracy ###
        for i in range(int(np.floor(N_val / minibatch_size))):

            x, y = valSet.get_example(
                np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            ### forward ###
            ######################################################################################
            # TODO: Call the forward methods of every layer in the model in order
            # Check above forward code
            # Make sure to keep train as False
            ######################################################################################

            #raise NotImplementedError("Not Implemented COMPUTING VALIDATION ACCURACY in main()")
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train=False)
            a2 = model['L2'].forward(d1)
            loss = model['loss'].forward(a2, y)

            ######################################################################################
            # NOTE: DO NOT MODIFY CODE BELOW THIS, until next TODO
            ######################################################################################

            loss = model['loss'].forward(a2, y)
            val_loss += loss
            val_acc += np.sum(predict_label(a2) == y)
            val_count += len(y)

        val_loss_record.append(val_loss)
        val_acc = val_acc / val_count
        val_acc_record.append(val_acc)

        print('Validation accuracy at epoch ' + str(t + 1) + ' is ' +
              str(val_acc))

    # save file
    json.dump(
        {
            'train': train_acc_record,
            'val': val_acc_record
        },
        open(
            'MLP_lr' + str(main_params['learning_rate']) + '_m' +
            str(main_params['alpha']) + '_w' + str(main_params['lambda']) +
            '_d' + str(main_params['dropout_rate']) + '_a' +
            str(main_params['activation']) + '.json', 'w'))

    print('Finish running!')
    return train_loss_record, val_loss_record
def main(main_params, optimization_type="minibatch_sgd"):
    # data processing
    data = pd.read_csv("dataset/london_merged.csv")
    data.drop(['timestamp'], inplace=True, axis=1)

    bins = [-1, 1000, 2000, 3000, 4000, 100000]
    dpy = data['cnt'].to_numpy()
    r = pd.cut(dpy, bins)
    data_y = r.codes
    data.drop(['cnt'], inplace=True, axis=1)
    # Normalization
    data = (data - data.min()) / (data.max() - data.min())
    data = data.to_numpy()
    x_train, x_val, y_train, y_val = train_test_split(data, data_y, test_size=0.33,
                                                  random_state=int(main_params['random_seed']))

    N_train, d = x_train.shape
    N_val, _ = x_val.shape

    trainSet = DataSplit(x_train, y_train)
    valSet = DataSplit(x_val, y_val)

    # building/defining model

    # experimental setup
    num_epoch = int(main_params['num_epoch'])
    minibatch_size = int(main_params['minibatch_size'])

    # optimization setting: _alpha for momentum, _lambda for weight decay
    _learning_rate = float(main_params['learning_rate'])
    _step = 10
    _alpha = float(main_params['alpha'])
    _lambda = float(main_params['lambda'])
    _dropout_rate = float(main_params['dropout_rate'])
    _activation = main_params['activation']

    if _activation == 'relu':
        act = relu
    else:
        act = tanh

    # The network structure is input --> linear --> relu --> dropout --> linear --> softmax_cross_entropy loss
    # num_L1:the hidden_layer1 size
    # num_L2:the hidden_layer2 size
    # num_L3:the output_layer size
    model = dict()
    num_L1 = 100
    num_L2 = 50
    num_L3 = 5

    # create objects (modules) from the module classes
    model['L1'] = linear_layer(input_D=d, output_D=num_L1)
    model['nonlinear1'] = act()
    model['drop1'] = dropout(r=_dropout_rate)
    model['L2'] = linear_layer(input_D=num_L1, output_D=num_L2)
    model['nonlinear2'] = act()
    model['L3'] = linear_layer(input_D=num_L2, output_D=num_L3)
    model['loss'] = softmax_cross_entropy()

    # Momentum
    if _alpha > 0.0:
        momentum = add_momentum(model)
    else:
        momentum = None

    train_acc_record = []
    val_acc_record = []
    train_loss_record = []
    val_loss_record = []

    # training & validation
    for t in range(num_epoch):

        if (t % _step == 0) and (t != 0):
            _learning_rate = _learning_rate * 0.1

        idx_order = np.random.permutation(N_train)

        train_acc = 0.0
        train_loss = 0.0
        train_count = 0

        val_acc = 0.0
        val_count = 0
        val_loss = 0.0

        for i in range(int(np.floor(N_train / minibatch_size))):
            # get a mini-batch of data
            x, y = trainSet.get_example(idx_order[i * minibatch_size: (i + 1) * minibatch_size])

            # forward
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train=True)
            a2 = model['L2'].forward(d1)
            h2 = model['nonlinear2'].forward(a2)
            a3 = model['L3'].forward(h2)
            loss = model['loss'].forward(a3, y)

            # backward
            grad_a3 = model['loss'].backward(a3, y)
            grad_h2 = model['L3'].backward(h2, grad_a3)
            grad_a2 = model['nonlinear2'].backward(a2, grad_h2)
            grad_d1 = model['L2'].backward(d1, grad_a2)
            grad_h1 = model['drop1'].backward(h1, grad_d1)
            grad_a1 = model['nonlinear1'].backward(a1, grad_h1)
            grad_x = model['L1'].backward(x, grad_a1)
            # update gradient
            model = miniBatchGradientDescent(model, momentum, _lambda, _alpha, _learning_rate)

        # training accuracy
        for i in range(int(np.floor(N_train / minibatch_size))):
            x, y = trainSet.get_example(np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            # forward
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train=False)
            a2 = model['L2'].forward(d1)
            h2 = model['nonlinear2'].forward(a2)
            a3 = model['L3'].forward(h2)
            loss = model['loss'].forward(a3, y)

            train_loss += loss
            train_acc += np.sum(predict_label(a3) == y)
            train_count += len(y)

        train_loss = train_loss
        train_acc = train_acc / train_count
        train_acc_record.append(train_acc)
        train_loss_record.append(train_loss)

        # validation accuracy
        for i in range(int(np.floor(N_val / minibatch_size))):
            x, y = valSet.get_example(np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            # forward
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train=False)
            a2 = model['L2'].forward(d1)
            h2 = model['nonlinear2'].forward(a2)
            a3 = model['L3'].forward(h2)

            loss = model['loss'].forward(a3, y)
            val_loss += loss
            val_acc += np.sum(predict_label(a3) == y)
            val_count += len(y)

        val_loss_record.append(val_loss)
        val_acc = val_acc / val_count
        val_acc_record.append(val_acc)

        print('At epoch ' + str(t + 1))
        print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss))
        print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc))
        print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc))

    index = [int(i+1) for i in range(num_epoch)]
    plt.figure()
    plt.grid()
    plt.plot(index, train_acc_record, color='b', label='train_acc')
    plt.plot(index, val_acc_record, color='darkorange', label='valadation_acc')
    plt.xlabel('training epoch')
    plt.ylabel('accuracy')
    plt.legend()
    plt.show()

    return train_acc_record, val_acc_record
コード例 #4
0
def main(main_params, optimization_type="minibatch_sgd"):

    ### set the random seed ###
    np.random.seed(int(main_params['random_seed']))

    ### data processing ###
    Xtrain, Ytrain, Xval, Yval , _, _ = data_loader_mnist(dataset = main_params['input_file'])
    N_train, d = Xtrain.shape
    N_val, _ = Xval.shape

    index = np.arange(10)
    unique, counts = np.unique(Ytrain, return_counts=True)
    counts = dict(zip(unique, counts)).values()

    trainSet = DataSplit(Xtrain, Ytrain)
    valSet = DataSplit(Xval, Yval)


    model = dict()
    num_L1 = 1000
    num_L2 = 10

    num_epoch = int(main_params['num_epoch'])
    minibatch_size = int(main_params['minibatch_size'])

    # optimization setting: _alpha for momentum, _lambda for weight decay
    _learning_rate = float(main_params['learning_rate'])
    _step = 10
    _alpha = float(main_params['alpha'])
    _lambda = float(main_params['lambda'])
    _dropout_rate = float(main_params['dropout_rate'])
    _activation = main_params['activation']


    if _activation == 'relu':
        act = relu
    else:
        act = tanh

    # create objects (modules) from the module classes
    model['L1'] = linear_layer(input_D = d, output_D = num_L1)
    model['nonlinear1'] = act()
    model['drop1'] = dropout(r = _dropout_rate)
    model['L2'] = linear_layer(input_D = num_L1, output_D = num_L2)
    model['loss'] = softmax_cross_entropy()

    # Momentum
    if _alpha > 0.0:
        momentum = add_momentum(model)
    else:
        momentum = None

    train_acc_record = []
    val_acc_record = []

    train_loss_record = []
    val_loss_record = []

    ### run training and validation ###
    for t in range(num_epoch):
        print('At epoch ' + str(t + 1))
        if (t % _step == 0) and (t != 0):
            _learning_rate = _learning_rate * 0.1

        idx_order = np.random.permutation(N_train)

        train_acc = 0.0
        train_loss = 0.0
        train_count = 0

        val_acc = 0.0
        val_count = 0
        val_loss = 0.0

        for i in range(int(np.floor(N_train / minibatch_size))):

            # get a mini-batch of data
            x, y = trainSet.get_example(idx_order[i * minibatch_size : (i + 1) * minibatch_size])

            ### forward ###
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train = True)
            a2 = model['L2'].forward(d1)
            loss = model['loss'].forward(a2, y)


            ### backward ###
            grad_d1 = model['L2'].backward(d1, grad_a2)
            grad_h1 = model['drop1'].backward(h1, grad_d1)
            grad_a1 = model['nonlinear1'].backward(a1, grad_h1)
            ######################################################################################
            grad_x = model['L1'].backward(x, grad_a1)

            ### gradient_update ###
            model = miniBatchGradientDescent(model, momentum, _lambda, _alpha, _learning_rate)
            
        ### Computing training accuracy and obj ###
        for i in range(int(np.floor(N_train / minibatch_size))):

           	a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train = False)
            a2 = model['L2'].forward(d1)
            
            loss = model['loss'].forward(a2, y)
            train_loss += loss
            train_acc += np.sum(predict_label(a2) == y)
            train_count += len(y)

        train_loss = train_loss
        train_acc = train_acc / train_count
        train_acc_record.append(train_acc)
        train_loss_record.append(train_loss)

        print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss))
        print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc))

        ### Computing validation accuracy ###
        for i in range(int(np.floor(N_val / minibatch_size))):

            x, y = valSet.get_example(np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train = False)
            a2 = model['L2'].forward(d1)           
            ######################################################################################

            loss = model['loss'].forward(a2, y)
            val_loss += loss
            val_acc += np.sum(predict_label(a2) == y)
            val_count += len(y)

        val_loss_record.append(val_loss)
        val_acc = val_acc / val_count
        val_acc_record.append(val_acc)

        print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc))
コード例 #5
0
def main(main_params, optimization_type="minibatch_sgd"):
    np.random.seed(int(main_params['random_seed']))

    Xtrain, Ytrain, Xval, Yval, _, _ = data_loader_mnist(
        dataset=main_params['input_file'])
    N_train, d = Xtrain.shape
    N_val, _ = Xval.shape

    index = np.arange(10)
    unique, counts = np.unique(Ytrain, return_counts=True)
    counts = dict(zip(unique, counts)).values()

    trainSet = DataSplit(Xtrain, Ytrain)
    valSet = DataSplit(Xval, Yval)

    model = dict()
    num_L1 = 1000
    num_L2 = 10

    num_epoch = int(main_params['num_epoch'])
    minibatch_size = int(main_params['minibatch_size'])

    _learning_rate = float(main_params['learning_rate'])
    _step = 10
    _alpha = float(main_params['alpha'])
    _lambda = float(main_params['lambda'])
    _dropout_rate = float(main_params['dropout_rate'])
    _activation = main_params['activation']

    if _activation == 'relu':
        act = relu
    else:
        act = tanh

    model['L1'] = linear_layer(input_D=d, output_D=num_L1)
    model['nonlinear1'] = act()
    model['drop1'] = dropout(r=_dropout_rate)
    model['L2'] = linear_layer(input_D=num_L1, output_D=num_L2)
    model['loss'] = softmax_cross_entropy()

    # Momentum
    if _alpha > 0.0:
        momentum = add_momentum(model)
    else:
        momentum = None

    train_acc_record = []
    val_acc_record = []

    train_loss_record = []
    val_loss_record = []

    for t in range(num_epoch):
        print('At epoch ' + str(t + 1))
        if (t % _step == 0) and (t != 0):
            _learning_rate = _learning_rate * 0.1

        idx_order = np.random.permutation(N_train)

        train_acc = 0.0
        train_loss = 0.0
        train_count = 0

        val_acc = 0.0
        val_count = 0
        val_loss = 0.0

        for i in range(int(np.floor(N_train / minibatch_size))):
            # get a mini-batch of data
            x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) *
                                                  minibatch_size])

            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train=True)
            a2 = model['L2'].forward(d1)
            a2 = model['L2'].forward(h1)
            loss = model['loss'].forward(a2, y)

            grad_a2 = model['loss'].backward(a2, y)
            l2_backward = model['L2'].backward(d1, grad_a2)
            drop1_backward = model['drop1'].backward(h1, l2_backward)
            nonlinear1_backward = model['nonlinear1'].backward(
                a1, drop1_backward)
            grad_x = model['L1'].backward(x, nonlinear1_backward)
            model = miniBatchGradientDescent(model, momentum, _lambda, _alpha,
                                             _learning_rate)

        for i in range(int(np.floor(N_train / minibatch_size))):
            x, y = trainSet.get_example(
                np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train=False)
            a2 = model['L2'].forward(d1)
            loss = model['loss'].forward(a2, y)
            loss = model['loss'].forward(a2, y)
            train_loss += loss
            train_acc += np.sum(predict_label(a2) == y)
            train_count += len(y)

        train_loss = train_loss
        train_acc = train_acc / train_count
        train_acc_record.append(train_acc)
        train_loss_record.append(train_loss)

        print('Training loss at epoch ' + str(t + 1) + ' is ' +
              str(train_loss))
        print('Training accuracy at epoch ' + str(t + 1) + ' is ' +
              str(train_acc))

        for i in range(int(np.floor(N_val / minibatch_size))):
            x, y = valSet.get_example(
                np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train=False)
            a2 = model['L2'].forward(d1)
            loss = model['loss'].forward(a2, y)
            loss = model['loss'].forward(a2, y)
            val_loss += loss
            val_acc += np.sum(predict_label(a2) == y)
            val_count += len(y)

        val_loss_record.append(val_loss)
        val_acc = val_acc / val_count
        val_acc_record.append(val_acc)

        print('Validation accuracy at epoch ' + str(t + 1) + ' is ' +
              str(val_acc))

    json.dump(
        {
            'train': train_acc_record,
            'val': val_acc_record
        },
        open(
            'MLP_lr' + str(main_params['learning_rate']) + '_m' +
            str(main_params['alpha']) + '_w' + str(main_params['lambda']) +
            '_d' + str(main_params['dropout_rate']) + '_a' +
            str(main_params['activation']) + '.json', 'w'))

    print('Finish running!')
    return train_loss_record, val_loss_record
コード例 #6
0
def main(main_params, optimization_type="minibatch_sgd"):

    ### set the random seed ###
    np.random.seed(int(main_params['random_seed']))

    USAGE = 'neuralNetworkMT.py <number of neurons in hidden layer>' \
            ' <max number of Iterations>  <train data> <test data>'
    # if len(sys.argv) != 5:
    #     print(USAGE)
    # else:
    #     num_neurons = int(sys.argv[1])
    #     num_iteration = int(sys.argv[2])
    #     train_data = sys.argv[3]
    #     test_data = sys.argv[4]

    train_data = 'downgesture_train.list'
    test_data = 'downgesture_test.list'
    num_neurons = 100
    num_iteration = 1000

    # read input data and separate to 'down' and 'not_down' gestures files
    print("start reading input text files")
    train_files_label1, train_files_label0 = parse_file_list(train_data)
    test_files_label1, test_files_label0 = parse_file_list(test_data)

    print("start reading input pictures files")
    data_one_train = read_pgm_image(train_files_label1, 1)
    data_zero_train = read_pgm_image(train_files_label0, 0)
    data_one_test = read_pgm_image(test_files_label1, 1)
    data_zero_test = read_pgm_image(test_files_label0, 0)

    # combine pgm data with corresponding labels in train and test
    data_train = np.concatenate((data_one_train, data_zero_train), axis=0)
    data_test = np.concatenate((data_one_test, data_zero_test), axis=0)

    # shuffle and prepare data
    print("start shuffling and splitting data")
    np.random.seed(7)
    np.random.shuffle(data_train)

    # prepare data
    Xtrain = data_train[:, :-1]
    Ytrain = data_train[:, -1]
    Xval = data_test[:, :-1]
    Yval = data_test[:, -1]

    ### data processing ###
    # Xtrain, Ytrain, Xval, Yval , _, _ = data_loader_mnist(dataset = main_params['input_file'])
    N_train, d = Xtrain.shape
    N_val, _ = Xval.shape

    index = np.arange(10)
    unique, counts = np.unique(Ytrain, return_counts=True)
    counts = dict(zip(unique, counts)).values()

    trainSet = DataSplit(Xtrain, Ytrain)
    valSet = DataSplit(Xval, Yval)

    ### building/defining MLP ###
    """
    In this script, we are going to build a MLP for a 10-class classification problem on MNIST.
    The network structure is input --> linear --> relu --> dropout --> linear --> softmax_cross_entropy loss
    the hidden_layer size (num_L1) is 100
    the output_layer size (num_L2) is 10
    """
    model = dict()
    num_L1 = 100
    num_L2 = 10

    # experimental setup
    num_epoch = int(main_params['num_epoch'])
    minibatch_size = int(main_params['minibatch_size'])

    # optimization setting: _alpha for momentum, _lambda for weight decay
    _learning_rate = float(main_params['learning_rate'])
    _step = 10
    _alpha = float(main_params['alpha'])
    _lambda = float(main_params['lambda'])
    _dropout_rate = float(main_params['dropout_rate'])
    _activation = main_params['activation']


    if _activation == 'relu':
        act = relu
    else:
        act = tanh

    # create objects (modules) from the module classes
    model['L1'] = linear_layer(input_D = d, output_D = num_L1)
    model['nonlinear1'] = act()
    model['drop1'] = dropout(r = _dropout_rate)
    model['L2'] = linear_layer(input_D = num_L1, output_D = num_L2)
    model['loss'] = softmax_cross_entropy()

    # Momentum
    if _alpha > 0.0:
        momentum = add_momentum(model)
    else:
        momentum = None

    train_acc_record = []
    val_acc_record = []

    train_loss_record = []
    val_loss_record = []

    ### run training and validation ###
    for t in range(num_epoch):
        print('At epoch ' + str(t + 1))
        if (t % _step == 0) and (t != 0):
            _learning_rate = _learning_rate * 0.1

        idx_order = np.random.permutation(N_train)

        train_acc = 0.0
        train_loss = 0.0
        train_count = 0

        val_acc = 0.0
        val_count = 0
        val_loss = 0.0

        for i in range(int(np.floor(N_train / minibatch_size))):

            # get a mini-batch of data
            x, y = trainSet.get_example(idx_order[i * minibatch_size : (i + 1) * minibatch_size])

            ### forward ###  x -L1> a1 -NL> h1 -Dr> d1 -L2> a2 -loss> y
            a1 = model['L1'].forward(x)   # a1 or u
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train = True)  # d1 or h after dropout
            a2 = model['L2'].forward(d1)
            loss = model['loss'].forward(a2, y)


            ### backward ###
            grad_a2 = model['loss'].backward(a2, y)
            ######################################################################################
            # TODO: Call the backward methods of every layer in the model in reverse order
            # We have given the first and last backward calls
            # Do not modify them.
            ######################################################################################
            
            # raise NotImplementedError("Not Implemented BACKWARD PASS in main()")
            grad_a2 = model['loss'].backward(a2, y)
            grad_d1 = model['L2'].backward(d1, grad_a2)
            grad_h1 = model['drop1'].backward(h1, grad_d1)
            grad_a1 = model['nonlinear1'].backward(a1,grad_h1)

            ######################################################################################
            # NOTE: DO NOT MODIFY CODE BELOW THIS, until next TODO
            ######################################################################################
            grad_x = model['L1'].backward(x, grad_a1)

            ### gradient_update ###
            model = miniBatchGradientDescent(model, momentum, _lambda, _alpha, _learning_rate)
            
        ### Computing training accuracy and obj ###
        for i in range(int(np.floor(N_train / minibatch_size))):

            x, y = trainSet.get_example(np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            ### forward ###
            ######################################################################################
            # TODO: Call the forward methods of every layer in the model in order
            # Check above forward code
            ######################################################################################
            
            # raise NotImplementedError("Not Implemented COMPUTING TRAINING ACCURACY in main()")
            a1 = model['L1'].forward(x)   # a1 or u
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train = True)  # d1 or h after dropout
            a2 = model['L2'].forward(d1)

            ######################################################################################
            # NOTE: DO NOT MODIFY CODE BELOW THIS, until next TODO
            ######################################################################################

            loss = model['loss'].forward(a2, y)
            train_loss += loss
            train_acc += np.sum(predict_label(a2) == y)
            train_count += len(y)

        train_loss = train_loss
        train_acc = train_acc / train_count
        train_acc_record.append(train_acc)
        train_loss_record.append(train_loss)

        print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss))
        print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc))

        ### Computing validation accuracy ###
        for i in range(int(np.floor(N_val / minibatch_size))):

            x, y = valSet.get_example(np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            ### forward ###
            ######################################################################################
            # TODO: Call the forward methods of every layer in the model in order
            # Check above forward code
            ######################################################################################
            a1 = model['L1'].forward(x)   # a1 or u
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train = True)  # d1 or h after dropout
            a2 = model['L2'].forward(d1)

            # raise NotImplementedError("Not Implemented COMPUTING VALIDATION ACCURACY in main()")

            
            ######################################################################################
            # NOTE: DO NOT MODIFY CODE BELOW THIS, until next TODO
            ######################################################################################

            loss = model['loss'].forward(a2, y)
            val_loss += loss
            val_acc += np.sum(predict_label(a2) == y)
            val_count += len(y)

        val_loss_record.append(val_loss)
        val_acc = val_acc / val_count
        val_acc_record.append(val_acc)

        print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc))

    # save file
    json.dump({'train': train_acc_record, 'val': val_acc_record},
              open('MLP_lr' + str(main_params['learning_rate']) +
                   '_m' + str(main_params['alpha']) +
                   '_w' + str(main_params['lambda']) +
                   '_d' + str(main_params['dropout_rate']) +
                   '_a' + str(main_params['activation']) +
                   '.json', 'w'))

    print('Finish running!')
    return train_loss_record, val_loss_record