Example #1
0
def main(main_params):
    """
        Search TODO for those parts you need to complete.
        Please follow the step indicated in TODO (step) to complete this script from step = 1 to step = 5.
    """

    ### set the random seed ###
    np.random.seed(int(main_params['random_seed']))

    ### data processing ###
    Xtrain, Ytrain, Xval, Yval, _, _ = data_loader_mnist(
        dataset='mnist_subset.json')
    N_train, d, _, _ = Xtrain.shape
    N_val, _, _, _ = Xval.shape

    trainSet = DataSplit(Xtrain, Ytrain)
    valSet = DataSplit(Xval, Yval)

    ### building/defining CNN ###
    """
        In this script, we are going to build a CNN for a 10-class classification problem on MNIST.
        The network structure is input --> convolution --> relu --> max pooling --> convolution --> relu --> max pooling --> flatten --> dropout --> linear --> softmax_cross_entropy loss
        the hidden_layer size (num_L1) is 1225
        the output_layer size (num_L2) is 10
    """
    model = dict()
    num_L1 = 1225
    num_L2 = 10

    # experimental setup
    num_epoch = int(main_params['num_epoch'])
    minibatch_size = int(main_params['minibatch_size'])

    # optimization setting: _alpha for momentum, _lambda for weight decay
    _learning_rate = float(main_params['learning_rate'])
    _step = 30
    _alpha = float(main_params['alpha'])
    _lambda = float(main_params['lambda'])
    _dropout_rate = float(main_params['dropout_rate'])

    # create objects (modules) from the module classes
    model['C1'] = dnn_misc.conv_layer(num_input=d,
                                      num_output=25,
                                      filter_len=5,
                                      stride=1)
    model['nonlinear1'] = dnn_misc.relu()
    model['M1'] = dnn_misc.max_pool(max_len=2, stride=2)

    ################################################################################
    # TODO (1): Understand the new modules to be included (compared to dnn_cnn.py) #
    # You do not need to modify any thing here.                                    #
    ################################################################################

    model['C2'] = dnn_misc.conv_layer(num_input=25,
                                      num_output=25,
                                      filter_len=3,
                                      stride=1)
    model['nonlinear2'] = dnn_misc.relu()
    model['M2'] = dnn_misc.max_pool(max_len=2, stride=2)

    ################################################################################
    #                            End of TODO (1)                                   #
    ################################################################################

    model['F1'] = dnn_misc.flatten_layer()
    model['drop1'] = dnn_misc.dropout(r=_dropout_rate)
    model['L1'] = dnn_misc.linear_layer(input_D=num_L1, output_D=num_L2)
    model['loss'] = dnn_misc.softmax_cross_entropy()

    # create variables for momentum
    if _alpha > 0.0:
        momentum = dnn_misc.add_momentum(model)
    else:
        momentum = None

    train_acc_record = []
    val_acc_record = []

    ### run training and validation ###
    for t in range(num_epoch):
        print('At epoch ' + str(t + 1))
        if (t % _step == 0) and (t != 0):
            _learning_rate = _learning_rate * 0.1

        idx_order = np.random.permutation(N_train)

        train_acc = 0.0
        train_loss = 0.0
        train_count = 0

        val_acc = 0.0
        val_count = 0

        for i in range(int(np.floor(N_train / minibatch_size))):

            # get a mini-batch of data
            x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) *
                                                  minibatch_size])

            ### forward ###
            c1 = model['C1'].forward(x)
            h1 = model['nonlinear1'].forward(c1)
            m1 = model['M1'].forward(h1)

            ################################################################################
            # TODO (2): Connect the three modules for the forward pass                     #
            # model['C2']                                                                  #
            # model['nonlinear2']                                                          #
            # model['M2']                                                                  #
            # into the forward pass.                                                       #
            # Please make sure to connect them with m1 and m2, the input and output of the #
            # previous and the next modules, respectively                                  #
            ################################################################################

            # TODO (2)
            c2 = model['C2'].forward(m1)
            h2 = model['nonlinear2'].forward(c2)
            m2 = model['M2'].forward(h2)

            ################################################################################
            #                            End of TODO (2)                                   #
            ################################################################################

            f1 = model['F1'].forward(m2)
            d1 = model['drop1'].forward(f1, is_train=True)
            a1 = model['L1'].forward(d1)
            loss = model['loss'].forward(a1, y)

            ### backward ###
            grad_a1 = model['loss'].backward(a1, y)
            grad_d1 = model['L1'].backward(d1, grad_a1)
            grad_f1 = model['drop1'].backward(f1, grad_d1)
            grad_m2 = model['F1'].backward(m2, grad_f1)

            ################################################################################
            # TODO (3): Connect the three modules for the backward pass                    #
            # model['C2']                                                                  #
            # model['nonlinear2']                                                          #
            # model['M2']                                                                  #
            # into the backward pass.                                                      #
            # Please make sure to connect them with grad_m2 and grad_m1, the input and     #
            # output of the previous and the next modules, respectively                    #
            # Please pay attention to the number of arguments in the backward pass.        #
            ################################################################################

            # TODO (3)
            grad_h2 = model['M2'].backward(h2, grad_m2)
            grad_c2 = model['nonlinear2'].backward(c2, grad_h2)
            grad_m1 = model['C2'].backward(m1, grad_c2)

            ################################################################################
            #                            End of TODO (3)                                   #
            ################################################################################

            grad_h1 = model['M1'].backward(h1, grad_m1)
            grad_c1 = model['nonlinear1'].backward(c1, grad_h1)
            grad_x = model['C1'].backward(x, grad_c1)

            ### gradient_update ###
            for module_name, module in model.items():

                # check if a module has learnable parameters
                if hasattr(module, 'params'):
                    for key, _ in module.params.items():
                        g = module.gradient[key] + _lambda * module.params[key]

                        if _alpha > 0.0:
                            momentum[module_name + '_' +
                                     key] = _alpha * momentum[
                                         module_name + '_' +
                                         key] - _learning_rate * g
                            module.params[key] += momentum[module_name + '_' +
                                                           key]

                        else:
                            module.params[key] -= _learning_rate * g

        ### Computing training accuracy and obj ###
        for i in range(int(np.floor(N_train / minibatch_size))):

            x, y = trainSet.get_example(
                np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            ### forward ###
            c1 = model['C1'].forward(x)
            h1 = model['nonlinear1'].forward(c1)
            m1 = model['M1'].forward(h1)

            ################################################################################
            # TODO (4): Connect the three modules for the forward pass                     #
            # model['C2']                                                                  #
            # model['nonlinear2']                                                          #
            # model['M2']                                                                  #
            # into the forward pass.                                                       #
            # Please make sure to connect them with m1 and m2, the input and output of the #
            # previous and the next modules, respectively                                  #
            ################################################################################

            # TODO (4)
            c2 = model['C2'].forward(m1)
            h2 = model['nonlinear2'].forward(c2)
            m2 = model['M2'].forward(h2)

            ################################################################################
            #                            End of TODO (4)                                   #
            ################################################################################

            f1 = model['F1'].forward(m2)
            d1 = model['drop1'].forward(f1, is_train=False)
            a1 = model['L1'].forward(d1)
            loss = model['loss'].forward(a1, y)
            train_loss += len(y) * loss
            train_acc += np.sum(predict_label(a1) == y)
            train_count += len(y)

        train_loss = train_loss / train_count
        train_acc = train_acc / train_count
        train_acc_record.append(train_acc)

        print('Training loss at epoch ' + str(t + 1) + ' is ' +
              str(train_loss))
        print('Training accuracy at epoch ' + str(t + 1) + ' is ' +
              str(train_acc))

        ### Computing validation accuracy ###
        for i in range(int(np.floor(N_val / minibatch_size))):

            x, y = valSet.get_example(
                np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            ### forward ###
            c1 = model['C1'].forward(x)
            h1 = model['nonlinear1'].forward(c1)
            m1 = model['M1'].forward(h1)

            ################################################################################
            # TODO (5): Connect the three modules for the forward pass                     #
            # model['C2']                                                                  #
            # model['nonlinear2']                                                          #
            # model['M2']                                                                  #
            # into the forward pass.                                                       #
            # Please make sure to connect them with m1 and m2, the input and output of the #
            # previous and the next modules, respectively                                  #
            ################################################################################

            # TODO (5)
            c2 = model['C2'].forward(m1)
            h2 = model['nonlinear2'].forward(c2)
            m2 = model['M2'].forward(h2)

            ################################################################################
            #                            End of TODO (5)                                   #
            ################################################################################

            f1 = model['F1'].forward(m2)
            d1 = model['drop1'].forward(f1, is_train=False)
            a1 = model['L1'].forward(d1)
            val_acc += np.sum(predict_label(a1) == y)
            val_count += len(y)

        val_acc = val_acc / val_count
        val_acc_record.append(val_acc)

        print('Validation accuracy at epoch ' + str(t + 1) + ' is ' +
              str(val_acc))

    # save file
    json.dump(
        {
            'train': train_acc_record,
            'val': val_acc_record
        },
        open(
            'CNN2_lr' + str(main_params['learning_rate']) + '_m' +
            str(main_params['alpha']) + '_w' + str(main_params['lambda']) +
            '_d' + str(main_params['dropout_rate']) + '.json', 'w'))

    print('Finish running!')
    return
Example #2
0
def main(main_params):

    ### set the random seed ###
    np.random.seed(int(main_params['random_seed']))

    ### data processing ###
    Xtrain, Ytrain, Xval, Yval, _, _ = data_loader_mnist(
        dataset='mnist_subset.json')
    N_train, d = Xtrain.shape
    N_val, _ = Xval.shape

    trainSet = DataSplit(Xtrain, Ytrain)
    valSet = DataSplit(Xval, Yval)

    ### building/defining MLP ###
    """
    MLP for a 10-class classification problem on MNIST.
    The network structure is input --> linear --> relu --> dropout --> linear --> softmax_cross_entropy loss
    the hidden_layer size (num_L1) is 1000
    the output_layer size (num_L2) is 10
    """
    model = dict()
    num_L1 = 1000
    num_L2 = 10

    # experimental setup
    num_epoch = int(main_params['num_epoch'])
    minibatch_size = int(main_params['minibatch_size'])

    # optimization setting: _alpha for momentum, _lambda for weight decay
    _learning_rate = float(main_params['learning_rate'])
    _step = 10
    _alpha = float(main_params['alpha'])
    _lambda = float(main_params['lambda'])
    _dropout_rate = float(main_params['dropout_rate'])

    # create objects (modules) from the module classes
    model['L1'] = dnn_misc.linear_layer(input_D=d, output_D=num_L1)
    model['nonlinear1'] = dnn_misc.relu()
    model['drop1'] = dnn_misc.dropout(r=_dropout_rate)
    model['L2'] = dnn_misc.linear_layer(input_D=num_L1, output_D=num_L2)
    model['loss'] = dnn_misc.softmax_cross_entropy()

    # create variables for momentum
    if _alpha > 0.0:
        momentum = dnn_misc.add_momentum(model)
    else:
        momentum = None

    train_acc_record = []
    val_acc_record = []

    ### run training and validation ###
    for t in range(num_epoch):
        print('At epoch ' + str(t + 1))
        if (t % _step == 0) and (t != 0):
            _learning_rate = _learning_rate * 0.1

        idx_order = np.random.permutation(N_train)

        train_acc = 0.0
        train_loss = 0.0
        train_count = 0

        val_acc = 0.0
        val_count = 0

        for i in range(int(np.floor(N_train / minibatch_size))):

            # get a mini-batch of data
            x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) *
                                                  minibatch_size])

            ### forward ###
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train=True)
            a2 = model['L2'].forward(d1)
            loss = model['loss'].forward(a2, y)

            ### backward ###
            grad_a2 = model['loss'].backward(a2, y)
            grad_d1 = model['L2'].backward(d1, grad_a2)
            grad_h1 = model['drop1'].backward(h1, grad_d1)
            grad_a1 = model['nonlinear1'].backward(a1, grad_h1)
            grad_x = model['L1'].backward(x, grad_a1)

            ### gradient_update ###
            for module_name, module in model.items():

                # check if a module has learnable parameters
                if hasattr(module, 'params'):
                    for key, _ in module.params.items():
                        g = module.gradient[key] + _lambda * module.params[key]

                        if _alpha > 0.0:
                            momentum[module_name + '_' +
                                     key] = _alpha * momentum[
                                         module_name + '_' +
                                         key] - _learning_rate * g
                            module.params[key] += momentum[module_name + '_' +
                                                           key]

                        else:
                            module.params[key] -= _learning_rate * g

        ### Computing training accuracy and obj ###
        for i in range(int(np.floor(N_train / minibatch_size))):

            x, y = trainSet.get_example(
                np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            ### forward ###
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train=False)
            a2 = model['L2'].forward(d1)
            loss = model['loss'].forward(a2, y)
            train_loss += len(y) * loss
            train_acc += np.sum(predict_label(a2) == y)
            train_count += len(y)

        train_loss = train_loss / train_count
        train_acc = train_acc / train_count
        train_acc_record.append(train_acc)

        print('Training loss at epoch ' + str(t + 1) + ' is ' +
              str(train_loss))
        print('Training accuracy at epoch ' + str(t + 1) + ' is ' +
              str(train_acc))

        ### Computing validation accuracy ###
        for i in range(int(np.floor(N_val / minibatch_size))):

            x, y = valSet.get_example(
                np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            ### forward ###
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train=False)
            a2 = model['L2'].forward(d1)
            val_acc += np.sum(predict_label(a2) == y)
            val_count += len(y)

        val_acc = val_acc / val_count
        val_acc_record.append(val_acc)

        print('Validation accuracy at epoch ' + str(t + 1) + ' is ' +
              str(val_acc))

    # save file
    json.dump(
        {
            'train': train_acc_record,
            'val': val_acc_record
        },
        open(
            'MLP_lr' + str(main_params['learning_rate']) + '_m' +
            str(main_params['alpha']) + '_w' + str(main_params['lambda']) +
            '_d' + str(main_params['dropout_rate']) + '.json', 'w'))

    print('Finish running!')
    return
Example #3
0
def main(main_params):
    ### set the random seed ###
    np.random.seed(int(main_params['random_seed']))

    ### data processing ###
    Xtrain, Ytrain, Xval, Yval, Xtest, Ytest = data_loader_mnist(
        dataset='mnist_subset.json')
    N_train, d = Xtrain.shape
    N_val, _ = Xval.shape
    N_test, _ = Xtest.shape
    trainSet = DataSplit(Xtrain, Ytrain)
    valSet = DataSplit(Xval, Yval)
    testSet = DataSplit(Xtest, Ytest)
    ### building/defining MLP ###
    """
    In this script, we are going to build an MLP for a 10-class classification problem on MNIST.
    The network structure is input --> linear --> relu --> dropout --> linear --> softmax_cross_entropy loss
    the hidden_layer size (num_L1) is 1000
    the output_layer size (num_L2) is 10
    """
    model = dict()
    num_L1 = 1000
    num_L2 = 10

    # experimental setup
    num_epoch = int(main_params['num_epoch'])
    minibatch_size = int(main_params['minibatch_size'])

    # optimization setting: _alpha for momentum, _lambda for weight decay
    _learning_rate = float(main_params['learning_rate'])
    _step = 10
    _alpha = 0.0
    _lambda = float(main_params['lambda'])
    _optimizer = main_params['optim']
    _epsilon = main_params['epsilon']

    # create objects (modules) from the module classes
    model['L1'] = dnn_misc.linear_layer(input_D=d, output_D=num_L1)
    model['nonlinear1'] = dnn_misc.relu()
    model['L2'] = dnn_misc.linear_layer(input_D=num_L1, output_D=num_L2)
    model['loss'] = dnn_misc.softmax_cross_entropy()

    # create variables for momentum
    if _optimizer == "Gradient_Descent_Momentum":
        # creates a dictionary that holds the value of momentum for learnable parameters
        momentum = dnn_misc.add_momentum(model)
        _alpha = 0.9
    else:
        momentum = None

    train_acc_record = []
    val_acc_record = []
    train_loss_record = []
    val_loss_record = []

    ### run training and validation ###
    for t in range(num_epoch):
        print('At epoch ' + str(t + 1))
        if (t % _step == 0) and (t != 0):
            # learning_rate decay
            _learning_rate = _learning_rate * 0.1

        # shuffle the train data
        idx_order = np.random.permutation(N_train)

        for i in range(int(np.floor(N_train / minibatch_size))):

            # get a mini-batch of data
            x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) *
                                                  minibatch_size])

            ### forward ###
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            a2 = model['L2'].forward(h1)
            loss = model['loss'].forward(a2, y)

            ### backward ###
            grad_a2 = model['loss'].backward(a2, y)
            grad_h1 = model['L2'].backward(h1, grad_a2)
            grad_a1 = model['nonlinear1'].backward(a1, grad_h1)
            grad_x = model['L1'].backward(x, grad_a1)

            ### gradient_update ###
            for module_name, module in model.items():
                # model is a dictionary with 'L1', 'L2', 'nonLinear1' and 'loss' as keys.
                # the values for these keys are the corresponding objects created in line 123-126 using classes
                # defined in dnn_misc.py

                # check if the module has learnable parameters. not all modules have learnable parameters.
                # if it does, the module object will have an attribute called 'params'. See Linear Layer for more details.
                if hasattr(module, 'params'):
                    for key, _ in module.params.items():
                        # gradient computed during the backward pass + L2 regularization term
                        # _lambda is the regularization hyper parameter
                        g = module.gradient[key] + _lambda * module.params[key]

                        if _optimizer == "Minibatch_Gradient_Descent":
                            ################################################################################
                            # TODO: Write the gradient update for the module parameter.                    #
                            # module.params[key] has to be updated with the new value.                     #
                            # parameter update will be of the form: w = w - learning_rate * dl/dw          #
                            ################################################################################

                            #
                            module.params[key] -= _learning_rate * g

                        elif _optimizer == "Gradient_Descent_Momentum":
                            ################################################################################
                            # TODO: Understand how the update differs when we use momentum.                #
                            # module.params[key] has to be updated with the new value.                     #
                            # momentum(w) = _aplha * momemtum(w) at previous step + _learning_rate * g     #
                            # parameter update will be of the form: w = w - momentum(w)                    #
                            ################################################################################
                            parameter = module_name + '_' + key
                            momentum[parameter] = _alpha * momentum[
                                parameter] + _learning_rate * g
                            module.params[key] -= momentum[parameter]

        ### Compute train accuracy ###
        train_acc = 0.0
        train_loss = 0.0
        train_count = 0
        for i in range(int(np.floor(N_train / minibatch_size))):
            x, y = trainSet.get_example(
                np.arange(minibatch_size * i, minibatch_size * (i + 1)))

            ### forward ###
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            a2 = model['L2'].forward(h1)
            loss = model['loss'].forward(a2, y)
            train_loss += len(y) * loss
            train_acc += np.sum(predict_label(a2) == y)
            train_count += len(y)

        train_loss = train_loss / train_count
        train_acc = train_acc / train_count
        train_acc_record.append(train_acc)
        train_loss_record.append(train_loss)
        print('Training loss at epoch ' + str(t + 1) + ' is ' +
              str(train_loss))
        print('Training accuracy at epoch ' + str(t + 1) + ' is ' +
              str(train_acc))

        ### Compute validation accuracy ###
        val_acc = 0.0
        val_loss = 0.0
        val_count = 0
        for i in range(int(np.floor(N_val / minibatch_size))):
            x, y = valSet.get_example(
                np.arange(minibatch_size * i, minibatch_size * (i + 1)))

            ### forward ###
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            a2 = model['L2'].forward(h1)
            loss = model['loss'].forward(a2, y)
            val_loss += len(y) * loss
            val_acc += np.sum(predict_label(a2) == y)
            val_count += len(y)

        val_loss = val_loss / val_count
        val_acc = val_acc / val_count

        val_acc_record.append(val_acc)
        val_loss_record.append(val_loss)

        print('Validation loss at epoch ' + str(t + 1) + ' is ' +
              str(val_loss))
        print('Validation accuracy at epoch ' + str(t + 1) + ' is ' +
              str(val_acc))

    ### Compute test accuracy ###
    ################################################################################
    # TODO: Do a forward pass on test data and compute test accuracy and loss.     #
    # populate them in test_loss and test_acc.                                     #
    ################################################################################
    test_loss = 0.0
    test_acc = 0.0
    test_count = 0
    for i in range(int(np.floor(N_test / minibatch_size))):
        x, y = testSet.get_example(
            np.arange(minibatch_size * i, minibatch_size * (i + 1)))

        ### forward ###
        a1 = model['L1'].forward(x)
        h1 = model['nonlinear1'].forward(a1)
        a2 = model['L2'].forward(h1)
        loss = model['loss'].forward(a2, y)
        test_loss += len(y) * loss
        test_acc += np.sum(predict_label(a2) == y)
        test_count += len(y)

    test_loss = test_loss / test_count
    test_acc = test_acc / test_count

    print('Test loss at epoch ' + str(t + 1) + ' is ' + str(test_loss))
    print('Test accuracy at epoch ' + str(t + 1) + ' is ' + str(test_acc))

    plot_metrics(num_epoch, train_acc_record, val_acc_record,
                 train_loss_record, val_loss_record, _optimizer)
    # save file
    json.dump(
        {
            'train_accuracy': train_acc_record,
            'train_loss': train_loss_record,
            'val_accuracy': val_acc_record,
            'val_loss': val_loss_record,
            'test_accuracy': test_acc,
            'test_loss': test_loss
        },
        open(
            _optimizer + '_lr' + str(main_params['learning_rate']) + '_m' +
            str(_alpha) + '_w' + str(main_params['lambda']) + '.json', 'w'))

    # plotting to understand what the network is trying to do

    # plot raw mnist data
    plot_tSNE(Xtest, Ytest, 't-SNE raw MNIST.png')
    ################################################################################
    # TODO: Vizualize the ouput of the first and second layer of the neural network#
    # on test data using t-SNE. Populate arrays 'first_layer_out' and              #
    # and 'second_layer_out'.                                                      #
    ################################################################################

    # first_layer_out = output of the neural network first layer on test data
    first_layer_out = np.zeros((N_test, num_L1), dtype=float)
    # second_layer_out = output of the neural network second layer on test data
    second_layer_out = np.zeros((N_test, num_L2), dtype=float)

    # Add your code here
    first_layer_out = model['L1'].forward(testSet.X)
    h1 = model['nonlinear1'].forward(first_layer_out)
    second_layer_out = model['L2'].forward(h1)

    ###########################################################################
    #          Please DO NOT change the following parts of the script         #
    ###########################################################################
    plot_tSNE(first_layer_out, Ytest, _optimizer + '_t-SNE_1.png')
    plot_tSNE(second_layer_out, Ytest, _optimizer + '_t-SNE_2.png')
    print('Finish running!')