예제 #1
0
def main(main_params):
    """
        Search TODO for those parts you need to complete.
        Please follow the step indicated in TODO (step) to complete this script from step = 1 to step = 5.
    """

    ### set the random seed ###
    np.random.seed(int(main_params['random_seed']))

    ### data processing ###
    Xtrain, Ytrain, Xval, Yval, _, _ = data_loader_mnist(
        dataset='mnist_subset.json')
    N_train, d, _, _ = Xtrain.shape
    N_val, _, _, _ = Xval.shape

    trainSet = DataSplit(Xtrain, Ytrain)
    valSet = DataSplit(Xval, Yval)

    ### building/defining CNN ###
    """
        In this script, we are going to build a CNN for a 10-class classification problem on MNIST.
        The network structure is input --> convolution --> relu --> max pooling --> convolution --> relu --> max pooling --> flatten --> dropout --> linear --> softmax_cross_entropy loss
        the hidden_layer size (num_L1) is 1225
        the output_layer size (num_L2) is 10
    """
    model = dict()
    num_L1 = 1225
    num_L2 = 10

    # experimental setup
    num_epoch = int(main_params['num_epoch'])
    minibatch_size = int(main_params['minibatch_size'])

    # optimization setting: _alpha for momentum, _lambda for weight decay
    _learning_rate = float(main_params['learning_rate'])
    _step = 30
    _alpha = float(main_params['alpha'])
    _lambda = float(main_params['lambda'])
    _dropout_rate = float(main_params['dropout_rate'])

    # create objects (modules) from the module classes
    model['C1'] = dnn_misc.conv_layer(num_input=d,
                                      num_output=25,
                                      filter_len=5,
                                      stride=1)
    model['nonlinear1'] = dnn_misc.relu()
    model['M1'] = dnn_misc.max_pool(max_len=2, stride=2)

    ################################################################################
    # TODO (1): Understand the new modules to be included (compared to dnn_cnn.py) #
    # You do not need to modify any thing here.                                    #
    ################################################################################

    model['C2'] = dnn_misc.conv_layer(num_input=25,
                                      num_output=25,
                                      filter_len=3,
                                      stride=1)
    model['nonlinear2'] = dnn_misc.relu()
    model['M2'] = dnn_misc.max_pool(max_len=2, stride=2)

    ################################################################################
    #                            End of TODO (1)                                   #
    ################################################################################

    model['F1'] = dnn_misc.flatten_layer()
    model['drop1'] = dnn_misc.dropout(r=_dropout_rate)
    model['L1'] = dnn_misc.linear_layer(input_D=num_L1, output_D=num_L2)
    model['loss'] = dnn_misc.softmax_cross_entropy()

    # create variables for momentum
    if _alpha > 0.0:
        momentum = dnn_misc.add_momentum(model)
    else:
        momentum = None

    train_acc_record = []
    val_acc_record = []

    ### run training and validation ###
    for t in range(num_epoch):
        print('At epoch ' + str(t + 1))
        if (t % _step == 0) and (t != 0):
            _learning_rate = _learning_rate * 0.1

        idx_order = np.random.permutation(N_train)

        train_acc = 0.0
        train_loss = 0.0
        train_count = 0

        val_acc = 0.0
        val_count = 0

        for i in range(int(np.floor(N_train / minibatch_size))):

            # get a mini-batch of data
            x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) *
                                                  minibatch_size])

            ### forward ###
            c1 = model['C1'].forward(x)
            h1 = model['nonlinear1'].forward(c1)
            m1 = model['M1'].forward(h1)

            ################################################################################
            # TODO (2): Connect the three modules for the forward pass                     #
            # model['C2']                                                                  #
            # model['nonlinear2']                                                          #
            # model['M2']                                                                  #
            # into the forward pass.                                                       #
            # Please make sure to connect them with m1 and m2, the input and output of the #
            # previous and the next modules, respectively                                  #
            ################################################################################

            # TODO (2)
            c2 = model['C2'].forward(m1)
            h2 = model['nonlinear2'].forward(c2)
            m2 = model['M2'].forward(h2)

            ################################################################################
            #                            End of TODO (2)                                   #
            ################################################################################

            f1 = model['F1'].forward(m2)
            d1 = model['drop1'].forward(f1, is_train=True)
            a1 = model['L1'].forward(d1)
            loss = model['loss'].forward(a1, y)

            ### backward ###
            grad_a1 = model['loss'].backward(a1, y)
            grad_d1 = model['L1'].backward(d1, grad_a1)
            grad_f1 = model['drop1'].backward(f1, grad_d1)
            grad_m2 = model['F1'].backward(m2, grad_f1)

            ################################################################################
            # TODO (3): Connect the three modules for the backward pass                    #
            # model['C2']                                                                  #
            # model['nonlinear2']                                                          #
            # model['M2']                                                                  #
            # into the backward pass.                                                      #
            # Please make sure to connect them with grad_m2 and grad_m1, the input and     #
            # output of the previous and the next modules, respectively                    #
            # Please pay attention to the number of arguments in the backward pass.        #
            ################################################################################

            # TODO (3)
            grad_h2 = model['M2'].backward(h2, grad_m2)
            grad_c2 = model['nonlinear2'].backward(c2, grad_h2)
            grad_m1 = model['C2'].backward(m1, grad_c2)

            ################################################################################
            #                            End of TODO (3)                                   #
            ################################################################################

            grad_h1 = model['M1'].backward(h1, grad_m1)
            grad_c1 = model['nonlinear1'].backward(c1, grad_h1)
            grad_x = model['C1'].backward(x, grad_c1)

            ### gradient_update ###
            for module_name, module in model.items():

                # check if a module has learnable parameters
                if hasattr(module, 'params'):
                    for key, _ in module.params.items():
                        g = module.gradient[key] + _lambda * module.params[key]

                        if _alpha > 0.0:
                            momentum[module_name + '_' +
                                     key] = _alpha * momentum[
                                         module_name + '_' +
                                         key] - _learning_rate * g
                            module.params[key] += momentum[module_name + '_' +
                                                           key]

                        else:
                            module.params[key] -= _learning_rate * g

        ### Computing training accuracy and obj ###
        for i in range(int(np.floor(N_train / minibatch_size))):

            x, y = trainSet.get_example(
                np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            ### forward ###
            c1 = model['C1'].forward(x)
            h1 = model['nonlinear1'].forward(c1)
            m1 = model['M1'].forward(h1)

            ################################################################################
            # TODO (4): Connect the three modules for the forward pass                     #
            # model['C2']                                                                  #
            # model['nonlinear2']                                                          #
            # model['M2']                                                                  #
            # into the forward pass.                                                       #
            # Please make sure to connect them with m1 and m2, the input and output of the #
            # previous and the next modules, respectively                                  #
            ################################################################################

            # TODO (4)
            c2 = model['C2'].forward(m1)
            h2 = model['nonlinear2'].forward(c2)
            m2 = model['M2'].forward(h2)

            ################################################################################
            #                            End of TODO (4)                                   #
            ################################################################################

            f1 = model['F1'].forward(m2)
            d1 = model['drop1'].forward(f1, is_train=False)
            a1 = model['L1'].forward(d1)
            loss = model['loss'].forward(a1, y)
            train_loss += len(y) * loss
            train_acc += np.sum(predict_label(a1) == y)
            train_count += len(y)

        train_loss = train_loss / train_count
        train_acc = train_acc / train_count
        train_acc_record.append(train_acc)

        print('Training loss at epoch ' + str(t + 1) + ' is ' +
              str(train_loss))
        print('Training accuracy at epoch ' + str(t + 1) + ' is ' +
              str(train_acc))

        ### Computing validation accuracy ###
        for i in range(int(np.floor(N_val / minibatch_size))):

            x, y = valSet.get_example(
                np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            ### forward ###
            c1 = model['C1'].forward(x)
            h1 = model['nonlinear1'].forward(c1)
            m1 = model['M1'].forward(h1)

            ################################################################################
            # TODO (5): Connect the three modules for the forward pass                     #
            # model['C2']                                                                  #
            # model['nonlinear2']                                                          #
            # model['M2']                                                                  #
            # into the forward pass.                                                       #
            # Please make sure to connect them with m1 and m2, the input and output of the #
            # previous and the next modules, respectively                                  #
            ################################################################################

            # TODO (5)
            c2 = model['C2'].forward(m1)
            h2 = model['nonlinear2'].forward(c2)
            m2 = model['M2'].forward(h2)

            ################################################################################
            #                            End of TODO (5)                                   #
            ################################################################################

            f1 = model['F1'].forward(m2)
            d1 = model['drop1'].forward(f1, is_train=False)
            a1 = model['L1'].forward(d1)
            val_acc += np.sum(predict_label(a1) == y)
            val_count += len(y)

        val_acc = val_acc / val_count
        val_acc_record.append(val_acc)

        print('Validation accuracy at epoch ' + str(t + 1) + ' is ' +
              str(val_acc))

    # save file
    json.dump(
        {
            'train': train_acc_record,
            'val': val_acc_record
        },
        open(
            'CNN2_lr' + str(main_params['learning_rate']) + '_m' +
            str(main_params['alpha']) + '_w' + str(main_params['lambda']) +
            '_d' + str(main_params['dropout_rate']) + '.json', 'w'))

    print('Finish running!')
    return
예제 #2
0
def main(main_params):
    # set the random seed
    np.random.seed(int(main_params['random_seed']))

    # data processing
    Xtrain, Ytrain, Xval, Yval , _, _ = data_loader_mnist(dataset = 'mnist_subset.json')
    N_train, d, _, _ = Xtrain.shape
    N_val, _, _, _ = Xval.shape

    trainSet = DataSplit(Xtrain, Ytrain)
    valSet = DataSplit(Xval, Yval)

    # building a CNN for 10-class classification problem on MNIST
    """
        The network structure is input --> convolution --> relu --> max pooling --> convolution --> relu --> max pooling --> flatten --> dropout --> linear --> softmax_cross_entropy loss
        the hidden_layer size (num_L1) is 1225
        the output_layer size (num_L2) is 10
    """
    model = dict()
    num_L1 = 1225
    num_L2 = 10

    # experimental setup
    num_epoch = int(main_params['num_epoch'])
    minibatch_size = int(main_params['minibatch_size'])

    # optimization setting: _alpha for momentum, _lambda for weight decay
    _learning_rate = float(main_params['learning_rate'])
    _step = 30
    _alpha = float(main_params['alpha'])
    _lambda = float(main_params['lambda'])
    _dropout_rate = float(main_params['dropout_rate'])

    # create objects from the module classes
    model['C1'] = dnn_misc.conv_layer(num_input = d, num_output = 25, filter_len = 5, stride = 1)
    model['nonlinear1'] = dnn_misc.relu()
    model['M1'] = dnn_misc.max_pool(max_len = 2, stride = 2)
    model['C2'] = dnn_misc.conv_layer(num_input = 25, num_output=25, filter_len=3, stride=1)
    model['nonlinear2'] = dnn_misc.relu()
    model['M2'] = dnn_misc.max_pool(max_len=2, stride=2)
    model['F1'] = dnn_misc.flatten_layer()
    model['drop1'] = dnn_misc.dropout(r = _dropout_rate)
    model['L1'] = dnn_misc.linear_layer(input_D = num_L1, output_D = num_L2)
    model['loss'] = dnn_misc.softmax_cross_entropy()

    # create variables for momentum
    if _alpha > 0.0:
        momentum = dnn_misc.add_momentum(model)
    else:
        momentum = None

    train_acc_record = []
    val_acc_record = []

    # run training and validation
    for t in range(num_epoch):
        print('At epoch ' + str(t + 1))
        if (t % _step == 0) and (t != 0):
            _learning_rate = _learning_rate * 0.1
        idx_order = np.random.permutation(N_train)
        train_acc = 0.0
        train_loss = 0.0
        train_count = 0
        val_acc = 0.0
        val_count = 0

        for i in range(int(np.floor(N_train / minibatch_size))):
            # get a mini-batch of data
            x, y = trainSet.get_example(idx_order[i * minibatch_size : (i + 1) * minibatch_size])

            # forward
            c1 = model['C1'].forward(x)
            h1 = model['nonlinear1'].forward(c1)
            m1 = model['M1'].forward(h1)
            c2 = model['C2'].forward(m1)
            h2 = model['nonlinear2'].forward(c2)
            m2 = model['M2'].forward(h2)           
            f1 = model['F1'].forward(m2)
            d1 = model['drop1'].forward(f1, is_train = True)
            a1 = model['L1'].forward(d1)
            loss = model['loss'].forward(a1, y)

            # backward
            grad_a1 = model['loss'].backward(a1, y)
            grad_d1 = model['L1'].backward(d1, grad_a1)
            grad_f1 = model['drop1'].backward(f1, grad_d1)
            grad_m2 = model['F1'].backward(m2, grad_f1)
            grad_h2 = model['M2'].backward(h2, grad_m2)
            grad_c2 = model['nonlinear2'].backward(c2, grad_h2)
            grad_m1 = model['C2'].backward(m1, grad_c2)
            grad_h1 = model['M1'].backward(h1, grad_m1)
            grad_c1 = model['nonlinear1'].backward(c1, grad_h1)
            grad_x = model['C1'].backward(x, grad_c1)

            # gradient_update
            for module_name, module in model.items():
                # check if a module has learnable parameters
                if hasattr(module, 'params'):
                    for key, _ in module.params.items():
                        g = module.gradient[key] + _lambda * module.params[key]
                        if _alpha > 0.0:
                            momentum[module_name + '_' + key] = _alpha * momentum[module_name + '_' + key] - _learning_rate * g
                            module.params[key] += momentum[module_name + '_' + key]
                        else:
                            module.params[key] -= _learning_rate * g

        # Computing training accuracy and obj
        for i in range(int(np.floor(N_train / minibatch_size))):
            x, y = trainSet.get_example(np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            # forward 
            c1 = model['C1'].forward(x)
            h1 = model['nonlinear1'].forward(c1)
            m1 = model['M1'].forward(h1)
            c2 = model['C2'].forward(m1)
            h2 = model['nonlinear2'].forward(c2)
            m2 = model['M2'].forward(h2) 
            f1 = model['F1'].forward(m2)
            d1 = model['drop1'].forward(f1, is_train = False)
            a1 = model['L1'].forward(d1)
            loss = model['loss'].forward(a1, y)
            train_loss += len(y) * loss
            train_acc += np.sum(predict_label(a1) == y)
            train_count += len(y)

        train_loss = train_loss / train_count
        train_acc = train_acc / train_count
        train_acc_record.append(train_acc)

        print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss))
        print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc))

        # Computing validation accuracy
        for i in range(int(np.floor(N_val / minibatch_size))):
            x, y = valSet.get_example(np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            # forward 
            c1 = model['C1'].forward(x)
            h1 = model['nonlinear1'].forward(c1)
            m1 = model['M1'].forward(h1)
            c2 = model['C2'].forward(m1)
            h2 = model['nonlinear2'].forward(c2)
            m2 = model['M2'].forward(h2) 
            f1 = model['F1'].forward(m2)
            d1 = model['drop1'].forward(f1, is_train = False)
            a1 = model['L1'].forward(d1)
            val_acc += np.sum(predict_label(a1) == y)
            val_count += len(y)

        val_acc = val_acc / val_count
        val_acc_record.append(val_acc)

        print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc))

    # save file
    json.dump({'train': train_acc_record, 'val': val_acc_record},
              open('CNN2_lr' + str(main_params['learning_rate']) +
                   '_m' + str(main_params['alpha']) +
                   '_w' + str(main_params['lambda']) +
                   '_d' + str(main_params['dropout_rate']) +
                   '.json', 'w'))
    print('Finish running!')
    return