def main(main_params): ### set the random seed ### np.random.seed(int(main_params['random_seed'])) ### data processing ### Xtrain, Ytrain, Xval, Yval, _, _ = data_loader_mnist( dataset='mnist_subset.json') N_train, d = Xtrain.shape N_val, _ = Xval.shape trainSet = DataSplit(Xtrain, Ytrain) valSet = DataSplit(Xval, Yval) ### building/defining MLP ### """ MLP for a 10-class classification problem on MNIST. The network structure is input --> linear --> relu --> dropout --> linear --> softmax_cross_entropy loss the hidden_layer size (num_L1) is 1000 the output_layer size (num_L2) is 10 """ model = dict() num_L1 = 1000 num_L2 = 10 # experimental setup num_epoch = int(main_params['num_epoch']) minibatch_size = int(main_params['minibatch_size']) # optimization setting: _alpha for momentum, _lambda for weight decay _learning_rate = float(main_params['learning_rate']) _step = 10 _alpha = float(main_params['alpha']) _lambda = float(main_params['lambda']) _dropout_rate = float(main_params['dropout_rate']) # create objects (modules) from the module classes model['L1'] = dnn_misc.linear_layer(input_D=d, output_D=num_L1) model['nonlinear1'] = dnn_misc.relu() model['drop1'] = dnn_misc.dropout(r=_dropout_rate) model['L2'] = dnn_misc.linear_layer(input_D=num_L1, output_D=num_L2) model['loss'] = dnn_misc.softmax_cross_entropy() # create variables for momentum if _alpha > 0.0: momentum = dnn_misc.add_momentum(model) else: momentum = None train_acc_record = [] val_acc_record = [] ### run training and validation ### for t in range(num_epoch): print('At epoch ' + str(t + 1)) if (t % _step == 0) and (t != 0): _learning_rate = _learning_rate * 0.1 idx_order = np.random.permutation(N_train) train_acc = 0.0 train_loss = 0.0 train_count = 0 val_acc = 0.0 val_count = 0 for i in range(int(np.floor(N_train / minibatch_size))): # get a mini-batch of data x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) * minibatch_size]) ### forward ### a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train=True) a2 = model['L2'].forward(d1) loss = model['loss'].forward(a2, y) ### backward ### grad_a2 = model['loss'].backward(a2, y) grad_d1 = model['L2'].backward(d1, grad_a2) grad_h1 = model['drop1'].backward(h1, grad_d1) grad_a1 = model['nonlinear1'].backward(a1, grad_h1) grad_x = model['L1'].backward(x, grad_a1) ### gradient_update ### for module_name, module in model.items(): # check if a module has learnable parameters if hasattr(module, 'params'): for key, _ in module.params.items(): g = module.gradient[key] + _lambda * module.params[key] if _alpha > 0.0: momentum[module_name + '_' + key] = _alpha * momentum[ module_name + '_' + key] - _learning_rate * g module.params[key] += momentum[module_name + '_' + key] else: module.params[key] -= _learning_rate * g ### Computing training accuracy and obj ### for i in range(int(np.floor(N_train / minibatch_size))): x, y = trainSet.get_example( np.arange(i * minibatch_size, (i + 1) * minibatch_size)) ### forward ### a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train=False) a2 = model['L2'].forward(d1) loss = model['loss'].forward(a2, y) train_loss += len(y) * loss train_acc += np.sum(predict_label(a2) == y) train_count += len(y) train_loss = train_loss / train_count train_acc = train_acc / train_count train_acc_record.append(train_acc) print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss)) print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc)) ### Computing validation accuracy ### for i in range(int(np.floor(N_val / minibatch_size))): x, y = valSet.get_example( np.arange(i * minibatch_size, (i + 1) * minibatch_size)) ### forward ### a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train=False) a2 = model['L2'].forward(d1) val_acc += np.sum(predict_label(a2) == y) val_count += len(y) val_acc = val_acc / val_count val_acc_record.append(val_acc) print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc)) # save file json.dump( { 'train': train_acc_record, 'val': val_acc_record }, open( 'MLP_lr' + str(main_params['learning_rate']) + '_m' + str(main_params['alpha']) + '_w' + str(main_params['lambda']) + '_d' + str(main_params['dropout_rate']) + '.json', 'w')) print('Finish running!') return
def main(main_params): """ Search TODO for those parts you need to complete. Please follow the step indicated in TODO (step) to complete this script from step = 1 to step = 5. """ ### set the random seed ### np.random.seed(int(main_params['random_seed'])) ### data processing ### Xtrain, Ytrain, Xval, Yval, _, _ = data_loader_mnist( dataset='mnist_subset.json') N_train, d, _, _ = Xtrain.shape N_val, _, _, _ = Xval.shape trainSet = DataSplit(Xtrain, Ytrain) valSet = DataSplit(Xval, Yval) ### building/defining CNN ### """ In this script, we are going to build a CNN for a 10-class classification problem on MNIST. The network structure is input --> convolution --> relu --> max pooling --> convolution --> relu --> max pooling --> flatten --> dropout --> linear --> softmax_cross_entropy loss the hidden_layer size (num_L1) is 1225 the output_layer size (num_L2) is 10 """ model = dict() num_L1 = 1225 num_L2 = 10 # experimental setup num_epoch = int(main_params['num_epoch']) minibatch_size = int(main_params['minibatch_size']) # optimization setting: _alpha for momentum, _lambda for weight decay _learning_rate = float(main_params['learning_rate']) _step = 30 _alpha = float(main_params['alpha']) _lambda = float(main_params['lambda']) _dropout_rate = float(main_params['dropout_rate']) # create objects (modules) from the module classes model['C1'] = dnn_misc.conv_layer(num_input=d, num_output=25, filter_len=5, stride=1) model['nonlinear1'] = dnn_misc.relu() model['M1'] = dnn_misc.max_pool(max_len=2, stride=2) ################################################################################ # TODO (1): Understand the new modules to be included (compared to dnn_cnn.py) # # You do not need to modify any thing here. # ################################################################################ model['C2'] = dnn_misc.conv_layer(num_input=25, num_output=25, filter_len=3, stride=1) model['nonlinear2'] = dnn_misc.relu() model['M2'] = dnn_misc.max_pool(max_len=2, stride=2) ################################################################################ # End of TODO (1) # ################################################################################ model['F1'] = dnn_misc.flatten_layer() model['drop1'] = dnn_misc.dropout(r=_dropout_rate) model['L1'] = dnn_misc.linear_layer(input_D=num_L1, output_D=num_L2) model['loss'] = dnn_misc.softmax_cross_entropy() # create variables for momentum if _alpha > 0.0: momentum = dnn_misc.add_momentum(model) else: momentum = None train_acc_record = [] val_acc_record = [] ### run training and validation ### for t in range(num_epoch): print('At epoch ' + str(t + 1)) if (t % _step == 0) and (t != 0): _learning_rate = _learning_rate * 0.1 idx_order = np.random.permutation(N_train) train_acc = 0.0 train_loss = 0.0 train_count = 0 val_acc = 0.0 val_count = 0 for i in range(int(np.floor(N_train / minibatch_size))): # get a mini-batch of data x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) * minibatch_size]) ### forward ### c1 = model['C1'].forward(x) h1 = model['nonlinear1'].forward(c1) m1 = model['M1'].forward(h1) ################################################################################ # TODO (2): Connect the three modules for the forward pass # # model['C2'] # # model['nonlinear2'] # # model['M2'] # # into the forward pass. # # Please make sure to connect them with m1 and m2, the input and output of the # # previous and the next modules, respectively # ################################################################################ # TODO (2) c2 = model['C2'].forward(m1) h2 = model['nonlinear2'].forward(c2) m2 = model['M2'].forward(h2) ################################################################################ # End of TODO (2) # ################################################################################ f1 = model['F1'].forward(m2) d1 = model['drop1'].forward(f1, is_train=True) a1 = model['L1'].forward(d1) loss = model['loss'].forward(a1, y) ### backward ### grad_a1 = model['loss'].backward(a1, y) grad_d1 = model['L1'].backward(d1, grad_a1) grad_f1 = model['drop1'].backward(f1, grad_d1) grad_m2 = model['F1'].backward(m2, grad_f1) ################################################################################ # TODO (3): Connect the three modules for the backward pass # # model['C2'] # # model['nonlinear2'] # # model['M2'] # # into the backward pass. # # Please make sure to connect them with grad_m2 and grad_m1, the input and # # output of the previous and the next modules, respectively # # Please pay attention to the number of arguments in the backward pass. # ################################################################################ # TODO (3) grad_h2 = model['M2'].backward(h2, grad_m2) grad_c2 = model['nonlinear2'].backward(c2, grad_h2) grad_m1 = model['C2'].backward(m1, grad_c2) ################################################################################ # End of TODO (3) # ################################################################################ grad_h1 = model['M1'].backward(h1, grad_m1) grad_c1 = model['nonlinear1'].backward(c1, grad_h1) grad_x = model['C1'].backward(x, grad_c1) ### gradient_update ### for module_name, module in model.items(): # check if a module has learnable parameters if hasattr(module, 'params'): for key, _ in module.params.items(): g = module.gradient[key] + _lambda * module.params[key] if _alpha > 0.0: momentum[module_name + '_' + key] = _alpha * momentum[ module_name + '_' + key] - _learning_rate * g module.params[key] += momentum[module_name + '_' + key] else: module.params[key] -= _learning_rate * g ### Computing training accuracy and obj ### for i in range(int(np.floor(N_train / minibatch_size))): x, y = trainSet.get_example( np.arange(i * minibatch_size, (i + 1) * minibatch_size)) ### forward ### c1 = model['C1'].forward(x) h1 = model['nonlinear1'].forward(c1) m1 = model['M1'].forward(h1) ################################################################################ # TODO (4): Connect the three modules for the forward pass # # model['C2'] # # model['nonlinear2'] # # model['M2'] # # into the forward pass. # # Please make sure to connect them with m1 and m2, the input and output of the # # previous and the next modules, respectively # ################################################################################ # TODO (4) c2 = model['C2'].forward(m1) h2 = model['nonlinear2'].forward(c2) m2 = model['M2'].forward(h2) ################################################################################ # End of TODO (4) # ################################################################################ f1 = model['F1'].forward(m2) d1 = model['drop1'].forward(f1, is_train=False) a1 = model['L1'].forward(d1) loss = model['loss'].forward(a1, y) train_loss += len(y) * loss train_acc += np.sum(predict_label(a1) == y) train_count += len(y) train_loss = train_loss / train_count train_acc = train_acc / train_count train_acc_record.append(train_acc) print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss)) print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc)) ### Computing validation accuracy ### for i in range(int(np.floor(N_val / minibatch_size))): x, y = valSet.get_example( np.arange(i * minibatch_size, (i + 1) * minibatch_size)) ### forward ### c1 = model['C1'].forward(x) h1 = model['nonlinear1'].forward(c1) m1 = model['M1'].forward(h1) ################################################################################ # TODO (5): Connect the three modules for the forward pass # # model['C2'] # # model['nonlinear2'] # # model['M2'] # # into the forward pass. # # Please make sure to connect them with m1 and m2, the input and output of the # # previous and the next modules, respectively # ################################################################################ # TODO (5) c2 = model['C2'].forward(m1) h2 = model['nonlinear2'].forward(c2) m2 = model['M2'].forward(h2) ################################################################################ # End of TODO (5) # ################################################################################ f1 = model['F1'].forward(m2) d1 = model['drop1'].forward(f1, is_train=False) a1 = model['L1'].forward(d1) val_acc += np.sum(predict_label(a1) == y) val_count += len(y) val_acc = val_acc / val_count val_acc_record.append(val_acc) print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc)) # save file json.dump( { 'train': train_acc_record, 'val': val_acc_record }, open( 'CNN2_lr' + str(main_params['learning_rate']) + '_m' + str(main_params['alpha']) + '_w' + str(main_params['lambda']) + '_d' + str(main_params['dropout_rate']) + '.json', 'w')) print('Finish running!') return
import numpy as np import dnn_misc np.random.seed(123) # example data X = np.random.normal(0, 1, (5, 3)) # example modules check_linear = dnn_misc.linear_layer(input_D = 3, output_D = 2) check_relu = dnn_misc.relu() check_dropout = dnn_misc.dropout(r = 0.5) # check_linear.forward hat_X = check_linear.forward(X) ground_hat_X = np.array([[ 0.42525407, -0.2120611 ], [ 0.15174804, -0.36218431], [ 0.20957104, -0.57861084], [ 0.03460477, -0.35992763], [-0.07256568, 0.1385197 ]]) if (hat_X.shape[0] != 5) or (hat_X.shape[1] != 2): print('Wrong output dimension of linear.forward') else: max_relative_diff = np.amax(np.abs(ground_hat_X - hat_X) / (ground_hat_X + 1e-8)) print('max_diff_output: ' + str(max_relative_diff)) if max_relative_diff >= 1e-7: print('linear.forward might be wrong')
def test_linear(self): np.random.seed(123) # example data X = np.random.normal(0, 1, (5, 3)) # example modules check_linear = dnn_misc.linear_layer(input_D=3, output_D=2) check_relu = dnn_misc.relu() check_dropout = dnn_misc.dropout(r=0.5) # check_linear.forward hat_X = check_linear.forward(X) ground_hat_X = np.array([[0.42525407, -0.2120611], [0.15174804, -0.36218431], [0.20957104, -0.57861084], [0.03460477, -0.35992763], [-0.07256568, 0.1385197]]) if (hat_X.shape[0] != 5) or (hat_X.shape[1] != 2): print('Wrong output dimension of linear.forward') self.fail() else: max_relative_diff = np.amax(np.abs(ground_hat_X - hat_X) / (ground_hat_X + 1e-8)) print('max_diff_output: ' + str(max_relative_diff)) if max_relative_diff >= 1e-7: print('linear.forward might be wrong') self.fail() else: print('linear.forward should be correct') pass # check_linear.backward grad_hat_X = np.random.normal(0, 1, (5, 2)) grad_X = check_linear.backward(X, grad_hat_X) ground_grad_X = np.array([[-0.32766959, 0.13123228, -0.0470483], [0.22780188, -0.04838436, 0.04225799], [0.03115675, -0.32648556, -0.06550193], [-0.01895741, -0.21411292, -0.05212837], [-0.26923074, -0.78986304, -0.23870499]]) ground_grad_W = np.array([[-0.27579345, -2.08570514], [4.52754775, -0.40995374], [-1.2049515, 1.77662551]]) ground_grad_b = np.array([[-4.55094716, -2.51399667]]) if (grad_X.shape[0] != 5) or (grad_X.shape[1] != 3): print('Wrong output dimension of linear.backward') self.fail() else: max_relative_diff_X = np.amax(np.abs(ground_grad_X - grad_X) / (ground_grad_X + 1e-8)) print('max_diff_grad_X: ' + str(max_relative_diff_X)) max_relative_diff_W = np.amax(np.abs(ground_grad_W - check_linear.gradient['W']) / (ground_grad_W + 1e-8)) print('max_diff_grad_W: ' + str(max_relative_diff_W)) max_relative_diff_b = np.amax(np.abs(ground_grad_b - check_linear.gradient['b']) / (ground_grad_b + 1e-8)) print('max_diff_grad_b: ' + str(max_relative_diff_b)) if (max_relative_diff_X >= 1e-7) or (max_relative_diff_W >= 1e-7) or (max_relative_diff_b >= 1e-7): print('linear.backward might be wrong') self.fail() else: print('linear.backward should be correct') pass