def main(main_params): """ Search TODO for those parts you need to complete. Please follow the step indicated in TODO (step) to complete this script from step = 1 to step = 5. """ ### set the random seed ### np.random.seed(int(main_params['random_seed'])) ### data processing ### Xtrain, Ytrain, Xval, Yval, _, _ = data_loader_mnist( dataset='mnist_subset.json') N_train, d, _, _ = Xtrain.shape N_val, _, _, _ = Xval.shape trainSet = DataSplit(Xtrain, Ytrain) valSet = DataSplit(Xval, Yval) ### building/defining CNN ### """ In this script, we are going to build a CNN for a 10-class classification problem on MNIST. The network structure is input --> convolution --> relu --> max pooling --> convolution --> relu --> max pooling --> flatten --> dropout --> linear --> softmax_cross_entropy loss the hidden_layer size (num_L1) is 1225 the output_layer size (num_L2) is 10 """ model = dict() num_L1 = 1225 num_L2 = 10 # experimental setup num_epoch = int(main_params['num_epoch']) minibatch_size = int(main_params['minibatch_size']) # optimization setting: _alpha for momentum, _lambda for weight decay _learning_rate = float(main_params['learning_rate']) _step = 30 _alpha = float(main_params['alpha']) _lambda = float(main_params['lambda']) _dropout_rate = float(main_params['dropout_rate']) # create objects (modules) from the module classes model['C1'] = dnn_misc.conv_layer(num_input=d, num_output=25, filter_len=5, stride=1) model['nonlinear1'] = dnn_misc.relu() model['M1'] = dnn_misc.max_pool(max_len=2, stride=2) ################################################################################ # TODO (1): Understand the new modules to be included (compared to dnn_cnn.py) # # You do not need to modify any thing here. # ################################################################################ model['C2'] = dnn_misc.conv_layer(num_input=25, num_output=25, filter_len=3, stride=1) model['nonlinear2'] = dnn_misc.relu() model['M2'] = dnn_misc.max_pool(max_len=2, stride=2) ################################################################################ # End of TODO (1) # ################################################################################ model['F1'] = dnn_misc.flatten_layer() model['drop1'] = dnn_misc.dropout(r=_dropout_rate) model['L1'] = dnn_misc.linear_layer(input_D=num_L1, output_D=num_L2) model['loss'] = dnn_misc.softmax_cross_entropy() # create variables for momentum if _alpha > 0.0: momentum = dnn_misc.add_momentum(model) else: momentum = None train_acc_record = [] val_acc_record = [] ### run training and validation ### for t in range(num_epoch): print('At epoch ' + str(t + 1)) if (t % _step == 0) and (t != 0): _learning_rate = _learning_rate * 0.1 idx_order = np.random.permutation(N_train) train_acc = 0.0 train_loss = 0.0 train_count = 0 val_acc = 0.0 val_count = 0 for i in range(int(np.floor(N_train / minibatch_size))): # get a mini-batch of data x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) * minibatch_size]) ### forward ### c1 = model['C1'].forward(x) h1 = model['nonlinear1'].forward(c1) m1 = model['M1'].forward(h1) ################################################################################ # TODO (2): Connect the three modules for the forward pass # # model['C2'] # # model['nonlinear2'] # # model['M2'] # # into the forward pass. # # Please make sure to connect them with m1 and m2, the input and output of the # # previous and the next modules, respectively # ################################################################################ # TODO (2) c2 = model['C2'].forward(m1) h2 = model['nonlinear2'].forward(c2) m2 = model['M2'].forward(h2) ################################################################################ # End of TODO (2) # ################################################################################ f1 = model['F1'].forward(m2) d1 = model['drop1'].forward(f1, is_train=True) a1 = model['L1'].forward(d1) loss = model['loss'].forward(a1, y) ### backward ### grad_a1 = model['loss'].backward(a1, y) grad_d1 = model['L1'].backward(d1, grad_a1) grad_f1 = model['drop1'].backward(f1, grad_d1) grad_m2 = model['F1'].backward(m2, grad_f1) ################################################################################ # TODO (3): Connect the three modules for the backward pass # # model['C2'] # # model['nonlinear2'] # # model['M2'] # # into the backward pass. # # Please make sure to connect them with grad_m2 and grad_m1, the input and # # output of the previous and the next modules, respectively # # Please pay attention to the number of arguments in the backward pass. # ################################################################################ # TODO (3) grad_h2 = model['M2'].backward(h2, grad_m2) grad_c2 = model['nonlinear2'].backward(c2, grad_h2) grad_m1 = model['C2'].backward(m1, grad_c2) ################################################################################ # End of TODO (3) # ################################################################################ grad_h1 = model['M1'].backward(h1, grad_m1) grad_c1 = model['nonlinear1'].backward(c1, grad_h1) grad_x = model['C1'].backward(x, grad_c1) ### gradient_update ### for module_name, module in model.items(): # check if a module has learnable parameters if hasattr(module, 'params'): for key, _ in module.params.items(): g = module.gradient[key] + _lambda * module.params[key] if _alpha > 0.0: momentum[module_name + '_' + key] = _alpha * momentum[ module_name + '_' + key] - _learning_rate * g module.params[key] += momentum[module_name + '_' + key] else: module.params[key] -= _learning_rate * g ### Computing training accuracy and obj ### for i in range(int(np.floor(N_train / minibatch_size))): x, y = trainSet.get_example( np.arange(i * minibatch_size, (i + 1) * minibatch_size)) ### forward ### c1 = model['C1'].forward(x) h1 = model['nonlinear1'].forward(c1) m1 = model['M1'].forward(h1) ################################################################################ # TODO (4): Connect the three modules for the forward pass # # model['C2'] # # model['nonlinear2'] # # model['M2'] # # into the forward pass. # # Please make sure to connect them with m1 and m2, the input and output of the # # previous and the next modules, respectively # ################################################################################ # TODO (4) c2 = model['C2'].forward(m1) h2 = model['nonlinear2'].forward(c2) m2 = model['M2'].forward(h2) ################################################################################ # End of TODO (4) # ################################################################################ f1 = model['F1'].forward(m2) d1 = model['drop1'].forward(f1, is_train=False) a1 = model['L1'].forward(d1) loss = model['loss'].forward(a1, y) train_loss += len(y) * loss train_acc += np.sum(predict_label(a1) == y) train_count += len(y) train_loss = train_loss / train_count train_acc = train_acc / train_count train_acc_record.append(train_acc) print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss)) print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc)) ### Computing validation accuracy ### for i in range(int(np.floor(N_val / minibatch_size))): x, y = valSet.get_example( np.arange(i * minibatch_size, (i + 1) * minibatch_size)) ### forward ### c1 = model['C1'].forward(x) h1 = model['nonlinear1'].forward(c1) m1 = model['M1'].forward(h1) ################################################################################ # TODO (5): Connect the three modules for the forward pass # # model['C2'] # # model['nonlinear2'] # # model['M2'] # # into the forward pass. # # Please make sure to connect them with m1 and m2, the input and output of the # # previous and the next modules, respectively # ################################################################################ # TODO (5) c2 = model['C2'].forward(m1) h2 = model['nonlinear2'].forward(c2) m2 = model['M2'].forward(h2) ################################################################################ # End of TODO (5) # ################################################################################ f1 = model['F1'].forward(m2) d1 = model['drop1'].forward(f1, is_train=False) a1 = model['L1'].forward(d1) val_acc += np.sum(predict_label(a1) == y) val_count += len(y) val_acc = val_acc / val_count val_acc_record.append(val_acc) print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc)) # save file json.dump( { 'train': train_acc_record, 'val': val_acc_record }, open( 'CNN2_lr' + str(main_params['learning_rate']) + '_m' + str(main_params['alpha']) + '_w' + str(main_params['lambda']) + '_d' + str(main_params['dropout_rate']) + '.json', 'w')) print('Finish running!') return
def main(main_params): # set the random seed np.random.seed(int(main_params['random_seed'])) # data processing Xtrain, Ytrain, Xval, Yval , _, _ = data_loader_mnist(dataset = 'mnist_subset.json') N_train, d, _, _ = Xtrain.shape N_val, _, _, _ = Xval.shape trainSet = DataSplit(Xtrain, Ytrain) valSet = DataSplit(Xval, Yval) # building a CNN for 10-class classification problem on MNIST """ The network structure is input --> convolution --> relu --> max pooling --> convolution --> relu --> max pooling --> flatten --> dropout --> linear --> softmax_cross_entropy loss the hidden_layer size (num_L1) is 1225 the output_layer size (num_L2) is 10 """ model = dict() num_L1 = 1225 num_L2 = 10 # experimental setup num_epoch = int(main_params['num_epoch']) minibatch_size = int(main_params['minibatch_size']) # optimization setting: _alpha for momentum, _lambda for weight decay _learning_rate = float(main_params['learning_rate']) _step = 30 _alpha = float(main_params['alpha']) _lambda = float(main_params['lambda']) _dropout_rate = float(main_params['dropout_rate']) # create objects from the module classes model['C1'] = dnn_misc.conv_layer(num_input = d, num_output = 25, filter_len = 5, stride = 1) model['nonlinear1'] = dnn_misc.relu() model['M1'] = dnn_misc.max_pool(max_len = 2, stride = 2) model['C2'] = dnn_misc.conv_layer(num_input = 25, num_output=25, filter_len=3, stride=1) model['nonlinear2'] = dnn_misc.relu() model['M2'] = dnn_misc.max_pool(max_len=2, stride=2) model['F1'] = dnn_misc.flatten_layer() model['drop1'] = dnn_misc.dropout(r = _dropout_rate) model['L1'] = dnn_misc.linear_layer(input_D = num_L1, output_D = num_L2) model['loss'] = dnn_misc.softmax_cross_entropy() # create variables for momentum if _alpha > 0.0: momentum = dnn_misc.add_momentum(model) else: momentum = None train_acc_record = [] val_acc_record = [] # run training and validation for t in range(num_epoch): print('At epoch ' + str(t + 1)) if (t % _step == 0) and (t != 0): _learning_rate = _learning_rate * 0.1 idx_order = np.random.permutation(N_train) train_acc = 0.0 train_loss = 0.0 train_count = 0 val_acc = 0.0 val_count = 0 for i in range(int(np.floor(N_train / minibatch_size))): # get a mini-batch of data x, y = trainSet.get_example(idx_order[i * minibatch_size : (i + 1) * minibatch_size]) # forward c1 = model['C1'].forward(x) h1 = model['nonlinear1'].forward(c1) m1 = model['M1'].forward(h1) c2 = model['C2'].forward(m1) h2 = model['nonlinear2'].forward(c2) m2 = model['M2'].forward(h2) f1 = model['F1'].forward(m2) d1 = model['drop1'].forward(f1, is_train = True) a1 = model['L1'].forward(d1) loss = model['loss'].forward(a1, y) # backward grad_a1 = model['loss'].backward(a1, y) grad_d1 = model['L1'].backward(d1, grad_a1) grad_f1 = model['drop1'].backward(f1, grad_d1) grad_m2 = model['F1'].backward(m2, grad_f1) grad_h2 = model['M2'].backward(h2, grad_m2) grad_c2 = model['nonlinear2'].backward(c2, grad_h2) grad_m1 = model['C2'].backward(m1, grad_c2) grad_h1 = model['M1'].backward(h1, grad_m1) grad_c1 = model['nonlinear1'].backward(c1, grad_h1) grad_x = model['C1'].backward(x, grad_c1) # gradient_update for module_name, module in model.items(): # check if a module has learnable parameters if hasattr(module, 'params'): for key, _ in module.params.items(): g = module.gradient[key] + _lambda * module.params[key] if _alpha > 0.0: momentum[module_name + '_' + key] = _alpha * momentum[module_name + '_' + key] - _learning_rate * g module.params[key] += momentum[module_name + '_' + key] else: module.params[key] -= _learning_rate * g # Computing training accuracy and obj for i in range(int(np.floor(N_train / minibatch_size))): x, y = trainSet.get_example(np.arange(i * minibatch_size, (i + 1) * minibatch_size)) # forward c1 = model['C1'].forward(x) h1 = model['nonlinear1'].forward(c1) m1 = model['M1'].forward(h1) c2 = model['C2'].forward(m1) h2 = model['nonlinear2'].forward(c2) m2 = model['M2'].forward(h2) f1 = model['F1'].forward(m2) d1 = model['drop1'].forward(f1, is_train = False) a1 = model['L1'].forward(d1) loss = model['loss'].forward(a1, y) train_loss += len(y) * loss train_acc += np.sum(predict_label(a1) == y) train_count += len(y) train_loss = train_loss / train_count train_acc = train_acc / train_count train_acc_record.append(train_acc) print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss)) print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc)) # Computing validation accuracy for i in range(int(np.floor(N_val / minibatch_size))): x, y = valSet.get_example(np.arange(i * minibatch_size, (i + 1) * minibatch_size)) # forward c1 = model['C1'].forward(x) h1 = model['nonlinear1'].forward(c1) m1 = model['M1'].forward(h1) c2 = model['C2'].forward(m1) h2 = model['nonlinear2'].forward(c2) m2 = model['M2'].forward(h2) f1 = model['F1'].forward(m2) d1 = model['drop1'].forward(f1, is_train = False) a1 = model['L1'].forward(d1) val_acc += np.sum(predict_label(a1) == y) val_count += len(y) val_acc = val_acc / val_count val_acc_record.append(val_acc) print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc)) # save file json.dump({'train': train_acc_record, 'val': val_acc_record}, open('CNN2_lr' + str(main_params['learning_rate']) + '_m' + str(main_params['alpha']) + '_w' + str(main_params['lambda']) + '_d' + str(main_params['dropout_rate']) + '.json', 'w')) print('Finish running!') return