def main(main_params): """ Search TODO for those parts you need to complete. Please follow the step indicated in TODO (step) to complete this script from step = 1 to step = 5. """ ### set the random seed ### np.random.seed(int(main_params['random_seed'])) ### data processing ### Xtrain, Ytrain, Xval, Yval, _, _ = data_loader_mnist( dataset='mnist_subset.json') N_train, d, _, _ = Xtrain.shape N_val, _, _, _ = Xval.shape trainSet = DataSplit(Xtrain, Ytrain) valSet = DataSplit(Xval, Yval) ### building/defining CNN ### """ In this script, we are going to build a CNN for a 10-class classification problem on MNIST. The network structure is input --> convolution --> relu --> max pooling --> convolution --> relu --> max pooling --> flatten --> dropout --> linear --> softmax_cross_entropy loss the hidden_layer size (num_L1) is 1225 the output_layer size (num_L2) is 10 """ model = dict() num_L1 = 1225 num_L2 = 10 # experimental setup num_epoch = int(main_params['num_epoch']) minibatch_size = int(main_params['minibatch_size']) # optimization setting: _alpha for momentum, _lambda for weight decay _learning_rate = float(main_params['learning_rate']) _step = 30 _alpha = float(main_params['alpha']) _lambda = float(main_params['lambda']) _dropout_rate = float(main_params['dropout_rate']) # create objects (modules) from the module classes model['C1'] = dnn_misc.conv_layer(num_input=d, num_output=25, filter_len=5, stride=1) model['nonlinear1'] = dnn_misc.relu() model['M1'] = dnn_misc.max_pool(max_len=2, stride=2) ################################################################################ # TODO (1): Understand the new modules to be included (compared to dnn_cnn.py) # # You do not need to modify any thing here. # ################################################################################ model['C2'] = dnn_misc.conv_layer(num_input=25, num_output=25, filter_len=3, stride=1) model['nonlinear2'] = dnn_misc.relu() model['M2'] = dnn_misc.max_pool(max_len=2, stride=2) ################################################################################ # End of TODO (1) # ################################################################################ model['F1'] = dnn_misc.flatten_layer() model['drop1'] = dnn_misc.dropout(r=_dropout_rate) model['L1'] = dnn_misc.linear_layer(input_D=num_L1, output_D=num_L2) model['loss'] = dnn_misc.softmax_cross_entropy() # create variables for momentum if _alpha > 0.0: momentum = dnn_misc.add_momentum(model) else: momentum = None train_acc_record = [] val_acc_record = [] ### run training and validation ### for t in range(num_epoch): print('At epoch ' + str(t + 1)) if (t % _step == 0) and (t != 0): _learning_rate = _learning_rate * 0.1 idx_order = np.random.permutation(N_train) train_acc = 0.0 train_loss = 0.0 train_count = 0 val_acc = 0.0 val_count = 0 for i in range(int(np.floor(N_train / minibatch_size))): # get a mini-batch of data x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) * minibatch_size]) ### forward ### c1 = model['C1'].forward(x) h1 = model['nonlinear1'].forward(c1) m1 = model['M1'].forward(h1) ################################################################################ # TODO (2): Connect the three modules for the forward pass # # model['C2'] # # model['nonlinear2'] # # model['M2'] # # into the forward pass. # # Please make sure to connect them with m1 and m2, the input and output of the # # previous and the next modules, respectively # ################################################################################ # TODO (2) c2 = model['C2'].forward(m1) h2 = model['nonlinear2'].forward(c2) m2 = model['M2'].forward(h2) ################################################################################ # End of TODO (2) # ################################################################################ f1 = model['F1'].forward(m2) d1 = model['drop1'].forward(f1, is_train=True) a1 = model['L1'].forward(d1) loss = model['loss'].forward(a1, y) ### backward ### grad_a1 = model['loss'].backward(a1, y) grad_d1 = model['L1'].backward(d1, grad_a1) grad_f1 = model['drop1'].backward(f1, grad_d1) grad_m2 = model['F1'].backward(m2, grad_f1) ################################################################################ # TODO (3): Connect the three modules for the backward pass # # model['C2'] # # model['nonlinear2'] # # model['M2'] # # into the backward pass. # # Please make sure to connect them with grad_m2 and grad_m1, the input and # # output of the previous and the next modules, respectively # # Please pay attention to the number of arguments in the backward pass. # ################################################################################ # TODO (3) grad_h2 = model['M2'].backward(h2, grad_m2) grad_c2 = model['nonlinear2'].backward(c2, grad_h2) grad_m1 = model['C2'].backward(m1, grad_c2) ################################################################################ # End of TODO (3) # ################################################################################ grad_h1 = model['M1'].backward(h1, grad_m1) grad_c1 = model['nonlinear1'].backward(c1, grad_h1) grad_x = model['C1'].backward(x, grad_c1) ### gradient_update ### for module_name, module in model.items(): # check if a module has learnable parameters if hasattr(module, 'params'): for key, _ in module.params.items(): g = module.gradient[key] + _lambda * module.params[key] if _alpha > 0.0: momentum[module_name + '_' + key] = _alpha * momentum[ module_name + '_' + key] - _learning_rate * g module.params[key] += momentum[module_name + '_' + key] else: module.params[key] -= _learning_rate * g ### Computing training accuracy and obj ### for i in range(int(np.floor(N_train / minibatch_size))): x, y = trainSet.get_example( np.arange(i * minibatch_size, (i + 1) * minibatch_size)) ### forward ### c1 = model['C1'].forward(x) h1 = model['nonlinear1'].forward(c1) m1 = model['M1'].forward(h1) ################################################################################ # TODO (4): Connect the three modules for the forward pass # # model['C2'] # # model['nonlinear2'] # # model['M2'] # # into the forward pass. # # Please make sure to connect them with m1 and m2, the input and output of the # # previous and the next modules, respectively # ################################################################################ # TODO (4) c2 = model['C2'].forward(m1) h2 = model['nonlinear2'].forward(c2) m2 = model['M2'].forward(h2) ################################################################################ # End of TODO (4) # ################################################################################ f1 = model['F1'].forward(m2) d1 = model['drop1'].forward(f1, is_train=False) a1 = model['L1'].forward(d1) loss = model['loss'].forward(a1, y) train_loss += len(y) * loss train_acc += np.sum(predict_label(a1) == y) train_count += len(y) train_loss = train_loss / train_count train_acc = train_acc / train_count train_acc_record.append(train_acc) print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss)) print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc)) ### Computing validation accuracy ### for i in range(int(np.floor(N_val / minibatch_size))): x, y = valSet.get_example( np.arange(i * minibatch_size, (i + 1) * minibatch_size)) ### forward ### c1 = model['C1'].forward(x) h1 = model['nonlinear1'].forward(c1) m1 = model['M1'].forward(h1) ################################################################################ # TODO (5): Connect the three modules for the forward pass # # model['C2'] # # model['nonlinear2'] # # model['M2'] # # into the forward pass. # # Please make sure to connect them with m1 and m2, the input and output of the # # previous and the next modules, respectively # ################################################################################ # TODO (5) c2 = model['C2'].forward(m1) h2 = model['nonlinear2'].forward(c2) m2 = model['M2'].forward(h2) ################################################################################ # End of TODO (5) # ################################################################################ f1 = model['F1'].forward(m2) d1 = model['drop1'].forward(f1, is_train=False) a1 = model['L1'].forward(d1) val_acc += np.sum(predict_label(a1) == y) val_count += len(y) val_acc = val_acc / val_count val_acc_record.append(val_acc) print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc)) # save file json.dump( { 'train': train_acc_record, 'val': val_acc_record }, open( 'CNN2_lr' + str(main_params['learning_rate']) + '_m' + str(main_params['alpha']) + '_w' + str(main_params['lambda']) + '_d' + str(main_params['dropout_rate']) + '.json', 'w')) print('Finish running!') return
def main(main_params): ### set the random seed ### np.random.seed(int(main_params['random_seed'])) ### data processing ### Xtrain, Ytrain, Xval, Yval, _, _ = data_loader_mnist( dataset='mnist_subset.json') N_train, d = Xtrain.shape N_val, _ = Xval.shape trainSet = DataSplit(Xtrain, Ytrain) valSet = DataSplit(Xval, Yval) ### building/defining MLP ### """ MLP for a 10-class classification problem on MNIST. The network structure is input --> linear --> relu --> dropout --> linear --> softmax_cross_entropy loss the hidden_layer size (num_L1) is 1000 the output_layer size (num_L2) is 10 """ model = dict() num_L1 = 1000 num_L2 = 10 # experimental setup num_epoch = int(main_params['num_epoch']) minibatch_size = int(main_params['minibatch_size']) # optimization setting: _alpha for momentum, _lambda for weight decay _learning_rate = float(main_params['learning_rate']) _step = 10 _alpha = float(main_params['alpha']) _lambda = float(main_params['lambda']) _dropout_rate = float(main_params['dropout_rate']) # create objects (modules) from the module classes model['L1'] = dnn_misc.linear_layer(input_D=d, output_D=num_L1) model['nonlinear1'] = dnn_misc.relu() model['drop1'] = dnn_misc.dropout(r=_dropout_rate) model['L2'] = dnn_misc.linear_layer(input_D=num_L1, output_D=num_L2) model['loss'] = dnn_misc.softmax_cross_entropy() # create variables for momentum if _alpha > 0.0: momentum = dnn_misc.add_momentum(model) else: momentum = None train_acc_record = [] val_acc_record = [] ### run training and validation ### for t in range(num_epoch): print('At epoch ' + str(t + 1)) if (t % _step == 0) and (t != 0): _learning_rate = _learning_rate * 0.1 idx_order = np.random.permutation(N_train) train_acc = 0.0 train_loss = 0.0 train_count = 0 val_acc = 0.0 val_count = 0 for i in range(int(np.floor(N_train / minibatch_size))): # get a mini-batch of data x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) * minibatch_size]) ### forward ### a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train=True) a2 = model['L2'].forward(d1) loss = model['loss'].forward(a2, y) ### backward ### grad_a2 = model['loss'].backward(a2, y) grad_d1 = model['L2'].backward(d1, grad_a2) grad_h1 = model['drop1'].backward(h1, grad_d1) grad_a1 = model['nonlinear1'].backward(a1, grad_h1) grad_x = model['L1'].backward(x, grad_a1) ### gradient_update ### for module_name, module in model.items(): # check if a module has learnable parameters if hasattr(module, 'params'): for key, _ in module.params.items(): g = module.gradient[key] + _lambda * module.params[key] if _alpha > 0.0: momentum[module_name + '_' + key] = _alpha * momentum[ module_name + '_' + key] - _learning_rate * g module.params[key] += momentum[module_name + '_' + key] else: module.params[key] -= _learning_rate * g ### Computing training accuracy and obj ### for i in range(int(np.floor(N_train / minibatch_size))): x, y = trainSet.get_example( np.arange(i * minibatch_size, (i + 1) * minibatch_size)) ### forward ### a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train=False) a2 = model['L2'].forward(d1) loss = model['loss'].forward(a2, y) train_loss += len(y) * loss train_acc += np.sum(predict_label(a2) == y) train_count += len(y) train_loss = train_loss / train_count train_acc = train_acc / train_count train_acc_record.append(train_acc) print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss)) print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc)) ### Computing validation accuracy ### for i in range(int(np.floor(N_val / minibatch_size))): x, y = valSet.get_example( np.arange(i * minibatch_size, (i + 1) * minibatch_size)) ### forward ### a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train=False) a2 = model['L2'].forward(d1) val_acc += np.sum(predict_label(a2) == y) val_count += len(y) val_acc = val_acc / val_count val_acc_record.append(val_acc) print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc)) # save file json.dump( { 'train': train_acc_record, 'val': val_acc_record }, open( 'MLP_lr' + str(main_params['learning_rate']) + '_m' + str(main_params['alpha']) + '_w' + str(main_params['lambda']) + '_d' + str(main_params['dropout_rate']) + '.json', 'w')) print('Finish running!') return
def main(main_params): ### set the random seed ### np.random.seed(int(main_params['random_seed'])) ### data processing ### Xtrain, Ytrain, Xval, Yval, Xtest, Ytest = data_loader_mnist( dataset='mnist_subset.json') N_train, d = Xtrain.shape N_val, _ = Xval.shape N_test, _ = Xtest.shape trainSet = DataSplit(Xtrain, Ytrain) valSet = DataSplit(Xval, Yval) testSet = DataSplit(Xtest, Ytest) ### building/defining MLP ### """ In this script, we are going to build an MLP for a 10-class classification problem on MNIST. The network structure is input --> linear --> relu --> dropout --> linear --> softmax_cross_entropy loss the hidden_layer size (num_L1) is 1000 the output_layer size (num_L2) is 10 """ model = dict() num_L1 = 1000 num_L2 = 10 # experimental setup num_epoch = int(main_params['num_epoch']) minibatch_size = int(main_params['minibatch_size']) # optimization setting: _alpha for momentum, _lambda for weight decay _learning_rate = float(main_params['learning_rate']) _step = 10 _alpha = 0.0 _lambda = float(main_params['lambda']) _optimizer = main_params['optim'] _epsilon = main_params['epsilon'] # create objects (modules) from the module classes model['L1'] = dnn_misc.linear_layer(input_D=d, output_D=num_L1) model['nonlinear1'] = dnn_misc.relu() model['L2'] = dnn_misc.linear_layer(input_D=num_L1, output_D=num_L2) model['loss'] = dnn_misc.softmax_cross_entropy() # create variables for momentum if _optimizer == "Gradient_Descent_Momentum": # creates a dictionary that holds the value of momentum for learnable parameters momentum = dnn_misc.add_momentum(model) _alpha = 0.9 else: momentum = None train_acc_record = [] val_acc_record = [] train_loss_record = [] val_loss_record = [] ### run training and validation ### for t in range(num_epoch): print('At epoch ' + str(t + 1)) if (t % _step == 0) and (t != 0): # learning_rate decay _learning_rate = _learning_rate * 0.1 # shuffle the train data idx_order = np.random.permutation(N_train) for i in range(int(np.floor(N_train / minibatch_size))): # get a mini-batch of data x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) * minibatch_size]) ### forward ### a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) a2 = model['L2'].forward(h1) loss = model['loss'].forward(a2, y) ### backward ### grad_a2 = model['loss'].backward(a2, y) grad_h1 = model['L2'].backward(h1, grad_a2) grad_a1 = model['nonlinear1'].backward(a1, grad_h1) grad_x = model['L1'].backward(x, grad_a1) ### gradient_update ### for module_name, module in model.items(): # model is a dictionary with 'L1', 'L2', 'nonLinear1' and 'loss' as keys. # the values for these keys are the corresponding objects created in line 123-126 using classes # defined in dnn_misc.py # check if the module has learnable parameters. not all modules have learnable parameters. # if it does, the module object will have an attribute called 'params'. See Linear Layer for more details. if hasattr(module, 'params'): for key, _ in module.params.items(): # gradient computed during the backward pass + L2 regularization term # _lambda is the regularization hyper parameter g = module.gradient[key] + _lambda * module.params[key] if _optimizer == "Minibatch_Gradient_Descent": ################################################################################ # TODO: Write the gradient update for the module parameter. # # module.params[key] has to be updated with the new value. # # parameter update will be of the form: w = w - learning_rate * dl/dw # ################################################################################ # module.params[key] -= _learning_rate * g elif _optimizer == "Gradient_Descent_Momentum": ################################################################################ # TODO: Understand how the update differs when we use momentum. # # module.params[key] has to be updated with the new value. # # momentum(w) = _aplha * momemtum(w) at previous step + _learning_rate * g # # parameter update will be of the form: w = w - momentum(w) # ################################################################################ parameter = module_name + '_' + key momentum[parameter] = _alpha * momentum[ parameter] + _learning_rate * g module.params[key] -= momentum[parameter] ### Compute train accuracy ### train_acc = 0.0 train_loss = 0.0 train_count = 0 for i in range(int(np.floor(N_train / minibatch_size))): x, y = trainSet.get_example( np.arange(minibatch_size * i, minibatch_size * (i + 1))) ### forward ### a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) a2 = model['L2'].forward(h1) loss = model['loss'].forward(a2, y) train_loss += len(y) * loss train_acc += np.sum(predict_label(a2) == y) train_count += len(y) train_loss = train_loss / train_count train_acc = train_acc / train_count train_acc_record.append(train_acc) train_loss_record.append(train_loss) print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss)) print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc)) ### Compute validation accuracy ### val_acc = 0.0 val_loss = 0.0 val_count = 0 for i in range(int(np.floor(N_val / minibatch_size))): x, y = valSet.get_example( np.arange(minibatch_size * i, minibatch_size * (i + 1))) ### forward ### a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) a2 = model['L2'].forward(h1) loss = model['loss'].forward(a2, y) val_loss += len(y) * loss val_acc += np.sum(predict_label(a2) == y) val_count += len(y) val_loss = val_loss / val_count val_acc = val_acc / val_count val_acc_record.append(val_acc) val_loss_record.append(val_loss) print('Validation loss at epoch ' + str(t + 1) + ' is ' + str(val_loss)) print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc)) ### Compute test accuracy ### ################################################################################ # TODO: Do a forward pass on test data and compute test accuracy and loss. # # populate them in test_loss and test_acc. # ################################################################################ test_loss = 0.0 test_acc = 0.0 test_count = 0 for i in range(int(np.floor(N_test / minibatch_size))): x, y = testSet.get_example( np.arange(minibatch_size * i, minibatch_size * (i + 1))) ### forward ### a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) a2 = model['L2'].forward(h1) loss = model['loss'].forward(a2, y) test_loss += len(y) * loss test_acc += np.sum(predict_label(a2) == y) test_count += len(y) test_loss = test_loss / test_count test_acc = test_acc / test_count print('Test loss at epoch ' + str(t + 1) + ' is ' + str(test_loss)) print('Test accuracy at epoch ' + str(t + 1) + ' is ' + str(test_acc)) plot_metrics(num_epoch, train_acc_record, val_acc_record, train_loss_record, val_loss_record, _optimizer) # save file json.dump( { 'train_accuracy': train_acc_record, 'train_loss': train_loss_record, 'val_accuracy': val_acc_record, 'val_loss': val_loss_record, 'test_accuracy': test_acc, 'test_loss': test_loss }, open( _optimizer + '_lr' + str(main_params['learning_rate']) + '_m' + str(_alpha) + '_w' + str(main_params['lambda']) + '.json', 'w')) # plotting to understand what the network is trying to do # plot raw mnist data plot_tSNE(Xtest, Ytest, 't-SNE raw MNIST.png') ################################################################################ # TODO: Vizualize the ouput of the first and second layer of the neural network# # on test data using t-SNE. Populate arrays 'first_layer_out' and # # and 'second_layer_out'. # ################################################################################ # first_layer_out = output of the neural network first layer on test data first_layer_out = np.zeros((N_test, num_L1), dtype=float) # second_layer_out = output of the neural network second layer on test data second_layer_out = np.zeros((N_test, num_L2), dtype=float) # Add your code here first_layer_out = model['L1'].forward(testSet.X) h1 = model['nonlinear1'].forward(first_layer_out) second_layer_out = model['L2'].forward(h1) ########################################################################### # Please DO NOT change the following parts of the script # ########################################################################### plot_tSNE(first_layer_out, Ytest, _optimizer + '_t-SNE_1.png') plot_tSNE(second_layer_out, Ytest, _optimizer + '_t-SNE_2.png') print('Finish running!')