def miniBatchStochasticGradientDescent(model, momentum, _lambda, _alpha, _learning_rate): ''' Input: model: Dictionary containing all parameters of the model momentum: Check add_momentum() function in utils.py to understand this parameter _lambda: Regularization constant _alpha: Momentum hyperparameter _learning_rate: Learning rate for the update Note: You can learn more about momentum here: https://blog.paperspace.com/intro-to-optimization-momentum-rmsprop-adam/ Returns: Updated model ''' for module_name, module in model.items(): # check if a module has learnable parameters if hasattr(module, 'params'): for key, _ in module.params.items(): g = module.gradient[key] + _lambda * module.params[key] if _alpha > 0.0: ################################################################################# # TODO: Update momentun using the formula: # m = alpha * m - learning_rate * g (Check add_momentum() function in utils file) # And update model parameter ################################################################################# momentum = add_momentum(model) momentum[module_name + '_' + key] = _alpha * momentum[module_name + '_' + key] - _learning_rate * g module.params[key] += momentum[module_name + '_' + key] else: ################################################################################# # TODO: update model parameter without momentum ################################################################################# module.params[key] -= _learning_rate * g return model
def main(main_params, optimization_type="minibatch_sgd"): ### set the random seed ### np.random.seed(int(main_params['random_seed'])) ### data processing ### Xtrain, Ytrain, Xval, Yval, _, _ = data_loader_mnist( dataset=main_params['input_file']) N_train, d = Xtrain.shape N_val, _ = Xval.shape index = np.arange(10) unique, counts = np.unique(Ytrain, return_counts=True) counts = dict(zip(unique, counts)).values() trainSet = DataSplit(Xtrain, Ytrain) valSet = DataSplit(Xval, Yval) ### building/defining MLP ### """ In this script, we are going to build a MLP for a 10-class classification problem on MNIST. The network structure is input --> linear --> relu --> dropout --> linear --> softmax_cross_entropy loss the hidden_layer size (num_L1) is 1000 the output_layer size (num_L2) is 10 """ model = dict() num_L1 = 1000 num_L2 = 10 # experimental setup num_epoch = int(main_params['num_epoch']) minibatch_size = int(main_params['minibatch_size']) # optimization setting: _alpha for momentum, _lambda for weight decay _learning_rate = float(main_params['learning_rate']) _step = 10 _alpha = float(main_params['alpha']) _lambda = float(main_params['lambda']) _dropout_rate = float(main_params['dropout_rate']) _activation = main_params['activation'] if _activation == 'relu': act = relu else: act = tanh # create objects (modules) from the module classes model['L1'] = linear_layer(input_D=d, output_D=num_L1) model['nonlinear1'] = act() model['drop1'] = dropout(r=_dropout_rate) model['L2'] = linear_layer(input_D=num_L1, output_D=num_L2) model['loss'] = softmax_cross_entropy() # Momentum if _alpha > 0.0: momentum = add_momentum(model) else: momentum = None train_acc_record = [] val_acc_record = [] train_loss_record = [] val_loss_record = [] ### run training and validation ### for t in range(num_epoch): print('At epoch ' + str(t + 1)) if (t % _step == 0) and (t != 0): _learning_rate = _learning_rate * 0.1 idx_order = np.random.permutation(N_train) train_acc = 0.0 train_loss = 0.0 train_count = 0 val_acc = 0.0 val_count = 0 val_loss = 0.0 for i in range(int(np.floor(N_train / minibatch_size))): # get a mini-batch of data x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) * minibatch_size]) ### forward ### a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train=True) a2 = model['L2'].forward(d1) loss = model['loss'].forward(a2, y) ### backward ### grad_a2 = model['loss'].backward(a2, y) ###################################################################################### # TODO: Call the backward methods of every layer in the model in reverse order # We have given the first and last backward calls # Do not modify them. ###################################################################################### grad_d1 = model['L2'].backward(d1, grad_a2) grad_h1 = model['drop1'].backward(h1, grad_d1) grad_a1 = model['nonlinear1'].backward(a1, grad_h1) #raise NotImplementedError("Not Implemented BACKWARD PASS in main()") ###################################################################################### # NOTE: DO NOT MODIFY CODE BELOW THIS, until next TODO ###################################################################################### grad_x = model['L1'].backward(x, grad_a1) ### gradient_update ### model = miniBatchGradientDescent(model, momentum, _lambda, _alpha, _learning_rate) ### Computing training accuracy and obj ### for i in range(int(np.floor(N_train / minibatch_size))): x, y = trainSet.get_example( np.arange(i * minibatch_size, (i + 1) * minibatch_size)) ### forward ### ###################################################################################### # TODO: Call the forward methods of every layer in the model in order # Check above forward code # Make sure to keep train as False ###################################################################################### #raise NotImplementedError("Not Implemented COMPUTING TRAINING ACCURACY in main()") a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train=False) a2 = model['L2'].forward(d1) loss = model['loss'].forward(a2, y) ###################################################################################### # NOTE: DO NOT MODIFY CODE BELOW THIS, until next TODO ###################################################################################### loss = model['loss'].forward(a2, y) train_loss += loss train_acc += np.sum(predict_label(a2) == y) train_count += len(y) train_loss = train_loss train_acc = train_acc / train_count train_acc_record.append(train_acc) train_loss_record.append(train_loss) print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss)) print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc)) ### Computing validation accuracy ### for i in range(int(np.floor(N_val / minibatch_size))): x, y = valSet.get_example( np.arange(i * minibatch_size, (i + 1) * minibatch_size)) ### forward ### ###################################################################################### # TODO: Call the forward methods of every layer in the model in order # Check above forward code # Make sure to keep train as False ###################################################################################### #raise NotImplementedError("Not Implemented COMPUTING VALIDATION ACCURACY in main()") a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train=False) a2 = model['L2'].forward(d1) loss = model['loss'].forward(a2, y) ###################################################################################### # NOTE: DO NOT MODIFY CODE BELOW THIS, until next TODO ###################################################################################### loss = model['loss'].forward(a2, y) val_loss += loss val_acc += np.sum(predict_label(a2) == y) val_count += len(y) val_loss_record.append(val_loss) val_acc = val_acc / val_count val_acc_record.append(val_acc) print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc)) # save file json.dump( { 'train': train_acc_record, 'val': val_acc_record }, open( 'MLP_lr' + str(main_params['learning_rate']) + '_m' + str(main_params['alpha']) + '_w' + str(main_params['lambda']) + '_d' + str(main_params['dropout_rate']) + '_a' + str(main_params['activation']) + '.json', 'w')) print('Finish running!') return train_loss_record, val_loss_record
def main(main_params, optimization_type="minibatch_sgd"): # data processing data = pd.read_csv("dataset/london_merged.csv") data.drop(['timestamp'], inplace=True, axis=1) bins = [-1, 1000, 2000, 3000, 4000, 100000] dpy = data['cnt'].to_numpy() r = pd.cut(dpy, bins) data_y = r.codes data.drop(['cnt'], inplace=True, axis=1) # Normalization data = (data - data.min()) / (data.max() - data.min()) data = data.to_numpy() x_train, x_val, y_train, y_val = train_test_split(data, data_y, test_size=0.33, random_state=int(main_params['random_seed'])) N_train, d = x_train.shape N_val, _ = x_val.shape trainSet = DataSplit(x_train, y_train) valSet = DataSplit(x_val, y_val) # building/defining model # experimental setup num_epoch = int(main_params['num_epoch']) minibatch_size = int(main_params['minibatch_size']) # optimization setting: _alpha for momentum, _lambda for weight decay _learning_rate = float(main_params['learning_rate']) _step = 10 _alpha = float(main_params['alpha']) _lambda = float(main_params['lambda']) _dropout_rate = float(main_params['dropout_rate']) _activation = main_params['activation'] if _activation == 'relu': act = relu else: act = tanh # The network structure is input --> linear --> relu --> dropout --> linear --> softmax_cross_entropy loss # num_L1:the hidden_layer1 size # num_L2:the hidden_layer2 size # num_L3:the output_layer size model = dict() num_L1 = 100 num_L2 = 50 num_L3 = 5 # create objects (modules) from the module classes model['L1'] = linear_layer(input_D=d, output_D=num_L1) model['nonlinear1'] = act() model['drop1'] = dropout(r=_dropout_rate) model['L2'] = linear_layer(input_D=num_L1, output_D=num_L2) model['nonlinear2'] = act() model['L3'] = linear_layer(input_D=num_L2, output_D=num_L3) model['loss'] = softmax_cross_entropy() # Momentum if _alpha > 0.0: momentum = add_momentum(model) else: momentum = None train_acc_record = [] val_acc_record = [] train_loss_record = [] val_loss_record = [] # training & validation for t in range(num_epoch): if (t % _step == 0) and (t != 0): _learning_rate = _learning_rate * 0.1 idx_order = np.random.permutation(N_train) train_acc = 0.0 train_loss = 0.0 train_count = 0 val_acc = 0.0 val_count = 0 val_loss = 0.0 for i in range(int(np.floor(N_train / minibatch_size))): # get a mini-batch of data x, y = trainSet.get_example(idx_order[i * minibatch_size: (i + 1) * minibatch_size]) # forward a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train=True) a2 = model['L2'].forward(d1) h2 = model['nonlinear2'].forward(a2) a3 = model['L3'].forward(h2) loss = model['loss'].forward(a3, y) # backward grad_a3 = model['loss'].backward(a3, y) grad_h2 = model['L3'].backward(h2, grad_a3) grad_a2 = model['nonlinear2'].backward(a2, grad_h2) grad_d1 = model['L2'].backward(d1, grad_a2) grad_h1 = model['drop1'].backward(h1, grad_d1) grad_a1 = model['nonlinear1'].backward(a1, grad_h1) grad_x = model['L1'].backward(x, grad_a1) # update gradient model = miniBatchGradientDescent(model, momentum, _lambda, _alpha, _learning_rate) # training accuracy for i in range(int(np.floor(N_train / minibatch_size))): x, y = trainSet.get_example(np.arange(i * minibatch_size, (i + 1) * minibatch_size)) # forward a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train=False) a2 = model['L2'].forward(d1) h2 = model['nonlinear2'].forward(a2) a3 = model['L3'].forward(h2) loss = model['loss'].forward(a3, y) train_loss += loss train_acc += np.sum(predict_label(a3) == y) train_count += len(y) train_loss = train_loss train_acc = train_acc / train_count train_acc_record.append(train_acc) train_loss_record.append(train_loss) # validation accuracy for i in range(int(np.floor(N_val / minibatch_size))): x, y = valSet.get_example(np.arange(i * minibatch_size, (i + 1) * minibatch_size)) # forward a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train=False) a2 = model['L2'].forward(d1) h2 = model['nonlinear2'].forward(a2) a3 = model['L3'].forward(h2) loss = model['loss'].forward(a3, y) val_loss += loss val_acc += np.sum(predict_label(a3) == y) val_count += len(y) val_loss_record.append(val_loss) val_acc = val_acc / val_count val_acc_record.append(val_acc) print('At epoch ' + str(t + 1)) print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss)) print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc)) print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc)) index = [int(i+1) for i in range(num_epoch)] plt.figure() plt.grid() plt.plot(index, train_acc_record, color='b', label='train_acc') plt.plot(index, val_acc_record, color='darkorange', label='valadation_acc') plt.xlabel('training epoch') plt.ylabel('accuracy') plt.legend() plt.show() return train_acc_record, val_acc_record
def main(main_params, optimization_type="minibatch_sgd"): ### set the random seed ### np.random.seed(int(main_params['random_seed'])) ### data processing ### Xtrain, Ytrain, Xval, Yval , _, _ = data_loader_mnist(dataset = main_params['input_file']) N_train, d = Xtrain.shape N_val, _ = Xval.shape index = np.arange(10) unique, counts = np.unique(Ytrain, return_counts=True) counts = dict(zip(unique, counts)).values() trainSet = DataSplit(Xtrain, Ytrain) valSet = DataSplit(Xval, Yval) model = dict() num_L1 = 1000 num_L2 = 10 num_epoch = int(main_params['num_epoch']) minibatch_size = int(main_params['minibatch_size']) # optimization setting: _alpha for momentum, _lambda for weight decay _learning_rate = float(main_params['learning_rate']) _step = 10 _alpha = float(main_params['alpha']) _lambda = float(main_params['lambda']) _dropout_rate = float(main_params['dropout_rate']) _activation = main_params['activation'] if _activation == 'relu': act = relu else: act = tanh # create objects (modules) from the module classes model['L1'] = linear_layer(input_D = d, output_D = num_L1) model['nonlinear1'] = act() model['drop1'] = dropout(r = _dropout_rate) model['L2'] = linear_layer(input_D = num_L1, output_D = num_L2) model['loss'] = softmax_cross_entropy() # Momentum if _alpha > 0.0: momentum = add_momentum(model) else: momentum = None train_acc_record = [] val_acc_record = [] train_loss_record = [] val_loss_record = [] ### run training and validation ### for t in range(num_epoch): print('At epoch ' + str(t + 1)) if (t % _step == 0) and (t != 0): _learning_rate = _learning_rate * 0.1 idx_order = np.random.permutation(N_train) train_acc = 0.0 train_loss = 0.0 train_count = 0 val_acc = 0.0 val_count = 0 val_loss = 0.0 for i in range(int(np.floor(N_train / minibatch_size))): # get a mini-batch of data x, y = trainSet.get_example(idx_order[i * minibatch_size : (i + 1) * minibatch_size]) ### forward ### a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train = True) a2 = model['L2'].forward(d1) loss = model['loss'].forward(a2, y) ### backward ### grad_d1 = model['L2'].backward(d1, grad_a2) grad_h1 = model['drop1'].backward(h1, grad_d1) grad_a1 = model['nonlinear1'].backward(a1, grad_h1) ###################################################################################### grad_x = model['L1'].backward(x, grad_a1) ### gradient_update ### model = miniBatchGradientDescent(model, momentum, _lambda, _alpha, _learning_rate) ### Computing training accuracy and obj ### for i in range(int(np.floor(N_train / minibatch_size))): a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train = False) a2 = model['L2'].forward(d1) loss = model['loss'].forward(a2, y) train_loss += loss train_acc += np.sum(predict_label(a2) == y) train_count += len(y) train_loss = train_loss train_acc = train_acc / train_count train_acc_record.append(train_acc) train_loss_record.append(train_loss) print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss)) print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc)) ### Computing validation accuracy ### for i in range(int(np.floor(N_val / minibatch_size))): x, y = valSet.get_example(np.arange(i * minibatch_size, (i + 1) * minibatch_size)) a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train = False) a2 = model['L2'].forward(d1) ###################################################################################### loss = model['loss'].forward(a2, y) val_loss += loss val_acc += np.sum(predict_label(a2) == y) val_count += len(y) val_loss_record.append(val_loss) val_acc = val_acc / val_count val_acc_record.append(val_acc) print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc))
def main(main_params, optimization_type="minibatch_sgd"): np.random.seed(int(main_params['random_seed'])) Xtrain, Ytrain, Xval, Yval, _, _ = data_loader_mnist( dataset=main_params['input_file']) N_train, d = Xtrain.shape N_val, _ = Xval.shape index = np.arange(10) unique, counts = np.unique(Ytrain, return_counts=True) counts = dict(zip(unique, counts)).values() trainSet = DataSplit(Xtrain, Ytrain) valSet = DataSplit(Xval, Yval) model = dict() num_L1 = 1000 num_L2 = 10 num_epoch = int(main_params['num_epoch']) minibatch_size = int(main_params['minibatch_size']) _learning_rate = float(main_params['learning_rate']) _step = 10 _alpha = float(main_params['alpha']) _lambda = float(main_params['lambda']) _dropout_rate = float(main_params['dropout_rate']) _activation = main_params['activation'] if _activation == 'relu': act = relu else: act = tanh model['L1'] = linear_layer(input_D=d, output_D=num_L1) model['nonlinear1'] = act() model['drop1'] = dropout(r=_dropout_rate) model['L2'] = linear_layer(input_D=num_L1, output_D=num_L2) model['loss'] = softmax_cross_entropy() # Momentum if _alpha > 0.0: momentum = add_momentum(model) else: momentum = None train_acc_record = [] val_acc_record = [] train_loss_record = [] val_loss_record = [] for t in range(num_epoch): print('At epoch ' + str(t + 1)) if (t % _step == 0) and (t != 0): _learning_rate = _learning_rate * 0.1 idx_order = np.random.permutation(N_train) train_acc = 0.0 train_loss = 0.0 train_count = 0 val_acc = 0.0 val_count = 0 val_loss = 0.0 for i in range(int(np.floor(N_train / minibatch_size))): # get a mini-batch of data x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) * minibatch_size]) a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train=True) a2 = model['L2'].forward(d1) a2 = model['L2'].forward(h1) loss = model['loss'].forward(a2, y) grad_a2 = model['loss'].backward(a2, y) l2_backward = model['L2'].backward(d1, grad_a2) drop1_backward = model['drop1'].backward(h1, l2_backward) nonlinear1_backward = model['nonlinear1'].backward( a1, drop1_backward) grad_x = model['L1'].backward(x, nonlinear1_backward) model = miniBatchGradientDescent(model, momentum, _lambda, _alpha, _learning_rate) for i in range(int(np.floor(N_train / minibatch_size))): x, y = trainSet.get_example( np.arange(i * minibatch_size, (i + 1) * minibatch_size)) a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train=False) a2 = model['L2'].forward(d1) loss = model['loss'].forward(a2, y) loss = model['loss'].forward(a2, y) train_loss += loss train_acc += np.sum(predict_label(a2) == y) train_count += len(y) train_loss = train_loss train_acc = train_acc / train_count train_acc_record.append(train_acc) train_loss_record.append(train_loss) print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss)) print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc)) for i in range(int(np.floor(N_val / minibatch_size))): x, y = valSet.get_example( np.arange(i * minibatch_size, (i + 1) * minibatch_size)) a1 = model['L1'].forward(x) h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train=False) a2 = model['L2'].forward(d1) loss = model['loss'].forward(a2, y) loss = model['loss'].forward(a2, y) val_loss += loss val_acc += np.sum(predict_label(a2) == y) val_count += len(y) val_loss_record.append(val_loss) val_acc = val_acc / val_count val_acc_record.append(val_acc) print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc)) json.dump( { 'train': train_acc_record, 'val': val_acc_record }, open( 'MLP_lr' + str(main_params['learning_rate']) + '_m' + str(main_params['alpha']) + '_w' + str(main_params['lambda']) + '_d' + str(main_params['dropout_rate']) + '_a' + str(main_params['activation']) + '.json', 'w')) print('Finish running!') return train_loss_record, val_loss_record
def main(main_params, optimization_type="minibatch_sgd"): ### set the random seed ### np.random.seed(int(main_params['random_seed'])) USAGE = 'neuralNetworkMT.py <number of neurons in hidden layer>' \ ' <max number of Iterations> <train data> <test data>' # if len(sys.argv) != 5: # print(USAGE) # else: # num_neurons = int(sys.argv[1]) # num_iteration = int(sys.argv[2]) # train_data = sys.argv[3] # test_data = sys.argv[4] train_data = 'downgesture_train.list' test_data = 'downgesture_test.list' num_neurons = 100 num_iteration = 1000 # read input data and separate to 'down' and 'not_down' gestures files print("start reading input text files") train_files_label1, train_files_label0 = parse_file_list(train_data) test_files_label1, test_files_label0 = parse_file_list(test_data) print("start reading input pictures files") data_one_train = read_pgm_image(train_files_label1, 1) data_zero_train = read_pgm_image(train_files_label0, 0) data_one_test = read_pgm_image(test_files_label1, 1) data_zero_test = read_pgm_image(test_files_label0, 0) # combine pgm data with corresponding labels in train and test data_train = np.concatenate((data_one_train, data_zero_train), axis=0) data_test = np.concatenate((data_one_test, data_zero_test), axis=0) # shuffle and prepare data print("start shuffling and splitting data") np.random.seed(7) np.random.shuffle(data_train) # prepare data Xtrain = data_train[:, :-1] Ytrain = data_train[:, -1] Xval = data_test[:, :-1] Yval = data_test[:, -1] ### data processing ### # Xtrain, Ytrain, Xval, Yval , _, _ = data_loader_mnist(dataset = main_params['input_file']) N_train, d = Xtrain.shape N_val, _ = Xval.shape index = np.arange(10) unique, counts = np.unique(Ytrain, return_counts=True) counts = dict(zip(unique, counts)).values() trainSet = DataSplit(Xtrain, Ytrain) valSet = DataSplit(Xval, Yval) ### building/defining MLP ### """ In this script, we are going to build a MLP for a 10-class classification problem on MNIST. The network structure is input --> linear --> relu --> dropout --> linear --> softmax_cross_entropy loss the hidden_layer size (num_L1) is 100 the output_layer size (num_L2) is 10 """ model = dict() num_L1 = 100 num_L2 = 10 # experimental setup num_epoch = int(main_params['num_epoch']) minibatch_size = int(main_params['minibatch_size']) # optimization setting: _alpha for momentum, _lambda for weight decay _learning_rate = float(main_params['learning_rate']) _step = 10 _alpha = float(main_params['alpha']) _lambda = float(main_params['lambda']) _dropout_rate = float(main_params['dropout_rate']) _activation = main_params['activation'] if _activation == 'relu': act = relu else: act = tanh # create objects (modules) from the module classes model['L1'] = linear_layer(input_D = d, output_D = num_L1) model['nonlinear1'] = act() model['drop1'] = dropout(r = _dropout_rate) model['L2'] = linear_layer(input_D = num_L1, output_D = num_L2) model['loss'] = softmax_cross_entropy() # Momentum if _alpha > 0.0: momentum = add_momentum(model) else: momentum = None train_acc_record = [] val_acc_record = [] train_loss_record = [] val_loss_record = [] ### run training and validation ### for t in range(num_epoch): print('At epoch ' + str(t + 1)) if (t % _step == 0) and (t != 0): _learning_rate = _learning_rate * 0.1 idx_order = np.random.permutation(N_train) train_acc = 0.0 train_loss = 0.0 train_count = 0 val_acc = 0.0 val_count = 0 val_loss = 0.0 for i in range(int(np.floor(N_train / minibatch_size))): # get a mini-batch of data x, y = trainSet.get_example(idx_order[i * minibatch_size : (i + 1) * minibatch_size]) ### forward ### x -L1> a1 -NL> h1 -Dr> d1 -L2> a2 -loss> y a1 = model['L1'].forward(x) # a1 or u h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train = True) # d1 or h after dropout a2 = model['L2'].forward(d1) loss = model['loss'].forward(a2, y) ### backward ### grad_a2 = model['loss'].backward(a2, y) ###################################################################################### # TODO: Call the backward methods of every layer in the model in reverse order # We have given the first and last backward calls # Do not modify them. ###################################################################################### # raise NotImplementedError("Not Implemented BACKWARD PASS in main()") grad_a2 = model['loss'].backward(a2, y) grad_d1 = model['L2'].backward(d1, grad_a2) grad_h1 = model['drop1'].backward(h1, grad_d1) grad_a1 = model['nonlinear1'].backward(a1,grad_h1) ###################################################################################### # NOTE: DO NOT MODIFY CODE BELOW THIS, until next TODO ###################################################################################### grad_x = model['L1'].backward(x, grad_a1) ### gradient_update ### model = miniBatchGradientDescent(model, momentum, _lambda, _alpha, _learning_rate) ### Computing training accuracy and obj ### for i in range(int(np.floor(N_train / minibatch_size))): x, y = trainSet.get_example(np.arange(i * minibatch_size, (i + 1) * minibatch_size)) ### forward ### ###################################################################################### # TODO: Call the forward methods of every layer in the model in order # Check above forward code ###################################################################################### # raise NotImplementedError("Not Implemented COMPUTING TRAINING ACCURACY in main()") a1 = model['L1'].forward(x) # a1 or u h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train = True) # d1 or h after dropout a2 = model['L2'].forward(d1) ###################################################################################### # NOTE: DO NOT MODIFY CODE BELOW THIS, until next TODO ###################################################################################### loss = model['loss'].forward(a2, y) train_loss += loss train_acc += np.sum(predict_label(a2) == y) train_count += len(y) train_loss = train_loss train_acc = train_acc / train_count train_acc_record.append(train_acc) train_loss_record.append(train_loss) print('Training loss at epoch ' + str(t + 1) + ' is ' + str(train_loss)) print('Training accuracy at epoch ' + str(t + 1) + ' is ' + str(train_acc)) ### Computing validation accuracy ### for i in range(int(np.floor(N_val / minibatch_size))): x, y = valSet.get_example(np.arange(i * minibatch_size, (i + 1) * minibatch_size)) ### forward ### ###################################################################################### # TODO: Call the forward methods of every layer in the model in order # Check above forward code ###################################################################################### a1 = model['L1'].forward(x) # a1 or u h1 = model['nonlinear1'].forward(a1) d1 = model['drop1'].forward(h1, is_train = True) # d1 or h after dropout a2 = model['L2'].forward(d1) # raise NotImplementedError("Not Implemented COMPUTING VALIDATION ACCURACY in main()") ###################################################################################### # NOTE: DO NOT MODIFY CODE BELOW THIS, until next TODO ###################################################################################### loss = model['loss'].forward(a2, y) val_loss += loss val_acc += np.sum(predict_label(a2) == y) val_count += len(y) val_loss_record.append(val_loss) val_acc = val_acc / val_count val_acc_record.append(val_acc) print('Validation accuracy at epoch ' + str(t + 1) + ' is ' + str(val_acc)) # save file json.dump({'train': train_acc_record, 'val': val_acc_record}, open('MLP_lr' + str(main_params['learning_rate']) + '_m' + str(main_params['alpha']) + '_w' + str(main_params['lambda']) + '_d' + str(main_params['dropout_rate']) + '_a' + str(main_params['activation']) + '.json', 'w')) print('Finish running!') return train_loss_record, val_loss_record