def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] dataset = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT) a, b, c = dataset['train'].images.shape[1:] n_classes = dataset['train'].labels.shape[1] n_inputs = a * b * c mlp = MLP(n_inputs, dnn_hidden_units, n_classes) crossentropy = CrossEntropyModule() test_input, test_labels = dataset['test'].images, dataset['test'].labels test_input = np.reshape(test_input, (test_input.shape[0], n_inputs)) for step in range(FLAGS.max_steps): input, labels = dataset['train'].next_batch(FLAGS.batch_size) input = np.reshape(input, (FLAGS.batch_size, n_inputs)) predictions = mlp.forward(input) dL = crossentropy.backward(predictions, labels) mlp.backward(dL) for layer in mlp.layers: if (layer.__class__ == LinearModule): layer.params[ 'weight'] -= FLAGS.learning_rate * layer.grads['weight'] layer.params[ 'bias'] -= FLAGS.learning_rate * layer.grads['bias'] loss = crossentropy.forward(predictions, labels) if (step % FLAGS.eval_freq == 0): test_prediction = mlp.forward(test_input) test_loss = crossentropy.forward(test_prediction, test_labels) test_accuracy = accuracy(test_prediction, test_labels) sys.stdout = open( str(FLAGS.dnn_hidden_units) + '_' + str(FLAGS.learning_rate) + '_' + str(FLAGS.max_steps) + '_' + str(FLAGS.batch_size) + '_' + str(FLAGS.batch_size) + '_mlp_numpy.csv', 'a') print("{},{:f},{:f}".format(step, test_loss, test_accuracy))
def train(): """ Performs training and evaluation of MLP model. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### # set up the data cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) test_images, test_labels = cifar10['test'].images, cifar10['test'].labels test_vectors = reshape_images(test_images) # set up the model mlp_model = MLP(3072, dnn_hidden_units, 10) loss_module = CrossEntropyModule() accuracies = [] losses = [] for i in range(FLAGS.max_steps): images, labels = cifar10['train'].next_batch(FLAGS.batch_size) image_vectors = reshape_images(images) # forward pass model_pred = mlp_model.forward(image_vectors) # backward pass loss = loss_module.forward(model_pred, labels) loss_grad = loss_module.backward(model_pred, labels) mlp_model.backward(loss_grad) # update all weights and biases mlp_model.update(FLAGS.learning_rate) # evaluate the model on the data set every eval_freq steps if i % FLAGS.eval_freq == 0: test_pred = mlp_model.forward(test_vectors) test_accuracy = accuracy(test_pred, test_labels) accuracies.append(test_accuracy) losses.append(loss) plot_curve(accuracies, 'Accuracy') plot_curve(losses, 'Loss')
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### data = cifar10_utils.get_cifar10(FLAGS.data_dir) n_inputs = 3 * 32 * 32 n_classes = 10 model = MLP(n_inputs, dnn_hidden_units, n_classes) loss_fn = CrossEntropyModule() max_accuracy = 0.0 start_time = time.perf_counter() for step in range(1, FLAGS.max_steps + 1): x, targets = data['train'].next_batch(FLAGS.batch_size) input = x.reshape((FLAGS.batch_size, -1)) predictions = model.forward(input) gradient = loss_fn.backward(predictions, targets) model.backward(gradient) model.step(FLAGS.learning_rate) if step == 1 or step % FLAGS.eval_freq == 0: training_loss = loss_fn.forward(predictions, targets) test_predictions = model.forward(data['test'].images.reshape( data['test'].num_examples, -1)) test_loss = loss_fn.forward(test_predictions, data['test'].labels) test_acc = accuracy(test_predictions, data['test'].labels) if test_acc > max_accuracy: max_accuracy = test_acc print( "step %d/%d: training loss: %.3f test loss: %.3f accuracy: %.1f%%" % (step, FLAGS.max_steps, training_loss, test_loss, test_acc * 100)) time_taken = time.perf_counter() - start_time print("Done. Scored %.1f%% in %.1f seconds." % (max_accuracy * 100, time_taken))
def train(): """ Performs training and evaluation of MLP model. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] data = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir) train = data['train'] test = data['test'] n_inputs = train.images[0].flatten().shape[0] n_classes = train.labels[0].shape[0] mlp = MLP(n_inputs, dnn_hidden_units, n_classes) loss_mod = CrossEntropyModule() loss_history = [] acc_history = [] for step in range(FLAGS.max_steps): #FLAGS.max_steps x, y = train.next_batch(FLAGS.batch_size) x = x.reshape(x.shape[0], n_inputs) out = mlp.forward(x) loss = loss_mod.forward(out, y) loss_history.append(loss) dout = loss_mod.backward(out, y) mlp.backward(dout) mlp.update(FLAGS.learning_rate) if step == 0 or (step + 1) % FLAGS.eval_freq == 0: x, y = test.images, test.labels x = x.reshape(x.shape[0], n_inputs) test_out = mlp.forward(x) acc = accuracy(test_out, y) print('Accuracy:', acc) acc_history.append(acc) print('Final loss:', loss_history[-1]) print('Final acc:', acc_history[-1]) print(len(acc_history)) plt.plot(loss_history) plt.step(range(0, FLAGS.max_steps + 1, FLAGS.eval_freq), acc_history) plt.legend(['loss', 'accuracy']) plt.show()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # Get Images cifar10 = cifar10_utils.read_data_sets(FLAGS.data_dir) # Create MLP Instance trainDataSet = cifar10['train'] testDataSet = cifar10['test'] #size of squeezed images size_of_images = cifar10['train'].images[0].shape[0] * cifar10[ 'train'].images[0].shape[1] * cifar10['train'].images[0].shape[2] #MLP Object & loss mlp = MLP(size_of_images, dnn_hidden_units, np.shape(cifar10['test'].labels)[1]) loss = CrossEntropyModule() for i in range(FLAGS.max_steps): # np.random.shuffle(cifar10['train']) accuracies_train = [] loss_train = [] batch = trainDataSet.next_batch(BATCH_SIZE_DEFAULT) x = batch[0] x = x.reshape(x.shape[0], (x.shape[1] * x.shape[2] * x.shape[3])) y = batch[1] prob = mlp.forward(x) predictions = (prob == prob.max(axis=1)[:, None]).astype(int) current_accuracy = accuracy(predictions, y) accuracies_train.append(current_accuracy) current_loss = loss.forward(prob, y) loss_train.append(current_loss) out_loss_back = loss.backward(prob, y) mlp.backward(out_loss_back) if i % FLAGS.eval_freq == 0: test_dataset(mlp, testDataSet, loss, i) writer.add_scalar('Train/LossIteration', current_accuracy, i) writer.add_scalar('Train/AccuracyIteration', current_loss, i) print(i) test_dataset(mlp, testDataSet, loss, FLAGS.max_steps + 1)
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### if FLAGS.batch_size: batch_size = int(FLAGS.batch_size) data = cifar10_utils.get_cifar10() data_train = data['train'] x_dim = np.prod(data_train.images.shape[1:]) y_dim = np.prod(data_train.labels.shape[1:]) print(x_dim, y_dim) mlp = MLP(x_dim, dnn_hidden_units, y_dim) while data_train.epochs_completed <= 10: x, y = data_train.next_batch(batch_size) x = x.reshape(x_dim, batch_size) out = mlp.forward(x) loss = mlp.loss.forward(out, y) dout = mlp.loss.backward(out, y) mlp.backward(dout) print("LOSS:", loss)
def train(): """ Performs training and evaluation of MLP model. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### batch_size = FLAGS.batch_size lr = FLAGS.learning_rate cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) x_test, y_test = cifar10['test'].images, cifar10['test'].labels (test_images, height, width, colors) = x_test.shape # (32, 32, 3) n_inputs = height * width * colors # 1024 # n_classes = 10 (_, n_classes) = y_test.shape x_test = x_test.reshape((test_images, n_inputs)) model = MLP(n_inputs, dnn_hidden_units, n_classes) ce = CrossEntropyModule() cols = ['train_acc', 'test_acc', 'train_loss', 'test_loss', 'secs'] # train results = [] name = f'mlp-numpy' with SummaryWriter(name) as w: for step in tqdm(range(FLAGS.max_steps)): x_train, y_train = cifar10['train'].next_batch(batch_size) x_train = x_train.reshape((batch_size, n_inputs)) # forward predictions = model.forward(x_train) train_loss = ce.forward(predictions, y_train) # backward grad = ce.backward(predictions, y_train) model.backward(grad) train_acc = accuracy(predictions, y_train) # update model for linear in model.linears: linear.params["weight"] -= lr * linear.grads["weight"] linear.params["bias"] -= lr * np.mean( linear.grads["bias"].T, axis=1, keepdims=True) # evaluate if step % FLAGS.eval_freq == 0: time = int(step / FLAGS.eval_freq) start = timer() predictions = model.forward(x_test) end = timer() secs = end - start test_loss = ce.forward(predictions, y_test) test_acc = accuracy(predictions, y_test) vals = [train_acc, test_acc, train_loss, test_loss, secs] stats = dict( zip(cols, [ np.asscalar(i) if isinstance(i, (np.ndarray, np.generic)) else i for i in vals ])) print( yaml.dump({ k: round(i, 3) if isinstance(i, float) else i for k, i in stats.items() })) w.add_scalars('metrics', stats, time) results.append(stats) # stop if loss has converged! check = 10 if len(results) >= 2 * check: threshold = 1e-6 losses = [item['train_loss'] for item in results] current = np.mean(losses[-check:]) prev = np.mean(losses[-2 * check:-check]) if (prev - current) < threshold: break df = pd.DataFrame(results, columns=cols) meta = { 'framework': 'numpy', 'algo': 'mlp', 'optimizer': 'sgd', 'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.learning_rate, 'dnn_hidden_units': FLAGS.dnn_hidden_units, 'weight_decay': 0, 'max_steps': FLAGS.max_steps, } for k, v in meta.items(): df[k] = v output_file = 'results/results.csv' # f'{name}.csv' if os.path.isfile(output_file): df.to_csv(f'{name}.csv', header=False, mode='a') else: df.to_csv(f'{name}.csv', header=True, mode='w') print('done!') return test_loss
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # Get negative slope parameter for LeakyReLU neg_slope = FLAGS.neg_slope ######################## # PUT YOUR CODE HERE # ####################### """ Initialize data module """ cifar10 = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT) x, y = cifar10['train'].next_batch(1) x_test, y_test = cifar10['test'].next_batch(10000) x = x.reshape(x.shape[0], -1) x_test = x_test.reshape(x_test.shape[0], -1) """ initialize the network """ network = MLP(x.shape[1], dnn_hidden_units, y.shape[1], neg_slope) """ compute forward """ crossEntropy = CrossEntropyModule() """ batch gradient descent """ for i in range(FLAGS.max_steps): x, y = cifar10['train'].next_batch(FLAGS.batch_size) x = x.reshape(x.shape[0], -1) prediction = network.forward(x) loss = crossEntropy.forward(prediction, y) gloss = crossEntropy.backward(prediction, y) network.backward(gloss) for linearModule in network.linearModules: linearModule.params['weight'] = linearModule.params[ 'weight'] - FLAGS.learning_rate * 1 / FLAGS.batch_size * linearModule.grads[ 'weight'] linearModule.params['bias'] = linearModule.params[ 'bias'] - FLAGS.learning_rate * 1 / FLAGS.batch_size * linearModule.grads[ 'bias'] if i % FLAGS.eval_freq == 0: prediction = network.forward(x_test) print('Accuracy after ' + str(i) + ' steps ' + str(accuracy(prediction, y_test))) prediction = network.forward(x_test) print('Final accuracy') print(accuracy(prediction, y_test))
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # Get negative slope parameter for LeakyReLU neg_slope = FLAGS.neg_slope cifar10 = cifar10_utils.get_cifar10("./cifar10/cifar-10-batches-py") training_set = cifar10['train'] test_set = cifar10['test'] f = vars(FLAGS) input_size = 3 * 32 * 32 number_of_classes = 10 batch_size = f['batch_size'] ### definition of architecture: layers = dnn_hidden_units + [number_of_classes] mlp = MLP(input_size, layers, number_of_classes, neg_slope, f['learning_rate']) lastEpochNum = 0 batchCounter = 0 epoch_acc = 0 epoch_loss = 0 ## preparing test data test_data, test_labels = test_set.images, test_set.labels test_data = np.reshape(test_data, (np.shape(test_data)[0], input_size)) ### normalize test_data = np.subtract(test_data, np.mean(test_data, 0)) test_data = np.divide(test_data, np.amax(test_data, 0)) training_accuracies = [] test_accuracies = [] training_losses = [] test_losses = [] while training_set.epochs_completed <= f['max_steps']: if lastEpochNum != training_set.epochs_completed: lastEpochNum = training_set.epochs_completed training_acc = epoch_acc / batchCounter tr_loss = epoch_loss / batchCounter training_losses.append(tr_loss) training_accuracies.append(training_acc) print("epoch " + str(lastEpochNum) + " avg accuracy on training data: " + str(training_acc)) batchCounter = 0 epoch_acc = 0 epoch_loss = 0 ## also calculate accuracy on the test data for better visualization test_output = mlp.forward(test_data) test_loss = mlp.loss.forward(test_output, test_labels) test_acc = accuracy(test_output, test_labels) test_accuracies.append(test_acc) test_losses.append(test_loss) ## testing after number of batches, given the parameter if batchCounter % f['eval_freq'] == 0: test_output = mlp.forward(test_data) test_acc = accuracy(test_output, test_labels) print("-----------------------") print("test accuracy: " + str(test_acc)) print("-----------------------") batch_data, batch_labels = training_set.next_batch(batch_size) batch_data_flat = np.reshape(batch_data, (batch_size, input_size)) ### normalize batch_data_flat = np.subtract(batch_data_flat, np.mean(batch_data_flat, 0)) batch_data_flat = np.divide(batch_data_flat, np.amax(batch_data_flat, 0)) ### forward pass output = mlp.forward(batch_data_flat) loss = mlp.loss.forward(output, batch_labels) ## backward loss_gradient = mlp.loss.backward(output, batch_labels) mlp.backward(loss_gradient) acc = accuracy(output, batch_labels) epoch_acc += acc epoch_loss += loss batchCounter += 1 drawPlot(training_accuracies, test_accuracies, './mlp-accuracies_numpy.png', 'MLP numpy - accuracies on training and test data', 5) drawPlot(training_losses, test_losses, './mlp-loss_numpy.png', 'MLP numpy - loss on training and test data', 6)
def train(): """ Performs training and evaluation of MLP model. TODONE: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### learning_rate = FLAGS.learning_rate max_steps = FLAGS.max_steps batch_size = FLAGS.batch_size eval_freq = FLAGS.eval_freq data_dir = FLAGS.data_dir # load the cifar10 data cifar10 = cifar10_utils.get_cifar10(data_dir) train = cifar10['train'] test = cifar10['test'] test_images, test_labels = test.images, test.labels # obtain the dimensions of the data n_test_images, depth, height, width = test_images.shape n_inputs = height * width * depth n_classes = test_labels.shape[1] # reshape the test images test_images = test_images.reshape((n_test_images, n_inputs)) # initialize the MLP mlp = MLP(n_inputs, dnn_hidden_units, n_classes) # initialize the loss function loss_function = CrossEntropyModule() # initialize empty results list results = [] output_dir = Path.cwd().parent / 'output' # train the MLP for step in range(max_steps): # obtain a new mini-batch and reshape the images train_images, train_labels = train.next_batch(batch_size) train_images = train_images.reshape((batch_size, n_inputs)) # forward pass the mini-batch predictions = mlp.forward(train_images) loss = loss_function.forward(predictions, train_labels) # backwards propogate the loss loss_grad = loss_function.backward(predictions, train_labels) mlp.backward(loss_grad) # update the weights and biases of the linear modules of the MLP for module in mlp.modules: if isinstance(module, LinearModule): # if it is a linear module module.params[ 'weight'] -= learning_rate * module.grads['weight'] module.params['bias'] -= learning_rate * module.grads['bias'] # evaluate the MLP if (step % eval_freq == 0) or (step == max_steps - 1): # compute train data metrics train_acc = accuracy(predictions, train_labels) train_loss = loss # evaluate the MLP on the test data test_predictions = mlp.forward(test_images) # compute the test data metrics test_acc = accuracy(test_predictions, test_labels) test_loss = loss_function.forward(test_predictions, test_labels) # append the results results.append( [step + 1, train_acc, train_loss, test_acc, test_loss]) print(f'Step {step + 1:0{len(str(max_steps))}}/{max_steps}:') print(f' Performance on the training data (mini-batch):') print(f' Accuracy: {train_acc}, Loss: {train_loss}') print(f' Performance on the testing data (mini-batch):') print(f' Accuracy: {test_acc}, Loss: {test_loss}') # break if train loss has converged # threshold = 1e-6 # if len(train_loss) > 20: # previous_losses = train_loss[-20:-10] # current_losses = train_loss[-10:] # if (previous_losses - current_losses) < threshold: # print(f'Loss has converged early in {step + 1} steps') # break # save the relevant metrics to disk print('Saving the results to disk...') output_path = Path.cwd().parent / 'output' / 'mlp_numpy.csv' output_path.parent.mkdir(parents=True, exist_ok=True) column_names = ['step', 'train_acc', 'train_loss', 'test_acc', 'test_loss'] with open(output_path, 'w') as csv_file: csv_file.write(';'.join(column_names) + '\n') for i in range(len(results)): csv_file.write( f'{results[i][0]};{results[i][1]};{results[i][2]};{results[i][3]};{results[i][4]}' + '\n')
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### # return {'train': train, 'validation': validation, 'test': test} dataset_dict = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT) train_loader = dataset_dict['train'] test_loader = dataset_dict['test'] model = MLP(n_inputs=32 * 32 * 3, n_hidden=dnn_hidden_units, n_classes=10) test_accs = [] train_accs = [] losses = [] for epoch in range(FLAGS.max_steps): batch_x, batch_y = train_loader.next_batch(FLAGS.batch_size) out = model.forward(batch_x.reshape(FLAGS.batch_size, -1)) cross_ent = CrossEntropyModule() loss = cross_ent.forward(out, batch_y) losses.append(round(loss, 3)) dout = cross_ent.backward(out, batch_y) model.backward(dout) for layer in model.layers: if type(layer) == modules.LinearModule: layer.params['weight'] = layer.params[ 'weight'] - FLAGS.learning_rate * layer.grads['weight'] layer.params['bias'] = layer.params[ 'bias'] - FLAGS.learning_rate * layer.grads['bias'] if epoch % FLAGS.eval_freq == 0: #print accuracy on test and train set train_acc = accuracy(out, batch_y) out = model.forward( test_loader.images.reshape(test_loader.images.shape[0], -1)) test_acc = accuracy(out, test_loader.labels) print( 'Train Epoch: {}/{}\tLoss: {:.6f}\tTrain accuracy: {:.6f}\tTest accuracy: {:.6f}' .format(epoch, FLAGS.max_steps, loss, train_acc, test_acc)) test_accs.append(test_acc) train_accs.append(train_acc) out = model.forward( test_loader.images.reshape(test_loader.images.shape[0], -1)) test_acc = accuracy(out, test_loader.labels) print('FINAL Test accuracy: {:.6f}'.format(test_acc)) import matplotlib.pyplot as plt plt.figure() plt.plot([i for i in range(0, MAX_STEPS_DEFAULT, EVAL_FREQ_DEFAULT)], train_accs) plt.plot([i for i in range(0, MAX_STEPS_DEFAULT, EVAL_FREQ_DEFAULT)], test_accs) plt.legend(["train", "test"]) plt.ylabel("accuracy") plt.xlabel("epoch") plt.savefig("accuracy") plt.figure() plt.plot([i for i in range(0, MAX_STEPS_DEFAULT, 1)], losses) plt.legend(["loss"]) plt.ylabel("loss") plt.xlabel("epoch") plt.savefig("loss")
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # # set all flag variables, except dor dnn_hidden_units LEARNING_RATE_DEFAULT = FLAGS.learning_rate MAX_STEPS_DEFAULT = FLAGS.max_steps BATCH_SIZE_DEFAULT = FLAGS.batch_size EVAL_FREQ_DEFAULT = FLAGS.eval_freq # get test data to initialize the model with cifar10 = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT) x_test, y_test = cifar10['test'].images, cifar10['test'].labels # input_size is a concatenation of the 3d image input_size = np.shape(x_test)[1] * np.shape(x_test)[2] * np.shape( x_test)[3] # this is the class size = output size class_size = np.shape(y_test)[1] # reshape the images to be 1-dimensional vectors x_test = x_test.reshape([np.shape(x_test)[0], input_size]) model = MLP(n_inputs=input_size, n_hidden=dnn_hidden_units, n_classes=class_size) calculate_loss = CrossEntropyModule() # will store are results accuracies_val = [] accuracies_train = [] loss_val = [] loss_train = [] # keep going until we reach the max steps for step in range(MAX_STEPS_DEFAULT): # get the next batch x, y = cifar10['train'].next_batch(BATCH_SIZE_DEFAULT) x = x.reshape([np.shape(x)[0], input_size]) forward_out = model.forward(x) loss = calculate_loss.forward(forward_out, y) loss_gradient = calculate_loss.backward(forward_out, y) model.backward(loss_gradient) for layer in model.layers: if hasattr(layer, 'params'): # this only holds for the linear layers # update weights and bias layer.params['weight'] = layer.params[ 'weight'] - LEARNING_RATE_DEFAULT * layer.grads['weight'] layer.params['bias'] = layer.params[ 'bias'] - LEARNING_RATE_DEFAULT * layer.grads['bias'] # evaluate every EVAL_FREQ_DEFAULT steps if step % EVAL_FREQ_DEFAULT == 0: test_forward = model.forward(x_test) accuracies_train.append(accuracy(forward_out, y)) accuracies_val.append(accuracy(test_forward, y_test)) loss_train.append(loss) loss_val.append(calculate_loss.forward(test_forward, y_test)) print("accuracies train") print(accuracies_train) print("accuracies val") print(accuracies_val) print("losses train") print(loss_train) print("losses val") print(loss_val)
def train(_run): """ Performs training and evaluation of MLP model. Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### datasets = cifar10_utils.read_data_sets(DATA_DIR_DEFAULT) train_data = datasets['train'] test_data = datasets['test'] model = MLP(n_inputs=3072, n_hidden=dnn_hidden_units, n_classes=10) loss_fn = CrossEntropyModule() log_every = 10 avg_loss = 0 avg_acc = 0 for step in range(FLAGS.max_steps): x, y = train_data.next_batch(FLAGS.batch_size) x = x.reshape(FLAGS.batch_size, -1) out = model.forward(x) # Forward and backward passes loss = loss_fn.forward(out, y) dout = loss_fn.backward(out, y) model.backward(dout) # Parameter updates for layer in model.layers: params = getattr(layer, 'params', None) if params is not None: grads = layer.grads layer.params = {name: params[name] - FLAGS.learning_rate * grads[name] for name in params} avg_loss += loss/log_every avg_acc += accuracy(out, y)/log_every if step % log_every == 0: print('\r[{}/{}] train loss: {:.6f} train acc: {:.6f}'.format(step + 1, FLAGS.max_steps, avg_loss, avg_acc), end='') _run.log_scalar('train-loss', avg_loss, step) _run.log_scalar('train-acc', avg_acc, step) avg_loss = 0 avg_acc = 0 # Evaluate if step % FLAGS.eval_freq == 0 or step == (FLAGS.max_steps - 1): x, y = test_data.next_batch(test_data.num_examples) x = x.reshape(test_data.num_examples, -1) out = model.forward(x) test_loss = loss_fn.forward(out, y) test_acc = accuracy(out, y) print(' test accuracy: {:6f}'.format(test_acc)) _run.log_scalar('test-loss', test_loss, step) _run.log_scalar('test-acc', test_acc, step)
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # Get negative slope parameter for LeakyReLU neg_slope = FLAGS.neg_slope learning_rate = FLAGS.learning_rate batch_size = FLAGS.batch_size max_steps = FLAGS.max_steps results = open("results.dat", "w+") results.write( "#numpy_mlp \n#neg_slope : {}\n#learning_rate : {}\n#batch_size : {}\n#hidden_units : {}\ \n#max_steps : {}\n".format(neg_slope, learning_rate, batch_size, dnn_hidden_units, max_steps)) results.write("#epoch batch max_steps loss train_acc test_acc test_loss\n") cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) x_test, t_test = cifar10["test"].images, cifar10["test"].labels x_test = x_test.reshape(np.size(x_test[:, 0, 0, 0]), N_INPUTS) mlp = MLP(N_INPUTS, dnn_hidden_units, N_CLASSES, neg_slope) crossEntropy = CrossEntropyModule() for batch in range(1, max_steps + 1): x, t = cifar10["train"].next_batch(batch_size) x = x.reshape(batch_size, N_INPUTS) y = mlp.forward(x) #y predictions, t targets loss = crossEntropy.forward(y, t) dout = crossEntropy.backward(y, t) mlp.backward(dout) #accuracy before updating if batch == 1: train_acc = accuracy(y, t) y_test = mlp.forward(x_test) test_loss = crossEntropy.forward(y_test, t_test) test_acc = accuracy(y_test, t_test) results.write("%d %d %d %.3f %.3f %.3f %.3f\n" % (cifar10["train"]._epochs_completed, 0, max_steps, loss, train_acc, test_acc, test_loss)) # print("Epoch: %d. Batch: %d/%d. Loss: %.3f. Train_acc: %.3f. Test_acc: %.3f" % # (cifar10["train"]._epochs_completed, 0, max_steps, loss, train_acc, test_acc)) #update weights for layer in mlp.linears: layer.params["weight"] = layer.params[ "weight"] - learning_rate * layer.grads["weight"] layer.params["bias"] = layer.params[ "bias"] - learning_rate * layer.grads["bias"] if batch % FLAGS.eval_freq == 0: train_acc = accuracy(y, t) y_test = mlp.forward(x_test) test_loss = crossEntropy.forward(y_test, t_test) test_acc = accuracy(y_test, t_test) results.write("%d %d %d %.3f %.3f %.3f %.3f\n" % (cifar10["train"]._epochs_completed, batch, max_steps, loss, train_acc, test_acc, test_loss)) # print("Epoch: %d. Batch: %d/%d. Loss: %.3f. Train_acc: %.3f. Test_acc: %.3f" % # (cifar10["train"]._epochs_completed, batch, max_steps, loss, train_acc, test_acc)) results.close()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # load dataset cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) # get batches batches = [] # initializing loss and accuracy arrays accuracies = [] losses = [] for i in range(FLAGS.max_steps): x, y = cifar10['train'].next_batch( FLAGS.batch_size) # (batch_size, 3, 32, 32) (batch_size, 10) x = x.reshape(FLAGS.batch_size, -1) batches.append((x, y)) # get output size out_size = batches[-1][1].shape[1] # get intput size in_size = batches[-1][0].shape[1] # initialize network net = MLP(in_size, dnn_hidden_units, out_size) # intialize loss function criterion = CrossEntropyModule() # make steps for s in range(FLAGS.max_steps): x, t = batches[s] # forwardpass y = net.forward(x) # calculate loss loss = criterion.forward(y, t) losses.append(loss) # gradient for cross entropy dx = criterion.backward(y, t) # backward pass net.backward(dx) # update weights for m in net.modules: if isinstance(m, LinearModule): m.params['weight'] -= FLAGS.learning_rate * m.grads['weight'] m.params['bias'] -= FLAGS.learning_rate * m.grads['bias'] if s % FLAGS.eval_freq == 0: x, t = cifar10['test'].images, cifar10['test'].labels x = x.reshape(x.shape[0], -1) y = net.forward(x) acc = accuracy(y, t) print('accuracy at step', s, ': ', acc) accuracies.append(acc * 100)
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ # DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) # Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### def reshape_cifar10_mlp(x): batch_size = x.shape[0] x = x.transpose([2, 3, 1, 0]) x = x.reshape([-1, batch_size]) x = x.transpose() return x cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size) x_train = reshape_cifar10_mlp(x_train) softmax = SoftMaxModule() crossent = CrossEntropyModule() mlp = MLP(x_train.shape[1], dnn_hidden_units, y_train.shape[1]) train_accs = [] train_losses = [] eval_accs = [] eval_losses = [] for i in np.arange(FLAGS.max_steps): print('\nStep: {}\n'.format(i)) print('Training: ') logits = mlp.forward(x_train) softmax_logits = softmax.forward(logits) train_loss = crossent.forward(softmax_logits, y_train) train_acc = accuracy(softmax_logits, y_train) print('loss: {:.4f}, acc: {:.4f}\n'.format(train_loss, train_acc)) dL = crossent.backward(softmax_logits, y_train) dL = softmax.backward(dL) mlp.backward(dL) for layer in mlp.layers: if isinstance(layer, LinearModule): layer.params[ 'weight'] -= FLAGS.learning_rate * layer.grads['weight'] layer.params[ 'bias'] -= FLAGS.learning_rate * layer.grads['bias'] x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size) x_train = reshape_cifar10_mlp(x_train) if i % FLAGS.eval_freq == 0: print('Evaluation: ') x_eval, y_eval = cifar10['test'].images, cifar10['test'].labels x_eval = reshape_cifar10_mlp(x_eval) logits = mlp.forward(x_eval) softmax_logits = softmax.forward(logits) eval_loss = crossent.forward(softmax_logits, y_eval) eval_acc = accuracy(softmax_logits, y_eval) train_losses.append(train_loss) train_accs.append(train_acc) eval_losses.append(eval_loss) eval_accs.append(eval_acc) print('loss: {:.4f}, acc: {:.4f}'.format(eval_loss, eval_acc)) print('Evaluation: ') x_eval, y_eval = cifar10['test'].images, cifar10['test'].labels x_eval = reshape_cifar10_mlp(x_eval) logits = mlp.forward(x_eval) softmax_logits = softmax.forward(logits) eval_loss = crossent.forward(softmax_logits, y_eval) eval_acc = accuracy(softmax_logits, y_eval) train_losses.append(train_loss) train_accs.append(train_acc) eval_losses.append(eval_loss) eval_accs.append(eval_acc) print('loss: {:.4f}, acc: {:.4f}'.format(eval_loss, eval_acc)) print('Finished training.') plt.figure(figsize=(10, 5)) plt.plot(np.arange(len(train_losses)), train_losses, label='training loss') plt.plot(np.arange(len(eval_losses)), eval_losses, label='evaluation loss') plt.legend() plt.xlabel('Iterations [x{}]'.format(FLAGS.eval_freq)) plt.savefig('results/mlp_loss.png', bbox_inches='tight') plt.figure(figsize=(10, 5)) plt.plot(np.arange(len(train_accs)), train_accs, label='training accuracy') plt.plot(np.arange(len(eval_accs)), eval_accs, label='evaluation accuracy') plt.legend() plt.xlabel('Iterations [x{}]'.format(FLAGS.eval_freq)) plt.savefig('results/mlp_acc.png', bbox_inches='tight')
def train(): """ Performs training and evaluation of MLP model. """ # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] lr = FLAGS.learning_rate cifar10 = cifar10_utils.get_cifar10('cifar10/cifar-10-batches-py') accuracies = {'train': [], 'test': []} loss_curve = {'train': [], 'test': []} x, y = cifar10['train'].next_batch(FLAGS.batch_size) num_inputs = np.prod(x.shape[1:]) num_layers = len(dnn_hidden_units) + 1 myMLP = MLP(num_inputs, dnn_hidden_units, 10) myCR = CrossEntropyModule() for j in range(FLAGS.max_steps): x = x.reshape(FLAGS.batch_size, -1) out = myMLP.forward(x) loss = myCR.forward(out, y) dout = myCR.backward(out, y) myMLP.backward(dout) for i in range(num_layers): myMLP.layers['linear' + str(i + 1)].params['weight'] = myMLP.layers[ 'linear' + str(i + 1)].params['weight'] - myMLP.layers[ 'linear' + str(i + 1)].grads['weight'] * lr myMLP.layers['linear' + str(i + 1)].params['bias'] = myMLP.layers[ 'linear' + str(i + 1)].params['bias'] - myMLP.layers[ 'linear' + str(i + 1)].grads['bias'] * lr if j % FLAGS.eval_freq == 0: ac = accuracy(out, y) accuracies['train'].append(ac) loss_curve['train'].append(loss) x, y = cifar10['test'].images, cifar10['test'].labels x = x.reshape(x.shape[0], -1) outputs = myMLP.forward(x) loss = myCR.forward(outputs, y) loss_curve['test'].append(loss) ac = accuracy(outputs, y) accuracies['test'].append(ac) print(ac) x, y = cifar10['train'].next_batch(FLAGS.batch_size)
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # Get negative slope parameter for LeakyReLU neg_slope = FLAGS.neg_slope ######################## # PUT YOUR CODE HERE # ####################### cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) train_data = cifar10['train'] # 60000 x 3 x 32 x32 -> 60000 x 3072, input vector 3072 n_inputs = train_data.images.reshape(train_data.images.shape[0], -1).shape[1] n_hidden = dnn_hidden_units n_classes = train_data.labels.shape[1] print(f"n_inputs {n_inputs}, n_classes {n_classes}") net = MLP(n_inputs, n_hidden, n_classes, neg_slope=neg_slope) loss = CrossEntropyModule() train_acc_plot = [] test_acc_plot = [] loss_train = [] loss_test = [] rloss = 0 print('[DEBUG] start training') for i in range(0, FLAGS.max_steps): x, y = cifar10['train'].next_batch(FLAGS.batch_size) x = x.reshape(x.shape[0], -1) out = net.forward(x) loss_forward = loss.forward(out, y) loss_grad = loss.backward(out, y) net.backward(loss_grad) for n in net.net: if hasattr(n, 'params'): n.params['weight'] = n.params[ 'weight'] - FLAGS.learning_rate * n.grads['weight'] n.params['bias'] = n.params[ 'bias'] - FLAGS.learning_rate * n.grads['bias'] rloss += loss_forward if i % FLAGS.eval_freq == 0: train_accuracy = accuracy(out, y) testX, testY = cifar10['test'].images, cifar10['test'].labels testX = testX.reshape(testX.shape[0], -1) testOut = net.forward(testX) testLoss = loss.forward(testOut, testY) test_accuracy = accuracy(testOut, testY) train_acc_plot.append(train_accuracy) test_acc_plot.append(test_accuracy) loss_train.append(rloss / (i + 1)) loss_test.append(testLoss) print( f'iter {i}, avg loss train {rloss/(i + 1)}, test loss {testLoss}, train acc {train_accuracy}, test acc {test_accuracy}' ) if FLAGS.plot: print('Start plotting...') fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) ax1.plot(np.arange(len(train_acc_plot)), train_acc_plot, label='training') ax1.plot(np.arange(len(test_acc_plot)), test_acc_plot, label='testing') ax1.set_title('Training evaluation with batch size ' + str(FLAGS.batch_size) + '\n learning rate ' + str(FLAGS.learning_rate)) ax1.set_ylabel('Accuracy') ax1.legend() ax2.plot(np.arange(len(loss_train)), loss_train, label='Train Loss') ax2.plot(np.arange(len(loss_test)), loss_test, label='Test Loss') ax2.set_title('Loss evaluation') ax2.set_ylabel('Loss') ax2.legend() plt.xlabel('Iteration') plt.savefig('numpy.png')
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # Preparation for training print('- Init parameters') data = cifar10_utils.get_cifar10(FLAGS.data_dir) train_data = data['train'] test_data = data['test'] w, h, d = train_data.images[0].shape n_classes = train_data.labels[0].shape[0] criterion = CrossEntropyModule() model = MLP(w * h * d, dnn_hidden_units, n_classes) train_losses = [] test_losses = [] accuracies = [] print('- Start training') for step in range(FLAGS.max_steps): x_batch, x_labels = train_data.next_batch(FLAGS.batch_size) x = x_batch.reshape((FLAGS.batch_size, -1)) predictions = model.forward(x) gradient = criterion.backward(predictions, x_labels) model.backward(gradient) model.step(FLAGS.learning_rate) if step % FLAGS.eval_freq == 0 or step == FLAGS.max_steps - 1: print(' - Step: {}'.format(step)) loss = criterion.forward(predictions, x_labels) out_test = model.forward( test_data.images.reshape(test_data.num_examples, -1)) test_loss = criterion.forward(out_test, test_data.labels) acc = accuracy(out_test, test_data.labels) train_losses.append(loss) test_losses.append(test_loss) accuracies.append(acc) # Save stuff print(accuracies[-1])
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # prepare input data cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) _, width, height, channels = cifar10['train']._images.shape _, n_outputs = cifar10['train']._labels.shape n_inputs = width * height * channels # initialize network and loss network = MLP(n_inputs, dnn_hidden_units, n_outputs) cross_entropy = CrossEntropyModule() current_loss = 0.0 # for plotting steps, losses, accuracies = [], [], [] for step in range(FLAGS.max_steps): x, y = cifar10['train'].next_batch(FLAGS.batch_size) x = x.reshape(FLAGS.batch_size, -1) # compute forward and backward pass outputs = network.forward(x) loss = cross_entropy.forward(outputs, y) loss_gradients = cross_entropy.backward(outputs, y) network.backward(loss_gradients) # update parameters of the network for layer in network.layers: if hasattr(layer, 'params'): layer.params['weight'] = layer.params[ 'weight'] - FLAGS.learning_rate * layer.grads['weight'] layer.params['bias'] = layer.params[ 'bias'] - FLAGS.learning_rate * layer.grads['bias'] current_loss += loss.item() # evaluate every eval_freq times if (step + 1) % FLAGS.eval_freq == 0: x_test, y_test = cifar10['test'].next_batch(FLAGS.batch_size) x_test = x_test.reshape(FLAGS.batch_size, -1) test_outputs = network.forward(x_test) steps.append(step) losses.append(current_loss) # calculate accuracy acc = accuracy(test_outputs, y_test) accuracies.append(acc) print("Step: {}, Accuracy: {}".format(step, acc)) current_loss = 0.0 # compute accuracy over entire test set x, y = cifar10['test'].next_batch(FLAGS.batch_size) x = x.reshape(FLAGS.batch_size, -1) out = network.forward(x) result = accuracy(out, y) print(result) # plot graph of accuracies plt.subplot(121) plt.plot(steps, accuracies) plt.title('Accuracy') plt.subplot(122) plt.plot(steps, losses) plt.title('Cross-entropy loss') plt.show()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### #Parameters max_steps = FLAGS.max_steps batch_size = FLAGS.batch_size learning_rate = FLAGS.learning_rate eval_freq = FLAGS.eval_freq data_dir = FLAGS.data_dir #Get data data = cifar10_utils.get_cifar10(data_dir) train_data = data['train'] validation_data = data['validation'] test_data = data['test'] #Get the shape of all data (_, channels, image_dim, _) = train_data.images.shape (_, mlp_classes) = train_data.labels.shape mlp_input_size = image_dim * image_dim * channels #Initialize the MLP NN = MLP(mlp_input_size, dnn_hidden_units, mlp_classes) for step in range(0,max_steps): x, y = train_data.next_batch(batch_size) x = np.reshape(x, [batch_size, mlp_input_size]) out = NN.forward(x) loss = NN.loss_function.forward(out, y) dx = NN.loss_function.backward(out, y) NN.backward(dx) #Update weights for layer in NN.layers: layer.params['weight'] = layer.params['weight'] - learning_rate * layer.grads['weight'] layer.params['bias'] = layer.params['bias'] - learning_rate * layer.grads['bias'] if (step%eval_freq == 0) or step == max_steps-1: test_data_x, test_data_y = test_data.images.reshape((test_data.images.shape[0], mlp_input_size)), test_data.labels test_out = NN.forward(test_data_x) test_loss = NN.loss_function.forward(test_out, test_data_y) test_accuracy = accuracy(test_out, test_data_y) print("Step:", step, "Train Loss:", loss, "Test Loss:", test_loss, "Test Accuracy:", test_accuracy)
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### #raise NotImplementedError #Load in data set cifar10_set = cifar10_utils.get_cifar10(FLAGS.data_dir) #Init tracking arrays test_acc = [] train_loss = [] train_acc = [] # pull in the first set and get the shapes x, t = cifar10_set['train'].next_batch(FLAGS.batch_size) x = x.reshape(FLAGS.batch_size, -1) out_dim = t.shape[1] in_dim = x.shape[1] mlp = MLP(in_dim, dnn_hidden_units, out_dim, FLAGS.neg_slope) loss_funct = CrossEntropyModule() # First pass of the data y = mlp.forward(x) loss = loss_funct.forward(y, t) mlp.backward(loss_funct.backward(y, t)) for mod in mlp.modules: if type(mod) == LinearModule: mod.params['weight'] -= FLAGS.learning_rate * mod.grads['weight'] mod.params['bias'] -= FLAGS.learning_rate * mod.grads['bias'] train_loss.append(loss) train_acc.append(accuracy(y, t)) x, t = cifar10_set['test'].images, cifar10_set['test'].labels x = x.reshape(x.shape[0], -1) y = mlp.forward(x) test_acc.append(accuracy(y, t)) print("The accuracy at step, " + str(0) + " is : " + str(test_acc[-1])) # loop through till steps are all done for i in range(1, FLAGS.max_steps + 1): x, t = cifar10_set['train'].next_batch(FLAGS.batch_size) x = x.reshape(FLAGS.batch_size, -1) y = mlp.forward(x) loss = loss_funct.forward(y, t) mlp.backward(loss_funct.backward(y, t)) #Update the weights for mod in mlp.modules: if type(mod) == LinearModule: mod.params[ 'weight'] -= FLAGS.learning_rate * mod.grads['weight'] mod.params['bias'] -= FLAGS.learning_rate * mod.grads['bias'] # Evaluation and acc/loss saving if i % FLAGS.eval_freq == 0: train_loss.append(loss) train_acc.append(accuracy(y, t)) x, t = cifar10_set['test'].images, cifar10_set['test'].labels x = x.reshape(x.shape[0], -1) y = mlp.forward(x) test_acc.append(accuracy(y, t)) print("The accuracy at step, " + str(i) + " is : " + str(test_acc[-1])) #Plotting the accuracy of test and train: plt.figure(0) plt.plot(np.arange(0, len(train_acc) * FLAGS.eval_freq * FLAGS.batch_size, FLAGS.eval_freq * FLAGS.batch_size) / cifar10_set['train'].num_examples, train_acc, label='Train') plt.plot(np.arange(0, len(train_acc) * FLAGS.eval_freq * FLAGS.batch_size, FLAGS.eval_freq * FLAGS.batch_size) / cifar10_set['train'].num_examples, test_acc, label='Test') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.title('Accuracy of Train and Test Set Through Training') plt.legend() plt.savefig('Accuracy_basic1.png') # plt.show() plt.figure(1) plt.plot(np.arange(0, len(train_loss) * FLAGS.eval_freq * FLAGS.batch_size, FLAGS.eval_freq * FLAGS.batch_size) / cifar10_set['train'].num_examples, train_loss, label='Train') plt.xlabel('Epoch') plt.ylabel('Loss') plt.title('Loss Through Training') plt.savefig('Loss_basic1.png')
def train(): """ Performs training and evaluation of MLP model. """ # Set the random seeds for reproducibility np.random.seed(42) # Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [DNN_HIDDEN_UNITS_DEFAULT] data = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT) n_inputs = np.prod(data['train'].images.shape[1:]) n_classes = data['train'].labels.shape[1] n_test = data['test'].images.shape[0] x_test, y_test = data['test'].next_batch(n_test) x_test = x_test.reshape((n_test, n_inputs)) net = MLP(n_inputs, dnn_hidden_units, n_classes) loss_func = CrossEntropyModule() losses = {'train': [], 'test': []} accuracies = {'train': [], 'test': []} eval_steps = [] for s in range(FLAGS.max_steps): x, y = data['train'].next_batch(FLAGS.batch_size) x = x.reshape((FLAGS.batch_size, n_inputs)) # FORWARD out = net.forward(x) # BACKWARD dloss = loss_func.backward(out, y) net.backward(dloss) # UPDATE for grad_layer in filter(lambda l: hasattr(l, 'grads'), net.layers): grad_layer.params[ 'weight'] -= FLAGS.learning_rate * grad_layer.grads['weight'] grad_layer.params[ 'bias'] -= FLAGS.learning_rate * grad_layer.grads['bias'] # Evaluation if s % FLAGS.eval_freq == 0 or s == FLAGS.max_steps - 1: eval_steps.append(s) losses['train'].append(loss_func.forward(out, y)) accuracies['train'].append(accuracy(out, y)) out = net.forward(x_test) losses['test'].append(loss_func.forward(out, y_test)) accuracies['test'].append(accuracy(out, y_test)) print('Iter {:04d}: Test: {:.2f} ({:f}), Train: {:.2f} ({:f})'. format(s, 100 * accuracies['test'][-1], losses['test'][-1], 100 * accuracies['train'][-1], losses['train'][-1])) # Plotting for d, n in [(accuracies, 'Accuracy'), (losses, 'Loss')]: plt.figure() plt.plot(eval_steps, d['train'], label='train') plt.plot(eval_steps, d['test'], label='test') plt.xlabel('Step') plt.ylabel(n) plt.legend() plt.tight_layout() plt.savefig('np_' + n.lower() + '.pdf') print('Best testing loss: {:.2f} accuracy: {:.2f}'.format( np.min(losses['test']), 100 * np.max(accuracies['test'])))
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### #raise NotImplementedError #FLAGS.learning_rate #FLAGS.batch_size #FLAGS.max_steps #FLAGS.eval_freq #FLAGS.data_dir cifar10 = cifar10_utils.get_cifar10('cifar10/cifar-10-batches-py') #x_train_batch, y_train_batch = cifar10['train'].next_batch(5) train_data = cifar10['train'] x_test, y_test = cifar10['test'].images, cifar10['test'].labels nsamples_x, channels, y_size, x_size = np.shape(x_test) nsamples_y, n_classes = np.shape(y_test) input_per_image = y_size * x_size * channels MLP_classifier = MLP(input_per_image, dnn_hidden_units, n_classes) cross_entropy_loss = CrossEntropyModule() #Evaluation list_train_acc = [] list_test_acc = [] list_train_loss = [] list_test_loss = [] #Reshape here as we do multiple test while training x_test = x_test.reshape((nsamples_x, input_per_image)) for step in range(FLAGS.max_steps): #Get batch and reshape for input x_train_batch, y_train_batch = train_data.next_batch(FLAGS.batch_size) x_train_batch = x_train_batch.reshape( (FLAGS.batch_size, input_per_image)) #Feed forward, get loss and gradient of the loss function and backpropagate. output = MLP_classifier.forward(x_train_batch) train_loss = cross_entropy_loss.forward(output, y_train_batch) loss_gradient = cross_entropy_loss.backward(output, y_train_batch) MLP_classifier.backward(loss_gradient) #Gradients are defined in each layer now, update the weights with it for pre_layer, activation in MLP_classifier.layers: gradient_w = pre_layer.grads['weight'] gradient_b = pre_layer.grads['bias'] pre_layer.params['weight'] = pre_layer.params['weight'] - ( FLAGS.learning_rate * gradient_w) pre_layer.params['bias'] = pre_layer.params['bias'] - ( FLAGS.learning_rate * gradient_b) if (step % FLAGS.eval_freq) == 0 or (step == FLAGS.max_steps - 1): output_test = MLP_classifier.forward(x_test) test_acc = accuracy(output_test, y_test) test_loss = cross_entropy_loss.forward(output_test, y_test) list_test_acc.append(test_acc) list_test_loss.append(test_loss) train_acc = accuracy(output, y_train_batch) list_train_loss.append(train_loss) list_train_acc.append(train_acc) #Print and plot results print(list_test_acc) print(list_test_loss) steps_x = range(len(list_test_acc)) plt.plot(steps_x, list_test_acc, label="Test accuracy") plt.plot(steps_x, list_train_acc, label="Train accuracy") plt.xlabel("Step") plt.ylabel("Accuracy") plt.title("Train and test accuracies", fontsize=18, fontweight="bold") plt.legend() #plt.savefig('accuracies.png', bbox_inches='tight') plt.show() plt.plot(steps_x, list_test_loss, label="Test loss") plt.plot(steps_x, list_train_loss, label="Train loss") plt.xlabel("Step") plt.ylabel("Loss") plt.title("Train and test loss", fontsize=18, fontweight="bold") plt.legend() #plt.savefig('loss.png', bbox_inches='tight') plt.show()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # Get negative slope parameter for LeakyReLU neg_slope = FLAGS.neg_slope ######################## # PUT YOUR CODE HERE # ####################### lr = FLAGS.learning_rate eval_freq = FLAGS.eval_freq max_steps = FLAGS.max_steps batch_size = FLAGS.batch_size input_size = 32 * 32 * 3 output_size = 10 # load dataset raw_data = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT) train_data = raw_data['train'] validation_data = raw_data["validation"] test_data = raw_data['test'] model = MLP(n_inputs=input_size, n_hidden=dnn_hidden_units, n_classes=output_size, neg_slope=neg_slope) loss_target = CrossEntropyModule() csv_data = [[ 'step', 'train_loss', 'test_loss', 'train_accuracy', 'test_accuracy' ]] for step in range(max_steps): x, y = train_data.next_batch(batch_size) x = x.reshape(batch_size, input_size) # train output = model.forward(x) loss_avg = loss_target.forward(output, y) dout = loss_target.backward(output, y) model.backward(dout) # only need to update weights for linear module for each step for layer in model.layers: if isinstance(layer, LinearModule): layer.params['weight'] -= lr * layer.grads['weight'] layer.params['bias'] -= lr * layer.grads['bias'] train_acc = accuracy(output, y) # with the \r and end = '' trick, we can print on the same line print('\r[{}/{}] train_loss: {} train_accuracy: {}'.format( step + 1, max_steps, round(loss_avg, 3), round(train_acc, 3)), end='') # evaluate if step % eval_freq == 0 or step >= (max_steps - 1): x, y = test_data.next_batch(test_data.num_examples) x = x.reshape(test_data.num_examples, input_size) output = model.forward(x) test_loss = loss_target.forward(output, y) test_acc = accuracy(output, y) csv_data.append([step, loss_avg, test_loss, train_acc, test_acc]) print(' test_loss: {}, test_accuracy: {}'.format( round(test_loss, 3), round(test_acc, 3))) with open('results/train_summary_np_{}.csv'.format(int(time.time())), 'w') as csv_file: writer = csv.writer(csv_file) writer.writerows(csv_data)
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### if FLAGS.batch_size: batch_size = int(FLAGS.batch_size); if FLAGS.learning_rate: learning_rate = float(FLAGS.learning_rate); if FLAGS.max_steps: max_steps = int(FLAGS.max_steps); if FLAGS.max_steps: eval_freq = int(FLAGS.eval_freq); cifar10 = cifar10_utils.get_cifar10(); mlp = MLP(3* 32* 32, dnn_hidden_units, 10, learning_rate); cifar10_train = cifar10['train']; # get all test image labels and features: cifar10_test = cifar10['test']; x_test, y_target = cifar10_test.images, cifar10_test.labels; x_test = x_test.reshape((x_test.shape[0], -1)); steps = 0; accuracy_rates = []; tmp_accuracy_rates = []; loss_lists = []; tmp_loss_lists = []; while (steps <= max_steps): epoch_completed = cifar10_train.epochs_completed; x, y = cifar10_train.next_batch(batch_size); #x_test, y_target = cifar10_test.next_batch(batch_size); x = x.reshape((batch_size, -1)); out = mlp.forward(x); crossentropy_loss = mlp.loss_forward(out, y); dout = mlp.loss_backward(out, y); mlp.backward(dout); steps = steps + 1; if ((steps % eval_freq) == 0): y_test = mlp.forward(x_test) rate = accuracy(y_test, y_target) print('---accuracy: ', rate, '---'); tmp_accuracy_rates += [rate] tmp_loss_lists += [crossentropy_loss]; if not (cifar10_train.epochs_completed == epoch_completed): print('===finish one epoch==='); print("Average accuracy: ", sum(tmp_accuracy_rates)/len(tmp_accuracy_rates)); accuracy_rates += [sum(tmp_accuracy_rates)/len(tmp_accuracy_rates)]; loss_lists += [sum(tmp_loss_lists)/len(tmp_loss_lists)]; tmp_accuracy_rates = []; tmp_loss_lists = []; print('finish!'); t = np.arange(1, cifar10_train.epochs_completed + 1, 1); a = np.asarray(accuracy_rates); l = np.asarray(loss_lists); plt.figure(1); plt.xticks(t); plt.ylabel('accuracy'); plt.xlabel('epoch'); plt.plot(t, a, 'b'); plt.show(); plt.figure(2); plt.xticks(t); plt.ylabel('loss'); plt.xlabel('epoch'); plt.plot(t, l, 'b'); plt.show(); print(a); print(l);
def train(): """ Performs training and evaluation of MLP model. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) # Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### # Import data cifar10_data = cifar10_utils.get_cifar10(FLAGS.data_dir) train_data = cifar10_data['train'] test_data = cifar10_data['test'] input_size = np.prod(np.array(train_data.images[0].shape)) output_size = train_data.labels.shape[1] # Create model model = MLP(input_size, dnn_hidden_units, output_size) loss_module = CrossEntropyModule() # Train & evaluate eval_loss = 0.0 full_loss = [] lossv = [] accv = [] for step in range(1, FLAGS.max_steps + 1): x_train, y_train = train_data.next_batch(FLAGS.batch_size) y_pred = model.forward(x_train) loss = loss_module.forward(y_pred, y_train) dout = loss_module.backward(y_pred, y_train) model.backward(dout) full_loss.append(loss) # Update weights for layer in model.layers: if isinstance(layer, LinearModule): layer.params[ 'weight'] -= FLAGS.learning_rate * layer.grads['weight'] layer.params[ 'bias'] -= FLAGS.learning_rate * layer.grads['bias'] # Accuracy evaluation eval_loss += loss.item() if step % FLAGS.eval_freq == 0: test_x, test_y = test_data.images, test_data.labels predicted_y = model.forward(test_x) accuracy_result = accuracy(predicted_y, test_y) lossv.append(eval_loss / FLAGS.eval_freq) accv.append(accuracy_result) print('Step %d - accuracy: %.4f - loss: %.3f' % (step, accuracy_result, eval_loss / FLAGS.eval_freq)) eval_loss = 0.0 print("Training Done")
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] output_dir = FLAGS.output_dir if not os.path.isdir(output_dir): os.makedirs(output_dir) learning_rate = FLAGS.learning_rate max_steps = FLAGS.max_steps batch_size = FLAGS.batch_size eval_freq = FLAGS.eval_freq data_dir = FLAGS.data_dir # Obtain dataset dataset = cifar10_utils.get_cifar10(data_dir) n_inputs = dataset['train'].images[0].reshape(-1).shape[0] n_classes = dataset['train'].labels[0].shape[0] n_test = dataset['test'].images.shape[0] # Initialise MLP net = MLP(n_inputs, dnn_hidden_units, n_classes) loss_module = CrossEntropyModule() print("Network architecture:\n\t{}\nLoss module:\n\t{}".format(str(net), str(loss_module))) # Evaluation vars train_loss = [] gradient_norms = [] train_acc = [] test_acc = [] iteration = 0 # Training while iteration < max_steps: iteration += 1 # Sample a mini-batch x, y = dataset['train'].next_batch(batch_size) x = x.reshape((batch_size, -1)) # Forward propagation prediction = net.forward(x) loss = loss_module.forward(prediction, y) acc = accuracy(prediction, y) train_acc.append( (iteration, acc) ) train_loss.append( (iteration, loss) ) # Backprop dpred = loss_module.backward(prediction, y) net.backward(dpred) # Weight update in linear modules norm = 0 for layer in net.linearModules: layer.params['weight'] -= learning_rate * layer.grads['weight'] layer.params['bias'] -= learning_rate * layer.grads['bias'] norm += np.einsum('ij,ij',layer.grads['weight'],layer.grads['weight']) + \ layer.grads['bias'].T @ layer.grads['bias'] gradient_norms.append( (iteration, norm.reshape(-1)) ) # Evaluation if iteration % eval_freq == 0: x = dataset['test'].images.reshape((n_test,-1)) y = dataset['test'].labels prediction = net.forward(x) acc = accuracy(prediction, y) test_acc.append( (iteration, acc) ) print("Iteration: {}\t\tTest accuracy: {}".format(iteration, acc)) # Save raw output now = datetime.datetime.now() time_stamp = "{}{}{}{}{}".format(now.year, now.month, now.day, now.hour, now.minute) net_name = "mlpNumpy" out_dir = os.path.join(output_dir, net_name, time_stamp) if not os.path.isdir(out_dir): os.makedirs(os.path.join(output_dir, net_name, time_stamp)) metrics = {"train_loss": train_loss, "gradient_norms": gradient_norms, "train_acc": train_acc, "test_acc": test_acc} raw_data = {"net": net, "metrics": metrics} pickle.dump(raw_data, open(os.path.join(out_dir, "raw_data.p"), "wb")) # Save plots # Loss fig, ax = plt.subplots() iter = [i for (i,q) in train_loss] loss = [q for (i,q) in train_loss] ax.plot(iter, loss) ax.set(xlabel='Iteration', ylabel='Loss (log)', title='Batch training loss') ax.set_yscale('log') ax.grid() fig.savefig(os.path.join(out_dir, "loss.png")) # gradient norm fig, ax = plt.subplots() iter = [i for (i,q) in gradient_norms] norm = [q for (i,q) in gradient_norms] ax.plot(iter, norm) ax.set(xlabel='Iteration', ylabel='Norm', title='Gradient norm') ax.grid() fig.savefig(os.path.join(out_dir, "gradient_norm.png")) # accuracies fig, ax = plt.subplots() iter = [i for (i,q) in train_acc] accu = [q for (i,q) in train_acc] ax.plot(iter, accu, label='Train') iter = [i for (i,q) in test_acc] accu = [q for (i,q) in test_acc] ax.plot(iter, accu, label='Test') ax.set(xlabel='Iteration', ylabel='Accuracy', title='Train and test accuracy') ax.legend() ax.grid() fig.savefig(os.path.join(out_dir, "accuracy.png"))
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### class SGD(object): def __init__(self, layers, learning_rate): self.layers = layers self.learning_rate = learning_rate def step(self): for layer in self.layers: try: layer.grads except Exception as e: continue layer.params[ 'weight'] -= self.learning_rate * layer.grads['weight'] layer.params[ 'bias'] -= self.learning_rate * layer.grads['bias'] def eval(model, accuracies, losses): x, y = test_data.next_batch(1000) preds = model.forward(np.reshape(x, (x.shape[0], -1))) loss = loss_module.forward(preds, y) print("Test Loss", loss) print("Test Accuracy: ", accuracy(preds, y)) accuracies['val_scores'].append(accuracy(preds, y)) losses['val_scores'].append(loss) x, y = train_data.next_batch(1000) preds = model.forward(np.reshape(x, (x.shape[0], -1))) loss = loss_module.forward(preds, y) accuracies['train_scores'].append(accuracy(preds, y)) losses['train_scores'].append(loss) def plot_history(results, ylabel, title="Validation performance of ", model_name=""): import matplotlib.pyplot as plt import seaborn as sns sns.set() plt.plot([i for i in range(1, len(results["train_scores"]) + 1)], results["train_scores"], label="Train") plt.plot([i for i in range(1, len(results["val_scores"]) + 1)], results["val_scores"], label="Val") plt.xlabel("Epochs") plt.ylabel(ylabel) plt.ylim(min(results["val_scores"]), max(results["train_scores"]) * 1.01) plt.title(title + model_name) plt.legend() plt.show() cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) train_data = cifar10_utils.DataSet(cifar10['train'].images, cifar10['train'].labels) test_data = cifar10_utils.DataSet(cifar10['test'].images, cifar10['test'].labels) model = MLP(3 * 32 * 32, dnn_hidden_units, 10) loss_module = CrossEntropyModule() optimizer = SGD(model.layers, FLAGS.learning_rate) accuracies = dict(train_scores=list(), val_scores=list()) losses = dict(train_scores=list(), val_scores=list()) for i in range(FLAGS.max_steps): x, y = train_data.next_batch(FLAGS.batch_size) preds = model.forward(np.reshape(x, (FLAGS.batch_size, -1))) loss = loss_module.forward(preds, y) model.backward(loss_module.backward(preds, y)) optimizer.step() if i % FLAGS.eval_freq == FLAGS.eval_freq - 1: print("Train loss: ", loss) eval(model, accuracies=accuracies, losses=losses) plot_history(accuracies, "Accuracies", model_name="VGG19") plot_history(losses, "Losses", title="Train and Validation Losses of ", model_name="VGG19")
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] # Get negative slope parameter for LeakyReLU neg_slope = FLAGS.neg_slope ######################## # PUT YOUR CODE HERE # ####################### # initialize required arrays for saving the results train_accuracies = [] train_losses = [] test_accuracies = [] test_losses = [] steps = [] # load data from directory specified in the input cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) # load test images test_images = cifar10['test'].images test_targets = cifar10['test'].labels # data dimensions # test_images.shape -> (10000, 3, 32, 32): n_images, channels, height, width # test_targets.shape <- (10000, 10): n_images, n_classes n_test = test_images.shape[0] # n_inputs is one vector for all channels of width and height # n_input = n_channel * width * height n_inputs = test_images.shape[1] * test_images.shape[2] * test_images.shape[3] # reshape to (n_samples, n_inputs) test_images = test_images.reshape((n_test, n_inputs)) # initialize MLP model MLP_model = MLP(n_inputs=n_inputs, n_hidden=dnn_hidden_units, n_classes=test_targets.shape[1], neg_slope=neg_slope) # loss function os loaded loss_module = CrossEntropyModule() for iteration in range(FLAGS.max_steps + 1): train_images, train_targets = cifar10['train'].next_batch(FLAGS.batch_size) # input to MLP.forward is (batch_size, n_inputs) train_input = train_images.reshape((FLAGS.batch_size, n_inputs)) # predictions by forward pass train_predictions = MLP_model.forward(train_input) # loss acc to loss module, predictions and targets loss = loss_module.forward(train_predictions, train_targets) # Apply backward pass: MLP backward takes gradients of losses = dout # dout = backward of loss module dout = loss_module.backward(train_predictions, train_targets) # backward pass from loss (dout) MLP_model.backward(dout) ## Save training statistics # save loss, acc, iteration for train evaluation afterwards train_accuracies.append(accuracy(train_predictions, train_targets)) train_losses.append(loss) steps.append(iteration) # Parameter updates with Stochastic Gradient Descent for layer in MLP_model.layers: # last module does not have params -> no gradient descent possible if hasattr(layer, 'params'): layer.params['weight'] -= FLAGS.learning_rate * layer.grads['weight'] layer.params['bias'] -= FLAGS.learning_rate * layer.grads['bias'] # Consider FLAGS.EVAL_FREQ_DEFAULT for the evaluation of the current MLP # on the test data and training data if iteration % FLAGS.eval_freq == 0: ## Test Statistics test_predictions = MLP_model.forward(test_images) test_loss = loss_module.forward(test_predictions, test_targets) print("iteration:" + str(iteration) + "train_acc:" + str(np.mean(train_accuracies))) print("iteration:" + str(iteration) + "test_acc:" + str(test_accuracies)) test_accuracies.append(accuracy(test_predictions, test_targets)) test_losses.append(test_loss) print('Training is done') print('Plot Results') plt.subplot(2, 1, 1) plt.title("Results") plt.plot(np.arange(len(train_accuracies)), train_accuracies, label="train acc") plt.plot(np.arange(len(test_accuracies) * FLAGS.eval_freq, step=FLAGS.eval_freq), test_accuracies, label="test acc") plt.ylabel('Accuracy (%)') plt.legend() # loss plt.subplot(2, 1, 2) plt.plot(np.arange(len(train_losses)), train_losses, label=" train loss") plt.plot(np.arange(len(test_losses) * FLAGS.eval_freq, step=FLAGS.eval_freq), test_losses, label=" test loss") plt.xlabel('Epoch') plt.ylabel('Loss') plt.legend() plt.savefig('numpy_results.png') plt.show()