def train(n_hidden_1, dropout, lr, wdecay, _run): """ Performs training and evaluation of MLP model. Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### def get_xy_tensors(batch): x, y = batch x = torch.tensor(x.reshape(-1, 3072), dtype=torch.float32).to(device) y = torch.tensor(y, dtype=torch.long).to(device) return x, y device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') datasets = cifar10_utils.read_data_sets(DATA_DIR_DEFAULT, one_hot=False) train_data = datasets['train'] test_data = datasets['test'] model = MLP(n_inputs=3072, n_hidden=[n_hidden_1, 400], n_classes=10, dropout=dropout).to(device) loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wdecay) log_every = 50 avg_loss = 0 avg_acc = 0 for step in range(FLAGS.max_steps): x, y = get_xy_tensors(train_data.next_batch(FLAGS.batch_size)) # Forward and backward passes optimizer.zero_grad() out = model.forward(x) loss = loss_fn(out, y) loss.backward() # Parameter updates optimizer.step() avg_loss += loss.item() / log_every avg_acc += accuracy(out, y) / log_every if step % log_every == 0: print('[{}/{}] train loss: {:.6f} train acc: {:.6f}'.format( step, FLAGS.max_steps, avg_loss, avg_acc)) _run.log_scalar('train-loss', avg_loss, step) _run.log_scalar('train-acc', avg_acc, step) avg_loss = 0 avg_acc = 0 # Evaluate if step % FLAGS.eval_freq == 0 or step == (FLAGS.max_steps - 1): x, y = get_xy_tensors(test_data.next_batch(test_data.num_examples)) model.eval() out = model.forward(x) model.train() test_loss = loss_fn(out, y).item() test_acc = accuracy(out, y) print('[{}/{}] test accuracy: {:6f}'.format( step, FLAGS.max_steps, test_acc)) _run.log_scalar('test-loss', test_loss, step) _run.log_scalar('test-acc', test_acc, step)
def train(): """ Performs training and evaluation of MLP model. Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) if torch.cuda.is_available(): torch.cuda.manual_seed(42) torch.cuda.manual_seed_all(42) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") # print("Device", device) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] # DNN_HIDDEN_UNITS_DEFAULT = '100' # LEARNING_RATE_DEFAULT = 1e-3 # MAX_STEPS_DEFAULT = 1400 # BATCH_SIZE_DEFAULT = 200 # EVAL_FREQ_DEFAULT = 100 data = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir) train = data['train'] print(train.images.shape) test = data['test'] n_inputs = train.images[0].flatten().shape[0] n_classes = train.labels[0].shape[0] mlp = MLP(n_inputs, dnn_hidden_units, n_classes) loss_mod = nn.CrossEntropyLoss() if FLAGS.optimizer == 'SGD': optimizer = torch.optim.SGD(mlp.parameters(), lr=FLAGS.learning_rate) elif FLAGS.optimizer == 'AdamW': optimizer = torch.optim.AdamW(mlp.parameters(), lr=FLAGS.learning_rate) mlp.to(device) loss_history = [] acc_history = [] for step in range(FLAGS.max_steps): #FLAGS.max_steps mlp.train() x, y = train.next_batch(FLAGS.batch_size) x = torch.from_numpy(x.reshape(x.shape[0], n_inputs)).to(device) y = torch.from_numpy(np.argmax(y, axis=1)).to(device) # converts onehot to dense out = mlp(x) loss = loss_mod(out, y) loss_history.append(loss) optimizer.zero_grad() loss.backward() optimizer.step() if step == 0 or (step + 1) % FLAGS.eval_freq == 0: mlp.eval() with torch.no_grad(): x, y = test.images, test.labels x = torch.from_numpy(x.reshape(x.shape[0], n_inputs)).to(device) y = torch.from_numpy(y).to(device) test_out = mlp.forward(x) acc = accuracy(test_out, y) print('Accuracy:', acc) acc_history.append(acc) print('Final loss:', loss_history[-1]) print('Final acc:', acc_history[-1]) plt.plot(loss_history) plt.step(range(0, FLAGS.max_steps + 1, FLAGS.eval_freq), acc_history) # range(0, FLAGS.max_steps, FLAGS.eval_freq) plt.legend(['loss', 'accuracy']) plt.show()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### ############################## VARIABLES ############################## SAVE_PLOTS = False SAVE_LOGS = False img_size = 32 n_classes = 10 input_size = img_size * img_size * 3 batch_size = FLAGS.batch_size eval_freq = FLAGS.eval_freq n_iterations = FLAGS.max_steps lr_rate = FLAGS.learning_rate device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Device:", device) ############################## METHODS ############################## # fp = open('memory_profiler_basic_mean.log', 'w+') # @profile(stream=fp) def test(): net.eval() output_t = net(x_t) loss_t = criterion(output_t, y_t).detach() acc_t = accuracy(output_t.detach(), y_t_onehot) return acc_t, loss_t def plot(iteration): idx_test = list(range(0, iteration + 1, eval_freq)) idx = list(range(0, iteration + 1)) plt.clf() plt.cla() plt.subplot(1, 2, 1) plt.plot(idx_test, test_accuracies, "k-", linewidth=1, label="test") plt.plot(idx, accuracies, "r-", linewidth=0.5, alpha=0.5, label="train") plt.xlabel('iteration') plt.ylabel('accuracy') plt.legend() plt.subplot(1, 2, 2) plt.plot(idx_test, test_losses, "k-", linewidth=1, label="test") plt.plot(idx, losses, "r-", linewidth=0.5, alpha=0.5, label="train") plt.xlabel('iteration') plt.ylabel('loss') plt.legend() plt.savefig("./out/plot/plot_pytorch_" + str(batch_size) + "_" + str(lr_rate) + ".png", bbox_inches='tight') return def to_label(tensor): _, tensor = tensor.max(1) return tensor ############################## MAIN ############################## cifar10 = cifar10_utils.get_cifar10('cifar10/cifar-10-batches-py') net = MLP(input_size, dnn_hidden_units, n_classes) net.to(device) criterion = nn.CrossEntropyLoss() # optimizer = optim.SGD(net.parameters(), lr=lr_rate, momentum=0.8, nesterov=False) optimizer = optim.Adam(net.parameters(), lr=lr_rate) losses = [] accuracies = [] test_accuracies = [] test_losses = [] alpha = 0.0001 x_t = cifar10['test'].images y_t = cifar10['test'].labels x_t = torch.from_numpy(x_t.reshape(-1, input_size)) y_t_onehot = torch.from_numpy(y_t).type(torch.LongTensor) y_t = to_label(y_t_onehot) x_t, y_t = x_t.to(device), y_t.to(device) y_t_onehot = y_t_onehot.to(device) plt.figure(figsize=(10, 4)) for i in range(n_iterations): x, y = cifar10['train'].next_batch(batch_size) x = torch.from_numpy(x.reshape(-1, input_size)) y_onehot = torch.from_numpy(y).type(torch.LongTensor) y = to_label(y_onehot) x, y = x.to(device), y.to(device) y_onehot = y_onehot.to(device) optimizer.zero_grad() output = net(x) train_loss = criterion(output, y) reg_loss = 0 for param in net.parameters(): reg_loss += param.norm(2) loss = train_loss + alpha * reg_loss loss.backward() optimizer.step() losses.append(loss.item()) accuracies.append(accuracy(output.detach().data, y_onehot.detach())) del x, y if i % eval_freq == 0: acc_t, loss_t = test() test_accuracies.append(acc_t) test_losses.append(loss_t) log_string = "[{:5d}/{:5d}] Test Accuracy: {:.4f} | Batch Accuracy: {:.4f} | Batch Loss: {:.6f} | Train/Reg: {:.6f}/{:.6f}\n".format( i, n_iterations, test_accuracies[-1], accuracies[-1], loss, train_loss, reg_loss * alpha ) print(log_string) if SAVE_LOGS: with open("./out/log/pytorch_log_" + str(batch_size) + "_" + str(lr_rate) + ".txt", "a") as myfile: myfile.write(log_string) if SAVE_PLOTS: plot(i) net.train()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ # DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) # Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # -------------------------- UNCKECKED ------------------- # initialize tensorboard run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_mlp") if batchnorm: run_id = run_id + '_batchnorm' log_dir = 'tensorboard/' + run_id writer = SummaryWriter(log_dir=log_dir) # get the dataset data_set = cifar10_utils.get_cifar10(FLAGS.data_dir) # get dataset information n_batches = { 'train': int(data_set['train']._num_examples / FLAGS.batch_size), 'validation': int(data_set['validation']._num_examples / FLAGS.batch_size), 'test': int(data_set['test']._num_examples / FLAGS.batch_size) } image_shape = data_set['train'].images[0].shape n_inputs = image_shape[0] * image_shape[1] * image_shape[2] n_classes = data_set['train'].labels[0].shape[0] # get the necessary components classifier = MLP(n_inputs, dnn_hidden_units, n_classes, dropout, batchnorm).to(device) loss_function = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(classifier.parameters(), lr=FLAGS.learning_rate, weight_decay=weight_decay) # list of training accuracies and losses train_accuracies = [] train_losses = [] # list of test accuracies and losses test_accuracies = [] test_losses = [] epoch_test_accuracy = 0 epoch_test_loss = 0 # training loop for step in range(FLAGS.max_steps): # get current batch... images, labels = data_set['train'].next_batch(FLAGS.batch_size) images = images.reshape(FLAGS.batch_size, n_inputs) # ...in the gpu images = torch.from_numpy(images).type(dtype).to(device=device) labels = torch.from_numpy(labels).type(dtype).to(device=device) # forward pass classifier.train() predictions = classifier.forward(images) # compute loss class_labels = labels.argmax(dim=1) loss = loss_function(predictions, class_labels) # reset gradients before backwards pass optimizer.zero_grad() # backward pass loss.backward() # update weights optimizer.step() # get accuracy and loss for the batch train_accuracy = accuracy(predictions, labels) train_accuracies.append(train_accuracy) writer.add_scalar("Training accuracy vs steps", train_accuracy, step) train_losses.append(loss.item()) writer.add_scalar("Training loss vs steps", loss.item(), step) if ((step + 1) % 100) == 0 or step == 0: print("\nStep", step + 1) print("\tTRAIN:", round(train_accuracy * 100, 1), "%") # run evaluation every eval_freq epochs if (step + 1) % FLAGS.eval_freq == 0 or (step + 1) == FLAGS.max_steps: # list of test batch accuracies and losses for this step step_test_accuracies = [] step_test_losses = [] # get accuracy on the test set classifier.eval() for batch in range(n_batches['test']): # get current batch... images, labels = data_set['test'].next_batch(FLAGS.batch_size) images = images.reshape(FLAGS.batch_size, n_inputs) # ...in the gpu images = torch.from_numpy(images).type(dtype).to(device=device) labels = torch.from_numpy(labels).type(dtype).to(device=device) # forward pass predictions = classifier(images) # compute loss class_labels = labels.argmax(dim=1) loss = loss_function(predictions, class_labels) # get accuracy and loss for the batch step_test_accuracies.append(accuracy(predictions, labels)) step_test_losses.append(loss.item()) # store accuracy and loss epoch_test_accuracy = np.mean(step_test_accuracies) test_accuracies.append(epoch_test_accuracy) epoch_test_loss = np.mean(step_test_losses) test_losses.append(epoch_test_loss) print("\tTEST:", round(epoch_test_accuracy * 100, 1), "%") writer.add_scalar("Test accuracy vs epochs", epoch_test_accuracy, step) writer.add_scalar("Test loss vs epochs", epoch_test_loss, step) print("\nBest TEST:", round(max(test_accuracies) * 100, 1), "%") # save results results = { 'train_accuracies': train_accuracies, 'train_losses': train_losses, 'test_accuracies': test_accuracies, 'test_losses': test_losses, 'eval_freq': FLAGS.eval_freq } if not os.path.exists("results/"): os.makedirs("results/") with open("results/" + run_id + "_results.pkl", "wb") as file: pkl.dump(results, file) writer.close()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### # prepare input data cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) _, width, height, channels = cifar10['train']._images.shape _, n_outputs = cifar10['train']._labels.shape n_inputs = width * height * channels network = MLP(n_inputs,dnn_hidden_units,n_outputs) optimizer = torch.optim.Adam(network.parameters(), lr=FLAGS.learning_rate) # or SGD? loss_fn = nn.CrossEntropyLoss() train_losses, train_acc, test_losses, test_acc = [], [], [], [] current_loss = 0.0 for step in range(FLAGS.max_steps): network.train() optimizer.zero_grad() x, y = cifar10['train'].next_batch(FLAGS.batch_size) x, y = torch.tensor(x, requires_grad=True), torch.tensor(y, dtype=torch.float) x = x.reshape(FLAGS.batch_size,-1) output = network(x) labels = torch.max(y,1)[1] loss = loss_fn(output, labels) loss.backward() optimizer.step() current_loss += loss.item() if (step+1) % FLAGS.eval_freq == 0: train_acc.append(accuracy(output, y)) train_losses.append(current_loss / float(FLAGS.eval_freq)) current_loss = 0.0 x_test, y_test = cifar10['test'].next_batch(FLAGS.batch_size) x_test, y_test = torch.tensor(x_test, requires_grad=True), torch.tensor(y_test, dtype=torch.float) x_test = x_test.reshape(FLAGS.batch_size, -1) output_test = network(x_test) # average loss over 100 iterations test_losses.append(loss_fn(output_test, torch.max(y_test,1)[1]).item()) test_acc.append(accuracy(output_test, y_test)) print("Step {}".format(step)) size_test = cifar10['test']._num_examples x, y = cifar10['test'].next_batch(size_test) x, y = torch.tensor(x, requires_grad=True), torch.tensor(y, dtype=torch.float) x = x.reshape(size_test, -1) # Get network output for batch and get loss and accuracy out = network(x) print("Accuracy: {}".format(accuracy(out, y))) # plot graph of accuracies plt.subplot(211) plt.plot(test_acc, label="test accuracy") plt.plot(train_acc, label="training accuracy") plt.title('Accuracy') plt.legend() plt.subplot(212) plt.plot(test_losses, label = "test loss") plt.plot(train_losses, label = "training loss") plt.title('Cross-entropy loss') plt.legend() plt.show()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # select which device to train the model on device = "cuda:0" if torch.cuda.is_available() else "cpu" # compute the input size of the MLP input_size, n_classes = 3 * 32 * 32, 10 # init model, define the dataset, loss function and optimizer model = MLP(input_size, dnn_hidden_units, n_classes, FLAGS.b).to(device) dataset = cifar10_utils.get_cifar10(FLAGS.data_dir) loss_fn = torch.nn.CrossEntropyLoss().to(device) optimizer = torch.optim.SGD(model.parameters(), lr=FLAGS.learning_rate) for step in range(FLAGS.max_steps): X_train, y_train = dataset['train'].next_batch(FLAGS.batch_size) optimizer.zero_grad() # move to correct device and shape for MLP X_train, y_train = torch.tensor(X_train).reshape( FLAGS.batch_size, input_size).float().to(device), torch.tensor( y_train).float().to(device) predictions = model(X_train) train_loss = loss_fn(predictions, y_train.argmax(1).long()) train_loss.backward() optimizer.step() # add the loss and accuracy to the lists for plotting train_overall_loss.append(train_loss.cpu().detach().sum()) train_overall_accuracy.append( accuracy(predictions.cpu().detach(), y_train.cpu().detach())) train_x_axis.append(step) # test the model when eval freq is reached or if it is the last step if not step % FLAGS.eval_freq or step + 1 == FLAGS.max_steps: model.eval() test_accuracies, test_losses_list = [], [] # test batchwise since it doesnot fit my gpu for X_test, y_test in cifar_test_generator(dataset): X_test, y_test = torch.tensor(X_test).reshape( FLAGS.batch_size, input_size).float().to( device), torch.tensor(y_test).float().to(device) predictions = model(X_test) test_loss = loss_fn(predictions, y_test.argmax(1).long()) test_accuracy = accuracy(predictions, y_test) # add the values to compute the average loss and accuracy for the entire testset test_accuracies.append(test_accuracy.cpu().detach()) test_losses_list.append(test_loss.cpu().detach().sum()) print( "[{:5}/{:5}] Train loss {:.5f} Test loss {:.5f} Test accuracy {:.5f}" .format(step, FLAGS.max_steps, train_loss, test_loss, sum(test_accuracies) / len(test_accuracies))) test_overall_accuracy.append( sum(test_accuracies) / len(test_accuracies)) test_overall_loss.append( sum(test_losses_list) / len(test_losses_list)) test_x_axis.append(step) model.train() plt.plot(train_x_axis, train_overall_loss, label="Avg Train loss") plt.plot(test_x_axis, test_overall_loss, label="Avg Test loss") plt.legend() plt.savefig("pytorch_loss_curve") plt.show() plt.plot(train_x_axis, train_overall_accuracy, label="Train batch accuracy") plt.plot(test_x_axis, test_overall_accuracy, label="Test set accuracy") plt.legend() plt.savefig("pytorch_accuracy_curve") plt.show()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) loss_list = [] batch_list = [] accuracy_list = [] # load the batches and reshape the samples for iter in range(FLAGS.max_steps): x, y = cifar10['train'].next_batch(FLAGS.batch_size) y = np.argmax(y, axis=1) # transform sample into vector x = np.reshape(x, (FLAGS.batch_size, -1)) batch_list.append((x, y)) print('Batch list completed') in_features = batch_list[0][0].shape[1] out_features = 10 #num_classes x_test, y_test = cifar10['test'].images, cifar10['test'].labels x_test = np.reshape(x_test, (x_test.shape[0], -1)) y_test = np.argmax(y_test, axis=1) print(y_test.shape) x_test = torch.from_numpy(x_test) y_test = torch.from_numpy(y_test).long() print(y_test) net = MLP(in_features, dnn_hidden_units, out_features) #var_init(net, sd=0.0001) lossfunc = nn.CrossEntropyLoss() optimiser = optim.SGD(net.parameters(), lr=FLAGS.learning_rate) print(net) net.train() for i in range(FLAGS.max_steps): inputs, labels = batch_list[i] #inputs = torch.from_numpy(inputs) inputs = torch.tensor(inputs) labels = torch.from_numpy(labels).long() optimiser.zero_grad() outputs = net.forward(inputs.float()) loss = lossfunc(outputs, labels) loss_list.append(loss) loss.backward() optimiser.step() if (i + 1) % FLAGS.eval_freq == 0: net.eval() predicted = net.forward(x_test) accuracy_val = accuracy(predicted, y_test) accuracy_list.append(accuracy_val) print('Accuracy on test set at step {} is {}'.format( i, accuracy_val)) print('Loss of training is {}'.format(loss.item())) plt.subplot(2, 1, 1) plt.plot( np.arange(len(accuracy_list) * FLAGS.eval_freq, step=FLAGS.eval_freq), accuracy_list, 'o-') plt.xlabel('Step') plt.ylabel('Accuracy') # plt.subplot(2, 1, 2) plt.plot(np.arange(len(loss_list)), loss_list) plt.xlabel('Step') plt.ylabel('Loss')
def train(): """ Performs training and evaluation of MLP model. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### # load the test daa cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir, one_hot=False) test_images, test_labels = torch.from_numpy(cifar10['test'].images).to(device), \ torch.from_numpy(cifar10['test'].labels).to(device) # flatten the images for the MLP test_vectors = reshape_images(test_images) # set up the model mlp_model = MLP(3072, dnn_hidden_units, 10) mlp_model.to(device) loss_module = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(mlp_model.parameters(), lr=FLAGS.learning_rate) accuracies = [] losses = [] mlp_model.train() for i in range(FLAGS.max_steps): # load data images, labels = cifar10['train'].next_batch(FLAGS.batch_size) image_vectors = reshape_images(images) image_vectors, labels = torch.from_numpy( image_vectors), torch.from_numpy(labels) image_vectors, labels = image_vectors.to(device), labels.to(device) labels.to(device) # forward pass model_pred = mlp_model(image_vectors) # calculate the loss loss = loss_module(model_pred, labels) # backward pass optimizer.zero_grad() loss.backward() # update the parameters optimizer.step() # evaluate the model on the data set every eval_freq steps mlp_model.eval() if i % FLAGS.eval_freq == 0: with torch.no_grad(): test_pred = mlp_model(test_vectors) test_accuracy = accuracy(test_pred, test_labels) accuracies.append(test_accuracy) losses.append(loss) mlp_model.train() plot_curve(accuracies, 'Accuracy') plot_curve(losses, 'Loss')
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ####################### # PUT YOUR CODE HERE # ####################### model = MLP(32 ** 2 * 3, dnn_hidden_units, 10) print(model) cv_size = 10000 cifar10 = cifar10_utils.get_cifar10('cifar10/cifar-10-batches-py', validation_size=cv_size) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=FLAGS.learning_rate, momentum=0.9, weight_decay=0.003) log = defaultdict(list) for step in range(FLAGS.max_steps): optimizer.zero_grad() x, y = cifar10['train'].next_batch(FLAGS.batch_size) x = torch.from_numpy(x.reshape(FLAGS.batch_size, -1)) y = torch.from_numpy(y) h = model.forward(x) loss = criterion(h, y.argmax(1)) loss.backward() optimizer.step() if step % FLAGS.eval_freq == 0: log['train_loss'].append(loss.item()) log['train_acc'].append(accuracy(h, y)) model.eval() x, y = cifar10['validation'].next_batch(cv_size) x = torch.from_numpy(x.reshape(-1, 32 ** 2 * 3)) y = torch.from_numpy(y) h = model.forward(x) loss = criterion(h, y.argmax(1)) log['cv_loss'].append(loss.item()) log['cv_acc'].append(accuracy(h, y)) model.train() print( f"Step {step} | " f"Training loss: {log['train_loss'][-1]:.5f}, " f"accuracy: {100 * log['train_acc'][-1]:.1f}% | " f"CV loss: {log['cv_loss'][-1]:.5f}, " f"accuracy: {100 * log['cv_acc'][-1]:.1f}%") model.eval() x, y = cifar10['test'].next_batch(cifar10['test'].num_examples) x = torch.from_numpy(x.reshape(-1, 32 ** 2 * 3)) y = torch.from_numpy(y) h = model.forward(x) loss = criterion(h, y.argmax(1)) print(f"Test loss: {loss.item()}, accuracy: {100 * accuracy(h, y):.1f}%") # Plot loss and accuracy. plt.subplot(121) plt.title("Loss") plt.plot(log['train_loss'], label="Training") plt.plot(log['cv_loss'], label="Cross Validation") plt.xlabel("Step") plt.legend() plt.subplot(122) plt.title("Accuracy") plt.plot(log['train_acc'], label="Training") plt.plot(log['cv_acc'], label="Cross Validation") plt.xlabel("Step") plt.legend() plt.legend() plt.show()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # Device configuration use_cuda = torch.cuda.is_available() if use_cuda: print('Running in GPU model') device = torch.device('cuda' if use_cuda else 'cpu') dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor # load dataset cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) # get batches batches = [] # initializing loss and accuracy arrays accuracies = [] losses = [] for i in range(FLAGS.max_steps): x, y = cifar10['train'].next_batch( FLAGS.batch_size) # (batch_size, 3, 32, 32) (batch_size, 10) x = x.reshape(FLAGS.batch_size, -1) batches.append((x, y)) # get output size out_size = batches[-1][1].shape[1] # get intput size in_size = batches[-1][0].shape[1] # initialize network net = MLP(in_size, dnn_hidden_units, out_size, FLAGS.batch_norm, FLAGS.dropout).to(device) # initialize l1 regularization reg_factor = 1e-6 # intialize loss function criterion = nn.CrossEntropyLoss() if FLAGS.l2: optimizer = torch.optim.Adam(net.parameters(), lr=FLAGS.learning_rate, weight_decay=1e-5) else: optimizer = torch.optim.Adam(net.parameters(), lr=FLAGS.learning_rate) # make steps for s in range(FLAGS.max_steps): net.train() x, t = batches[s] # Forward pass y = net(torch.from_numpy(x).type(dtype)) t = torch.from_numpy(t).type(dtype) t = torch.max(t, 1)[1] loss = criterion(y, t) if FLAGS.l1: l1_loss = torch.autograd.Variable(torch.FloatTensor(1), requires_grad=True) for name, param in net.named_parameters(): if 'bias' not in name and isinstance(param, nn.Linear): loss = loss + (reg_factor * torch.sum(torch.abs(param))) losses.append(loss.cpu().detach().numpy()) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if s % FLAGS.eval_freq == 0: net.eval() x, t = cifar10['test'].images, cifar10['test'].labels x = x.reshape(x.shape[0], -1) y = net(torch.from_numpy(x).type(dtype)) acc = accuracy(y.cpu().detach().numpy(), t) print('accuracy at step', s, ': ', acc) print('loss at step', s, ': ', loss.cpu().detach().numpy()) accuracies.append(acc * 100)
def train(): """ Performs training and evaluation of MLP model. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### opt = 'Adam' decay = 0 # Import data cifar10_data = cifar10_utils.get_cifar10(FLAGS.data_dir) train_data = cifar10_data['train'] test_data = cifar10_data['test'] validation_data = cifar10_data['validation'] input_size = np.prod(np.array([train_data.images[0].shape])) output_size = train_data.labels.shape[1] # Create model and optimizer model = MLP(input_size, dnn_hidden_units, output_size) criterion = nn.CrossEntropyLoss() if opt is 'Adam': optimizer = optim.Adam(model.parameters(), lr=FLAGS.learning_rate, weight_decay=decay) elif opt is 'SGD': optimizer = optim.SGD(model.parameters(), lr=FLAGS.learning_rate, weight_decay=decay) model.train() # Train & evaluate eval_loss = 0.0 full_loss = [] lossv = [] accv = [] for step in range(1, FLAGS.max_steps + 1): data, target = train_data.next_batch(FLAGS.batch_size) data, target = Variable(torch.from_numpy(data).float()), Variable( torch.from_numpy(target)) optimizer.zero_grad() prediction = model(data) loss = criterion(prediction, torch.argmax(target, dim=1)) loss.backward() optimizer.step() full_loss.append(loss.item()) # Accuracy evaluation eval_loss += loss.item() if step % FLAGS.eval_freq == 0: model.eval() # test_x, test_y = test_data.next_batch(FLAGS.batch_size) test_x, test_y = test_data.images, test_data.labels test_x = Variable(torch.from_numpy(test_x).float()) predicted_y = model(test_x) accuracy_result = accuracy(predicted_y.detach().numpy(), test_y) lossv.append(eval_loss / FLAGS.eval_freq) accv.append(accuracy_result) print('Step %d - accuracy: %.4f - loss: %.3f' % (step, accuracy_result, eval_loss / FLAGS.eval_freq)) eval_loss = 0.0 model.train() print("Training Done")