def fit(hyperparameter): model = MLP(3 * 32 * 32, hyperparameter['dnn_hidden_units'], 10, hyperparameter).to(device) loss_module = nn.CrossEntropyLoss() optimizer = hyperparameter['optimizer']( model.parameters(), lr=hyperparameter['learning_rate']) results = dict(train_scores=list(), val_scores=list()) for i in range(hyperparameter['n_steps']): x, y = train_data.next_batch(FLAGS.batch_size) x, y = torch.from_numpy(x).float().to(device), torch.from_numpy( y).long().to(device) preds = model(torch.flatten(x, start_dim=1)) preds = preds.squeeze(dim=1) if i % FLAGS.eval_freq == FLAGS.eval_freq - 1: results['train_scores'].append(accuracy(preds, y).cpu()) x_test, y_test = test_data.next_batch(300) x_test, y_test = torch.from_numpy(x_test).float().to( device), torch.from_numpy(y_test).long().to(device) preds_test = model(torch.flatten(x_test, start_dim=1)) preds_test = preds_test.squeeze(dim=1) results['val_scores'].append( accuracy(preds_test, y_test).cpu()) print("current step: ", accuracy(preds, y)) _, y = torch.max(y, dim=1) loss = loss_module(preds, y) optimizer.zero_grad() loss.backward() optimizer.step() plot_history(results) # Test x, y = test_data.next_batch(10000) x, y = torch.from_numpy(x).float().to(device), torch.from_numpy( y).long().to(device) preds = model(torch.flatten(x, start_dim=1)) preds = preds.squeeze(dim=1) print("Test Accuracy: ", accuracy(preds, y)) return accuracy(preds, y)
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### batch_size = 100 image_dim = 32 channels = 3 mlp_classes = 10 mlp_input_size = image_dim * image_dim * channels data = cifar10_utils.get_cifar10() train_data = data['train'] validation_data = data['validation'] test_data = data['test'] NN = MLP(mlp_input_size, dnn_hidden_units[0], mlp_classes) x, y = train_data.next_batch(batch_size) print(x.shape, y.shape) for image_label in zip(x, y): im = np.reshape(image_label[0], (1, mlp_input_size)) im = torch.tensor(im) out = NN.forward(im) print(out, image_label[1])
def main(): """ Main function """ # Print all Flags to confirm parameter settings print_flags() if not os.path.exists(FLAGS.data_dir): os.makedirs(FLAGS.data_dir) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # neg_slope = FLAGS.neg_slope data = cifar10_utils.get_cifar10(FLAGS.data_dir, one_hot=False, validation_size=0) img_shape = data["train"].images[0].shape # print(np.prod(img_shape), dnn_hidden_units, N_CLASSES) mlp = MLP(np.prod(img_shape), dnn_hidden_units, N_CLASSES) print(mlp) optimizer = optim.SGD(mlp.parameters(), lr=FLAGS.learning_rate) loss_module = nn.CrossEntropyLoss() # run the training operation train(mlp, data, optimizer, loss_module)
def train(): """ Performs training and evaluation of MLP model. Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) if torch.cuda.is_available(): torch.cuda.manual_seed(42) torch.cuda.manual_seed_all(42) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") # print("Device", device) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] # DNN_HIDDEN_UNITS_DEFAULT = '100' # LEARNING_RATE_DEFAULT = 1e-3 # MAX_STEPS_DEFAULT = 1400 # BATCH_SIZE_DEFAULT = 200 # EVAL_FREQ_DEFAULT = 100 data = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir) train = data['train'] print(train.images.shape) test = data['test'] n_inputs = train.images[0].flatten().shape[0] n_classes = train.labels[0].shape[0] mlp = MLP(n_inputs, dnn_hidden_units, n_classes) loss_mod = nn.CrossEntropyLoss() if FLAGS.optimizer == 'SGD': optimizer = torch.optim.SGD(mlp.parameters(), lr=FLAGS.learning_rate) elif FLAGS.optimizer == 'AdamW': optimizer = torch.optim.AdamW(mlp.parameters(), lr=FLAGS.learning_rate) mlp.to(device) loss_history = [] acc_history = [] for step in range(FLAGS.max_steps): #FLAGS.max_steps mlp.train() x, y = train.next_batch(FLAGS.batch_size) x = torch.from_numpy(x.reshape(x.shape[0], n_inputs)).to(device) y = torch.from_numpy(np.argmax(y, axis=1)).to(device) # converts onehot to dense out = mlp(x) loss = loss_mod(out, y) loss_history.append(loss) optimizer.zero_grad() loss.backward() optimizer.step() if step == 0 or (step + 1) % FLAGS.eval_freq == 0: mlp.eval() with torch.no_grad(): x, y = test.images, test.labels x = torch.from_numpy(x.reshape(x.shape[0], n_inputs)).to(device) y = torch.from_numpy(y).to(device) test_out = mlp.forward(x) acc = accuracy(test_out, y) print('Accuracy:', acc) acc_history.append(acc) print('Final loss:', loss_history[-1]) print('Final acc:', acc_history[-1]) plt.plot(loss_history) plt.step(range(0, FLAGS.max_steps + 1, FLAGS.eval_freq), acc_history) # range(0, FLAGS.max_steps, FLAGS.eval_freq) plt.legend(['loss', 'accuracy']) plt.show()
def train(n_hidden_1, dropout, lr, wdecay, _run): """ Performs training and evaluation of MLP model. Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### def get_xy_tensors(batch): x, y = batch x = torch.tensor(x.reshape(-1, 3072), dtype=torch.float32).to(device) y = torch.tensor(y, dtype=torch.long).to(device) return x, y device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') datasets = cifar10_utils.read_data_sets(DATA_DIR_DEFAULT, one_hot=False) train_data = datasets['train'] test_data = datasets['test'] model = MLP(n_inputs=3072, n_hidden=[n_hidden_1, 400], n_classes=10, dropout=dropout).to(device) loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wdecay) log_every = 50 avg_loss = 0 avg_acc = 0 for step in range(FLAGS.max_steps): x, y = get_xy_tensors(train_data.next_batch(FLAGS.batch_size)) # Forward and backward passes optimizer.zero_grad() out = model.forward(x) loss = loss_fn(out, y) loss.backward() # Parameter updates optimizer.step() avg_loss += loss.item() / log_every avg_acc += accuracy(out, y) / log_every if step % log_every == 0: print('[{}/{}] train loss: {:.6f} train acc: {:.6f}'.format( step, FLAGS.max_steps, avg_loss, avg_acc)) _run.log_scalar('train-loss', avg_loss, step) _run.log_scalar('train-acc', avg_acc, step) avg_loss = 0 avg_acc = 0 # Evaluate if step % FLAGS.eval_freq == 0 or step == (FLAGS.max_steps - 1): x, y = get_xy_tensors(test_data.next_batch(test_data.num_examples)) model.eval() out = model.forward(x) model.train() test_loss = loss_fn(out, y).item() test_acc = accuracy(out, y) print('[{}/{}] test accuracy: {:6f}'.format( step, FLAGS.max_steps, test_acc)) _run.log_scalar('test-loss', test_loss, step) _run.log_scalar('test-acc', test_acc, step)
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ # DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) # Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # -------------------------- UNCKECKED ------------------- # initialize tensorboard run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_mlp") if batchnorm: run_id = run_id + '_batchnorm' log_dir = 'tensorboard/' + run_id writer = SummaryWriter(log_dir=log_dir) # get the dataset data_set = cifar10_utils.get_cifar10(FLAGS.data_dir) # get dataset information n_batches = { 'train': int(data_set['train']._num_examples / FLAGS.batch_size), 'validation': int(data_set['validation']._num_examples / FLAGS.batch_size), 'test': int(data_set['test']._num_examples / FLAGS.batch_size) } image_shape = data_set['train'].images[0].shape n_inputs = image_shape[0] * image_shape[1] * image_shape[2] n_classes = data_set['train'].labels[0].shape[0] # get the necessary components classifier = MLP(n_inputs, dnn_hidden_units, n_classes, dropout, batchnorm).to(device) loss_function = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(classifier.parameters(), lr=FLAGS.learning_rate, weight_decay=weight_decay) # list of training accuracies and losses train_accuracies = [] train_losses = [] # list of test accuracies and losses test_accuracies = [] test_losses = [] epoch_test_accuracy = 0 epoch_test_loss = 0 # training loop for step in range(FLAGS.max_steps): # get current batch... images, labels = data_set['train'].next_batch(FLAGS.batch_size) images = images.reshape(FLAGS.batch_size, n_inputs) # ...in the gpu images = torch.from_numpy(images).type(dtype).to(device=device) labels = torch.from_numpy(labels).type(dtype).to(device=device) # forward pass classifier.train() predictions = classifier.forward(images) # compute loss class_labels = labels.argmax(dim=1) loss = loss_function(predictions, class_labels) # reset gradients before backwards pass optimizer.zero_grad() # backward pass loss.backward() # update weights optimizer.step() # get accuracy and loss for the batch train_accuracy = accuracy(predictions, labels) train_accuracies.append(train_accuracy) writer.add_scalar("Training accuracy vs steps", train_accuracy, step) train_losses.append(loss.item()) writer.add_scalar("Training loss vs steps", loss.item(), step) if ((step + 1) % 100) == 0 or step == 0: print("\nStep", step + 1) print("\tTRAIN:", round(train_accuracy * 100, 1), "%") # run evaluation every eval_freq epochs if (step + 1) % FLAGS.eval_freq == 0 or (step + 1) == FLAGS.max_steps: # list of test batch accuracies and losses for this step step_test_accuracies = [] step_test_losses = [] # get accuracy on the test set classifier.eval() for batch in range(n_batches['test']): # get current batch... images, labels = data_set['test'].next_batch(FLAGS.batch_size) images = images.reshape(FLAGS.batch_size, n_inputs) # ...in the gpu images = torch.from_numpy(images).type(dtype).to(device=device) labels = torch.from_numpy(labels).type(dtype).to(device=device) # forward pass predictions = classifier(images) # compute loss class_labels = labels.argmax(dim=1) loss = loss_function(predictions, class_labels) # get accuracy and loss for the batch step_test_accuracies.append(accuracy(predictions, labels)) step_test_losses.append(loss.item()) # store accuracy and loss epoch_test_accuracy = np.mean(step_test_accuracies) test_accuracies.append(epoch_test_accuracy) epoch_test_loss = np.mean(step_test_losses) test_losses.append(epoch_test_loss) print("\tTEST:", round(epoch_test_accuracy * 100, 1), "%") writer.add_scalar("Test accuracy vs epochs", epoch_test_accuracy, step) writer.add_scalar("Test loss vs epochs", epoch_test_loss, step) print("\nBest TEST:", round(max(test_accuracies) * 100, 1), "%") # save results results = { 'train_accuracies': train_accuracies, 'train_losses': train_losses, 'test_accuracies': test_accuracies, 'test_losses': test_losses, 'eval_freq': FLAGS.eval_freq } if not os.path.exists("results/"): os.makedirs("results/") with open("results/" + run_id + "_results.pkl", "wb") as file: pkl.dump(results, file) writer.close()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] # Get negative slope parameter for LeakyReLU neg_slope = FLAGS.neg_slope ######################## # PUT YOUR CODE HERE # ####################### # raise NotImplementedError acc_param_search = [] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') cifar10_set = cifar10_utils.get_cifar10(FLAGS.data_dir) x, y = cifar10_set['train'].next_batch(FLAGS.batch_size) print("The size of the dataset is: " + str(cifar10_set['train'].num_examples)) x = x.reshape(FLAGS.batch_size, -1) out_dim = y.shape[1] in_dim = x.shape[1] hu = 4 lr_list = [1e-2, 1.5e-3, 1.25e-3, 1e-3 , 1e-4] wd_list = [1e-4, 5e-4, 1e-5, 5e-5] dnn_hidden_units[0] = 600 for i in range(0, hu): dnn_hidden_units.append(int(500 - (450 * (i / hu)))) for lr in lr_list: for wd in wd_list: loss_train = [] acc_train = [] acc_test = [] print('Testing Parameters layers ' + str((hu * 2) + 3) + '_learning_rate_' + str( lr) + '_weightdecay_' + str(wd)) max_acc = 0 mlp = MLP(in_dim, dnn_hidden_units, out_dim, neg_slope).to(device) #print('This is SGD') # optimizer = torch.optim.SGD(mlp.parameters(), lr = FLAGS.learning_rate) print("Opt is Adam") # optimizer = torch.optim.Adam(mlp.parameters(), lr = FLAGS.learning_rate) optimizer = torch.optim.Adam(mlp.parameters(),lr = lr, weight_decay = wd) # lr=1.25e-3 loss_funct = nn.CrossEntropyLoss() #Adding regularization reg_on = False dropout_on = False reg_const = 0.00001 # steps = 500 steps = int((cifar10_set['train'].num_examples/FLAGS.batch_size) * 10) # dataset is size 50,000 print(steps) # dataset is size 50,000 for i in range(0, steps + 1): x, t = cifar10_set['train'].next_batch(FLAGS.batch_size) x = torch.tensor(x.reshape(FLAGS.batch_size, -1), dtype=torch.float32).to(device) y = mlp.forward(x) loss = loss_funct(y,torch.LongTensor(np.argmax(t, 1)).to(device)) if reg_on: for mod in mlp.modls: if type(mod) == nn.Linear: loss += loss + (torch.sum(torch.abs(mod.weight))*reg_const) optimizer.zero_grad() loss.backward() optimizer.step() if i % FLAGS.eval_freq == 0: loss_train.append(loss) acc_train.append(accuracy(y.cpu().detach().numpy(), t)) x,t = cifar10_set['test'].images, cifar10_set['test'].labels x = torch.tensor(x.reshape(x.shape[0], -1), dtype=torch.float32).to(device) y = mlp.forward(x) acc_test.append(accuracy(y.cpu().detach().numpy(),t)) max_acc = np.array(acc_test).max() print('The max found for these settings: layers ' + str((hu*2)+3) + '_learning_rate_' + str(lr) +'_weightdecay_' + str(wd) + 'was :' +str(max_acc)) acc_param_search.append(max_acc) #Plotting the accuracy of test and train: # plt.figure(0, figsize = (17,10)) plt.figure(0) plt.plot(np.arange(0, len(acc_train) * FLAGS.eval_freq * FLAGS.batch_size, FLAGS.eval_freq* FLAGS.batch_size) / cifar10_set['train'].num_examples, acc_train, label='Train') plt.plot(np.arange(0, len(acc_train) * FLAGS.eval_freq* FLAGS.batch_size, FLAGS.eval_freq* FLAGS.batch_size) / cifar10_set['train'].num_examples, acc_test, label='Test') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.title('Accuracy of Train and Test Set Through Training') plt.legend() acc_loc = 'figs/loss_adam_' + str((hu*2)+3) + '_learning_rate_' + str(lr) +'_weightdecay_' + str(wd) +'.png' plt.savefig(acc_loc) # plt.show() # plt.figure(1, figsize=(17,10)) plt.figure(1) plt.plot(np.arange(0, len(loss_train)*FLAGS.eval_freq* FLAGS.batch_size, FLAGS.eval_freq* FLAGS.batch_size)/cifar10_set['train'].num_examples, loss_train, label = 'Train') plt.xlabel('Epoch') plt.ylabel('Loss') plt.title('Loss Through Training') loss_loc = 'figs/loss_adam_' + str((hu * 2) + 3) + '_learning_rate_' + str(lr) + '_weightdecay_' + str(wd) + '.png' plt.savefig(loss_loc) # plt.show() # plt.legend() ######################## # END OF YOUR CODE # ####################### print(acc_param_search) np.save(acc_grid_srch_4, acc_param_search)
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') data = cifar10_utils.get_cifar10(FLAGS.data_dir) n_inputs = 3 * 32 * 32 n_classes = 10 batches_per_epoch = (int)(data['test'].images.shape[0] / FLAGS.batch_size) # need this for test set model = MLP(n_inputs, dnn_hidden_units, n_classes).to(device) loss_fn = nn.CrossEntropyLoss() optimizer = None if FLAGS.optimizer == "Adam": optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.learning_rate, weight_decay=FLAGS.weight_decay) if FLAGS.optimizer == "SGD": optimizer = torch.optim.SGD(model.parameters(), lr=FLAGS.learning_rate, weight_decay=FLAGS.weight_decay, momentum=FLAGS.momentum) if FLAGS.optimizer == "RMSprop": optimizer = torch.optim.RMSprop(model.parameters(), lr=FLAGS.learning_rate, weight_decay=FLAGS.weight_decay, momentum=FLAGS.momentum) max_accuracy = 0.0 start_time = time.perf_counter() for step in range(1, FLAGS.max_steps + 1): x, y = get_batch(data, 'train', FLAGS.batch_size, device) predictions = model.forward(x) training_loss = loss_fn(predictions, y.argmax(dim=1)) optimizer.zero_grad() training_loss.backward() optimizer.step() if step == 1 or step % FLAGS.eval_freq == 0: with torch.no_grad(): test_loss = 0 test_acc = 0 for test_batch in range(batches_per_epoch): x, y = get_batch(data, 'test', FLAGS.batch_size, device) predictions = model(x) test_loss += loss_fn(predictions, y.argmax(dim=1)) / batches_per_epoch test_acc += accuracy(predictions, y) / batches_per_epoch if test_acc > max_accuracy: max_accuracy = test_acc print( "step %d/%d: training loss: %.3f test loss: %.3f accuracy: %.1f%%" % (step, FLAGS.max_steps, training_loss, test_loss, test_acc * 100)) time_taken = time.perf_counter() - start_time csv = open("results.csv", "a+") csv.write("%s;%s;%f;%f;%f;%d;%d;%d;%f;%.3f\n" % (FLAGS.dnn_hidden_units, FLAGS.optimizer, FLAGS.learning_rate, FLAGS.momentum, FLAGS.weight_decay, FLAGS.batch_size, FLAGS.max_steps, FLAGS.eval_freq, max_accuracy, time_taken)) csv.close() print("Done. Scored %.1f%% in %.1f seconds." % (max_accuracy * 100, time_taken))
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] # Get negative slope parameter for LeakyReLU neg_slope = FLAGS.neg_slope device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # print("[DEBUG], Device ", device) ######################## # PUT YOUR CODE HERE # ####################### cifar10 = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir) train_data = cifar10['train'] # 60000 x 3 x 32 x32 -> 60000 x 3072, input vector 3072 n_inputs = train_data.images.reshape(train_data.images.shape[0], -1).shape[1] n_hidden = dnn_hidden_units n_classes = train_data.labels.shape[1] # print(f"[DEBUG] n_inputs {n_inputs}, n_classes {n_classes}") model = MLP(n_inputs, n_hidden, n_classes, FLAGS.neg_slope) model.to(device) params = model.parameters() if FLAGS.optimizer == 'Adam': optimizer = torch.optim.Adam(params, lr=FLAGS.learning_rate) elif FLAGS.optimizer == 'Adamax': optimizer = torch.optim.Adamax(params, lr=FLAGS.learning_rate) elif FLAGS.optimizer == 'Adagrad': optimizer = torch.optim.Adagrad(params, lr=FLAGS.learning_rate) elif FLAGS.optimizer == 'Adadelta': optimizer = torch.optim.Adadelta(params, lr=FLAGS.learning_rate) elif FLAGS.optimizer == 'SparseAdam': optimizer = torch.optim.SparseAdam(params, lr=FLAGS.learning_rate) else: optimizer = torch.optim.SGD(params,lr=FLAGS.learning_rate) criterion = torch.nn.CrossEntropyLoss() train_acc_plot = [] test_acc_plot = [] loss_train = [] loss_test = [] rloss = 0 best_accuracy = 0 # print('[DEBUG] start training') for i in range(0, FLAGS.max_steps): x, y = cifar10['train'].next_batch(FLAGS.batch_size) x, y = torch.from_numpy(x).float().to(device) , torch.from_numpy(y).float().to(device) x = x.reshape(x.shape[0], -1) out = model.forward(x) loss = criterion.forward(out, y.argmax(1)) optimizer.zero_grad() loss.backward() optimizer.step() rloss += loss.item() if i % FLAGS.eval_freq == 0: train_accuracy = accuracy(out, y) with torch.no_grad(): test_accuracys, test_losses = [] ,[] for j in range(0, FLAGS.max_steps): test_x, test_y = cifar10['test'].next_batch(FLAGS.batch_size) test_x, test_y = torch.from_numpy(test_x).float().to(device) , torch.from_numpy(test_y).float().to(device) test_x = test_x.reshape(test_x.shape[0], -1) test_out = model.forward(test_x) test_loss = criterion(test_out, test_y.argmax(1)) test_accuracy = accuracy(test_out, test_y) if device == 'cpu': test_losses.append(test_loss) else: test_losses.append(test_loss.cpu().data.numpy()) test_accuracys.append(test_accuracy) t_acc = np.array(test_accuracys).mean() t_loss = np.array(test_losses).mean() train_acc_plot.append(train_accuracy) test_acc_plot.append(t_acc) loss_train.append(rloss/(i + 1)) loss_test.append(t_loss) print(f"iter {i}, train_loss_avg {rloss/(i + 1)}, test_loss_avg {t_loss}, train_acc {train_accuracy}, test_acc_avg {t_acc}") if t_acc > best_accuracy: best_accuracy = t_acc print(f"Best Accuracy {best_accuracy}",flush=True) if FLAGS.plot: print('Start plotting...') fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) ax1.plot(np.arange(len(train_acc_plot)), train_acc_plot, label='training') ax1.plot(np.arange(len(test_acc_plot)), test_acc_plot, label='testing') ax1.set_title('Training evaluation batch size '+str(FLAGS.batch_size)+' learning rate '+str(FLAGS.learning_rate)+ '\n best accuracy '+str(best_accuracy) ) ax1.set_ylabel('Accuracy') ax1.legend() ax2.plot(np.arange(len(loss_train)), loss_train, label='Train Loss') ax2.plot(np.arange(len(loss_test)), loss_test, label='Test Loss') ax2.set_title('Loss evaluation') ax2.set_ylabel('Loss') ax2.legend() plt.xlabel('Iteration') plt.savefig('pytorch.png')
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### # initialize required arrays for saving the results print(torch.cuda.is_available()) # device = torch.device("cpu") # my gpu is not cuda conform device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') train_accuracies = [] train_losses = [] test_accuracies = [] test_losses = [] steps = [] # load data from directory specified in the input cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) # load test images and labels test_images = cifar10['test'].images test_targets = cifar10['test'].labels # data dimensions # test_images.shape -> (10000, 3, 32, 32): n_images, channels, height, width # test_targets.shape <- (10000, 10): n_images, n_classes n_test = test_images.shape[0] # n_inputs is one vector for all channels of width and height # n_input = n_channel * width * height n_inputs = test_images.shape[1] * test_images.shape[2] * test_images.shape[3] # reshape to (n_samples, n_inputs) test_images = test_images.reshape((n_test, n_inputs)) n_classes = 10 # use torch tensors instead of np arrays, no grad needed as model is not trained on test images test_images = torch.tensor(test_images, requires_grad=False).to(device) test_targets = torch.tensor(test_targets, requires_grad=False).to(device) # initialize MLP model MLP_model = MLP(n_inputs=n_inputs, n_hidden=dnn_hidden_units, n_classes=n_classes, neg_slope=FLAGS.neg_slope) print(MLP_model) # loss function os loaded loss_module = nn.CrossEntropyLoss() learning_rate = FLAGS.learning_rate if OPTIMIZER == "SGD": optimizer = torch.optim.SGD(MLP_model.parameters(), lr=learning_rate, weight_decay=weight_decay) else: optimizer = torch.optim.Adam(MLP_model.parameters(), lr=learning_rate, weight_decay=weight_decay) batch_size = FLAGS.batch_size # extract max accuracy while training on test set max_acc = 0 max_iter = 0 # optimizer = torch.optimAdam(MLP_model.parameters(), lr=lr) for iteration in range(FLAGS.max_steps): train_images, train_targets = cifar10['train'].next_batch(batch_size) # input to MLP.forward is (batch_size, n_inputs) train_images = train_images.reshape((batch_size, n_inputs)) # switch from numpy version to tensor and to device train_images = torch.tensor(train_images).type(torch.FloatTensor).to(device) train_targets = torch.tensor(train_targets).type(torch.LongTensor).to(device) if iteration % LR_FREQ == 0: learning_rate = learning_rate * 0.8 optimizer = torch.optim.SGD(MLP_model.parameters(), lr=learning_rate, weight_decay=weight_decay) # gradients zero initialized optimizer.zero_grad() # predictions by forward pass train_predictions = MLP_model.forward(train_images) # loss acc to loss module, predictions and targets loss = loss_module(train_predictions, train_targets.argmax(dim=1)) # Apply backward pass: MLP backward takes gradients of losses = dout # dout = backward of loss module loss.backward() # backward pass from loss (dout) optimizer.step() train_accuracies.append(accuracy(train_predictions, train_targets)) train_losses.append(loss) steps.append(iteration) ## Save training statistics # save loss, acc, iteration for train evaluation afterwards if iteration % 100 == 0: print("iteration:" + str(iteration) + "train_acc:" + str(np.mean(train_accuracies))) # Consider FLAGS.EVAL_FREQ_DEFAULT for the evaluation of the current MLP # on the test data and training data if iteration % FLAGS.eval_freq == 0: ## Test Statistics test_predictions = MLP_model.forward(test_images) test_loss = loss_module.forward(test_predictions, test_targets.argmax(dim=1)) test_acc = accuracy(test_predictions, test_targets) test_accuracies.append(test_acc) print("iteration:" + str(iteration) + "test_acc:" + str(test_accuracies[-1])) test_losses.append(test_loss) if (max_acc < test_acc): max_acc = test_acc max_iter = iteration print('Training is done') print('Save results in folder: .') # save loss and accuracies to plot from for report # folder for numpy results print('Training is done') print('Plot Results') plot_results(train_accuracies, test_accuracies, train_losses, test_losses) print("max accuracy: " + str(max_acc) + " at iteration: " + str(max_iter))
def train(): """ Performs training and evaluation of MLP model. Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] print("arch: ", dnn_hidden_units) ######################## # PUT YOUR CODE HERE # ####################### device = torch.device("cuda") dataset = cifar10_utils.get_cifar10() training = dataset['train'] test = dataset['test'] test_images = Variable( torch.tensor(test.images.reshape(test.images.shape[0], -1))) test_labels = torch.tensor(test.labels) model = MLP(n_inputs=32 * 32 * 3, n_hidden=dnn_hidden_units, n_classes=10).to(device) opt = torch.optim.SGD(model.parameters(), lr=FLAGS.learning_rate) ce = nn.CrossEntropyLoss() test_accuracy = [] train_accuracy = [] loss_list = [] for epoch in range(FLAGS.max_steps): x, y = training.next_batch(FLAGS.batch_size) x = Variable(torch.tensor(x).to(device)) y = Variable(torch.tensor(y).to(device)) opt.zero_grad() out = model.forward(x.reshape(FLAGS.batch_size, -1)) loss = ce(out, y.max(1)[1]) loss_list.append(float(loss)) loss.backward() opt.step() if not epoch % FLAGS.eval_freq: train_accuracy.append(accuracy(out, y)) out = model.forward(test_images.to(device)) test_accuracy.append(accuracy(out, test_labels.to(device))) print('Epoch: ', epoch, 'Loss: ', loss, 'Accuracy: ', train_accuracy[-1], 'Test ac.:', test_accuracy[-1]) out = model.forward(test_images.to(device)) print('Test accuracy: ', accuracy(out, test_labels.to(device))) import seaborn as sns import matplotlib.pyplot as plt f, axes = plt.subplots(1, 2) ax = sns.lineplot(np.arange(0, MAX_STEPS_DEFAULT, EVAL_FREQ_DEFAULT), train_accuracy, ax=axes[0]) ax = sns.lineplot(np.arange(0, MAX_STEPS_DEFAULT, EVAL_FREQ_DEFAULT), test_accuracy, ax=axes[0]) ax.set_title('Training and test accuracy') ax.legend(['training', 'test']) ax = sns.lineplot(np.arange(0, MAX_STEPS_DEFAULT, 1), loss_list, ax=axes[1]) ax.set_title('Loss') figure = ax.get_figure() figure.savefig("mlp-pytorch-results")
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # # because I don't have a GPU and the training was quick enough on a CPU, # I don't save my tensor on a GPU LEARNING_RATE_DEFAULT = FLAGS.learning_rate MAX_STEPS_DEFAULT = FLAGS.max_steps BATCH_SIZE_DEFAULT = FLAGS.batch_size EVAL_FREQ_DEFAULT = FLAGS.eval_freq OPTIMIZER_DEFAULT = FLAGS.optimizer # self-added variables REGULARIZER_DEFAULT = FLAGS.regularizer MOMENTUM_DEFAULT = FLAGS.momentum # get test data to initialize the model with cifar10 = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT) x_test, y_test = cifar10['test'].images, cifar10['test'].labels input_size = np.shape(x_test)[1] * np.shape(x_test)[2] * np.shape( x_test)[3] class_size = np.shape(y_test)[1] x_test = torch.from_numpy(x_test.reshape([np.shape(x_test)[0], input_size])) y_test = torch.from_numpy(y_test) net = MLP(n_inputs=input_size, n_hidden=dnn_hidden_units, n_classes=class_size) criterion = torch.nn.CrossEntropyLoss() eval_accuracies = [] train_accuracies = [] eval_loss = [] train_loss = [] # choose between optimizer if OPTIMIZER_DEFAULT == 'sgd': optimizer = optim.SGD(net.parameters(), lr=LEARNING_RATE_DEFAULT, momentum=MOMENTUM_DEFAULT, weight_decay=REGULARIZER_DEFAULT) elif OPTIMIZER_DEFAULT == 'adam': optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE_DEFAULT, weight_decay=REGULARIZER_DEFAULT) for step in range(MAX_STEPS_DEFAULT): x, y = cifar10['train'].next_batch(BATCH_SIZE_DEFAULT) x = x.reshape([np.shape(x)[0], input_size]) x = torch.from_numpy(x) y = torch.from_numpy(y) optimizer.zero_grad() out = net.forward(x) # convert out and y to index of max (class prediction)? # required? # x = x.argmax(dim=1) loss = criterion(out, y.argmax(dim=1)) loss.backward() optimizer.step() # print(loss.item()) if step % EVAL_FREQ_DEFAULT == 0: test_out = net.forward(x_test) # print(accuracy(test_out, y_test)) eval_accuracies.append(accuracy(test_out, y_test)) train_accuracies.append(accuracy(out, y)) eval_loss.append( criterion(test_out, y_test.argmax(dim=1)).data.item()) train_loss.append(criterion(out, y.argmax(dim=1)).data.item()) # final accuracy calculation test_out = net.forward(x_test) print("EVAL ACCURACY") print(eval_accuracies) print("train ACCURACY") print(train_accuracies) print("EVAL loss") print(eval_loss) print("train loss") print(train_loss)
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### # loop through data cifar10 = cifar10_utils.get_cifar10('cifar10/cifar-10-batches-py') x, y = cifar10['train'].next_batch(BATCH_SIZE_DEFAULT) print(y.shape) print(x.shape) x = x.reshape(np.size(x, 0), -1) n_input = np.size(x, 1) # create model net = MLP(n_input, dnn_hidden_units, 10) # get loss function and optimizer crossEntropy = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=FLAGS.learning_rate) for i in range(FLAGS.max_steps): x = Variable(torch.from_numpy(x), requires_grad=True) out = net(x) out_numpy = out.data[:].numpy() # apply cross entropy label_index = np.argmax(y, axis=1) label_index = torch.LongTensor(label_index) loss = crossEntropy(out, label_index) if i % FLAGS.eval_freq == 0: print(accuracy(out_numpy, y)) print(loss) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() # insert data x, y = cifar10['train'].next_batch(FLAGS.batch_size) x = x.reshape(np.size(x, 0), -1) # test x, y = cifar10['test'].images, cifar10['test'].labels x = x.reshape(np.size(x, 0), -1) x = Variable(torch.from_numpy(x), requires_grad=False) out = net(x) out_numpy = out.data[:].numpy() print("The accuracy on the test set is:") print(accuracy(out_numpy, y))
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### ce_loss = nn.CrossEntropyLoss() n_inputs = 3 * 32 * 32 n_classes = 10 mlp = MLP(n_inputs, dnn_hidden_units, n_classes) optimizer = optim.SGD( mlp.parameters(), lr = FLAGS.learning_rate, weight_decay=0.001) c10 = cifar10_utils.get_cifar10(FLAGS.data_dir) test_data = c10['test'].images test_data = test_data.reshape(test_data.shape[0], -1) test_data = torch.tensor(test_data) acc_values = [] loss_values = [] for i in range(FLAGS.max_steps): #range(FLAGS.max_steps) x, y = c10['train'].next_batch(FLAGS.batch_size) x = x.reshape(FLAGS.batch_size, -1) y = y.argmax(axis=1) x = torch.tensor(x) y = torch.tensor(y) optimizer.zero_grad() out = mlp(x) loss = ce_loss(out, y) loss.backward() optimizer.step() loss_values.append(loss.item()) # evaluate if i % FLAGS.eval_freq == 0: predictions = mlp.forward(test_data).detach().numpy() targets = c10['test'].labels acc = accuracy(predictions, targets) print('acc', acc, 'loss', loss.item()) acc_values.append(acc) # save loss and accuracy to file with open('accuracy_torch.txt', 'a') as f_acc: print (acc_values, file=f_acc) with open('loss_torch.txt', 'a') as f_loss: print (loss_values, file=f_loss)
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ # DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### def reshape_cifar10_mlp(x): batch_size = x.shape[0] x = x.transpose([2, 3, 1, 0]) x = x.reshape([-1, batch_size]) x = x.transpose() return x device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size) x_train = reshape_cifar10_mlp(x_train) x_train = torch.from_numpy(x_train).to(device) y_train = torch.from_numpy(y_train).to(device) crossent_softmax = nn.CrossEntropyLoss() mlp = MLP(x_train.shape[1], dnn_hidden_units, y_train.shape[1], bn_flag=True) # optimizer = torch.optim.SGD(mlp.parameters(), lr=FLAGS.learning_rate) optimizer = torch.optim.Adam(mlp.parameters(), weight_decay=1e-3) mlp.to(device) train_accs = [] train_losses = [] eval_accs = [] eval_losses = [] for i in np.arange(FLAGS.max_steps): print('\nStep: {}\n'.format(i)) print('Training: ') optimizer.zero_grad() logits = mlp(x_train) train_loss = crossent_softmax(logits, y_train.argmax(dim=-1)) train_acc = accuracy(logits, y_train) print('loss: {:.4f}, acc: {:.4f}\n'.format(train_loss, train_acc)) train_loss.backward() optimizer.step() x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size) x_train = reshape_cifar10_mlp(x_train) x_train = torch.from_numpy(x_train).to(device) y_train = torch.from_numpy(y_train).to(device) if i % FLAGS.eval_freq == 0: with torch.no_grad(): print('Evaluation: ') x_eval, y_eval = cifar10['test'].images, cifar10['test'].labels x_eval = reshape_cifar10_mlp(x_eval) x_eval = torch.from_numpy(x_eval).to(device) y_eval = torch.from_numpy(y_eval).to(device) logits = mlp(x_eval) eval_loss = crossent_softmax(logits, y_eval.argmax(dim=-1)) eval_acc = accuracy(logits, y_eval) train_losses.append(train_loss) train_accs.append(train_acc) eval_losses.append(eval_loss) eval_accs.append(eval_acc) print('loss: {:.4f}, acc: {:.4f}'.format(eval_loss, eval_acc)) print('Evaluation: ') x_eval, y_eval = cifar10['test'].images, cifar10['test'].labels x_eval = reshape_cifar10_mlp(x_eval) x_eval = torch.from_numpy(x_eval).to(device) y_eval = torch.from_numpy(y_eval).to(device) logits = mlp(x_eval) eval_loss = crossent_softmax(logits, y_eval.argmax(dim=-1)) eval_acc = accuracy(logits, y_eval) train_losses.append(train_loss) train_accs.append(train_acc) eval_losses.append(eval_loss) eval_accs.append(eval_acc) print('loss: {:.4f}, acc: {:.4f}'.format(eval_loss, eval_acc)) print('Finished training.') plt.figure(figsize=(10, 5)) plt.plot(np.arange(len(train_losses)), train_losses, label='training loss') plt.plot(np.arange(len(eval_losses)), eval_losses, label='evaluation loss') plt.ylim(0, 3) plt.legend() plt.xlabel('Iterations [x{}]'.format(FLAGS.eval_freq)) plt.savefig('results/mlp_loss_torch_adam_layers_maxstep_reg_batch.png', bbox_inches='tight') plt.figure(figsize=(10, 5)) plt.plot(np.arange(len(train_accs)), train_accs, label='training accuracy') plt.plot(np.arange(len(eval_accs)), eval_accs, label='evaluation accuracy') plt.legend() plt.xlabel('Iterations [x{}]'.format(FLAGS.eval_freq)) plt.savefig('results/mlp_acc_torch_adam_layers_maxstep_reg_batch.png', bbox_inches='tight')
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # select which device to train the model on device = "cuda:0" if torch.cuda.is_available() else "cpu" # compute the input size of the MLP input_size, n_classes = 3 * 32 * 32, 10 # init model, define the dataset, loss function and optimizer model = MLP(input_size, dnn_hidden_units, n_classes, FLAGS.b).to(device) dataset = cifar10_utils.get_cifar10(FLAGS.data_dir) loss_fn = torch.nn.CrossEntropyLoss().to(device) optimizer = torch.optim.SGD(model.parameters(), lr=FLAGS.learning_rate) for step in range(FLAGS.max_steps): X_train, y_train = dataset['train'].next_batch(FLAGS.batch_size) optimizer.zero_grad() # move to correct device and shape for MLP X_train, y_train = torch.tensor(X_train).reshape( FLAGS.batch_size, input_size).float().to(device), torch.tensor( y_train).float().to(device) predictions = model(X_train) train_loss = loss_fn(predictions, y_train.argmax(1).long()) train_loss.backward() optimizer.step() # add the loss and accuracy to the lists for plotting train_overall_loss.append(train_loss.cpu().detach().sum()) train_overall_accuracy.append( accuracy(predictions.cpu().detach(), y_train.cpu().detach())) train_x_axis.append(step) # test the model when eval freq is reached or if it is the last step if not step % FLAGS.eval_freq or step + 1 == FLAGS.max_steps: model.eval() test_accuracies, test_losses_list = [], [] # test batchwise since it doesnot fit my gpu for X_test, y_test in cifar_test_generator(dataset): X_test, y_test = torch.tensor(X_test).reshape( FLAGS.batch_size, input_size).float().to( device), torch.tensor(y_test).float().to(device) predictions = model(X_test) test_loss = loss_fn(predictions, y_test.argmax(1).long()) test_accuracy = accuracy(predictions, y_test) # add the values to compute the average loss and accuracy for the entire testset test_accuracies.append(test_accuracy.cpu().detach()) test_losses_list.append(test_loss.cpu().detach().sum()) print( "[{:5}/{:5}] Train loss {:.5f} Test loss {:.5f} Test accuracy {:.5f}" .format(step, FLAGS.max_steps, train_loss, test_loss, sum(test_accuracies) / len(test_accuracies))) test_overall_accuracy.append( sum(test_accuracies) / len(test_accuracies)) test_overall_loss.append( sum(test_losses_list) / len(test_losses_list)) test_x_axis.append(step) model.train() plt.plot(train_x_axis, train_overall_loss, label="Avg Train loss") plt.plot(test_x_axis, test_overall_loss, label="Avg Test loss") plt.legend() plt.savefig("pytorch_loss_curve") plt.show() plt.plot(train_x_axis, train_overall_accuracy, label="Train batch accuracy") plt.plot(test_x_axis, test_overall_accuracy, label="Test set accuracy") plt.legend() plt.savefig("pytorch_accuracy_curve") plt.show()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### net = MLP(3072, dnn_hidden_units, 10) net.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr = FLAGS.learning_rate) #Load cifar10 cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) print() print() print("----------------------------------------------") print("\t \t Training") print("----------------------------------------------\n") pl_loss =[] average_loss =[] moving_average=0.0 acc =[] count = 1 acc =[] check =0 for iter_ in np.arange(0, FLAGS.max_steps): #Load batches x , y = cifar10['train'].next_batch(FLAGS.batch_size) labels = np.argmax(y, axis=1) #reshape x into vectors x = np.reshape(x, (200, 3072)) inputs, labels = torch.from_numpy(x), torch.LongTensor(torch.from_numpy(labels)) inputs, labels = inputs.to(device), labels.to(device) # # labels = torch.LongTensor(labels) # # zero the parameter gradients optimizer.zero_grad() # # forward + backward + optimize outputs = net(inputs) print("output: {}, labels:{}".format(outputs.size(),labels.size())) loss = criterion(outputs, labels) loss.backward() optimizer.step() # # print statistics running_loss = loss.item() pl_loss.append(running_loss) moving_average+=running_loss average_loss.append(np.mean(np.mean(pl_loss[:-100:-1]))) print("iter: {} | training loss: {} ".format(iter_,"%.3f"%running_loss)) if (iter_+1)%FLAGS.eval_freq==0: net.eval() acc.append(evaluate(net, cifar10, FLAGS.batch_size)) ####################### # END OF YOUR CODE # ####################### plt.plot(pl_loss,'r-', label="Batch loss", alpha=0.5) plt.plot(average_loss,'g-', label="Average loss", alpha=0.5) plt.legend() plt.xlabel("Iterations") plt.ylabel("Loss") plt.title("Training Loss") plt.grid(True) plt.show() plt.close() plt.plot(acc,'g-', alpha=0.5) plt.xlabel("Iterations") plt.ylabel("Accuracy") plt.title("Test Accuracy") plt.grid(True) plt.show() plt.close() print() print("TRAINING COMPLETED")
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### # prepare input data cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) _, width, height, channels = cifar10['train']._images.shape _, n_outputs = cifar10['train']._labels.shape n_inputs = width * height * channels network = MLP(n_inputs,dnn_hidden_units,n_outputs) optimizer = torch.optim.Adam(network.parameters(), lr=FLAGS.learning_rate) # or SGD? loss_fn = nn.CrossEntropyLoss() train_losses, train_acc, test_losses, test_acc = [], [], [], [] current_loss = 0.0 for step in range(FLAGS.max_steps): network.train() optimizer.zero_grad() x, y = cifar10['train'].next_batch(FLAGS.batch_size) x, y = torch.tensor(x, requires_grad=True), torch.tensor(y, dtype=torch.float) x = x.reshape(FLAGS.batch_size,-1) output = network(x) labels = torch.max(y,1)[1] loss = loss_fn(output, labels) loss.backward() optimizer.step() current_loss += loss.item() if (step+1) % FLAGS.eval_freq == 0: train_acc.append(accuracy(output, y)) train_losses.append(current_loss / float(FLAGS.eval_freq)) current_loss = 0.0 x_test, y_test = cifar10['test'].next_batch(FLAGS.batch_size) x_test, y_test = torch.tensor(x_test, requires_grad=True), torch.tensor(y_test, dtype=torch.float) x_test = x_test.reshape(FLAGS.batch_size, -1) output_test = network(x_test) # average loss over 100 iterations test_losses.append(loss_fn(output_test, torch.max(y_test,1)[1]).item()) test_acc.append(accuracy(output_test, y_test)) print("Step {}".format(step)) size_test = cifar10['test']._num_examples x, y = cifar10['test'].next_batch(size_test) x, y = torch.tensor(x, requires_grad=True), torch.tensor(y, dtype=torch.float) x = x.reshape(size_test, -1) # Get network output for batch and get loss and accuracy out = network(x) print("Accuracy: {}".format(accuracy(out, y))) # plot graph of accuracies plt.subplot(211) plt.plot(test_acc, label="test accuracy") plt.plot(train_acc, label="training accuracy") plt.title('Accuracy') plt.legend() plt.subplot(212) plt.plot(test_losses, label = "test loss") plt.plot(train_losses, label = "training loss") plt.title('Cross-entropy loss') plt.legend() plt.show()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### ############################## VARIABLES ############################## SAVE_PLOTS = False SAVE_LOGS = False img_size = 32 n_classes = 10 input_size = img_size * img_size * 3 batch_size = FLAGS.batch_size eval_freq = FLAGS.eval_freq n_iterations = FLAGS.max_steps lr_rate = FLAGS.learning_rate device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Device:", device) ############################## METHODS ############################## # fp = open('memory_profiler_basic_mean.log', 'w+') # @profile(stream=fp) def test(): net.eval() output_t = net(x_t) loss_t = criterion(output_t, y_t).detach() acc_t = accuracy(output_t.detach(), y_t_onehot) return acc_t, loss_t def plot(iteration): idx_test = list(range(0, iteration + 1, eval_freq)) idx = list(range(0, iteration + 1)) plt.clf() plt.cla() plt.subplot(1, 2, 1) plt.plot(idx_test, test_accuracies, "k-", linewidth=1, label="test") plt.plot(idx, accuracies, "r-", linewidth=0.5, alpha=0.5, label="train") plt.xlabel('iteration') plt.ylabel('accuracy') plt.legend() plt.subplot(1, 2, 2) plt.plot(idx_test, test_losses, "k-", linewidth=1, label="test") plt.plot(idx, losses, "r-", linewidth=0.5, alpha=0.5, label="train") plt.xlabel('iteration') plt.ylabel('loss') plt.legend() plt.savefig("./out/plot/plot_pytorch_" + str(batch_size) + "_" + str(lr_rate) + ".png", bbox_inches='tight') return def to_label(tensor): _, tensor = tensor.max(1) return tensor ############################## MAIN ############################## cifar10 = cifar10_utils.get_cifar10('cifar10/cifar-10-batches-py') net = MLP(input_size, dnn_hidden_units, n_classes) net.to(device) criterion = nn.CrossEntropyLoss() # optimizer = optim.SGD(net.parameters(), lr=lr_rate, momentum=0.8, nesterov=False) optimizer = optim.Adam(net.parameters(), lr=lr_rate) losses = [] accuracies = [] test_accuracies = [] test_losses = [] alpha = 0.0001 x_t = cifar10['test'].images y_t = cifar10['test'].labels x_t = torch.from_numpy(x_t.reshape(-1, input_size)) y_t_onehot = torch.from_numpy(y_t).type(torch.LongTensor) y_t = to_label(y_t_onehot) x_t, y_t = x_t.to(device), y_t.to(device) y_t_onehot = y_t_onehot.to(device) plt.figure(figsize=(10, 4)) for i in range(n_iterations): x, y = cifar10['train'].next_batch(batch_size) x = torch.from_numpy(x.reshape(-1, input_size)) y_onehot = torch.from_numpy(y).type(torch.LongTensor) y = to_label(y_onehot) x, y = x.to(device), y.to(device) y_onehot = y_onehot.to(device) optimizer.zero_grad() output = net(x) train_loss = criterion(output, y) reg_loss = 0 for param in net.parameters(): reg_loss += param.norm(2) loss = train_loss + alpha * reg_loss loss.backward() optimizer.step() losses.append(loss.item()) accuracies.append(accuracy(output.detach().data, y_onehot.detach())) del x, y if i % eval_freq == 0: acc_t, loss_t = test() test_accuracies.append(acc_t) test_losses.append(loss_t) log_string = "[{:5d}/{:5d}] Test Accuracy: {:.4f} | Batch Accuracy: {:.4f} | Batch Loss: {:.6f} | Train/Reg: {:.6f}/{:.6f}\n".format( i, n_iterations, test_accuracies[-1], accuracies[-1], loss, train_loss, reg_loss * alpha ) print(log_string) if SAVE_LOGS: with open("./out/log/pytorch_log_" + str(batch_size) + "_" + str(lr_rate) + ".txt", "a") as myfile: myfile.write(log_string) if SAVE_PLOTS: plot(i) net.train()
def train(): """ Performs training and evaluation of MLP model. """ print_flags() ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) # use GPU if available device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] lr = FLAGS.learning_rate max_steps = FLAGS.max_steps batch_size = FLAGS.batch_size eval_freq = FLAGS.eval_freq data_dir = FLAGS.data_dir optim_type = FLAGS.optimizer #plot_results = FLAGS.plot train_treshold = 1e-6 # if train loss below that threshold, training stops # evaluation metrics acc_train = [] acc_test = [] loss_train = [] loss_test = [] # load input data cifar10 = cifar10_utils.get_cifar10(data_dir, one_hot=True) # get test data x_test = cifar10["test"].images y_test = cifar10["test"].labels train_data = cifar10["train"] # determine dimension of data x_dim = x_test.shape n_test_samples = x_dim[0] # number of test samples # images of size 32 x 32 x 3 n_inputs = x_dim[1] * x_dim[2] * x_dim[3] # channels * height * width # reshape test images to fit MLP input x_test = x_test.reshape((n_test_samples, n_inputs)) n_classes = y_test.shape[1] #reshape data to tensor representation x_test = x_test.reshape((n_test_samples, n_inputs)) x_test_torch = torch.tensor(x_test, dtype=torch.float, device=device) y_test_torch = torch.tensor(y_test, dtype=torch.float, device=device) #initialize MLP model mlp_model = MLP(n_inputs=n_inputs, n_hidden=dnn_hidden_units, n_classes=n_classes).to(device) if optim_type == 'SGD': optimizer = torch.optim.SGD(mlp_model.parameters(), lr=lr) elif optim_type == 'Adam': optimizer = torch.optim.Adam(mlp_model.parameters(), lr=lr) elif optim_type == 'Adadelta': optimizer = torch.optim.Adadelta(mlp_model.parameters(), lr=lr) optimizer.zero_grad() #define loss function loss_fn = nn.CrossEntropyLoss() # evaluation metrics acc_train = [] acc_test = [] loss_train = [] loss_test = [] best_acc = 0.0 results = [] #train the model print("Start training") for step in range(max_steps): #get mini-batch x_train, y_train = train_data.next_batch(batch_size) x_train = x_train.reshape((batch_size, n_inputs)) #transform to tensor representation x_train_torch = torch.tensor(x_train, dtype=torch.float, device=device) y_train_torch = torch.tensor( y_train, dtype=torch.float, device=device) #labels for mb training set #set gradients to zero optimizer.zero_grad() #forward pass mb to get predictions as output out = mlp_model.forward(x_train_torch) #compute loss loss_mb = loss_fn.forward(out, y_train_torch.argmax(dim=1)) #backward pass loss_mb.backward() optimizer.step() #evaluate training and validation set (pretty much the same as with Numpy) # perhaps modify learning rate? if (step % eval_freq == 0) or (step == max_steps - 1): print(f"Step: {step}") # compute and store training metrics loss_train.append(loss_mb.item()) acc_train.append(accuracy(out, y_train_torch)) print("TRAIN acc: {0:.4f} & loss: {1:.4f}".format( acc_train[-1], loss_train[-1])) # compute and store test metrics # Note that we use the test set as validation set!! Only as an exception :P # if test set is too big to fit into memory, use mini-batches as well and average results out_test = mlp_model.forward(x_test_torch) loss_val = loss_fn.forward(out_test, y_test_torch.argmax(dim=1)) loss_test.append(loss_val.item()) acc_test.append(accuracy(out_test, y_test_torch)) print("TEST acc: {0:.4f} & loss: {1:.4f}".format( acc_test[-1], loss_test[-1])) results.append([ step, acc_train[-1], loss_train[-1], acc_test[-1], loss_test[-1] ]) if acc_test[-1] > best_acc: best_acc = acc_test[-1] print("New BEST acc: {0:.4f}".format(best_acc)) # Early stop when training loss below threshold? if len(loss_train) > 20: prev_losses = loss_test[-2] cur_losses = loss_test[-1] if abs(prev_losses - cur_losses) < train_treshold: print("Training stopped early at step {0}".format(step + 1)) break print("Finished training") print("BEST acc: {0:.4f}".format(best_acc)) res_path = Path.cwd().parent / 'mlp_pytorch_results' if not res_path.exists(): res_path.mkdir(parents=True) print("Saving results to {0}".format(res_path)) #model_path.mkdir(parents=True, exist_ok=True) #model_path = model_path / 'mlp_pytorch.csv' res_path = res_path / 'mlp_pytorch.csv' mode = 'a' if not res_path.exists(): mode = 'w' col_names = [ 'step', 'train_acc', 'train_loss', 'test_acc', 'test_loss', 'lr', 'max_steps', 'batch_size', 'dnn_hidden_units', 'optimizer' ] with open(res_path, mode) as csv_file: if mode == 'w': csv_file.write('|'.join(col_names) + '\n') for i in range(len(results)): csv_file.write( f'{results[i][0]};{results[i][1]};{results[i][2]};{results[i][3]};{results[i][4]}' f'{lr};{max_steps};{batch_size};{dnn_hidden_units};{optim_type};' + '\n') #results.append([step, acc_train[-1], loss_train[-1], acc_test[-1], loss_test[-1]]) return results
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### #dnn_hidden_units = [200,200] #batch_size = 200 cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size) # print(x_train.shape) MLP_net = MLP(n_inputs=1 * 3 * 32 * 32, n_hidden=dnn_hidden_units, n_classes=10) params = MLP_net.parameters() criterion = torch.nn.CrossEntropyLoss() # criterion = torch.nn.L1Loss() # optimizer = torch.optim.SGD(params,lr=FLAGS.learning_rate)#,momentum=0.005)# weight_decay=0.001) optimizer = torch.optim.Adam( params, lr=FLAGS.learning_rate) #,weight_decay=0.0001) # optimizer = torch.optim.SGD(params,lr=0.02) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4000, gamma=0.8) print(MLP_net) batch_norm = torch.nn.BatchNorm2d(3) #,affine=False,momentum=0) loss_list = [] for step in range(FLAGS.max_steps): # Get batch and reshape input to vector x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size) x_train = batch_norm(torch.from_numpy(x_train)).detach().numpy() x_train = np.reshape(x_train, (FLAGS.batch_size, -1)) net_output = MLP_net.forward(torch.from_numpy(x_train)) batch_accuracy = accuracy(net_output.detach().numpy(), y_train) y_train = torch.from_numpy(y_train) y_train = y_train.type(torch.LongTensor) # y_train = y_train.type(torch.FloatTensor) loss = criterion(net_output, torch.max(y_train, 1)[1]) loss_list.append(loss) # print("loss : ",loss) optimizer.zero_grad() loss.backward() optimizer.step() # scheduler.step() # print("out and y shapes : "+str(net_output.shape),str(y_train.shape)) if (step + 1) % FLAGS.eval_freq == 0: # print("in test") x_test, y_test = cifar10['test'].images, cifar10['test'].labels x_test = batch_norm(torch.from_numpy(x_test)).detach().numpy() x_test = np.reshape(x_test, (x_test.shape[0], -1)) net_test_output = MLP_net.forward(torch.from_numpy(x_test)) print("test set accuracy for step " + str(step + 1) + " : " + str(accuracy(net_test_output.detach().numpy(), y_test))) print("loss : ", sum(loss_list) / len(loss_list)) loss_list = [] writer.add_scalar( 'Test_accuracy', accuracy(net_test_output.detach().numpy(), y_test), step) writer.add_scalar('Train_accuracy', batch_accuracy, step) writer.add_scalar('Train_loss', loss, step)
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### # will be used to compute accuracy and loss for the train and test sets by batches batch_size_acc = 500 data_accuracy_loss = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir) X_train_acc, y_train_acc = data_accuracy_loss[ 'train'].images, data_accuracy_loss['train'].labels X_test_acc, y_test_acc = data_accuracy_loss[ 'test'].images, data_accuracy_loss['test'].labels X_train_acc = np.reshape(X_train_acc, (X_train_acc.shape[0], -1)) X_test_acc = np.reshape(X_test_acc, (X_test_acc.shape[0], -1)) steps_train = int(X_train_acc.shape[0] / batch_size_acc) steps_test = int(X_test_acc.shape[0] / batch_size_acc) #loading data for training data = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir) n_classes = data['train'].labels.shape[1] n_inputs = data['train'].images.shape[1] * data['train'].images.shape[ 2] * data['train'].images.shape[3] batch_size = FLAGS.batch_size m_steps = FLAGS.max_steps alpha = FLAGS.learning_rate mlp = MLP(n_inputs, dnn_hidden_units, n_classes) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(mlp.parameters(), lr=alpha) X_test, y_test = data['test'].images, data['test'].labels X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_test = torch.from_numpy(X_test) y_test = torch.LongTensor(y_test) x_ax = [] acc_train = [] acc_test = [] loss_train = [] loss_test = [] for step in range(m_steps): x, y = data['train'].next_batch(batch_size) n = x.shape x = x.reshape([n[0], n[1] * n[2] * n[3]]) x = torch.from_numpy(x) y_pred = mlp(x) labels = torch.LongTensor(y) loss = criterion(y_pred, torch.max(labels, 1)[1]) optimizer.zero_grad() loss.backward() optimizer.step() if step % FLAGS.eval_freq == 0: print('Iteration ', step) x_ax.append(step) acc_ = [] loss_ = [] for i in range(steps_train): x_acc = X_train_acc[i * batch_size_acc:(i + 1) * batch_size_acc] y_acc = y_train_acc[i * batch_size_acc:(i + 1) * batch_size_acc] x_acc = torch.from_numpy(x_acc) y_acc = torch.LongTensor(y_acc) y_pred = mlp.forward(x_acc) acc_.append(accuracy(y_pred, y_acc)) loss_.append(float(criterion(y_pred, torch.max(y_acc, 1)[1]))) acc_train.append(np.mean(acc_)) loss_train.append(np.mean(loss_)) predictions = mlp.forward(X_test) acc_test.append(accuracy(predictions, y_test)) loss_te = criterion(predictions, torch.max(y_test, 1)[1]) loss_test.append(float(loss_te)) print('Max train accuracy ', max(acc_train)) print('Max test accuracy ', max(acc_test)) print('Min train loss ', min(loss_train)) print('Min test loss ', min(loss_test)) x_ax = np.array(x_ax) acc_test = np.array(acc_test) acc_train = np.array(acc_train) loss_test = np.array(loss_test) loss_train = np.array(loss_train) print('Max train accuracy ', max(acc_train)) print('Max test accuracy ', max(acc_test)) print('Min train loss ', min(loss_train)) print('Min test loss ', min(loss_test)) fig = plt.figure() ax = plt.axes() plt.title("MLP Pytorch. Accuracy curves") ax.plot(x_ax, acc_train, label='train') ax.plot(x_ax, acc_test, label='test') ax.set_xlabel('Step') ax.set_ylabel('Accuracy') plt.legend() plt.savefig('accuracy_mlp.jpg') fig = plt.figure() ax = plt.axes() plt.title("MLP Pytorch. Loss curves") ax.plot(x_ax, loss_train, label='train') ax.plot(x_ax, loss_test, label='test') ax.set_xlabel('Step') ax.set_ylabel('Loss') ax.set_ylim(top=10, bottom=1) plt.legend() plt.savefig('loss_mlp.jpg')
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] # Get negative slope parameter for LeakyReLU neg_slope = FLAGS.neg_slope learning_rate = FLAGS.learning_rate batch_size = FLAGS.batch_size max_steps = FLAGS.max_steps results = open("results.dat","w+") results.write("#torch_mlp \n#neg_slope : {}\n#learning_rate : {}\n#batch_size : {}\n#hidden_units : {}\ \n#max_steps : {}\n".format(neg_slope, learning_rate, batch_size, dnn_hidden_units, max_steps)) device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) x_test, t_test = cifar10["test"].images, cifar10["test"].labels x_test = torch.tensor(x_test.reshape(np.size(x_test[:,0,0,0]), N_INPUTS)).to(device) t_test_indx = torch.tensor(np.where(t_test==1)[1]).to(device) mlp = MLP(N_INPUTS, dnn_hidden_units, N_CLASSES, neg_slope).to(device) crossEntropy = nn.CrossEntropyLoss() optimizer = optim.SGD(mlp.parameters(), lr=learning_rate) results.write("#GPUs : {}\n".format(torch.cuda.device_count())) #show no of available gpus # print("GPUs : ", torch.cuda.device_count()) if torch.cuda.device_count() > 1: nn.DataParallel(mlp) results.write("#epoch batch max_steps loss train_acc test_acc test_loss\n") for batch in range(1,max_steps+1): optimizer.zero_grad() x, t = cifar10["train"].next_batch(batch_size) x = torch.tensor(x.reshape(batch_size, N_INPUTS), requires_grad=True).to(device) t_indx = torch.tensor(np.where(t==1)[1]).to(device) #shape: (batch_size,) y = mlp(x).to(device) #y predictions, t targets loss = crossEntropy(y, t_indx) #includes softmax #accuracy before updating if batch == 1: train_acc = accuracy(y, t) y_test = mlp(x_test).to(device) test_loss = crossEntropy(y_test, t_test_indx) test_acc = accuracy(y_test, t_test) results.write("%d %d %d %.3f %.3f %.3f %.3f\n" % (cifar10["train"]._epochs_completed, 0, max_steps, loss, train_acc, test_acc, test_loss)) # print("Epoch: %d. Batch: %d/%d. Loss: %.3f. Train_acc: %.3f. Test_acc: %.3f" % # (cifar10["train"]._epochs_completed, 0, max_steps, loss, train_acc, test_acc)) #update weights loss.backward() optimizer.step() if batch % FLAGS.eval_freq == 0: train_acc = accuracy(y, t) y_test = mlp(x_test).to(device) test_loss = crossEntropy(y_test, t_test_indx) test_acc = accuracy(y_test, t_test) results.write("%d %d %d %.3f %.3f %.3f %.3f\n" % (cifar10["train"]._epochs_completed, batch, max_steps, loss, train_acc, test_acc, test_loss)) # print("Epoch: %d. Batch: %d/%d. Loss: %.3f. Train_acc: %.3f. Test_acc: %.3f" % # (cifar10["train"]._epochs_completed, batch, max_steps, loss, train_acc, test_acc)) results.close()
def train(): """ Performs training and evaluation of MLP model. """ # DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) # Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # Preparation for training print('- Init parameters') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') data = cifar10_utils.get_cifar10(FLAGS.data_dir) train_data = data['train'] test_data = data['test'] w, h, d = train_data.images[0].shape n_classes = train_data.labels[0].shape[0] criterion = nn.CrossEntropyLoss() model = MLP(w * h * d, dnn_hidden_units, n_classes).to(device) optimizer = torch.optim.SGD(model.parameters(), lr=FLAGS.learning_rate, weight_decay=FLAGS.weight_decay, momentum=FLAGS.momentum) train_losses = [] test_losses = [] test_accuracies = [] # Train print('- Start Training') for step in range(FLAGS.max_steps): x_batch, x_labels = next_batch_in_tensors(train_data, FLAGS.batch_size, device) optimizer.zero_grad() out = model(x_batch) loss = criterion(out, x_labels.argmax(dim=1)) loss.backward() optimizer.step() train_losses.append(loss.data[0].item()) #.cpu().numpy()) if (step % FLAGS.eval_freq == 0) or (step == FLAGS.max_steps - 1): # Test current test_x, test_labels = next_batch_in_tensors( test_data, test_data.num_examples, device) out_test = model(test_x) loss_test = criterion(out_test, test_labels.argmax(dim=1)) acc = accuracy(out_test, test_labels) test_losses.append(loss_test.data[0].item()) #.cpu().numpy()) test_accuracies.append(acc.item()) #.numpy()) # if step % 10 == 0: # print(' Step: {}, Train Loss: {}'.format(str(step), str(loss.data[0]))) # print(' Test Loss: {}'.format(str(loss_test.data[0]))) # Save stuff filename = 'steps-{}_layers-{}_lr-{}_bs-{}'.format(FLAGS.max_steps, FLAGS.dnn_hidden_units, FLAGS.learning_rate, FLAGS.batch_size) if FLAGS.momentum != SGD_MOMENTUM_DEFAULT: filename += '_SGDmomentum-{}'.format(FLAGS.momentum) if FLAGS.weight_decay != SGD_WEIGHT_DECAY_DEFAULT: filename += '_SGDweightDecay-{}'.format(FLAGS.weight_decay) filepath = '../models/{}'.format(filename) if not os.path.exists(filepath): os.makedirs(filepath) torch.save(model, '{}/model.pt'.format(filepath)) with open('{}/train_loss'.format(filepath), 'wb+') as f: pickle.dump(train_losses, f) with open('{}/test_loss'.format(filepath), 'wb+') as f: pickle.dump(test_losses, f) with open('{}/accuracies'.format(filepath), 'wb+') as f: pickle.dump(test_accuracies, f) print(test_accuracies[-1])
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] if FLAGS.data_dir: DATA_DIR_DEFAULT = FLAGS.data_dir # Get negative slope parameter for LeakyReLU neg_slope = FLAGS.neg_slope ######################## # PUT YOUR CODE HERE # ####################### batch_size = FLAGS.batch_size learning_rate = FLAGS.learning_rate cifar_data = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT) train_data = cifar_data['train'] test_data = cifar_data['test'] n_classes = train_data.labels.shape[1] n_inputs = np.prod(train_data.images.shape[1:]) x_test, y_test = test_data.images, test_data.labels x_test = torch.from_numpy(np.reshape(x_test, (x_test.shape[0], n_inputs))) y_test = torch.from_numpy(np.argmax(y_test, axis=1)).type(torch.LongTensor) criterion = nn.CrossEntropyLoss() model = MLP(n_inputs, dnn_hidden_units, n_classes, neg_slope) if FLAGS.optimizer == 'ADAM': optimizer = optim.Adam(model.parameters(), lr=learning_rate) elif FLAGS.optimizer == 'ADAMwd': optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.02) elif FLAGS.optimizer == 'SGD': optimizer = optim.SGD(model.parameters(), lr=learning_rate) elif FLAGS.optimizer == 'RMS': optimizer = optim.RMSprop(model.parameters(), lr=learning_rate) else: print("Optimizer: Used default option, SGD") optimizer = optim.SGD(model.parameters(), lr=learning_rate) # Train and Test losses losses = [[], []] # Train and Test accuracies accuracies = [[], []] # True iteration for plotting iterations = [] for iteration in np.arange(FLAGS.max_steps): x, y = train_data.next_batch(batch_size) x = torch.from_numpy(np.reshape(x, (batch_size, n_inputs))) # argmax in order to align labels with the Cross entropy loss function y = torch.from_numpy(np.argmax(y, axis=1)).type(torch.LongTensor) train_output = model.forward(x) loss = criterion(train_output, y) optimizer.zero_grad() loss.backward() optimizer.step() if iteration % FLAGS.eval_freq == 0 or iteration == FLAGS.max_steps - 1: iterations.append(iteration) # Second forward pass for test set with torch.no_grad(): test_output = model.forward(x_test) # Calculate losses train_loss = criterion.forward(train_output, y) losses[0].append(train_loss) test_loss = criterion.forward(test_output, y_test) losses[1].append(test_loss) # Calculate accuracies train_acc = accuracy(train_output, y) test_acc = accuracy(test_output, y_test) accuracies[0].append(train_acc) accuracies[1].append(test_acc) print( "Iteration {}, Train loss: {}, Train accuracy: {}, Test accuracy: {}" .format(iteration, train_loss, train_acc, test_acc)) fig = plt.figure(figsize=(25, 10), dpi=200) fig.suptitle('PyTorch MLP: Losses and Accuracies', fontsize=40) ax1 = fig.add_subplot(1, 2, 1) ax2 = fig.add_subplot(1, 2, 2) ax1.plot(iterations, losses[0], linewidth=4, color="g", label="Train loss") ax1.plot(iterations, losses[1], linewidth=4, color="c", label="Test loss") ax2.plot(iterations, accuracies[0], linewidth=4, color="g", label="Train accuracy") ax2.plot(iterations, accuracies[1], linewidth=4, color="c", label="Test accuracy") ax1.set_xlabel('$Iteration$', fontsize=28) ax1.set_ylabel('$Loss$', fontsize=28) ax2.set_xlabel('$Iteration$', fontsize=28) ax2.set_ylabel('$Accuracy$', fontsize=28) ax1.legend(fontsize=22) ax2.legend(fontsize=22) plt.savefig("../figures/pytorch_mlp.png") plt.show()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # Get negative slope parameter for LeakyReLU neg_slope = FLAGS.neg_slope ######################## # PUT YOUR CODE HERE # ####################### import matplotlib.pyplot as plt data = cifar10_utils.get_cifar10(FLAGS.data_dir) train = data['train'] test = data['test'] dim_x = train.images.shape[1] * train.images.shape[2] * train.images.shape[ 3] mlp = MLP(dim_x, dnn_hidden_units, train.labels.shape[1], neg_slope) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(mlp.parameters(), FLAGS.learning_rate) loss_train = np.zeros((int(np.floor(FLAGS.max_steps / FLAGS.eval_freq), ))) loss_test = np.zeros((int(np.floor(FLAGS.max_steps / FLAGS.eval_freq), ))) accuracy_test = np.zeros( (int(np.floor(FLAGS.max_steps / FLAGS.eval_freq), ))) images_test_np = test.images labels_test_np = test.labels images_test_np = np.reshape(images_test_np, (images_test_np.shape[0], dim_x)) images_test = torch.from_numpy(images_test_np) labels_test = torch.from_numpy(np.argmax(labels_test_np, axis=1)) for i in range(0, FLAGS.max_steps): if PRINTS: print('iter', i + 1, end='\r') images_np, labels_np = train.next_batch(FLAGS.batch_size) images_np = np.reshape(images_np, (images_np.shape[0], dim_x)) images = torch.from_numpy(images_np) labels = torch.from_numpy(np.argmax(labels_np, axis=1)) optimizer.zero_grad() pred = mlp(images) loss = criterion(pred, labels.long()) loss.backward() optimizer.step() if (i + 1) % FLAGS.eval_freq == 0: loss_train[i // FLAGS.eval_freq] = loss.item() pred_test = mlp(images_test) accuracy_test[i // FLAGS.eval_freq] = accuracy( pred_test, F.one_hot(labels_test)) loss_test[i // FLAGS.eval_freq] = criterion( pred_test, labels_test.long()).item() if PRINTS: print() print('test_loss:', loss_test[i // FLAGS.eval_freq]) print('test_accuracy:', accuracy_test[i // FLAGS.eval_freq]) print('train_loss:', loss_train[i // FLAGS.eval_freq]) if PLOTS: fig, ax = plt.subplots(1, 2, figsize=(10, 5)) fig.suptitle('Training curves for Pytorch MLP') ax[0].set_title('Loss') ax[0].set_ylabel('Loss value') ax[0].set_xlabel('No of batches seen x{}'.format(FLAGS.eval_freq)) ax[0].plot(loss_train, label='Train') ax[0].plot(loss_test, label='Test') ax[0].legend() ax[1].set_title('Accuracy') ax[1].set_ylabel('Accuracy value') ax[1].set_xlabel('No of batches seen x{}'.format(FLAGS.eval_freq)) ax[1].plot(accuracy_test, label='Test') ax[1].legend() plt.show()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## ####################### # PUT YOUR CODE HERE # learning_rate = FLAGS.learning_rate max_steps = FLAGS.max_steps batch_size = FLAGS.batch_size eval_freq = FLAGS.eval_freq cifar10 = cifar10_utils.get_cifar10('cifar10/cifar-10-batches-py') mlp = MLP(32 * 32 * 3, dnn_hidden_units, 10).cuda() # opt = optim.SGD(mlp.parameters(), lr = learning_rate) opt = optim.Adam(mlp.parameters(), lr=learning_rate) loss_function = nn.CrossEntropyLoss() train_losses = [] accuracies = [] steps = [] for step in range(max_steps): total_loss = 0 x, y = cifar10['train'].next_batch(batch_size) x_tensor = torch.from_numpy(np.reshape( x, [batch_size, 32 * 32 * 3])).cuda() y_tensor = torch.from_numpy(y).cuda() out = mlp(x_tensor) loss = loss_function(out, torch.max(y_tensor, 1)[1]) total_loss += loss opt.zero_grad() loss.backward() opt.step() train_losses.append(total_loss) print('Step: {} Loss: {:.4f}'.format(step + 1, total_loss)) if (step + 1) % eval_freq == 0: test_x = cifar10['test'].images test_y = cifar10['test'].labels test_x_tensor = torch.from_numpy( np.reshape(test_x, [test_x.shape[0], 32 * 32 * 3])).cuda() test_y_tensor = torch.from_numpy(test_y).cuda() test_out = mlp(test_x_tensor) test_accuracy = accuracy(test_out, test_y_tensor) accuracies.append(test_accuracy) steps.append(step + 1) print('Step: {} Accuracy {:.2f}'.format(step + 1, test_accuracy)) plt.plot(range(max_steps), train_losses) plt.xlabel("Step") plt.ylabel("Training loss") plt.show() plt.plot(steps, accuracies) plt.show()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # Get negative slope parameter for LeakyReLU neg_slope = FLAGS.neg_slope ######################## # PUT YOUR CODE HERE # ####################### def init_weights(m): print(m) if type(m) == nn.Linear: m.weight.data.uniform_(0.0, 1.0) print(m.weight) m.bias.data.fill_(0.0) print(m.bias) lr = FLAGS.learning_rate eval_freq = FLAGS.eval_freq max_steps = FLAGS.max_steps batch_size = FLAGS.batch_size input_size = 32 * 32 * 3 output_size = 10 # load dataset raw_data = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT) train_data = raw_data['train'] validation_data = raw_data["validation"] test_data = raw_data['test'] model = MLP(n_inputs=input_size, n_hidden=dnn_hidden_units, n_classes=output_size, neg_slope=neg_slope) print(model.layers) optimizer = torch.optim.Adam(model.parameters(), lr=lr) loss_target = nn.CrossEntropyLoss() csv_data = [[ 'step', 'train_loss', 'test_loss', 'train_accuracy', 'test_accuracy' ]] print("initial weights as normal distribution and bias as zeros") # model.layers.apply(init_weights) for step in range(max_steps): x, y = train_data.next_batch(batch_size) x = x.reshape(batch_size, input_size) x = torch.tensor(x, dtype=torch.float32) y = torch.tensor(y, dtype=torch.long) # train # x = Variable(torch.from_numpy(x)) output = model.forward(x) loss = loss_target.forward(output, y.argmax(dim=1)) # somehow we need to divide the loss by the output size to get the same loss loss_avg = loss.item() # model.zero_grad() optimizer.zero_grad() loss.backward() # only need to update weights for linear module for each step optimizer.step() # with torch.no_grad(): # for param in model.parameters(): # param.data -= lr * param.grad train_acc = accuracy(output, y) # with the \r and end = '' trick, we can print on the same line print('\r[{}/{}] train_loss: {} train_accuracy: {}'.format( step + 1, max_steps, round(loss_avg, 3), round(train_acc, 3)), end='') # evaluate if step % eval_freq == 0 or step >= (max_steps - 1): x, y = test_data.next_batch(test_data.num_examples) x = x.reshape(test_data.num_examples, input_size) x = torch.tensor(x, dtype=torch.float32) y = torch.tensor(y, dtype=torch.long) output = model.forward(x) test_loss = loss_target.forward(output, y.argmax(dim=1)).item() test_acc = accuracy(output, y) csv_data.append([step, loss_avg, test_loss, train_acc, test_acc]) print(' test_loss: {}, test_accuracy: {}'.format( round(test_loss, 3), round(test_acc, 3))) with open('results/train_summary_torch_{}.csv'.format(int(time.time())), 'w') as csv_file: writer = csv.writer(csv_file) writer.writerows(csv_data)
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] # Get negative slope parameter for LeakyReLU neg_slope = FLAGS.neg_slope ######################## # PUT YOUR CODE HERE # ####################### cifar10 = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT) x, y = cifar10['train'].next_batch(1) x_test, y_test = cifar10['test'].next_batch(10000) x = x.reshape(x.shape[0], -1) x_test = x_test.reshape(x_test.shape[0], -1) x_test = torch.tensor(x_test) y_test = torch.tensor(y_test) model = MLP(x.shape[1], dnn_hidden_units, y.shape[1], neg_slope) prediction = model.forward(torch.tensor(x[0])) crossEntropy = nn.CrossEntropyLoss() target = torch.tensor(y[0]) optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.learning_rate, amsgrad=True) """ batch gradient descent """ for i in range(FLAGS.max_steps): x, y = cifar10['train'].next_batch(FLAGS.batch_size) x = x.reshape(x.shape[0], -1) x = torch.tensor(x) y = torch.LongTensor(y) prediction = model.forward(x) loss = crossEntropy.forward(prediction, torch.max(y, 1)[1]) optimizer.zero_grad() loss.backward() optimizer.step() if i%FLAGS.eval_freq == 0: prediction = model.forward(x_test) prediction = nn.functional.softmax(prediction) print('Accuracy after '+ str(i) +' steps ' + str(accuracy(prediction, y_test))) prediction = model.forward(x_test) print('Final accuracy') print(accuracy(prediction, y_test))
def train(running_loss=0.0): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] # Set path to data data_dir = FLAGS.data_dir data = cifar10_utils.get_cifar10(data_dir) # # =============================== Approach 1 =========================================== # # Prepare the test set # input_dims_test = data['test'].images.shape # height = input_dims_test[1] # width = input_dims_test[2] # channels = input_dims_test[3] # num_images_test = input_dims_test[0] # image_dims_ravel = height * width * channels # # X_test = data["test"].images # Y_test = data["test"].labels # # Make acceptable input for test # X_test = X_test.reshape((num_images_test, image_dims_ravel)) # # # make usable by pytorch # X_test = torch.tensor(X_test, requires_grad=False).type(dtype).to(device) # Y_test = torch.tensor(Y_test, requires_grad=False).type(dtype).to(device) # # # Create model (i.e. Net) # model = MLP(n_inputs=image_dims_ravel, n_hidden=dnn_hidden_units, n_classes=10) # # accuracy_train_log = list() # accuracy_test_log = list() # loss_train_log = list() # loss_test_log = list() # # # FLAGS hold command line arguments # batch_size = FLAGS.batch_size # numb_iterations = FLAGS.max_steps # learning_rate = FLAGS.learning_rate # evaluation_freq = FLAGS.eval_freq # logging.info(f"learning rate: %2d " % learning_rate) # # # Before backprop calc loss and its derivative # criterion = nn.CrossEntropyLoss() # new = model.model_params_tensors[0] + model.model_params_tensors[1] # # optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9) # optimizer = optim.SGD(model.param_list, lr=learning_rate, momentum=0.9) # # for step in range(numb_iterations): # # X_batch, Y_batch = data['train'].next_batch(batch_size) # # X_batch = X_batch.reshape((batch_size, image_dims_ravel)) # # # Convert to tensors which are handled by the device # X_batch = torch.from_numpy(X_batch).type(dtype).to(device) # Y_batch = torch.from_numpy(Y_batch).type(dtype).to(device) # # # why do we need this again? # optimizer.zero_grad() # # targs = Y_batch.argmax(dim=1) # # forward + backward + optimize # outputs = model(X_batch) # loss_current = criterion(outputs, targs) # loss_current.backward() # optimizer.step() # # running_loss = loss_current.item() # # if step % evaluation_freq == 0: # loss_train_log.append(running_loss) # accuracy_train_log.append(accuracy(outputs, Y_batch)) # logging.info(f"train performance: loss = %4f, accuracy = %4f ", loss_train_log[-1], accuracy_train_log[-1]) # # # Get performance on the test set # # targs_test = Y_test.argmax(dim=1) # # outputs = model(X_test) # # loss_test_log.append(criterion(outputs, targs_test)) # # accuracy_test_log.append(accuracy(outputs, Y_test)) # # logging.info(f"test performance: loss = %4f , accuracy = %4f", loss_test_log[-1], accuracy_test_log[-1]) # # # TODO: implement early stopping ? # # path = "./mlp_results_pytorch/" # date_time = datetime.now().replace(second=0, microsecond=0).strftime(format="%Y-%m-%d-%H-%M") # np.save(os.path.join(path, date_time + "accuracy_test"), accuracy_test_log) # np.save(os.path.join(path, date_time + "loss_test"), loss_test_log) # np.save(os.path.join(path, date_time + "loss_train"), loss_train_log) # np.save(os.path.join(path, date_time + "accuracy_train"), accuracy_train_log) # =============================== Approach 1.2, sequantial =========================================== input_dims_test = data['test'].images.shape height = input_dims_test[1] width = input_dims_test[2] channels = input_dims_test[3] num_images_test = input_dims_test[0] image_dims_ravel = height * width * channels X_test = data["test"].images Y_test = data["test"].labels # Make acceptable input for test X_test = X_test.reshape((num_images_test, image_dims_ravel)) X_test = torch.tensor(X_test, requires_grad=False).type(dtype).to(device) Y_test = torch.tensor(Y_test, requires_grad=False).type(dtype).to(device) model = MLP(n_inputs=image_dims_ravel, n_hidden=dnn_hidden_units, n_classes=10) # if cuda_flag: model.cuda() accuracy_train_log = list() accuracy_test_log = list() loss_train_log = list() loss_test_log = list() batch_size = FLAGS.batch_size numb_iterations = FLAGS.max_steps learning_rate = FLAGS.learning_rate evaluation_freq = FLAGS.eval_freq logging.info(f"learning rate: %2d " % learning_rate) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate) for step in range(numb_iterations): X_batch, Y_batch = data['train'].next_batch(batch_size) X_batch = X_batch.reshape((batch_size, image_dims_ravel)) # Convert to tensors which are handled by the device X_batch = torch.from_numpy(X_batch).type(dtype).to(device) Y_batch = torch.from_numpy(Y_batch).type(dtype).to(device) # why do we need this again? optimizer.zero_grad() targs = Y_batch.argmax(dim=1) outputs = model.forward(X_batch) loss_current = criterion(outputs, targs) loss_current.backward() optimizer.step() X_train = data['train'].images.reshape((data['train'].images.shape[0], image_dims_ravel)) Y_train = data['train'].labels X_train = torch.tensor(X_train, requires_grad=False).type(dtype).to(device) Y_train = torch.tensor(Y_train, requires_grad=False).type(dtype).to(device) targs_train = Y_train.argmax(dim=1) running_loss = loss_current.detach().item() if step % evaluation_freq == 0: list_acc = list() list_loss = list() for i in range(0, 70): selection = random.sample(range(1, 5000), 64) targs_train = Y_train[selection].argmax(dim=1) outputs_train = model(X_train[selection]) loss_current_train = criterion(outputs_train, targs_train).detach().item() acc_current_train = accuracy(outputs_train, Y_train[selection]) list_loss.append(loss_current_train) list_acc.append(acc_current_train) loss_train_log.append(np.mean(list_loss)) accuracy_train_log.append(np.mean(list_acc)) logging.info(f"train performance: loss = %4f, accuracy = %4f ", loss_train_log[-1], accuracy_train_log[-1]) list_acc = list() list_loss = list() for i in range(0, 15): selection = random.sample(range(1, 1000), 64) targs_test = Y_test[selection].argmax(dim=1) outputs_test = model(X_test[selection]) loss_current_test = criterion(outputs_test, targs_test).detach().item() acc_current_test = accuracy(outputs_test, Y_test[selection]) list_loss.append(loss_current_test) list_acc.append(acc_current_test) loss_test_log.append(np.mean(list_loss)) accuracy_test_log.append(np.mean(list_acc)) logging.info(f"test performance: loss = %4f , accuracy = %4f\n", loss_test_log[-1], accuracy_test_log[-1]) ## NO BATCHES # # evaluate on the whole train set, not only on the bathes # output = model.forward(X_train) # # targs = data['train'].labels.argmax(dim=1) # loss_model_current = criterion(output, targs_train) # loss_train_log.append(loss_model_current.detach().item()) # accuracy_train_log.append(accuracy(output, Y_train)) # logging.info(f"train performance: loss = %4f, accuracy = %4f ", loss_train_log[-1], accuracy_train_log[-1]) # # # Get performance on the test set # targs_test = Y_test.argmax(dim=1) # outputs = model(X_test) # loss_test_log.append(criterion(outputs, targs_test)) # accuracy_test_log.append(accuracy(outputs, Y_test)) # logging.info(f"test performance: loss = %4f , accuracy = %4f\n", loss_test_log[-1], accuracy_test_log[-1]) path = "./mlp_results_pytorch/" date_time = datetime.now().replace(second=0, microsecond=0).strftime(format="%Y-%m-%d-%H-%M") np.save(os.path.join(path, date_time + "accuracy_test"), accuracy_test_log) np.save(os.path.join(path, date_time + "loss_test"), loss_test_log) np.save(os.path.join(path, date_time + "loss_train"), loss_train_log) np.save(os.path.join(path, date_time + "accuracy_train"), accuracy_train_log)