def train(): """ Performs training and evaluation of MLP model. Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) if torch.cuda.is_available(): torch.cuda.manual_seed(42) torch.cuda.manual_seed_all(42) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") # print("Device", device) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] # DNN_HIDDEN_UNITS_DEFAULT = '100' # LEARNING_RATE_DEFAULT = 1e-3 # MAX_STEPS_DEFAULT = 1400 # BATCH_SIZE_DEFAULT = 200 # EVAL_FREQ_DEFAULT = 100 data = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir) train = data['train'] print(train.images.shape) test = data['test'] n_inputs = train.images[0].flatten().shape[0] n_classes = train.labels[0].shape[0] mlp = MLP(n_inputs, dnn_hidden_units, n_classes) loss_mod = nn.CrossEntropyLoss() if FLAGS.optimizer == 'SGD': optimizer = torch.optim.SGD(mlp.parameters(), lr=FLAGS.learning_rate) elif FLAGS.optimizer == 'AdamW': optimizer = torch.optim.AdamW(mlp.parameters(), lr=FLAGS.learning_rate) mlp.to(device) loss_history = [] acc_history = [] for step in range(FLAGS.max_steps): #FLAGS.max_steps mlp.train() x, y = train.next_batch(FLAGS.batch_size) x = torch.from_numpy(x.reshape(x.shape[0], n_inputs)).to(device) y = torch.from_numpy(np.argmax(y, axis=1)).to(device) # converts onehot to dense out = mlp(x) loss = loss_mod(out, y) loss_history.append(loss) optimizer.zero_grad() loss.backward() optimizer.step() if step == 0 or (step + 1) % FLAGS.eval_freq == 0: mlp.eval() with torch.no_grad(): x, y = test.images, test.labels x = torch.from_numpy(x.reshape(x.shape[0], n_inputs)).to(device) y = torch.from_numpy(y).to(device) test_out = mlp.forward(x) acc = accuracy(test_out, y) print('Accuracy:', acc) acc_history.append(acc) print('Final loss:', loss_history[-1]) print('Final acc:', acc_history[-1]) plt.plot(loss_history) plt.step(range(0, FLAGS.max_steps + 1, FLAGS.eval_freq), acc_history) # range(0, FLAGS.max_steps, FLAGS.eval_freq) plt.legend(['loss', 'accuracy']) plt.show()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### net = MLP(3072, dnn_hidden_units, 10) net.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr = FLAGS.learning_rate) #Load cifar10 cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) print() print() print("----------------------------------------------") print("\t \t Training") print("----------------------------------------------\n") pl_loss =[] average_loss =[] moving_average=0.0 acc =[] count = 1 acc =[] check =0 for iter_ in np.arange(0, FLAGS.max_steps): #Load batches x , y = cifar10['train'].next_batch(FLAGS.batch_size) labels = np.argmax(y, axis=1) #reshape x into vectors x = np.reshape(x, (200, 3072)) inputs, labels = torch.from_numpy(x), torch.LongTensor(torch.from_numpy(labels)) inputs, labels = inputs.to(device), labels.to(device) # # labels = torch.LongTensor(labels) # # zero the parameter gradients optimizer.zero_grad() # # forward + backward + optimize outputs = net(inputs) print("output: {}, labels:{}".format(outputs.size(),labels.size())) loss = criterion(outputs, labels) loss.backward() optimizer.step() # # print statistics running_loss = loss.item() pl_loss.append(running_loss) moving_average+=running_loss average_loss.append(np.mean(np.mean(pl_loss[:-100:-1]))) print("iter: {} | training loss: {} ".format(iter_,"%.3f"%running_loss)) if (iter_+1)%FLAGS.eval_freq==0: net.eval() acc.append(evaluate(net, cifar10, FLAGS.batch_size)) ####################### # END OF YOUR CODE # ####################### plt.plot(pl_loss,'r-', label="Batch loss", alpha=0.5) plt.plot(average_loss,'g-', label="Average loss", alpha=0.5) plt.legend() plt.xlabel("Iterations") plt.ylabel("Loss") plt.title("Training Loss") plt.grid(True) plt.show() plt.close() plt.plot(acc,'g-', alpha=0.5) plt.xlabel("Iterations") plt.ylabel("Accuracy") plt.title("Test Accuracy") plt.grid(True) plt.show() plt.close() print() print("TRAINING COMPLETED")
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### ############################## VARIABLES ############################## SAVE_PLOTS = False SAVE_LOGS = False img_size = 32 n_classes = 10 input_size = img_size * img_size * 3 batch_size = FLAGS.batch_size eval_freq = FLAGS.eval_freq n_iterations = FLAGS.max_steps lr_rate = FLAGS.learning_rate device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Device:", device) ############################## METHODS ############################## # fp = open('memory_profiler_basic_mean.log', 'w+') # @profile(stream=fp) def test(): net.eval() output_t = net(x_t) loss_t = criterion(output_t, y_t).detach() acc_t = accuracy(output_t.detach(), y_t_onehot) return acc_t, loss_t def plot(iteration): idx_test = list(range(0, iteration + 1, eval_freq)) idx = list(range(0, iteration + 1)) plt.clf() plt.cla() plt.subplot(1, 2, 1) plt.plot(idx_test, test_accuracies, "k-", linewidth=1, label="test") plt.plot(idx, accuracies, "r-", linewidth=0.5, alpha=0.5, label="train") plt.xlabel('iteration') plt.ylabel('accuracy') plt.legend() plt.subplot(1, 2, 2) plt.plot(idx_test, test_losses, "k-", linewidth=1, label="test") plt.plot(idx, losses, "r-", linewidth=0.5, alpha=0.5, label="train") plt.xlabel('iteration') plt.ylabel('loss') plt.legend() plt.savefig("./out/plot/plot_pytorch_" + str(batch_size) + "_" + str(lr_rate) + ".png", bbox_inches='tight') return def to_label(tensor): _, tensor = tensor.max(1) return tensor ############################## MAIN ############################## cifar10 = cifar10_utils.get_cifar10('cifar10/cifar-10-batches-py') net = MLP(input_size, dnn_hidden_units, n_classes) net.to(device) criterion = nn.CrossEntropyLoss() # optimizer = optim.SGD(net.parameters(), lr=lr_rate, momentum=0.8, nesterov=False) optimizer = optim.Adam(net.parameters(), lr=lr_rate) losses = [] accuracies = [] test_accuracies = [] test_losses = [] alpha = 0.0001 x_t = cifar10['test'].images y_t = cifar10['test'].labels x_t = torch.from_numpy(x_t.reshape(-1, input_size)) y_t_onehot = torch.from_numpy(y_t).type(torch.LongTensor) y_t = to_label(y_t_onehot) x_t, y_t = x_t.to(device), y_t.to(device) y_t_onehot = y_t_onehot.to(device) plt.figure(figsize=(10, 4)) for i in range(n_iterations): x, y = cifar10['train'].next_batch(batch_size) x = torch.from_numpy(x.reshape(-1, input_size)) y_onehot = torch.from_numpy(y).type(torch.LongTensor) y = to_label(y_onehot) x, y = x.to(device), y.to(device) y_onehot = y_onehot.to(device) optimizer.zero_grad() output = net(x) train_loss = criterion(output, y) reg_loss = 0 for param in net.parameters(): reg_loss += param.norm(2) loss = train_loss + alpha * reg_loss loss.backward() optimizer.step() losses.append(loss.item()) accuracies.append(accuracy(output.detach().data, y_onehot.detach())) del x, y if i % eval_freq == 0: acc_t, loss_t = test() test_accuracies.append(acc_t) test_losses.append(loss_t) log_string = "[{:5d}/{:5d}] Test Accuracy: {:.4f} | Batch Accuracy: {:.4f} | Batch Loss: {:.6f} | Train/Reg: {:.6f}/{:.6f}\n".format( i, n_iterations, test_accuracies[-1], accuracies[-1], loss, train_loss, reg_loss * alpha ) print(log_string) if SAVE_LOGS: with open("./out/log/pytorch_log_" + str(batch_size) + "_" + str(lr_rate) + ".txt", "a") as myfile: myfile.write(log_string) if SAVE_PLOTS: plot(i) net.train()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ # DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### def reshape_cifar10_mlp(x): batch_size = x.shape[0] x = x.transpose([2, 3, 1, 0]) x = x.reshape([-1, batch_size]) x = x.transpose() return x device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size) x_train = reshape_cifar10_mlp(x_train) x_train = torch.from_numpy(x_train).to(device) y_train = torch.from_numpy(y_train).to(device) crossent_softmax = nn.CrossEntropyLoss() mlp = MLP(x_train.shape[1], dnn_hidden_units, y_train.shape[1], bn_flag=True) # optimizer = torch.optim.SGD(mlp.parameters(), lr=FLAGS.learning_rate) optimizer = torch.optim.Adam(mlp.parameters(), weight_decay=1e-3) mlp.to(device) train_accs = [] train_losses = [] eval_accs = [] eval_losses = [] for i in np.arange(FLAGS.max_steps): print('\nStep: {}\n'.format(i)) print('Training: ') optimizer.zero_grad() logits = mlp(x_train) train_loss = crossent_softmax(logits, y_train.argmax(dim=-1)) train_acc = accuracy(logits, y_train) print('loss: {:.4f}, acc: {:.4f}\n'.format(train_loss, train_acc)) train_loss.backward() optimizer.step() x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size) x_train = reshape_cifar10_mlp(x_train) x_train = torch.from_numpy(x_train).to(device) y_train = torch.from_numpy(y_train).to(device) if i % FLAGS.eval_freq == 0: with torch.no_grad(): print('Evaluation: ') x_eval, y_eval = cifar10['test'].images, cifar10['test'].labels x_eval = reshape_cifar10_mlp(x_eval) x_eval = torch.from_numpy(x_eval).to(device) y_eval = torch.from_numpy(y_eval).to(device) logits = mlp(x_eval) eval_loss = crossent_softmax(logits, y_eval.argmax(dim=-1)) eval_acc = accuracy(logits, y_eval) train_losses.append(train_loss) train_accs.append(train_acc) eval_losses.append(eval_loss) eval_accs.append(eval_acc) print('loss: {:.4f}, acc: {:.4f}'.format(eval_loss, eval_acc)) print('Evaluation: ') x_eval, y_eval = cifar10['test'].images, cifar10['test'].labels x_eval = reshape_cifar10_mlp(x_eval) x_eval = torch.from_numpy(x_eval).to(device) y_eval = torch.from_numpy(y_eval).to(device) logits = mlp(x_eval) eval_loss = crossent_softmax(logits, y_eval.argmax(dim=-1)) eval_acc = accuracy(logits, y_eval) train_losses.append(train_loss) train_accs.append(train_acc) eval_losses.append(eval_loss) eval_accs.append(eval_acc) print('loss: {:.4f}, acc: {:.4f}'.format(eval_loss, eval_acc)) print('Finished training.') plt.figure(figsize=(10, 5)) plt.plot(np.arange(len(train_losses)), train_losses, label='training loss') plt.plot(np.arange(len(eval_losses)), eval_losses, label='evaluation loss') plt.ylim(0, 3) plt.legend() plt.xlabel('Iterations [x{}]'.format(FLAGS.eval_freq)) plt.savefig('results/mlp_loss_torch_adam_layers_maxstep_reg_batch.png', bbox_inches='tight') plt.figure(figsize=(10, 5)) plt.plot(np.arange(len(train_accs)), train_accs, label='training accuracy') plt.plot(np.arange(len(eval_accs)), eval_accs, label='evaluation accuracy') plt.legend() plt.xlabel('Iterations [x{}]'.format(FLAGS.eval_freq)) plt.savefig('results/mlp_acc_torch_adam_layers_maxstep_reg_batch.png', bbox_inches='tight')
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] # Get negative slope parameter for LeakyReLU neg_slope = FLAGS.neg_slope device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # print("[DEBUG], Device ", device) ######################## # PUT YOUR CODE HERE # ####################### cifar10 = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir) train_data = cifar10['train'] # 60000 x 3 x 32 x32 -> 60000 x 3072, input vector 3072 n_inputs = train_data.images.reshape(train_data.images.shape[0], -1).shape[1] n_hidden = dnn_hidden_units n_classes = train_data.labels.shape[1] # print(f"[DEBUG] n_inputs {n_inputs}, n_classes {n_classes}") model = MLP(n_inputs, n_hidden, n_classes, FLAGS.neg_slope) model.to(device) params = model.parameters() if FLAGS.optimizer == 'Adam': optimizer = torch.optim.Adam(params, lr=FLAGS.learning_rate) elif FLAGS.optimizer == 'Adamax': optimizer = torch.optim.Adamax(params, lr=FLAGS.learning_rate) elif FLAGS.optimizer == 'Adagrad': optimizer = torch.optim.Adagrad(params, lr=FLAGS.learning_rate) elif FLAGS.optimizer == 'Adadelta': optimizer = torch.optim.Adadelta(params, lr=FLAGS.learning_rate) elif FLAGS.optimizer == 'SparseAdam': optimizer = torch.optim.SparseAdam(params, lr=FLAGS.learning_rate) else: optimizer = torch.optim.SGD(params,lr=FLAGS.learning_rate) criterion = torch.nn.CrossEntropyLoss() train_acc_plot = [] test_acc_plot = [] loss_train = [] loss_test = [] rloss = 0 best_accuracy = 0 # print('[DEBUG] start training') for i in range(0, FLAGS.max_steps): x, y = cifar10['train'].next_batch(FLAGS.batch_size) x, y = torch.from_numpy(x).float().to(device) , torch.from_numpy(y).float().to(device) x = x.reshape(x.shape[0], -1) out = model.forward(x) loss = criterion.forward(out, y.argmax(1)) optimizer.zero_grad() loss.backward() optimizer.step() rloss += loss.item() if i % FLAGS.eval_freq == 0: train_accuracy = accuracy(out, y) with torch.no_grad(): test_accuracys, test_losses = [] ,[] for j in range(0, FLAGS.max_steps): test_x, test_y = cifar10['test'].next_batch(FLAGS.batch_size) test_x, test_y = torch.from_numpy(test_x).float().to(device) , torch.from_numpy(test_y).float().to(device) test_x = test_x.reshape(test_x.shape[0], -1) test_out = model.forward(test_x) test_loss = criterion(test_out, test_y.argmax(1)) test_accuracy = accuracy(test_out, test_y) if device == 'cpu': test_losses.append(test_loss) else: test_losses.append(test_loss.cpu().data.numpy()) test_accuracys.append(test_accuracy) t_acc = np.array(test_accuracys).mean() t_loss = np.array(test_losses).mean() train_acc_plot.append(train_accuracy) test_acc_plot.append(t_acc) loss_train.append(rloss/(i + 1)) loss_test.append(t_loss) print(f"iter {i}, train_loss_avg {rloss/(i + 1)}, test_loss_avg {t_loss}, train_acc {train_accuracy}, test_acc_avg {t_acc}") if t_acc > best_accuracy: best_accuracy = t_acc print(f"Best Accuracy {best_accuracy}",flush=True) if FLAGS.plot: print('Start plotting...') fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) ax1.plot(np.arange(len(train_acc_plot)), train_acc_plot, label='training') ax1.plot(np.arange(len(test_acc_plot)), test_acc_plot, label='testing') ax1.set_title('Training evaluation batch size '+str(FLAGS.batch_size)+' learning rate '+str(FLAGS.learning_rate)+ '\n best accuracy '+str(best_accuracy) ) ax1.set_ylabel('Accuracy') ax1.legend() ax2.plot(np.arange(len(loss_train)), loss_train, label='Train Loss') ax2.plot(np.arange(len(loss_test)), loss_test, label='Test Loss') ax2.set_title('Loss evaluation') ax2.set_ylabel('Loss') ax2.legend() plt.xlabel('Iteration') plt.savefig('pytorch.png')
def train(): """ Performs training and evaluation of MLP model. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### # load the test daa cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir, one_hot=False) test_images, test_labels = torch.from_numpy(cifar10['test'].images).to(device), \ torch.from_numpy(cifar10['test'].labels).to(device) # flatten the images for the MLP test_vectors = reshape_images(test_images) # set up the model mlp_model = MLP(3072, dnn_hidden_units, 10) mlp_model.to(device) loss_module = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(mlp_model.parameters(), lr=FLAGS.learning_rate) accuracies = [] losses = [] mlp_model.train() for i in range(FLAGS.max_steps): # load data images, labels = cifar10['train'].next_batch(FLAGS.batch_size) image_vectors = reshape_images(images) image_vectors, labels = torch.from_numpy( image_vectors), torch.from_numpy(labels) image_vectors, labels = image_vectors.to(device), labels.to(device) labels.to(device) # forward pass model_pred = mlp_model(image_vectors) # calculate the loss loss = loss_module(model_pred, labels) # backward pass optimizer.zero_grad() loss.backward() # update the parameters optimizer.step() # evaluate the model on the data set every eval_freq steps mlp_model.eval() if i % FLAGS.eval_freq == 0: with torch.no_grad(): test_pred = mlp_model(test_vectors) test_accuracy = accuracy(test_pred, test_labels) accuracies.append(test_accuracy) losses.append(loss) mlp_model.train() plot_curve(accuracies, 'Accuracy') plot_curve(losses, 'Loss')
def train(): """ Performs training and evaluation of MLP model. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('device', device) # flags batch_size = FLAGS.batch_size optim = FLAGS.optimizer lr = FLAGS.learning_rate # cifar cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) x_test_np, y_test_np = cifar10['test'].images, cifar10['test'].labels (test_images, height, width, colors) = x_test_np.shape n_inputs = height * width * colors (_, n_classes) = y_test_np.shape # torch crap x_test_flat = x_test_np.reshape((test_images, n_inputs)) x_test_torch = torch.from_numpy(x_test_flat).to(device) y_test_torch = torch.from_numpy(y_test_np).long().to(device) idx_test = torch.argmax(y_test_torch, dim=-1).long() # model ce = torch.nn.CrossEntropyLoss() model = MLP(n_inputs, dnn_hidden_units, n_classes) model.to(device) pars = model.parameters() # optimizer optim_pars = {'params': pars, 'lr': lr, 'weight_decay': FLAGS.weight_decay} if optim == 'adadelta': optimizer = torch.optim.Adadelta(**optim_pars) elif optim == 'adagrad': optimizer = torch.optim.Adagrad(**optim_pars) elif optim == 'rmsprop': optimizer = torch.optim.RMSprop(**optim_pars) elif optim == 'adam': optimizer = torch.optim.Adam(**optim_pars) else: # default is SGD, same as the numpy version optimizer = torch.optim.SGD(**optim_pars) cols = ['train_acc', 'test_acc', 'train_loss', 'test_loss', 'secs'] # train results = [] name = f'mlp-pytorch-{optim}' with SummaryWriter(name) as w: for step in tqdm(range(FLAGS.max_steps)): # print(step) optimizer.zero_grad() # batch x_train_np, y_train_np = cifar10['train'].next_batch(batch_size) x_train_flat = x_train_np.reshape((batch_size, n_inputs)) x_train_torch = torch.from_numpy(x_train_flat).to(device) y_train_torch = torch.from_numpy(y_train_np).long().to(device) idx_train = torch.argmax(y_train_torch, dim=-1).long() # results train_predictions = model.forward(x_train_torch) train_loss = ce(train_predictions, idx_train) train_acc = accuracy(train_predictions, idx_train) # evaluate if step % FLAGS.eval_freq == 0: time = int(step / FLAGS.eval_freq) start = timer() test_predictions = model.forward(x_test_torch) end = timer() secs = end - start test_loss = ce(test_predictions, idx_test) test_acc = accuracy(test_predictions, idx_test) vals = [train_acc, test_acc, train_loss, test_loss, secs] stats = dict( zip(cols, [ np.asscalar(i.detach().cpu().numpy().take(0)) if isinstance(i, torch.Tensor) else np.asscalar(i) if isinstance(i, (np.ndarray, np.generic)) else i for i in vals ])) # print(yaml.dump({k: round(i, 3) if isinstance(i, float) else i for k, i in stats.items()})) print(test_acc.item()) w.add_scalars('metrics', stats, time) results.append(stats) # stop if loss has converged! check = 10 if len(results) >= 2 * check: threshold = 1e-6 losses = [item['train_loss'] for item in results] current = np.mean(losses[-check:]) prev = np.mean(losses[-2 * check:-check]) if (prev - current) < threshold: break train_loss.backward() optimizer.step() # w.add_scalars('metrics', stats) df = pd.DataFrame(results, columns=cols) meta = { 'framework': 'pytorch', 'algo': 'mlp', 'optimizer': optim, 'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.learning_rate, 'dnn_hidden_units': FLAGS.dnn_hidden_units, 'weight_decay': FLAGS.weight_decay, 'max_steps': FLAGS.max_steps, } for k, v in meta.items(): df[k] = v csv_file = os.path.join( os.getcwd(), 'results', f'{name}-batch={FLAGS.batch_size}-lr={FLAGS.learning_rate}-hidden={FLAGS.dnn_hidden_units}-regularization={FLAGS.weight_decay}-steps={FLAGS.max_steps}.csv' ) df.to_csv(csv_file) csv_file = os.path.join(os.getcwd(), 'results', 'results.csv') if os.path.isfile(csv_file): df.to_csv(csv_file, header=False, mode='a') else: df.to_csv(csv_file, header=True, mode='w') torch_file = os.path.join(os.getcwd(), 'results', f'{name}.pth') torch.save(model.state_dict(), torch_file) print('done!') return test_loss
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility torch.manual_seed(42) np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units] else: dnn_hidden_units = [] device = torch.device("cuda") mlp = MLP(IMAGE_SIZE, dnn_hidden_units, 10) mlp.to(device) cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) # get all train data x_train, y_train = cifar10["train"].images, cifar10["train"].labels x_train = x_train.reshape((x_train.shape[0], IMAGE_SIZE)) x_train = torch.torch.from_numpy(x_train).to(device) y_train = y_train.argmax(axis=1) y_train = torch.from_numpy(y_train).to(device).type(torch.long) # get test data x_test, y_test = cifar10["test"].images, cifar10["test"].labels x_test = x_test.reshape((x_test.shape[0], IMAGE_SIZE)) inputs_test = torch.torch.from_numpy(x_test).to(device) y_test = y_test.argmax(axis=1) targets_test = torch.from_numpy(y_test).to(device).type(torch.long) # define a loss function and an optimizer, as mentioned in the official framework's tutorial # https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py loss_function = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(mlp.parameters(), lr=5e-2, weight_decay=2e-3, momentum=0.8) losses_test = [] losses_train = [] accuracies_test = [] accuracies_train = [] for i in range(FLAGS.max_steps): # getting batch for forwarding x, y = cifar10['train'].next_batch(FLAGS.batch_size) x = x.reshape((FLAGS.batch_size, IMAGE_SIZE)) class_indexes_targets = y.argmax(axis=1) # pytorch does not support one hot encoded vectors as targests # making a tensor which pytorch can work with inputs = torch.from_numpy(x).to(device) targets = torch.from_numpy(class_indexes_targets).to(device).type(torch.long) # making gradients zero, framework tutorial states if we don't do so the gradients at each step will just add optimizer.zero_grad() predictions = mlp.forward(inputs) # forwarding batch into the network loss = loss_function.forward(predictions, targets) # calculating the Cross Entropy loss loss.backward() # backwards the loss into the net, updating gradients optimizer.step() # updating the weights if i % FLAGS.eval_freq == 0 or i == FLAGS.max_steps - 1: # evaluate on Test forward_test = mlp.forward(inputs_test) acc = accuracy(forward_test, targets_test) accuracies_test.append(acc.item()) loss_test = loss_function.forward(forward_test, targets_test) losses_test.append(loss_test.item()) print("TEST loss:" + str(round(losses_test[-1], 2)) + " acc:" + str( round(accuracies_test[-1], 2)) + " model:" + str(i)) # evaluate on Train forward_train = mlp.forward(x_train) acc_train = accuracy(forward_train, y_train) accuracies_train.append(acc_train.item()) loss_train = loss_function.forward(forward_train, y_train) losses_train.append(loss_train.item()) print("TRAIN loss:" + str(round(losses_train[-1], 2)) + " acc:" + str( round(accuracies_train[-1], 2)) + " model:" + str(i)) with open('../results/torch_mlp.pkl', 'wb') as f: mlp_data = dict() mlp_data["train_loss"] = losses_train mlp_data["test_loss"] = losses_test mlp_data["train_acc"] = accuracies_train mlp_data["test_acc"] = accuracies_test pk.dump(mlp_data, f) x = [i * FLAGS.eval_freq for i in range(len(accuracies_test))] plt.title("Torch MLP loss accuracy") plt.plot(x, accuracies_test, label="accuracy") plt.plot(x, losses_test, label="loss") plt.legend() plt.savefig("../results/pytorch_mlp.png") plt.show()
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### cifar10 = cifar10_utils.get_cifar10() if torch.cuda.is_available(): # print(torch.device('cpu'), torch.device("cuda")) device = torch.device("cuda") else: device = torch.device("cpu") network = MLP(3072, dnn_hidden_units, 10) network.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(network.parameters(), lr=FLAGS.learning_rate) #, weight_decay=1/(200*9)) # optimizer = optim.RMSprop(network.parameters(), lr=FLAGS.learning_rate) # optimizer = optim.SGD(network.parameters(), lr=FLAGS.learning_rate) # print(FLAGS.batch_size) # print(FLAGS.eval_freq) # print(FLAGS.learning_rate) # print(FLAGS.max_steps) plotting_accuracy = [] plotting_loss = [] plotting_accuracy_test = [] plotting_loss_test = [] for i in range(1, FLAGS.max_steps - 1): x, y = cifar10['train'].next_batch(FLAGS.batch_size) x = torch.from_numpy(x) y = torch.from_numpy(y) x = x.to(device) y = y.to(device) x = x.view(FLAGS.batch_size, -1) out = network.forward(x) loss = criterion(out, y.argmax(dim=1)) # print("Batch: {} Loss {}".format(i, loss)) # acc = accuracy(out, y) # print("Accuracy: {}".format(acc)) optimizer.zero_grad() loss.backward() optimizer.step() # learning_rate = 0.01 # for f in network.parameters(): # f.data.sub_(f.grad.data * learning_rate) # if (i % FLAGS.eval_freq == 0): # # print("TRAIN Batch: {} Loss {}".format(i, loss.item())) # acc = accuracy(out, y) # print("TRAIN Accuracy: {}".format(acc)) # plotting_accuracy.append(acc) # plotting_loss.append(loss.item()) # # x, y = cifar10['test'].next_batch(5000) # x = torch.from_numpy(x) # y = torch.from_numpy(y) # x = x.to(device) # y = y.to(device) # x = x.view(5000, -1) # out = network.forward(x) # loss = criterion(out, y.argmax(dim=1)) # # print("TEST Batch: {} Loss {}".format(i, loss)) # acc = accuracy(out, y) # print("TEST Accuracy: {}".format(acc)) # # print(loss.item()) # # print(asdasd) # plotting_accuracy_test.append(acc) # plotting_loss_test.append(loss.item()) if (i == FLAGS.max_steps - FLAGS.eval_freq): print("hellooo") acc = accuracy(out, y) print("TRAIN Accuracy: {}".format(acc)) train_accuracy = acc train_loss = loss.item() x, y = cifar10['test'].next_batch(5000) x = torch.from_numpy(x) y = torch.from_numpy(y) x = x.to(device) y = y.to(device) x = x.view(5000, -1) out = network.forward(x) loss = criterion(out, y.argmax(dim=1)) acc = accuracy(out, y) print("TEST Accuracy: {}".format(acc)) test_accuracy = acc test_loss = loss.item() with open('MLP_results.csv', 'a') as output_file: writer = csv.writer(output_file) writer.writerow([ FLAGS.dnn_hidden_units, FLAGS.learning_rate, train_accuracy, train_loss, test_accuracy, test_loss ])
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### #load data cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) #hyperparameters eta = FLAGS.learning_rate eps = 1e-6 # convergence criterion max_steps = FLAGS.max_steps b_size = FLAGS.batch_size #load test data x_test = cifar10["test"].images y_test = cifar10["test"].labels y_test = torch.tensor(y_test, requires_grad=False).type(dtype).to(device) #get usefull dimensions n_inputs = np.size(x_test, 0) n_classes = np.size(y_test, 1) v_size = np.size(x_test, 1) * np.size(x_test, 2) * np.size(x_test, 3) n_test_batches = np.size(x_test, 0) // b_size # x_test = x_test.reshape((n_inputs, v_size)) # x_test = torch.tensor(x_test, requires_grad=False).type(dtype).to(device) #load whole train data ############################################################ x_train = cifar10["train"].images x_train = x_train.reshape((np.size(x_train, 0), v_size)) x_train = torch.tensor(x_train, requires_grad=False).type(dtype).to(device) n_train_batches = np.size(x_train, 0) // b_size #initialize the MLP model model = MLP(n_inputs=v_size, n_hidden=dnn_hidden_units, n_classes=n_classes, b_norm=FLAGS.b_norm) get_loss = torch.nn.CrossEntropyLoss() if FLAGS.optimizer == "adam": optimizer = torch.optim.Adam(model.parameters(), lr=eta) elif FLAGS.optimizer == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=eta) model.to(device) train_loss = [] test_loss = [] train_acc = [] test_acc = [] for step in range(max_steps): #get batch x, y = cifar10['train'].next_batch(b_size) y = torch.tensor(y).type(dtype).to(device) #stretch input images into vectors x = x.reshape(b_size, v_size) x = torch.tensor(x).type(dtype).to(device) #forward pass pred = model.forward(x) #get training loss current_loss = get_loss(pred, y.argmax(dim=1)) optimizer.zero_grad() #get training loss gradient current_loss.backward() # #get training accuracy # current_train_acc = accuracy(pred, y) optimizer.step() #free memory up pred.detach() x.detach() y.detach() #select evaluation step if (step % FLAGS.eval_freq) == 0: # c_train_loss = current_loss.data.item() # train_loss.append(c_train_loss) # train_acc.append(current_train_acc) # c_train_loss = 0 current_train_acc = 0 c_test_loss = 0 current_test_acc = 0 #loop through train set in batches ###################################################### for test_batch in range(n_train_batches): #load test data x_train, y_train = cifar10['train'].next_batch(b_size) y_train = torch.tensor( y_train, requires_grad=False).type(dtype).to(device) #stretch input images into vectors x_train = x_train.reshape(b_size, v_size) x_train = torch.tensor( x_train, requires_grad=False).type(dtype).to(device) #get test batch results train_pred = model.forward(x_train) current_train_loss = get_loss(train_pred, y_train.argmax(dim=1)) c_train_loss += current_train_loss.data.item() current_train_acc += accuracy(train_pred, y_train) #free memory up train_pred.detach() x_train.detach() y_train.detach() #loop through entire test set in batches for test_batch in range(n_test_batches): #load test data x_test, y_test = cifar10['test'].next_batch(b_size) y_test = torch.tensor( y_test, requires_grad=False).type(dtype).to(device) #stretch input images into vectors x_test = x_test.reshape(b_size, v_size) x_test = torch.tensor(x_test).type(dtype).to(device) #get test batch results test_pred = model.forward(x_test) current_test_loss = get_loss(test_pred, y_test.argmax(dim=1)) c_test_loss += current_test_loss.data.item() current_test_acc += accuracy(test_pred, y_test) #free memory up test_pred.detach() x_test.detach() y_test.detach() #get full training set results ######################################################### c_train_loss = c_train_loss / n_train_batches current_train_acc = current_train_acc / n_train_batches train_loss.append(c_train_loss) train_acc.append(current_train_acc) #get full test set results c_test_loss = c_test_loss / n_test_batches current_test_acc = current_test_acc / n_test_batches test_loss.append(c_test_loss) test_acc.append(current_test_acc) if FLAGS.optimize == False: print('\nStep ', step, '\n------------\nTraining Loss = ', round(c_train_loss, 4), ', Train Accuracy = ', current_train_acc, '\nTest Loss = ', round(c_test_loss, 4), ', Test Accuracy = ', current_test_acc) if step > 0 and abs(test_loss[(int(step / FLAGS.eval_freq))] - test_loss[int(step / FLAGS.eval_freq) - 1]) < eps: break if FLAGS.optimize == False: plot_graphs(train_loss, 'Training Loss', 'orange', test_loss, 'Test Loss', 'blue', title='Stochastic gradient descent', ylabel='Loss', xlabel='Steps') plot_graphs(train_acc, 'Training Accuracy', 'darkorange', test_acc, 'Test Accuracy', 'darkred', title='Stochastic gradient descent', ylabel='Accuracy', xlabel='Steps') #save results: path = "./results/pytorch results/" np.save(path + 'train_loss', train_loss) np.save(path + 'train_acc', train_acc) np.save(path + 'test_loss', test_loss) np.save(path + 'test_acc', test_acc) return train_loss, test_loss, train_acc, test_acc
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility # np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] output_dir = FLAGS.output_dir if not os.path.isdir(output_dir): os.makedirs(output_dir) learning_rate = FLAGS.learning_rate max_steps = FLAGS.max_steps batch_size = FLAGS.batch_size eval_freq = FLAGS.eval_freq data_dir = FLAGS.data_dir no_write = FLAGS.no_write == 1 # Obtain dataset dataset = cifar10_utils.get_cifar10(data_dir) n_inputs = dataset['train'].images[0].reshape(-1).shape[0] n_classes = dataset['train'].labels[0].shape[0] n_test = dataset['test'].images.shape[0] # Initialise MLP dev = 'cuda' if torch.cuda.is_available() else 'cpu' device = torch.device(dev) print("Device: " + dev) net = MLP(n_inputs, dnn_hidden_units, n_classes).to(device) loss_fn = F.cross_entropy print("Network architecture:\n\t{}\nLoss module:\n\t{}".format( str(net), str(loss_fn))) # Evaluation vars train_loss = [] gradient_norms = [] train_acc = [] test_acc = [] iteration = 0 # Training optimizer = optim.SGD(net.parameters(), lr=learning_rate) while iteration < max_steps: iteration += 1 # Sample a mini-batch x, y = dataset['train'].next_batch(batch_size) x = torch.from_numpy(x.reshape((batch_size, -1))).to(device) y = torch.from_numpy(y).argmax(dim=1).long().to(device) # Forward propagation prediction = net.forward(x) loss = loss_fn(prediction, y) acc = accuracy(prediction, y) train_acc.append((iteration, acc.tolist())) train_loss.append((iteration, loss.tolist())) # Backprop optimizer.zero_grad() loss.backward() # Weight update in linear modules optimizer.step() with torch.no_grad(): norm = 0 for params in net.parameters(): norm += params.reshape(-1).pow(2).sum() gradient_norms.append((iteration, norm.reshape(-1).tolist())) # Evaluation if iteration % eval_freq == 0: x = torch.from_numpy(dataset['test'].images.reshape( (n_test, -1))).to(device) y = torch.from_numpy( dataset['test'].labels).argmax(dim=1).long().to(device) prediction = net.forward(x) acc = accuracy(prediction, y) test_acc.append((iteration, acc.tolist())) print("Iteration: {}\t\tTest accuracy: {}".format( iteration, acc)) # Save or return raw output metrics = { "train_loss": train_loss, "gradient_norms": gradient_norms, "train_acc": train_acc, "test_acc": test_acc } raw_data = {"net": net.to(torch.device('cpu')), "metrics": metrics} if no_write: return raw_data # Save now = datetime.datetime.now() time_stamp = "{}{}{}{}{}".format(now.year, now.month, now.day, now.hour, now.minute) net_name = "torchnet" out_dir = os.path.join(output_dir, net_name, time_stamp) if not os.path.isdir(out_dir): os.makedirs(out_dir) pickle.dump(raw_data, open(os.path.join(out_dir, "torch_raw_data"), "wb")) # Save plots # Loss fig, ax = plt.subplots() iter = [i for (i, q) in train_loss] loss = [q for (i, q) in train_loss] ax.plot(iter, loss) ax.set(xlabel='Iteration', ylabel='Loss (log)', title='Batch training loss') ax.set_yscale('log') ax.grid() fig.savefig(os.path.join(out_dir, "torch_loss.png")) # gradient norm fig, ax = plt.subplots() iter = [i for (i, q) in gradient_norms] norm = [q for (i, q) in gradient_norms] ax.plot(iter, norm) ax.set(xlabel='Iteration', ylabel='Norm', title='Gradient norm') ax.grid() fig.savefig(os.path.join(out_dir, "torch_gradient_norm.png")) # accuracies fig, ax = plt.subplots() iter = [i for (i, q) in train_acc] accu = [q for (i, q) in train_acc] ax.plot(iter, accu, label='Train') iter = [i for (i, q) in test_acc] accu = [q for (i, q) in test_acc] ax.plot(iter, accu, label='Test') ax.set(xlabel='Iteration', ylabel='Accuracy', title='Train and test accuracy') ax.legend() ax.grid() fig.savefig(os.path.join(out_dir, "torch_accuracy.png")) return raw_data
def train(): """ Performs training and evaluation of MLP model. TODO: Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) ## Prepare all functions # Get number of units in each hidden layer specified in the string such as 100,100 if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] ######################## # PUT YOUR CODE HERE # ####################### device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir) x, y = cifar10['train'].next_batch(FLAGS.batch_size) x = torch.from_numpy(x.reshape(FLAGS.batch_size, -1)).float().to(device) y = torch.from_numpy(y).float().to(device) n_inputs = x.shape[1] n_classes = y.shape[1] n_hidden = dnn_hidden_units MutLP = MLP(n_inputs, n_hidden, n_classes) MutLP.to(device) if FLAGS.optimizer == 'SGD': optimizer = optim.SGD(MutLP.parameters(), lr=FLAGS.learning_rate, weight_decay=FLAGS.weight_decay) elif FLAGS.optimizer == 'Adam': optimizer = optim.Adam(MutLP.parameters(), lr=FLAGS.learning_rate, weight_decay=FLAGS.weight_decay) else: print('Try SGD or Adam...') loss = nn.CrossEntropyLoss() l_list = list() t_list = list() train_acc = list() test_acc = list() iterations = list() print('\nTraining...') for i in range(FLAGS.max_steps): optimizer.zero_grad() s_pred = MutLP(x) f_loss = loss(s_pred, y.argmax(dim=1)) f_loss.backward() optimizer.step() if i % FLAGS.eval_freq == 0: l_list.append(round(f_loss.item(), 3)) iterations.append(i + 1) train_acc.append(accuracy(s_pred, y)) t_x, t_y = cifar10['test'].images, cifar10['test'].labels t_x = torch.from_numpy(t_x.reshape(t_x.shape[0], -1)).float().to(device) t_y = torch.from_numpy(t_y).float().to(device) t_pred = MutLP(t_x) t_loss = loss(t_pred, t_y.argmax(dim=1)) t_list.append(round(t_loss.item(), 3)) test_acc.append(accuracy(t_pred, t_y)) x, y = cifar10['train'].next_batch(FLAGS.batch_size) x = torch.from_numpy(x.reshape(FLAGS.batch_size, -1)).float().to(device) y = torch.from_numpy(y).float().to(device) print('Done!\n') print('Training Losses:', l_list) print('Test Losses:', t_list) print('Training Accuracies:', train_acc) print('Test Accuracies:', test_acc) print('Best Test Accuracy:', max(test_acc)) fig, axs = plt.subplots(1, 2, figsize=(10, 5)) axs[0].plot(iterations, train_acc, iterations, test_acc) axs[0].set_xlabel('Iteration') axs[0].set_ylabel('Accuracy') axs[0].legend(('train', 'test')) axs[1].plot(iterations, l_list, iterations, t_list) axs[1].set_xlabel('Iteration') axs[1].set_ylabel('Loss') axs[1].legend(('train', 'test')) fig.tight_layout() plt.show()