def train(): """ Performs training and evaluation of ConvNet model. TODO: Implement training and evaluation of ConvNet model. Evaluate your model on the whole test set each eval_freq iterations. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) #all external parameters in a readable format. lr = FLAGS.learning_rate max_steps = FLAGS.max_steps batch_size = FLAGS.batch_size eval_freq = FLAGS.eval_freq data_dir = FLAGS.data_dir #fetch data data = cifar10_utils.get_cifar10(data_dir) n_classes = 10 n_channels = 3 #number of iterations to train the data in the whole dataset: n_iter = 1 # int(np.ceil(data["train"]._num_examples/batch_size)) #number of evaluations num_evals = int(np.ceil(data['test']._num_examples / batch_size)) #load model cnn_model = ConvNet(n_channels, n_classes) #Loss function loss_XE = torch.nn.CrossEntropyLoss() #keep track of how loss and accuracy evolves over time. loss_train = np.zeros(max_steps + 1) #loss on training data acc_train = np.zeros(max_steps + 1) #accuracy on training data loss_eval = np.zeros(max_steps + 1) #loss on test data acc_eval = np.zeros(max_steps + 1) #accuracy on test data #Optimizer optmizer = optim.Adam(cnn_model.parameters(), lr=lr) #let's put some gpus to work! cnn_model.to(device) #index to keep track of the evaluations. eval_i = 0 #Train shit for s in range(max_steps): for n in range(n_iter): #fetch next batch of data X, y = data['train'].next_batch(batch_size) #use torch tensor + gpu X = torch.from_numpy(X).type(dtype).to(device) y = torch.from_numpy(y).type(dtype).to(device) #reset gradient to zero before gradient descent. optmizer.zero_grad() #calculate loss probs = cnn_model(X) #automatically calls .forward() loss = loss_XE(probs, y.argmax(dim=1)) #backward propagation loss.backward() optmizer.step() #stores the loss and accuracy of the trainind data for later analysis. loss_train[eval_i] += loss.item() / num_evals # acc_train[eval_i] += accuracy(probs, y) / num_evals probs.detach() if (s % eval_freq == 0) | (s == (max_steps - 1)): #calculate accuracy for the whole data set for t in range(num_evals): #fetch all the data X, y = data['test'].next_batch(batch_size) #use torch tensor + gpu, no gradient needed. X = torch.tensor(X, requires_grad=False).type(dtype).to(device) y = torch.tensor(y, requires_grad=False).type(dtype).to(device) #actually calculates loss and accuracy for the batch probs = cnn_model.forward(X) loss_eval[eval_i] += loss_XE( probs, y.argmax(dim=1)).item() # detach().data.cpu().item() acc_eval[eval_i] += accuracy(probs, y) probs.detach() #frees memory X.detach() y.detach() #average the losses and accuracies across test batches loss_eval[eval_i] /= num_evals acc_eval[eval_i] /= num_evals #print performance print(f"step {s} out of {max_steps}") print( f" loss: {loss_eval[eval_i]}, accuracy: {acc_eval[eval_i]}") print( f" loss: {loss_train[eval_i]}, accuracy: {acc_train[eval_i]}" ) #save the results # np.save("loss_eval", loss_eval) # np.save("accuracy_eval", acc_eval) #increments eval counter eval_i += 1 #Save intermediary results for later analysis print("saving results in folder...") np.save("loss_train", loss_train) np.save("accuracy_train", acc_train) np.save("loss_eval", loss_eval) np.save("accuracy_eval", acc_eval) print("savign model") torch.save(cnn_model.state_dict(), cnn_model.__class__.__name__ + ".pt")
def train(): """ Performs training and evaluation of ConvNet model. """ ### DO NOT CHANGE SEEDS! # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) ######################## lr = FLAGS.learning_rate max_steps = FLAGS.max_steps batch_size = FLAGS.batch_size eval_freq = FLAGS.eval_freq data_dir = FLAGS.data_dir optim = FLAGS.optimizer #fetch data cifar10 = cifar10_utils.get_cifar10(data_dir) n_classes = 10 n_channels = 3 eval_rounds = int(np.ceil(cifar10['test']._num_examples / batch_size)) model = ConvNet(n_channels, n_classes) ce = torch.nn.CrossEntropyLoss() pars = model.parameters() # optimizer optim_pars = {'params': pars, 'lr': lr, 'weight_decay': FLAGS.weight_decay} if optim == 'adadelta': optimizer = torch.optim.Adadelta(**optim_pars) elif optim == 'adagrad': optimizer = torch.optim.Adagrad(**optim_pars) elif optim == 'rmsprop': optimizer = torch.optim.RMSprop(**optim_pars) elif optim == 'adam': optimizer = torch.optim.Adam(**optim_pars) else: # SGD optimizer = torch.optim.SGD(**optim_pars) model.to(device) eval_i = 0 cols = ['train_acc', 'test_acc', 'train_loss', 'test_loss', 'secs'] # train results = [] name = f'convnet-pytorch-{optim}' with SummaryWriter(name) as w: for step in tqdm(range(FLAGS.max_steps)): optimizer.zero_grad() X, y = cifar10['train'].next_batch(batch_size) X = torch.tensor(X).type(dtype).to(device) train_predictions = model.forward(X) X.detach() y = torch.tensor(y).type(dtype).to(device) train_acc = accuracy(train_predictions, y) idx_train = torch.argmax(y, dim=-1).long() y.detach() train_loss = ce(train_predictions, idx_train) train_predictions.detach() # stop if loss has converged! check = 10 if len(results) >= 2 * check: threshold = 1e-6 losses = [result['test_loss'] for result in results] current = np.mean(losses[-check:]) prev = np.mean(losses[-2 * check:-check]) if (prev - current) < threshold: break # # at each epoch, we divide the learning rate by this if the dev accuracy decreases # if dev_acc > prev_acc: # lr /= learning_decay # prev_acc = dev_acc train_loss.backward() optimizer.step() # evaluate if step % FLAGS.eval_freq == 0: time = int(step / FLAGS.eval_freq) start = timer() test_accs = [] test_losses = [] for t in range(eval_rounds): X, y = cifar10['test'].next_batch(batch_size) X = torch.tensor( X, requires_grad=False).type(dtype).to(device) y = torch.tensor( y, requires_grad=False).type(dtype).to(device) test_predictions = model.forward(X) X.detach() test_accs.append(accuracy(test_predictions, y)) test_losses.append( ce(test_predictions, y.argmax(dim=1)).item()) test_predictions.detach() y.detach() end = timer() secs = end - start test_acc = np.mean(test_accs) test_loss = np.mean(test_losses) vals = [train_acc, test_acc, train_loss, test_loss, secs] stats = dict( zip(cols, [ np.asscalar(i.detach().cpu().numpy().take(0)) if isinstance(i, torch.Tensor) else np.asscalar(i) if isinstance(i, (np.ndarray, np.generic)) else i for i in vals ])) print( yaml.dump({ k: round(i, 3) if isinstance(i, float) else i for k, i in stats.items() })) w.add_scalars('metrics', stats, time) results.append(stats) df = pd.DataFrame(results, columns=cols) meta = { 'framework': 'pytorch', 'algo': 'convnet', 'optimizer': optim, 'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.learning_rate, 'dnn_hidden_units': '', 'weight_decay': FLAGS.weight_decay, 'max_steps': FLAGS.max_steps, } for k, v in meta.items(): df[k] = v output_file = 'results/results.csv' # f'{name}.csv' if os.path.isfile(output_file): df.to_csv(f'{name}.csv', header=False, mode='a') else: df.to_csv(f'{name}.csv', header=True, mode='w') torch.save(model.state_dict(), f'{name}.pth') print('done!') return test_loss