def train(epoch): net.train() train_loss = 0 correct = 0 total = 0 optimizer = optim.Adam(net.parameters(), lr=cf.learning_rate(cf.lr, epoch), weight_decay=cf.weight_decay) print('\n=> Training Epoch #%d, LR=%.4f' % (epoch, cf.learning_rate(cf.lr, epoch))) m = math.ceil(len(testset) / cf.batch_size) for batch_idx, (inputs_value, targets) in enumerate(trainloader): x = inputs_value.view(-1, inputs, resize, resize).repeat(cf.num_samples, 1, 1, 1) y = targets.repeat(cf.num_samples) if use_cuda: x, y = x.cuda(), y.cuda() # GPU settings if cf.beta_type is "Blundell": beta = 2**(m - (batch_idx + 1)) / (2**m - 1) elif cf.beta_type is "Soenderby": beta = min(epoch / (cf.num_epochs // 4), 1) elif cf.beta_type is "Standard": beta = 1 / m else: beta = 0 # Forward Propagation x, y = Variable(x), Variable(y) outputs, kl = net.probforward(x) loss = vi(outputs, y, kl, beta) # Loss optimizer.zero_grad() loss.backward() # Backward Propagation optimizer.step() # Optimizer update train_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(y.data).cpu().sum() sys.stdout.write('\r') sys.stdout.write( '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' % (epoch, cf.num_epochs, batch_idx + 1, (len(trainset) // cf.batch_size) + 1, loss.data[0], (100 * correct / total) / cf.num_samples)) sys.stdout.flush() diagnostics_to_write = { 'Epoch': epoch, 'Loss': loss.data[0], 'Accuracy': (100 * correct / total) / cf.num_samples } with open(logfile, 'a') as lf: lf.write(str(diagnostics_to_write))
def train_and_val(epoch): ################### # train the model # #################### likelihoods = [] kls = [] net.train() avg_train_loss = 0 train_loss = 0 valid_loss = 0 correct_train = 0 total_train = 0 correct_val = 0 total_val = 0 accuracy_train = 0 global valid_loss_min m = math.ceil(len(train_loader) / batch_size) optimizer = optim.Adam(net.parameters(), lr=cf.learning_rate(args.lr, epoch), weight_decay=args.weight_decay) print('\n| Training Epoch #%d, LR=%.4f' % (epoch, cf.learning_rate(args.lr, epoch))) for batch_idx, (x, y) in enumerate(train_loader): x = x.view(-1, 3, resize, resize).repeat(args.num_samples, 1, 1, 1) y = y.repeat(args.num_samples) if use_cuda: x, y = x.cuda(), y.cuda() # GPU settings if args.beta_type is "Blundell": beta = 2**(m - (batch_idx + 1)) / (2**m - 1) elif args.beta_type is "Soenderby": beta = min(epoch / (num_epochs // 4), 1) elif args.beta_type is "Standard": beta = 1 / m else: beta = 0 # Forward Propagation x, y = Variable(x), Variable(y) # outputs, loss_train = net(x, y, args.num_samples, batch_size, 10, "train") outputs, kl = net(x) outputs = normalization_function(outputs) loss_train = vi(outputs, y, kl, beta) ll = loss_train.data.mean() - beta * kl.data train_loss += loss_train.item() optimizer.zero_grad() loss_train.backward() # Backward Propagation optimizer.step() # Optimizer update _, predicted = outputs.max(1) accuracy_train = (predicted.data.cpu() == y.cpu()).float().mean() total_train += y.size(0) kls.append(beta * kl) likelihoods.append(ll) avg_train_loss = train_loss / total_train # print training/validation statistics sys.stdout.write('\r') sys.stdout.write( '| Epoch [%3d/%3d] Iter[%3d/%3d] Average Training Loss: %.4f Average Training Accuracy: %.3f Average KL : %.4f Average Likelihood : %.4f' % (epoch, num_epochs, batch_idx + 1, len(train_loader), avg_train_loss, accuracy_train, sum(kls) / len(kls), sum(likelihoods) / len(likelihoods))) sys.stdout.flush() ###################### # validate the model # ###################### conf = [] likelihoods_val = [] kls_val = [] average_loss = 0 accuracy_val = 0 net.eval() m = math.ceil(len(valid_loader) / batch_size) print('\n| Validation Epoch #%d, LR=%.4f' % (epoch, cf.learning_rate(args.lr, epoch))) for batch_idx, (x, y) in enumerate(valid_loader): x = x.view(-1, 3, resize, resize).repeat(args.num_samples, 1, 1, 1) y = y.repeat(args.num_samples) # move tensors to GPU if CUDA is available if use_cuda: x, y = x.cuda(), y.cuda() with torch.no_grad(): x, y = Variable(x), Variable(y) if args.beta_type is "Blundell": beta = 2**(m - (batch_idx + 1)) / (2**m - 1) elif args.beta_type is "Soenderby": beta = min(epoch / (num_epochs // 4), 1) elif args.beta_type is "Standard": beta = 1 / m else: beta = 0 # forward pass: compute predicted outputs by passing inputs to the model # output, loss_val = net(x, y, args.num_samples, batch_size, 10, "validation") output, kl_val = net(x) output = normalization_function(output) loss_val = vi(output, y, kl, beta) ll_val = loss_val.data.mean() - beta * kl_val.data kls_val.append(beta * kl_val.data) likelihoods_val.append(ll_val) # update average validation loss valid_loss += loss_val.item() # preds = F.softmax(output, dim=1) _, predicted = output.max(1) accuracy_val = (predicted.data.cpu() == y.cpu()).float().mean() # output = F.softmax(output, 1) results = torch.topk(output.cuda().data, k=1, dim=1) conf.append(results[0][0].item()) total_val += y.size(0) average_loss = valid_loss / total_val # print training/validation statistics sys.stdout.write('\r') sys.stdout.write( '| Epoch [%3d/%3d] Iter[%3d/%3d] Average Validation Loss: %.4f Average Validation Accuracy: %.3f KL : %.4f Likelihood : %.4f' % (epoch, num_epochs, batch_idx + 1, len(valid_loader), average_loss, accuracy_val, sum(kls_val) / len(kls_val), sum(likelihoods_val) / len(likelihoods_val))) sys.stdout.flush() p_hat = np.array(conf) confidence_mean = np.mean(p_hat, axis=0) confidence_var = np.var(p_hat, axis=0) epistemic = np.mean(p_hat**2, axis=0) - np.mean(p_hat, axis=0)**2 aleatoric = np.mean(p_hat * (1 - p_hat), axis=0) # calculate average info print( "\n| Final Training Accuracy : {:.3f} ; Final Validation Accuracy : {:.3f}" .format(accuracy_train, accuracy_val)) print("| Epistemic Uncertainity is : ", epistemic) print("| Aleatoric Uncertainity is : ", aleatoric) print("| Mean is : ", confidence_mean) print("| Variance is : ", confidence_var) if average_loss <= valid_loss_min: print( '| Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...' .format(valid_loss_min, average_loss)) state = { 'net': net if use_cuda else net, 'acc': accuracy_val, 'epoch': epoch, 'model_state': net.state_dict() } if not os.path.isdir(args.save_folder + '/checkpoint'): os.mkdir(args.save_folder + '/checkpoint') save_point = args.save_folder + '/checkpoint/' + args.dataset + os.sep if not os.path.isdir(save_point): os.mkdir(save_point) torch.save(state, save_point + file_name + '.t7') valid_loss_min = valid_loss diagnostics_to_write = { 'Epoch': epoch, 'Loss': avg_train_loss, 'Accuracy': accuracy_train, "KL divergency": sum(kls) / len(kls), "Log Likelihood": sum(likelihoods) / len(likelihoods) } val_diagnostics_to_write = { 'Validation Epoch': epoch, 'Loss': average_loss, 'Accuracy': accuracy_val, "KL divergency": sum(kls_val) / len(kls_val), "Log Likelihood": sum(likelihoods_val) / len(likelihoods_val) } values_to_write = { 'Epoch': epoch, 'Confidence Mean: ': confidence_mean, 'Confidence Variance:': confidence_var, 'Epistemic Uncertainity: ': epistemic, 'Aleatoric Uncertainity: ': aleatoric } with open(logfile, 'a') as lf: lf.write(str(diagnostics_to_write)) with open(val_logfile, 'a') as lf: lf.write(str(val_diagnostics_to_write)) with open(value_file, 'a') as lf: lf.write(str(values_to_write))