def test_bp(args, device, net, test_loader, loss_function): loss = 0 if args.classification: accuracy = 0 nb_batches = len(test_loader) with torch.no_grad(): for inputs, targets in test_loader: if args.double_precision: inputs, targets = inputs.double().to(device), targets.to( device) else: inputs, targets = inputs.to(device), targets.to(device) if not args.network_type == 'BPConv': inputs = inputs.flatten(1, -1) if args.classification and args.output_activation == 'sigmoid': # convert targets to one hot vectors for MSE loss: targets = utils.int_to_one_hot(targets, 10, device, soft_target=args.soft_target) predictions = net(inputs) loss += loss_function(predictions, targets).item() if args.classification: if args.output_activation == 'sigmoid': accuracy += utils.accuracy(predictions, utils.one_hot_to_int(targets)) else: # softmax accuracy += utils.accuracy(predictions, targets) loss /= nb_batches if args.classification: accuracy /= nb_batches else: accuracy = None return accuracy, loss
def test_few_shot(backbone, loader, num_episode, nums_support): cl_data_file = extract_feature(backbone, loader) results = [] for n_support in nums_support: print(f"=> test {n_support} shot accuracy") model_finetune = BaselineFinetune(n_way=cfg.test.n_way, n_support=n_support, metric_type=cfg.method.metric, metric_params=cfg.method.metric_params_test, finetune_params=cfg.test.finetune_params) model_finetune.eval() t = trange(num_episode, desc='testing', ncols=80) acc_all = [] for _ in t: z_all = sample_task(cl_data_file, cfg.test.n_way, n_support, cfg.test.n_query) y = get_few_shot_label(cfg.test.n_way, cfg.test.n_query).cuda() scores = model_finetune(z_all) acc = accuracy(scores, y).item() acc_all.append(acc) t.set_postfix(acc=np.mean(acc_all)) acc_all = np.asarray(acc_all) acc_mean = np.mean(acc_all) acc_std = np.std(acc_all) confidence_interval = 1.96 * acc_std / np.sqrt(num_episode) print(f'{n_support} shot accuracy: {acc_mean:4.2%}±{confidence_interval:4.2%}') results.append((acc_mean, confidence_interval)) return results
def validate(val_loader, model, device='cpu', print_freq=100, prefix='test'): batch_time = AverageMeter() top1 = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (data, target) in enumerate(val_loader): data, target = data.to(device), target.to(device) # compute output output = model(data) # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) top1.update(prec1[0], data.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: print(f'{prefix}: [{i}/{len(val_loader)}] ' f'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' f'Prec@1 {top1.val:.3f} ({top1.avg:.3f})') print(f' * {prefix} Prec@1 {top1.avg:.3f}') return top1.avg
def train(model, train_loader, optimizer, criterion, epoch, log_writer, args): train_loss = lib.Metric('train_loss') train_accuracy = lib.Metric('train_accuracy') model.train() N = len(train_loader) start_time = time.time() for batch_idx, (data, target) in enumerate(train_loader): lr_cur = adjust_learning_rate(args, optimizer, epoch, batch_idx, N, type=args.lr_scheduler) if args.cuda: data, target = data.cuda(), target.cuda() optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() train_loss.update(loss) train_accuracy.update(accuracy(output, target)) if (batch_idx + 1) % 20 == 0: memory = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0 used_time = time.time() - start_time eta = used_time / (batch_idx + 1) * (N - batch_idx) eta = str(datetime.timedelta(seconds=int(eta))) training_state = ' '.join(['Epoch: {}', '[{} / {}]', 'eta: {}', 'lr: {:.9f}', 'max_mem: {:.0f}', 'loss: {:.3f}', 'accuracy: {:.3f}']) training_state = training_state.format(epoch + 1, batch_idx + 1, N, eta, lr_cur, memory, train_loss.avg.item(), 100. * train_accuracy.avg.item()) print(training_state) if log_writer: log_writer.add_scalar('train/loss', train_loss.avg, epoch) log_writer.add_scalar('train/accuracy', train_accuracy.avg, epoch)
def validate(val_loader, model, criterion): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() for i, (input, target) in enumerate(val_loader): target = target.cuda() input = input.cuda() with torch.no_grad(): # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() logging.info( " ---------------------------------------------------------------") logging.info(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) return top1.avg, top5.avg
def test(epoch, test_loader, save=True): global best_acc net.eval() batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(test_loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() outputs = net(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # timing batch_time.update(time.time() - end) end = time.time() progress_bar( batch_idx, len(test_loader), 'Loss: {:.3f} | Acc1: {:.3f}% | Acc5: {:.3f}%'.format( losses.avg, top1.avg, top5.avg)) if save: writer.add_scalar('loss/test', losses.avg, epoch) writer.add_scalar('acc/test_top1', top1.avg, epoch) writer.add_scalar('acc/test_top5', top5.avg, epoch) is_best = False if top1.avg > best_acc: best_acc = top1.avg is_best = True print('Current best acc: {}'.format(best_acc)) save_checkpoint( { 'epoch': epoch, 'model': args.model, 'dataset': args.dataset, 'state_dict': net.module.state_dict() if isinstance(net, nn.DataParallel) else net.state_dict(), 'acc': top1.avg, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint_dir=log_dir)
def val(model, val_loader, criterion, epoch, args, log_writer=False): global best_val_acc model.eval() val_loss = lib.Metric('val_loss') val_accuracy = lib.Metric('val_accuracy') if epoch == -1: epoch = args.epochs - 1 with tqdm(total=len(val_loader), desc='Validate Epoch #{}'.format(epoch + 1)) as t: with torch.no_grad(): for data, target in val_loader: if args.cuda: data, target = data.cuda(), target.cuda() output = model(data) val_loss.update(criterion(output, target)) val_accuracy.update(accuracy(output, target)) t.update(1) print("\nloss: {}, accuracy: {:.2f}, best acc: {:.2f}\n".format(val_loss.avg.item(), 100. * val_accuracy.avg.item(), 100. * max(best_val_acc, val_accuracy.avg))) if val_accuracy.avg > best_val_acc and log_writer: save_model(model, None, -1, args) if log_writer: log_writer.add_scalar('val/loss', val_loss.avg, epoch) log_writer.add_scalar('val/accuracy', val_accuracy.avg, epoch) best_val_acc = max(best_val_acc, val_accuracy.avg) log_writer.add_scalar('val/best_acc', best_val_acc, epoch)
def test(epoch, test_loader, save=False): global best_accd net.eval() batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() device0 = 'cuda' with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(test_loader): if use_cuda: # inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = inputs.to(device0), targets.to(device0) outputs = net(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # timing batch_time.update(time.time() - end) end = time.time() progress_bar( batch_idx, len(test_loader), 'Loss: {:.3f} | Acc1: {:.3f}% | Acc5: {:.3f}%'.format( losses.avg, top1.avg, top5.avg)) return top1.avg
def eval_train(net, train_loader): net.eval() batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() count_threshold = float('inf') count = 0 with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(train_loader): if use_cuda: inputs, targets = inputs.to(device), targets.to(device) outputs = net(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # timing batch_time.update(time.time() - end) end = time.time() return losses.avg, top1.avg, top5.avg
def validate(model, auxiliarynet, valid_loader, device, logger): """Do validation.""" model.eval() auxiliarynet.eval() losses, nme = list(), list() with torch.no_grad(): for i, (img, land_gt, angle_gt) in enumerate(valid_loader): img = img.to(device, non_blocking=True) landmark_gt = land_gt.to(device, non_blocking=True) angle_gt = angle_gt.to(device, non_blocking=True) landmark, _ = model(img) # compute the l2 loss landmark = landmark.squeeze() l2_diff = torch.sum((landmark_gt - landmark)**2, axis=1) loss = torch.mean(l2_diff) losses.append(loss.cpu().detach().numpy()) # compute the accuracy landmark = landmark.cpu().detach().numpy() landmark = landmark.reshape(landmark.shape[0], -1, 2) landmark_gt = landmark_gt.cpu().detach().numpy() landmark_gt = landmark_gt.reshape(landmark_gt.shape[0], -1, 2) _, nme_i = accuracy(landmark, landmark_gt) for item in nme_i: nme.append(item) logger.info("===> Evaluate:") logger.info("Eval set: Average loss: {:.4f} nme: {:.4f}".format( np.mean(losses), np.mean(nme))) return np.mean(losses), np.mean(nme)
def test(args, device, net, test_loader, loss_function): """ Compute the test loss and accuracy on the test dataset Args: args: command line inputs net: network test_loader (DataLoader): dataloader object with the test dataset Returns: Tuple containing: - Test accuracy - Test loss """ loss = 0 if args.classification: accuracy = 0 nb_batches = len(test_loader) with torch.no_grad(): for inputs, targets in test_loader: if args.double_precision: inputs, targets = inputs.double().to(device), targets.to( device) else: inputs, targets = inputs.to(device), targets.to(device) if not args.network_type == 'DDTPConv': inputs = inputs.flatten(1, -1) if args.classification and\ args.output_activation == 'sigmoid': # convert targets to one hot vectors for MSE loss: targets = utils.int_to_one_hot(targets, 10, device, soft_target=args.soft_target) predictions = net.forward(inputs) loss += loss_function(predictions, targets).item() if args.classification: if args.output_activation == 'sigmoid': accuracy += utils.accuracy(predictions, utils.one_hot_to_int(targets)) else: # softmax accuracy += utils.accuracy(predictions, targets) loss /= nb_batches if args.classification: accuracy /= nb_batches else: accuracy = None return accuracy, loss
def validate(loader, model, logger, epoch=0, print_freq=100): # switch to train mode model.eval() meters = logger.reset_meters('val') end = time.time() for i, sample in enumerate(loader): batch_size = sample['visual'].size(0) # measure data loading time meters['data_time'].update(time.time() - end, n=batch_size) input_visual = Variable(sample['visual'].cuda(), volatile=True) target = Variable(sample['label'].cuda(async=True), volatile=True) # compute output output = model(input_visual) loss = F.cross_entropy(output, target) acc1, acc3 = utils.accuracy(output.data, target.data, topk=(1, 3)) # bleu_score = calculate_bleu_score(generated_q.cpu().data, sample['question'], loader.dataset.wid_to_word) meters['acc1'].update(acc1[0], n=batch_size) meters['acc3'].update(acc3[0], n=batch_size) meters['loss'].update(loss.data[0], n=batch_size) # measure elapsed time meters['batch_time'].update(time.time() - end, n=batch_size) end = time.time() if (i + 1) % print_freq == 0: print('[Val]\tEpoch: [{0}][{1}/{2}] ' 'Time {batch_time.avg:.3f}\t' 'Data {data_time.avg:.3f}\t' 'Loss: {loss.avg:.3f}\t' 'Acc@1 {acc1.avg:.3f}\t' 'Acc@3 {acc3.avg:.3f}\t'.format( epoch, i + 1, len(loader), batch_time=meters['batch_time'], data_time=meters['data_time'], acc1=meters['acc1'], acc3=meters['acc3'], loss=meters['loss'])) print('[Val]\tEpoch: [{0}] ' 'Time {batch_time.avg:.3f}\t' 'Data {data_time.avg:.3f}\t' 'Loss: {loss.avg:.3f}\t' 'Acc@1 {acc1.avg:.3f}\t' 'Acc@3 {acc3.avg:.3f}\t'.format(epoch, batch_time=meters['batch_time'], data_time=meters['data_time'], acc1=meters['acc1'], acc3=meters['acc3'], loss=meters['loss'])) logger.log_meters('val', n=epoch) return meters['acc1'].avg, meters['acc3'].avg
def train(train_loader, n_epoch): net.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() count_threshold = n_epoch * len(train_loader) count = 0 for batch_idx, (inputs, targets) in enumerate(train_loader): data_time.update(time.time() - end) if use_cuda: #inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = inputs.to(device), targets.to(device) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # timing batch_time.update(time.time() - end) end = time.time() #progress_bar(batch_idx, len(train_loader), 'Loss: {:.3f} | Acc1: {:.3f}% | Acc5: {:.3f}%' # .format(losses.avg, top1.avg, top5.avg)) if batch_idx % 200 == 0: print('[{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( batch_idx, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) count += 1 if count >= count_threshold: break net.eval()
def _validate(self, val_loader, model, verbose=False): ''' Validate the performance on validation set :param val_loader: :param model: :param verbose: :return: ''' batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() if torch.cuda.is_available(): criterion = nn.CrossEntropyLoss().cuda() else: criterion = nn.CrossEntropyLoss() # switch to evaluate mode model.eval() end = time.time() t1 = time.time() with torch.no_grad(): for i, (input, target) in enumerate(val_loader): if torch.cuda.is_available(): target = target.cuda(non_blocking=True) input_var = torch.autograd.Variable(input).cuda() target_var = torch.autograd.Variable(target).cuda() else: input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() t2 = time.time() if verbose: print('* Test loss: %.3f top1: %.3f top5: %.3f time: %.3f' % (losses.avg, top1.avg, top5.avg, t2 - t1)) if self.acc_metric == 'acc1': return top1.avg elif self.acc_metric == 'acc5': return top5.avg else: raise NotImplementedError
def train_forward_parameters(args, net, predictions, targets, loss_function, forward_optimizer): """ Train the forward parameters on the current mini-batch.""" if predictions.requires_grad == False: # we need the gradient of the loss with respect to the network # output. If a LeeDTPNetwork is used, this is already the case. # The gradient will also be saved in the activations attribute of the # output layer of the network predictions.requires_grad = True save_target = args.save_GN_activations_angle or \ args.save_BP_activations_angle forward_optimizer.zero_grad() loss = loss_function(predictions, targets) if not args.train_randomized: net.backward(loss, args.target_stepsize, save_target=save_target, norm_ratio=args.norm_ratio) else: k = np.random.randint(0, net.depth) net.backward_random(loss, args.target_stepsize, k, save_target=save_target, norm_ratio=args.norm_ratio) if args.classification: if args.output_activation == 'sigmoid': batch_accuracy = utils.accuracy(predictions, utils.one_hot_to_int(targets)) else: #softmax batch_accuracy = utils.accuracy(predictions, targets) else: batch_accuracy = None batch_loss = loss return batch_accuracy, batch_loss
def train(model, train_loader, optimizer, criterion, summary_writer, epoch, scheduler=None): train_loss = AverageMeter() data_time = AverageMeter() batch_time = AverageMeter() top1 = AverageMeter() model.train() end = time.time() for i, (x, y) in enumerate(train_loader): x = x.cuda(non_blocking=True) y = y.cuda(non_blocking=True) data_time.update(time.time() - end) scores = model(x, y) loss = criterion(scores, y) acc = accuracy(scores, y) * 100 optimizer.zero_grad() loss.backward() optimizer.step() step = epoch * len(train_loader) + i if scheduler is not None: scheduler.step(step) train_loss.update(loss.item(), x.shape[0]) top1.update(acc, x.shape[0]) batch_time.update(time.time() - end) end = time.time() # log summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], step) summary_writer.add_scalar('loss', loss.item(), step) summary_writer.add_scalar('train_acc', acc, step) if i % cfg.train.print_freq == 0: lr = optimizer.param_groups[0]["lr"] print(f'Train: [{epoch}][{i}/{len(train_loader)}] ' f'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' f'Data: {data_time.val:.3f} ({data_time.avg:.3f}) ' f'Lr: {lr:.5f} ' f'prec1: {top1.val:.3f} ({top1.avg:.3f}) ' f'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f})')
def validate(val_loader, model, criterion): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() for i, (input, target) in enumerate(val_loader): target = target.cuda() input = input.cuda() with torch.no_grad(): # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # if i % print_freq == 0: # print('Test: [{0}/{1}]\t' # 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' # 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' # 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' # 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( # i, len(val_loader), batch_time=batch_time, loss=losses, # top1=top1, top5=top5)) logging.info( " ---------------------------------------------------------------") logging.info(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) return top1.avg, top5.avg
def evaluate(): # build dataset val_loader, n_class = get_dataset() # build model net = get_model(n_class) criterion = nn.CrossEntropyLoss() if use_cuda: net = net.cuda() net = torch.nn.DataParallel(net, list(range(args.n_gpu))) cudnn.benchmark = True # begin eval net.eval() batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(val_loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # timing batch_time.update(time.time() - end) end = time.time() progress_bar(batch_idx, len(val_loader), 'Loss: {:.3f} | Acc1: {:.3f}% | Acc5: {:.3f}%' .format(losses.avg, top1.avg, top5.avg))
def train(epoch, train_loader): print('\nEpoch: %d' % epoch) net.train() batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for batch_idx, (inputs, targets) in enumerate(train_loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # timing batch_time.update(time.time() - end) end = time.time() progress_bar( batch_idx, len(train_loader), 'Loss: {:.3f} | Acc1: {:.3f}% | Acc5: {:.3f}%'.format( losses.avg, top1.avg, top5.avg)) writer.add_scalar('loss/train', losses.avg, epoch) writer.add_scalar('acc/train_top1', top1.avg, epoch) writer.add_scalar('acc/train_top5', top5.avg, epoch)
def train(net, optimizer, scheduler, trainloader, testloader, criterion, summary_writer, args): train_loss = AverageMeter() data_time = AverageMeter() batch_time = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() best_acc = 0 end = time.time() global global_step for inputs, targets in inf_generator(trainloader): if global_step >= args.max_iters: break data_time.update(time.time() - end) inputs, targets = inputs.to(args.device), targets.to(args.device) # switch to train mode net.train() scheduler.step(global_step) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, targets) prec1, prec5 = accuracy(outputs, targets, topk=(1, 5)) top1.update(prec1[0], inputs.size(0)) top5.update(prec5[0], inputs.size(0)) loss.backward() optimizer.step() train_loss.update(loss.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step) summary_writer.add_scalar('top1', top1.val, global_step) summary_writer.add_scalar('top5', top5.val, global_step) summary_writer.add_scalar('batch_time', batch_time.val, global_step) summary_writer.add_scalar('data_time', data_time.val, global_step) summary_writer.add_scalar('train_loss', train_loss.val, global_step) if global_step % args.print_freq == 0: lr = optimizer.param_groups[0]['lr'] print(f'Train: [{global_step}/{args.max_iters}] ' f'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' f'Data: {data_time.val:.3f} ({data_time.avg:.3f}) ' f'Lr: {lr:.5f} ' f'prec1: {top1.val:.3f} ({top1.avg:.3f}) ' f'prec5: {top5.val:.3f} ({top5.avg:.3f}) ' f'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f})') if (global_step + 1) % args.eval_freq == 0 or global_step == args.max_iters - 1: acc = validate(testloader, net, criterion, device=args.device, print_freq=args.print_freq) summary_writer.add_scalar('val_top1', acc, global_step) if acc > best_acc: best_acc = acc state = { 'step': global_step, 'best_acc': best_acc, 'net': net.state_dict(), 'optimizer': optimizer.state_dict(), } os.makedirs(args.model_dir, exist_ok=True) torch.save(state, os.path.join(args.model_dir, 'ckpt.pth.tar')) print('best accuracy: {:.2f}\n'.format(best_acc)) global_step += 1
def prune_a_layer(m): isalladd = 0 num_layer = m.layer_num init_loss, init_top1, init_top5 = eval_train(net, eval_train_loader) print( 'Layer: ({:d}); Init Loss: {:.4f}; Init top1: ({:.4f}%); Init top5: ({:.4f}%)' .format(num_layer, init_loss, init_top1, init_top5)) m.switch_mode('prune') # prunable neuron list; only consider the neuron that is inside at initial prunable_neuron = (m.prune_a.cpu().data.squeeze().numpy() > 0) all_neuron = np.sum((m.prune_a.cpu().data.numpy() > 0).astype(int)) m.empty_all_eps() is_first_neuron = 1 iteration = 0 verbose = True while 1: # get a mini-batch of data for datas, data_labels in train_loader: break with torch.no_grad(): datas = datas.to(device) data_labels = data_labels.to(device) targets = data_labels candidate_plus = decide_candidate_set(m, prunable_neuron, num_evaluate=args.num_evaluate) decide_candidate(datas, targets, m, candidate_plus) outputs = net(datas) batch_top1, batch_top5 = accuracy(outputs.data, data_labels.data, topk=(1, 5)) if batch_top1 >= (1. - args.skip_eval_converge) * init_top1: # evaluate whether converged cur_loss, cur_top1, cur_top5 = eval_train(net, eval_train_loader) cur_neuron = np.sum((m.prune_a.cpu().data.numpy() > 0).astype(int)) if verbose: print('Converge Eval------', args.top1_tol) print( 'Layer: ({:d}); Cur Loss: {:.4f}; Init Loss: {:.4f}; Cur top1: ({:.4f}%); Init top1: {:.4f}' .format(num_layer, cur_loss, init_loss, cur_top1, init_top1)) print('Cur_neuron/ All neuron', cur_neuron, m.scale) if cur_top1 >= (1. - args.top1_tol) * (init_top1): break # reach convergence else: cur_neuron = np.sum((m.prune_a.cpu().data.numpy() > 0).astype(int)) if verbose: print('Layer: ({:d}); Batch top1: {:.4f}'.format( num_layer, batch_top1)) print('Cur_neuron/ All neuron', cur_neuron, all_neuron) if cur_neuron >= all_neuron: print('all the neurons are added') m.set_alpha_to_init(prunable_neuron) isalladd = 1 break print("This layer's Neuron", cur_neuron) cur_loss, cur_top1, cur_top5 = eval_train(net, eval_train_loader) print( 'Layer (before finetune): ({:d}); Cur Loss: {:.4f}; Cur top1: ({:.4f}%); Cur top5: ({:.4f}%)' .format(num_layer, cur_loss, cur_top1, cur_top5)) print('=' * 90) a_para = m.prune_a.data a_num = np.sum((m.prune_a.cpu().data.numpy() > 0).astype(int)) cur_loss, cur_top1, cur_top5 = eval_train(net, eval_train_loader) m.set_alpha_to_init(prunable_neuron) return a_para, a_num, cur_top1, isalladd
def train(cfg, train_loader, main_model, scheduler, criterion, main_optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() main_losses = AverageMeter() # switch to train mode main_model.train() end = time.time() logit_all = [] target_all = [] for i, (input, target, index, mmse, segment, age) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) index = index.to(device) if cfg['training_parameters']['use_age']: age = age.to(device) else: age = None # compute output input = input.to(device) target = target.to(device) main_loss, logit = main_model([input, age], target) main_loss = main_loss.mean() logit_all.append(logit.data.cpu()) target_all.append(target.data.cpu()) acc, _ = accuracy(logit.data.cpu(), target.data.cpu()) main_optimizer.zero_grad() main_loss.backward() clip_gradients(main_model, i, cfg['training_parameters']['max_grad_l2_norm']) main_optimizer.step() # measure accuracy and record loss main_losses.update(main_loss.cpu().item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % cfg['training_parameters']['print_freq'] == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Accuracy {accuracy:.3f}\t'.format(epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=main_losses, accuracy=acc[0].item())) logit_all = torch.cat(logit_all).numpy() target_all = torch.cat(target_all).numpy() acc_all = balanced_accuracy_score(target_all, np.argmax(logit_all, 1)) return main_losses.avg, acc_all * 100
def calc_accuracy(self, output, target): return utils.accuracy(output, target, topk=(1, ))
def validate(cfg, val_loader, main_model, criterion, epoch): batch_time = AverageMeter() data_time = AverageMeter() main_losses = AverageMeter() end = time.time() correct_all = 0.0 # switch to validation mode main_model.eval() confusion_matrix = torch.zeros(cfg['model']['n_label'], cfg['model']['n_label']) logit_all = [] target_all = [] for i, (input, target, patient_idx, mmse, segment, age) in enumerate(val_loader): # measure data loading time data_time.update(time.time() - end) input = input.to(device) target = target.to(device) if cfg['training_parameters']['use_age']: age = age.to(device) else: age = None # compute output main_loss, logit = main_model([input, age], target) main_loss = main_loss.mean() logit_all.append(torch.tensor(logit.data.cpu())) target_all.append(torch.tensor(target.data.cpu())) acc, _ = accuracy(logit.data.cpu(), target.data.cpu()) _, preds = torch.max(logit.cpu(), 1) for t, p in zip(target.cpu().view(-1), preds.view(-1)): confusion_matrix[t.long(), p.long()] += 1 acc, correct = accuracy(logit.cpu(), target.cpu()) correct_all += correct[0].item() # measure accuracy and record loss main_losses.update(main_loss.cpu().item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % cfg['training_parameters']['print_freq'] == 0: print('Validation [{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Accuracy {accuracy:.3f}\t'.format(epoch, i, len(val_loader), batch_time=batch_time, data_time=data_time, loss=main_losses, accuracy=acc[0].item())) #plot AUC curves logit_all = torch.cat(logit_all).numpy() target_all = torch.cat(target_all).numpy() acc_all = balanced_accuracy_score(target_all, np.argmax(logit_all, 1)) plotting_fpr, plotting_tpr, roc_auc = get_auc_data(logit_all, target_all, cfg['model']['n_label']) return main_losses.avg, acc_all * 100, confusion_matrix, [ plotting_fpr, plotting_tpr, roc_auc ]
def train(net, ema_net, optimizer, ema_optimizer, trainloader, unlabeled_trainloder, testloader, writer, args): end = time.time() def inf_generator(): while True: for data in zip(trainloader, unlabeled_trainloder): yield data for step, ((x_in, l_in), y_in) in enumerate(inf_generator(), start=args.start_step): if step >= args.max_iters: break data_time = time.time() - end with torch.no_grad(): x_in = x_in.to(args.device) l_in = l_in.to(args.device) y_in = [yi.to(args.device) for yi in y_in] guess = guess_label(y_in, net).detach_() nu = len(y_in) bs = x_in.shape[0] assert x_in.shape[0] == y_in[0].shape[0] # mixup l_in_onehot = torch.zeros(bs, args.num_class).float().to( args.device).scatter_(1, l_in.view(-1, 1), 1) xy, l_xy = mixup(torch.cat([x_in] + y_in, dim=0), torch.cat([l_in_onehot] + [guess] * nu, dim=0)) # reshape to (nu+1, bs, w, h, c) xy = xy.reshape([nu + 1] + list(x_in.shape)) # reshape to (nu+1, bs) l_xy = l_xy.reshape([nu + 1] + list(l_in_onehot.shape)) x, y = xy[0], xy[1:] l_x, l_y = l_xy[0], l_xy[1:] # forward. only update bn in one step net.train() batches = interleave([x, y[0], y[1]], bs) logits = [net(batches[0])] for batchi in batches[1:]: logits.append(net(batchi)) logits = interleave(logits, bs) logits_x = logits[0] logits_y = torch.cat(logits[1:], 0) # logits_x = net(x) # logits_y = net(y.reshape([-1, ] + list(x_in.shape)[1:])) # loss # cross entropy loss for soft label loss_xe = torch.mean( torch.sum(-l_x * log_softmax(logits_x, dim=-1), dim=1)) # L2 loss loss_l2u = mse_loss(softmax(logits_y, dim=-1), l_y.reshape(nu * bs, args.num_class)) # weight for unlabeled loss with warmup w_match = 75 * min(1, step / 16384) loss = loss_xe + w_match * loss_l2u prec1, = accuracy(logits_x, l_in, topk=(1, )) optimizer.zero_grad() loss.backward() optimizer.step() ema_optimizer.step() # measure elapsed time batch_time = time.time() - end end = time.time() log_step = args.batch_size * step if step % args.print_freq == 0: writer.add_scalar('w_match', w_match, log_step) writer.add_scalar('top1/train', prec1.item(), log_step) writer.add_scalar('loss/all', loss.item(), log_step) writer.add_scalar('loss/xe', loss_xe.item(), log_step) writer.add_scalar('loss/l2u', loss_l2u.item(), log_step) print(f'Train: [{step}/{args.max_iters}] ' f'Time: {batch_time:.3f} ' f'Data: {data_time:.3f} ' f'prec1: {prec1.item():.3f} ' f'w_match: {w_match:.3f} ' f'Loss: {loss.item():.3f} ' f'Loss_xe: {loss_xe.item():.3f} ' f'Loss_l2u: {loss_l2u.item():.4f}') if (step + 1) % args.eval_freq == 0 or step == args.max_iters - 1: ema_optimizer.step(bn=True) get_acc = partial(validate, device=args.device, print_freq=args.print_freq) writer.add_scalar( 'top1/train_ema', get_acc(trainloader, ema_net, prefix='train_ema'), log_step) writer.add_scalar('top1/val', get_acc(testloader, net, prefix='val'), log_step) val_ema_acc = get_acc(testloader, ema_net, prefix='val_ema') writer.add_scalar('top1/val_ema', val_ema_acc, log_step) global best_acc if val_ema_acc > best_acc: best_acc = val_ema_acc state = { 'step': step, 'best_acc': best_acc, 'net': net.state_dict(), 'ema_net': ema_net.state_dict(), 'optimizer': optimizer.state_dict(), } os.makedirs(args.model_dir, exist_ok=True) torch.save(state, os.path.join(args.model_dir, 'ckpt.pth.tar')) print('best accuracy: {:.2f}\n'.format(best_acc))
def train_bp(args, device, train_loader, net, writer, test_loader, summary, val_loader): print('Training network ...') net.train() forward_optimizer = utils.OptimizerList(args, net) nb_batches = len(train_loader) if args.classification: if args.output_activation == 'softmax': loss_function = nn.CrossEntropyLoss() elif args.output_activation == 'sigmoid': loss_function = nn.MSELoss() else: raise ValueError('The mnist dataset can only be combined with a ' 'sigmoid or softmax output activation.') elif args.regression: loss_function = nn.MSELoss() else: raise ValueError('The provided dataset {} is not supported.'.format( args.dataset)) epoch_losses = np.array([]) epoch_reconstruction_losses = np.array([]) epoch_reconstruction_losses_var = np.array([]) test_losses = np.array([]) val_losses = np.array([]) val_loss = None val_accuracy = None if args.classification: epoch_accuracies = np.array([]) test_accuracies = np.array([]) val_accuracies = np.array([]) if args.output_space_plot: forward_optimizer.zero_grad() val_loader_iter = iter(val_loader) (inputs, targets) = val_loader_iter.next() inputs, targets = inputs.to(device), targets.to(device) if args.classification: if args.output_activation == 'sigmoid': targets = utils.int_to_one_hot(targets, 10, device, soft_target=1.) else: raise utils.NetworkError( "output space plot for classification " "tasks is only possible with sigmoid " "output layer.") utils.make_plot_output_space_bp(args, net, args.output_space_plot_layer_idx, loss_function, targets, inputs, steps=20) return summary for e in range(args.epochs): if args.classification: running_accuracy = 0 else: running_accuracy = None running_loss = 0 for i, (inputs, targets) in enumerate(train_loader): if args.double_precision: inputs, targets = inputs.double().to(device), targets.to( device) else: inputs, targets = inputs.to(device), targets.to(device) if not args.network_type == 'BPConv': inputs = inputs.flatten(1, -1) if args.classification and \ args.output_activation == 'sigmoid': # convert targets to one hot vectors for MSE loss: targets = utils.int_to_one_hot(targets, 10, device, soft_target=args.soft_target) forward_optimizer.zero_grad() predictions = net(inputs) loss = loss_function(predictions, targets) loss.backward() forward_optimizer.step() running_loss += loss.item() if args.classification: if args.output_activation == 'sigmoid': running_accuracy += utils.accuracy( predictions, utils.one_hot_to_int(targets)) else: # softmax running_accuracy += utils.accuracy(predictions, targets) test_accuracy, test_loss = test_bp(args, device, net, test_loader, loss_function) if not args.no_val_set: val_accuracy, val_loss = test_bp(args, device, net, val_loader, loss_function) epoch_loss = running_loss / nb_batches if args.classification: epoch_accuracy = running_accuracy / nb_batches else: epoch_accuracy = None print('Epoch {} -- training loss = {}.'.format(e + 1, epoch_loss)) if not args.no_val_set: print('Epoch {} -- val loss = {}.'.format(e + 1, val_loss)) print('Epoch {} -- test loss = {}.'.format(e + 1, test_loss)) if args.classification: print('Epoch {} -- training acc = {}%'.format( e + 1, epoch_accuracy * 100)) if not args.no_val_set: print('Epoch {} -- val acc = {}%'.format( e + 1, val_accuracy * 100)) print('Epoch {} -- test acc = {}%'.format(e + 1, test_accuracy * 100)) if args.save_logs: utils.save_logs(writer, step=e + 1, net=net, loss=epoch_loss, accuracy=epoch_accuracy, test_loss=test_loss, test_accuracy=test_accuracy, val_loss=val_loss, val_accuracy=val_accuracy) epoch_losses = np.append(epoch_losses, epoch_loss) test_losses = np.append(test_losses, test_loss) if not args.no_val_set: val_losses = np.append(val_losses, val_loss) if args.classification: epoch_accuracies = np.append(epoch_accuracies, epoch_accuracy) test_accuracies = np.append(test_accuracies, test_accuracy) if not args.no_val_set: val_accuracies = np.append(val_accuracies, val_accuracy) utils.save_summary_dict(args, summary) if e > 4: # stop unpromising runs if args.dataset in ['mnist', 'fashion_mnist']: if epoch_accuracy < 0.4: # error code to indicate pruned run print('writing error code -1') summary['finished'] = -1 break if args.dataset in ['cifar10']: if epoch_accuracy < 0.25: # error code to indicate pruned run print('writing error code -1') summary['finished'] = -1 break if not args.epochs == 0: # save training summary results in summary dict summary['loss_train_last'] = epoch_loss summary['loss_test_last'] = test_loss summary['loss_train_best'] = epoch_losses.min() summary['loss_test_best'] = test_losses.min() summary['loss_train'] = epoch_losses summary['loss_test'] = test_losses if not args.no_val_set: summary['loss_val_last'] = val_loss summary['loss_val_best'] = val_losses.min() summary['loss_val'] = val_losses # pick the epoch with best validation loss and save the corresponding # test loss best_epoch = val_losses.argmin() summary['epoch_best_loss'] = best_epoch summary['loss_test_val_best'] = \ test_losses[best_epoch] summary['loss_train_val_best'] = \ epoch_losses[best_epoch] if args.classification: summary['acc_train_last'] = epoch_accuracy summary['acc_test_last'] = test_accuracy summary['acc_train_best'] = epoch_accuracies.max() summary['acc_test_best'] = test_accuracies.max() summary['acc_train'] = epoch_accuracies summary['acc_test'] = test_accuracies if not args.no_val_set: summary['acc_val'] = val_accuracies summary['acc_val_last'] = val_accuracy summary['acc_val_best'] = val_accuracies.max() # pick the epoch with best validation acc and save the corresponding # test acc best_epoch = val_accuracies.argmax() summary['epoch_best_acc'] = best_epoch summary['acc_test_val_best'] = \ test_accuracies[best_epoch] summary['acc_train_val_best'] = \ epoch_accuracies[best_epoch] utils.save_summary_dict(args, summary) print('Training network ... Done') return summary