def validate(val_loader, model, criterion): global args, rank, world_size, best_prec1 # validation don't need track the history batch_time = AverageMeter(0) losses = AverageMeter(0) top1 = AverageMeter(0) top5 = AverageMeter(0) # switch to evaluate mode model.eval() c1 = 0 c5 = 0 end = time.time() for i, (input, target) in enumerate(val_loader): if i == len(val_loader) / (args.batch_size * world_size): break input = input.cuda() if args.double == 1: input = input.double() if args.half == 1: input = input.half() target = target.cuda() # compute output with torch.no_grad(): output = model(input) # measure accuracy and record loss loss = criterion(output, target) / world_size prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) reduced_loss = loss.data.clone() reduced_prec1 = prec1.clone() / world_size reduced_prec5 = prec5.clone() / world_size if args.dist == 1: dist.all_reduce(reduced_loss) dist.all_reduce(reduced_prec1) dist.all_reduce(reduced_prec5) losses.update(reduced_loss.item()) top1.update(reduced_prec1.item()) top5.update(reduced_prec5.item()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if rank == 0: print( ' * All Loss {loss.avg:.4f} Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}' .format(loss=losses, top1=top1, top5=top5)) model.train() return losses.avg, top1.avg, top5.avg
def model_inference_imagenet(base_classifier, loader, device, display=False, print_freq=1000): print_freq = 100 top1 = AverageMeter() top5 = AverageMeter() start = time.time() base_classifier.eval() # Regular dataset: with torch.no_grad(): for i, (inputs, targets) in enumerate(loader): inputs = inputs.to(device, non_blocking=True) targets = torch.tensor(targets) targets = targets.to(device, non_blocking=True) outputs = base_classifier(inputs) acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) if i % print_freq == 0 and display == True: print("Test : [{0}/{1}]\t" "Acc@1 {top1.avg:.3f}" "Acc@5 {top5.avg:.3f}".format(i, len(loader), top1=top1, top5=top5)) end = time.time() if display == True: print("Inference Time: {0:.3f}".format(end - start)) print("Final Accuracy: [{0}]".format(top1.avg)) return top1.avg, top5.avg
def train(args, train_queue, model, criterion, optimizer): objs = train_utils.AvgrageMeter() top1 = train_utils.AvgrageMeter() top5 = train_utils.AvgrageMeter() model.train() for step, (input, target) in enumerate(train_queue): input = Variable(input, requires_grad=False).cuda() target = Variable(target, requires_grad=False).cuda() optimizer.zero_grad() logits, logits_aux = model(input) loss = criterion(logits, target) if args.train.auxiliary: loss_aux = criterion(logits_aux, target) loss += args.train.auxiliary_weight * loss_aux loss.backward() nn.utils.clip_grad_norm(model.parameters(), args.train.grad_clip) optimizer.step() prec1, prec5 = train_utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.run.report_freq == 0: logging.info("train %03d %e %f %f", step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, noise_sd: float): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to train mode model.train() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.to(device) targets = targets.to(device) # augment inputs with noise inputs = inputs + torch.randn_like(inputs, device=device) * noise_sd # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg)
def train(training_model, training_data, opts, lr_scheduler, epochs, optimizer): nr_batches = len(training_data) try: for epoch in epochs: logging.info("Epoch {0}/{1}".format(epoch, opts.epoch)) bar = tqdm(training_data) sum_loss = 0.0 sum_acc = 0.0 start_time = time.time() total_sample = 0 for batch_idx, (data, labels) in enumerate(bar): preds, losses = training_model(data, labels) if not opts.disable_metrics: with torch.no_grad(): # Convert to full precision for CPU execute. losses = losses.float() preds = preds.float() mean_loss = torch.mean(losses).item() acc = accuracy(preds, labels) sum_acc += acc sum_loss += mean_loss aggregated_loss = sum_loss / (batch_idx+1) aggregated_accuracy = sum_acc / (batch_idx+1) bar.set_description("Loss:{:0.4f} | Accuracy:{:0.2f}%".format(aggregated_loss, aggregated_accuracy)) total_sample += data.size()[0] end_time = time.time() if not opts.disable_metrics: print("Epoch {}: Train accuracy is {:0.2f}%".format(epoch, aggregated_accuracy)) print("Throughput of the epoch:{:0.1f} img/sec".format(total_sample / (end_time-start_time))) # save if not opts.checkpoint_path == "": if not os.path.exists(opts.checkpoint_path): os.makedirs(opts.checkpoint_path) filename = "{0}_{1}_{2}.pt".format(opts.model, opts.data, epoch) save_path = os.path.join(opts.checkpoint_path, filename) training_model.copyWeightsToHost() state = training_model.model.model.state_dict() optimizer_state = optimizer.state_dict() torch.save({ 'epoch': epoch, 'model_state_dict': state, 'optimizer_state_dict': optimizer_state, 'loss': aggregated_loss, 'train_accuracy': aggregated_accuracy, 'opts': opts }, save_path) # lr schedule if not(lr_scheduler is None): lr_scheduler.step() new_optimizer = copy.copy(optimizer) training_model.setOptimizer(new_optimizer) logging.info(f"Learning rate is changed to {lr_scheduler.get_last_lr()}") finally: # kill the process which fetch the data if isinstance(training_data, AsyncDataLoader): training_data.stop_data_fetch()
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, noise_sd: float, device: torch.device, writer=None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to train mode model.train() for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs, targets = inputs.to(device), targets.to(device) batch_size = inputs.size(0) # augment inputs with noise inputs = inputs + torch.randn_like(inputs, device=device) * noise_sd # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), batch_size) top1.update(acc1.item(), batch_size) top5.update(acc5.item(), batch_size) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.avg:.3f}\t' 'Data {data_time.avg:.3f}\t' 'Loss {loss.avg:.4f}\t' 'Acc@1 {top1.avg:.3f}\t' 'Acc@5 {top5.avg:.3f}'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) if writer: writer.add_scalar('loss/train', losses.avg, epoch) writer.add_scalar('batch_time', batch_time.avg, epoch) writer.add_scalar('accuracy/train@1', top1.avg, epoch) writer.add_scalar('accuracy/train@5', top5.avg, epoch) return (losses.avg, top1.avg)
def test(loader: DataLoader, model: torch.nn.Module, criterion, noise_sd: float): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to eval mode model.eval() with torch.no_grad(): for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.cuda() targets = targets.cuda() # augment inputs with noise inputs = inputs + randgn_like(inputs, p=args.p, device='cuda') * noise_sd # compute output if (args.scale_down != 1): inputs = torch.nn.functional.interpolate( inputs, scale_factor=args.scale_down) outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg)
def optimization_step(model, criterion, optimizer, optimizer_fp, x_batch, y_batch): x_batch, y_batch = Variable(x_batch.cuda()), Variable( y_batch.cuda(async=True)) # use quantized model logits = model(x_batch) # compute logloss loss = criterion(logits, y_batch) batch_loss = loss.data[0] # compute accuracies pred = F.softmax(logits) batch_accuracy, batch_top5_accuracy = accuracy(y_batch, pred, top_k=(1, 5)) optimizer.zero_grad() optimizer_fp.zero_grad() # compute grads for quantized model loss.backward() all_kernels = optimizer.param_groups[2]['params'] all_fp_kernels = optimizer_fp.param_groups[0]['params'] for i in range(len(all_kernels)): # get quantized kernel k = all_kernels[i] # get corresponding full precision kernel k_fp = all_fp_kernels[i] # get modified grads k_fp_grad = get_grads(k.grad.data, k.data) # grad for full precision kernel k_fp.grad = Variable(k_fp_grad) # we don't need to update quantized kernel directly k.grad.data.zero_() # update the last fc layer and all batch norm params in quantized model optimizer.step() # update full precision kernels optimizer_fp.step() # update quantized kernels for i in range(len(all_kernels)): k = all_kernels[i] k_fp = all_fp_kernels[i] k.data = quantize(k_fp.data) return batch_loss, batch_accuracy, batch_top5_accuracy
def test(loader: DataLoader, model: torch.nn.Module, criterion, device, print_freq, display=False): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to eval mode model.eval() with torch.no_grad(): for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.to(device) targets = targets.to(device) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0 and display == True: print('Test : [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg, top5.avg)
def update(self, output, target, key): with torch.no_grad(): loss = self.criterion(output, target) self.losses[key].update(loss.item(), target.size(0)) t1, t5 = accuracy(output, target, topk=(1, 5)) self.top1[key].update(t1.item(), target.size(0)) self.top5[key].update(t5.item(), target.size(0)) log_prob = F.log_softmax(output, 1) prob = log_prob.exp() entropy = -(log_prob * prob).sum(1).data self.ent[key].update(entropy.mean().item(), target.size(0))
def test(inference_model, test_data, opts): nr_batches = len(test_data) bar = tqdm(test_data, total=nr_batches) sum_acc = 0.0 with torch.no_grad(): for idx, (input_data, labels) in enumerate(bar): output = inference_model(input_data) output = output.float() sum_acc += accuracy(output, labels) aggregated_accuracy = sum_acc/(idx+1) bar.set_description(f"Accuracy:{aggregated_accuracy:0.2f}%") acc = sum_acc / nr_batches logging.info(f"Accuracy on test set: {acc:0.2f}%") return acc
def test(inference_model, test_data, opts): nr_batches = len(test_data) bar = tqdm(test_data, total=nr_batches) sum_acc = 0.0 with torch.no_grad(): for idx, (data, labels) in enumerate(bar): if opts.precision == "half": data = data.half() data = data.contiguous() output = inference_model(data) output = output.float() sum_acc += accuracy(output, labels) aggregated_accuracy = sum_acc / (idx + 1) bar.set_description( "Accuracy:{:0.2f}%".format(aggregated_accuracy)) print("Accuracy on test set: {:0.2f}%".format(sum_acc / len(test_data)))
def train(train_loader, model, criterion, optimizer, epoch, args): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i)
def validate(val_loader, model, criterion, args): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter( len(val_loader), [batch_time, losses, top1, top5], prefix='Test: ') # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) # TODO: this should also be done with the ProgressMeter print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' .format(top1=top1, top5=top5)) return top1.avg, top5.avg
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, noise_sd: float, attacker: Attacker = None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to train mode model.train() requires_grad_(model, True) for i, batch in enumerate(loader): # measure data loading time data_time.update(time.time() - end) mini_batches = get_minibatches(batch, args.num_noise_vec) noisy_inputs_list = [] for inputs, targets in mini_batches: inputs = inputs.cuda() targets = targets.cuda() inputs = inputs.repeat( (1, args.num_noise_vec, 1, 1)).view(batch[0].shape) # augment inputs with noise noise = torch.randn_like(inputs, device='cuda') * noise_sd if args.adv_training: requires_grad_(model, False) model.eval() inputs = attacker.attack(model, inputs, targets, noise=noise, num_noise_vectors=args.num_noise_vec, no_grad=args.no_grad_attack) model.train() requires_grad_(model, True) if args.train_multi_noise: noisy_inputs = inputs + noise targets = targets.unsqueeze(1).repeat( 1, args.num_noise_vec).reshape(-1, 1).squeeze() outputs = model(noisy_inputs) loss = criterion(outputs, targets) acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), noisy_inputs.size(0)) top1.update(acc1.item(), noisy_inputs.size(0)) top5.update(acc5.item(), noisy_inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() else: inputs = inputs[::args.num_noise_vec] # subsample the samples noise = noise[::args.num_noise_vec] # noise = torch.randn_like(inputs, device='cuda') * noise_sd noisy_inputs_list.append(inputs + noise) if not args.train_multi_noise: noisy_inputs = torch.cat(noisy_inputs_list) targets = batch[1].cuda() assert len(targets) == len(noisy_inputs) outputs = model(noisy_inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), noisy_inputs.size(0)) top1.update(acc1.item(), noisy_inputs.size(0)) top5.update(acc5.item(), noisy_inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg)
def test(loader: DataLoader, model: torch.nn.Module, criterion, noise_sd: float, attacker: Attacker = None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() top1_normal = AverageMeter() end = time.time() # switch to eval mode model.eval() requires_grad_(model, False) with torch.no_grad(): for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.cuda() targets = targets.cuda() # augment inputs with noise noise = torch.randn_like(inputs, device='cuda') * noise_sd noisy_inputs = inputs + noise # compute output if args.adv_training: normal_outputs = model(noisy_inputs) acc1_normal, _ = accuracy(normal_outputs, targets, topk=(1, 5)) top1_normal.update(acc1_normal.item(), inputs.size(0)) with torch.enable_grad(): inputs = attacker.attack(model, inputs, targets, noise=noise) # noise = torch.randn_like(inputs, device='cuda') * noise_sd noisy_inputs = inputs + noise outputs = model(noisy_inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) if args.adv_training: return (losses.avg, top1.avg, top1_normal.avg) else: return (losses.avg, top1.avg, None)
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, transformer: AbstractTransformer, writer=None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() losses_reg = AverageMeter() confidence = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to train mode model.train() for i, batch in enumerate(loader): # measure data loading time data_time.update(time.time() - end) mini_batches = _chunk_minibatch(batch, args.num_noise_vec) for inputs, targets in mini_batches: targets = targets.cuda() batch_size = inputs.size(0) noised_inputs = [ transformer.process(inputs).cuda() for _ in range(args.num_noise_vec) ] # augment inputs with noise inputs_c = torch.cat(noised_inputs, dim=0) targets_c = targets.repeat(args.num_noise_vec) logits = model(inputs_c) loss_xent = criterion(logits, targets_c) logits_chunk = torch.chunk(logits, args.num_noise_vec, dim=0) softmax = [F.softmax(logit, dim=1) for logit in logits_chunk] avg_softmax = sum(softmax) / args.num_noise_vec consistency = [ kl_div(logit, avg_softmax, reduction='none').sum(1) + _entropy(avg_softmax, reduction='none') for logit in logits_chunk ] consistency = sum(consistency) / args.num_noise_vec consistency = consistency.mean() loss = loss_xent + args.lbd * consistency avg_confidence = -F.nll_loss(avg_softmax, targets) acc1, acc5 = accuracy(logits, targets_c, topk=(1, 5)) losses.update(loss_xent.item(), batch_size) losses_reg.update(consistency.item(), batch_size) confidence.update(avg_confidence.item(), batch_size) top1.update(acc1.item(), batch_size) top5.update(acc5.item(), batch_size) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.avg:.3f}\t' 'Data {data_time.avg:.3f}\t' 'Loss {loss.avg:.4f}\t' 'Acc@1 {top1.avg:.3f}\t' 'Acc@5 {top5.avg:.3f}'.format(epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) if args.print_step: writer.add_scalar(f'epoch/{epoch}/loss/train', losses.avg, i) writer.add_scalar(f'epoch/{epoch}/loss/consistency', losses_reg.avg, i) writer.add_scalar(f'epoch/{epoch}/loss/avg_confidence', confidence.avg, i) writer.add_scalar(f'epoch/{epoch}/batch_time', batch_time.avg, i) writer.add_scalar(f'epoch/{epoch}/accuracy/train@1', top1.avg, i) writer.add_scalar(f'epoch/{epoch}/accuracy/train@5', top5.avg, i) writer.add_scalar('loss/train', losses.avg, epoch) writer.add_scalar('loss/consistency', losses_reg.avg, epoch) writer.add_scalar('loss/avg_confidence', confidence.avg, epoch) writer.add_scalar('batch_time', batch_time.avg, epoch) writer.add_scalar('accuracy/train@1', top1.avg, epoch) writer.add_scalar('accuracy/train@5', top5.avg, epoch) return (losses.avg, top1.avg)
def test_with_classifier(loader: DataLoader, denoiser: torch.nn.Module, criterion, noise_sd: float, print_freq: int, classifier: torch.nn.Module): """ A function to test the classification performance of a denoiser when attached to a given classifier :param loader:DataLoader: test dataloader :param denoiser:torch.nn.Module: the denoiser :param criterion: the loss function (e.g. CE) :param noise_sd:float: the std-dev of the Guassian noise perturbation of the input :param print_freq:int: the frequency of logging :param classifier:torch.nn.Module: the classifier to which the denoiser is attached """ batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to eval mode classifier.eval() if denoiser: denoiser.eval() with torch.no_grad(): for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.cuda() targets = targets.cuda() # augment inputs with noise inputs = inputs + torch.randn_like(inputs, device='cuda') * noise_sd if denoiser is not None: inputs = denoiser(inputs) # compute output outputs = classifier(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg)
def test(loader: DataLoader, model: torch.nn.Module, criterion): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() learning_rate = args.attlr iterations = args.attiters ROAwidth = args.ROAwidth ROAheight = args.ROAheight skip_in_x = args.skip_in_x skip_in_y = args.skip_in_y potential_nums = args.potential_nums # switch to eval mode model.eval() roa = ROA(model, 32) with torch.no_grad(): for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.cuda() targets = targets.cuda() with torch.set_grad_enabled(True): adv_inputs = roa.gradient_based_search(inputs, targets, learning_rate,\ iterations, ROAwidth , ROAheight, skip_in_x, skip_in_y, potential_nums) # compute output outputs = model(adv_inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg)
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() learning_rate = args.attlr iterations = args.attiters ROAwidth = args.ROAwidth ROAheight = args.ROAheight skip_in_x = args.skip_in_x skip_in_y = args.skip_in_y potential_nums = args.potential_nums # switch to train mode model.train() for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.cuda() targets = targets.cuda() model.eval() roa = ROA(model, 32) adv_inputs = roa.gradient_based_search(inputs, targets, learning_rate,\ iterations, ROAwidth , ROAheight, skip_in_x, skip_in_y, potential_nums) imshow(args.outdir, adv_inputs) # compute output model.train() outputs = model(adv_inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg)
def test(loader, model, criterion, epoch, transformer: AbstractTransformer, writer=None, print_freq=10): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to eval mode model.eval() with torch.no_grad(): for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs targets = targets.cuda() # augment inputs with noise inputs = transformer.process(inputs).cuda() # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.avg:.3f}\t' 'Data {data_time.avg:.3f}\t' 'Loss {loss.avg:.4f}\t' 'Acc@1 {top1.avg:.3f}\t' 'Acc@5 {top5.avg:.3f}'.format(i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) if writer: writer.add_scalar('loss/test', losses.avg, epoch) writer.add_scalar('accuracy/test@1', top1.avg, epoch) writer.add_scalar('accuracy/test@5', top5.avg, epoch) return (losses.avg, top1.avg)
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, device, print_freq=100, display=True): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # print("Entered training function") # switch to train mode model.train() for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.to(device) targets = targets.to(device) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0 and display == True: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg, top5.avg)
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, noise_sd: float, attacker: Attacker, device: torch.device, writer=None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() losses_reg = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to train mode model.train() requires_grad_(model, True) for i, batch in enumerate(loader): # measure data loading time data_time.update(time.time() - end) mini_batches = _chunk_minibatch(batch, args.num_noise_vec) for inputs, targets in mini_batches: inputs, targets = inputs.to(device), targets.to(device) batch_size = inputs.size(0) noises = [ torch.randn_like(inputs, device=device) * noise_sd for _ in range(args.num_noise_vec) ] if args.adv_training: requires_grad_(model, False) model.eval() inputs = attacker.attack(model, inputs, targets, noises=noises) model.train() requires_grad_(model, True) # augment inputs with noise inputs_c = torch.cat([inputs + noise for noise in noises], dim=0) targets_c = targets.repeat(args.num_noise_vec) logits = model(inputs_c) loss_xent = criterion(logits, targets_c) logits_chunk = torch.chunk(logits, args.num_noise_vec, dim=0) loss_con = consistency_loss(logits_chunk, args.lbd, args.eta) loss = loss_xent + loss_con acc1, acc5 = accuracy(logits, targets_c, topk=(1, 5)) losses.update(loss_xent.item(), batch_size) losses_reg.update(loss_con.item(), batch_size) top1.update(acc1.item(), batch_size) top5.update(acc5.item(), batch_size) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.avg:.3f}\t' 'Data {data_time.avg:.3f}\t' 'Loss {loss.avg:.4f}\t' 'Acc@1 {top1.avg:.3f}\t' 'Acc@5 {top5.avg:.3f}'.format(epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) writer.add_scalar('loss/train', losses.avg, epoch) writer.add_scalar('loss/consistency', losses_reg.avg, epoch) writer.add_scalar('batch_time', batch_time.avg, epoch) writer.add_scalar('accuracy/train@1', top1.avg, epoch) writer.add_scalar('accuracy/train@5', top5.avg, epoch) return (losses.avg, top1.avg)
def train(self, args, logger=None, progressbar=None): """ Train function of FixMatch. From data_loader, it inference training data, computes losses, and update the networks. """ ngpus_per_node = torch.cuda.device_count() # lb: labeled, ulb: unlabeled self.train_model.train() # for gpu profiling start_batch = torch.cuda.Event(enable_timing=True) end_batch = torch.cuda.Event(enable_timing=True) start_run = torch.cuda.Event(enable_timing=True) end_run = torch.cuda.Event(enable_timing=True) total_epochs = args.num_train_iter // args.num_eval_iter curr_epoch = 0 progressbar = tqdm(desc=f"Epoch {curr_epoch}/{total_epochs}", total=args.num_eval_iter) start_batch.record() best_eval_acc, best_it = 0.0, 0 scaler = GradScaler() amp_cm = autocast if args.amp else contextlib.nullcontext for (x_lb, y_lb), (x_ulb_w, x_ulb_s, _) in zip(self.loader_dict["train_lb"], self.loader_dict["train_ulb"]): # prevent the training iterations exceed args.num_train_iter if self.it > args.num_train_iter: break end_batch.record() torch.cuda.synchronize() start_run.record() num_lb = x_lb.shape[0] num_ulb = x_ulb_w.shape[0] assert num_ulb == x_ulb_s.shape[0] x_lb, x_ulb_w, x_ulb_s = ( x_lb.cuda(args.gpu), x_ulb_w.cuda(args.gpu), x_ulb_s.cuda(args.gpu), ) y_lb = y_lb.cuda(args.gpu) inputs = torch.cat((x_lb, x_ulb_w, x_ulb_s)) # inference and calculate sup/unsup losses with amp_cm(): logits = self.train_model(inputs) logits_x_lb = logits[:num_lb] logits_x_ulb_w, logits_x_ulb_s = logits[num_lb:].chunk(2) del logits # hyper-params for update T = self.t_fn(self.it) p_cutoff = self.p_fn(self.it) sup_loss = ce_loss(logits_x_lb, y_lb, reduction="mean") unsup_loss, mask = consistency_loss( logits_x_ulb_w, logits_x_ulb_s, "ce", T, p_cutoff, use_hard_labels=args.hard_label, ) total_loss = sup_loss + self.lambda_u * unsup_loss # parameter updates if args.amp: scaler.scale(total_loss).backward() scaler.step(self.optimizer) scaler.update() else: total_loss.backward() self.optimizer.step() self.scheduler.step() self.train_model.zero_grad() with torch.no_grad(): self._eval_model_update() train_accuracy = accuracy(logits_x_lb, y_lb) train_accuracy = train_accuracy[0] end_run.record() torch.cuda.synchronize() # tensorboard_dict update tb_dict = {} tb_dict["train/sup_loss"] = sup_loss.detach() tb_dict["train/unsup_loss"] = unsup_loss.detach() tb_dict["train/total_loss"] = total_loss.detach() tb_dict["train/mask_ratio"] = 1.0 - mask.detach() tb_dict["lr"] = self.optimizer.param_groups[0]["lr"] tb_dict["train/prefetch_time"] = ( start_batch.elapsed_time(end_batch) / 1000.0) tb_dict["train/run_time"] = start_run.elapsed_time( end_run) / 1000.0 tb_dict["train/top-1-acc"] = train_accuracy progressbar.set_postfix_str( f"Total Loss={total_loss.detach():.3e}") progressbar.update(1) if self.it % self.num_eval_iter == 0: progressbar.close() curr_epoch += 1 eval_dict = self.evaluate(args=args) tb_dict.update(eval_dict) save_path = os.path.join(args.save_dir, args.save_name) if tb_dict["eval/top-1-acc"] > best_eval_acc: best_eval_acc = tb_dict["eval/top-1-acc"] best_it = self.it self.print_fn( f"{self.it} iteration, USE_EMA: {hasattr(self, 'eval_model')}, {tb_dict}, BEST_EVAL_ACC: {best_eval_acc}, at {best_it} iters" ) progressbar = tqdm(desc=f"Epoch {curr_epoch}/{total_epochs}", total=args.num_eval_iter) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): if self.it == best_it: self.save_model("model_best.pth", save_path) if not self.tb_log is None: self.tb_log.update(tb_dict, self.it) self.it += 1 del tb_dict start_batch.record() if self.it > 2**19: self.num_eval_iter = 1000 eval_dict = self.evaluate(args=args) eval_dict.update({ "eval/best_acc": best_eval_acc, "eval/best_it": best_it }) return eval_dict
def test(loader: DataLoader, model: torch.nn.Module, criterion, epoch: int, args): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to eval mode model.eval() m = Bernoulli(torch.tensor([args.calibrated_alpha]).cuda()) with torch.no_grad(): for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.cuda() targets = targets.cuda() # make MNIST binary if args.dataset == 'mnist': inputs = (inputs > 0.5).type(torch.cuda.FloatTensor) # augment inputs with noise if args.perturb == 'bernoulli': mask = m.sample(inputs.shape).squeeze(-1) # make sure that the value is normalized rand_inputs = torch.randint_like( inputs, low=0, high=args.K + 1, device='cuda') / float( args.K) inputs = inputs * mask + rand_inputs * (1 - mask) elif args.perturb == 'gaussian': inputs = inputs + torch.randn_like(inputs, device='cuda') * args.sigma # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if (i + 1) % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i + 1, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) print('* Epoch: [{0}] Test: \t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})\n'.format(epoch, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg)
def train_or_eval(data_loader, classifier, decoder, train=False, optimizer=None, epoch=None): ## initialize all metric used batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() losses_m = AverageMeter() acc_m = AverageMeter() statistics = StatisticsContainer() classifier_criterion = nn.CrossEntropyLoss().to(device) ## switch to train mode if needed if train: decoder.train() if args.fixed_classifier: classifier.eval() else: classifier.train() else: decoder.eval() classifier.eval() ## data loop end = time.time() for i, (input, target) in enumerate(data_loader): if train and i > len(data_loader) * args.pot: break ## measure data loading time data_time.update(time.time() - end) ## move input and target on the device input, target = input.to(device), target.to(device) ## compute classifier prediction on the original images and get inner layers with torch.set_grad_enabled(train and (not args.fixed_classifier)): output, layers = classifier(input) classifier_loss = classifier_criterion(output, target) ## update metrics losses.update(classifier_loss.item(), input.size(0)) acc.update( accuracy(output.detach(), target, topk=(1, ))[0].item(), input.size(0)) ## update classifier - compute gradient and do SGD step for clean image, save classifier if train and (not args.fixed_classifier): optimizer['classifier'].zero_grad() classifier_loss.backward() optimizer['classifier'].step() ## save classifier (needed only if previous iterations are used i.e. args.hp > 0) global F_k if args.hp > 0 and ((i % args.smf == -1 % args.smf) or len(F_k) < 1): print( 'Current iteration is saving, will be used in the future. ', end='', flush=True) if len(F_k) < args.f_size: index = len(F_k) else: index = random.randint(0, len(F_k) - 1) state_dict = classifier.state_dict() F_k[index] = {} for p in state_dict: F_k[index][p] = state_dict[p].cpu() print('There are {0} iterations stored.'.format(len(F_k)), flush=True) ## detach inner layers to make them be features for decoder layers = [l.detach() for l in layers] with torch.set_grad_enabled(train): ## compute mask and masked input mask = decoder(layers) input_m = input * (1 - mask) ## update statistics statistics.update(mask) ## randomly select classifier to be evaluated on masked image and compute output if (not train) or args.fixed_classifier or (random.random() > args.hp): output_m, _ = classifier(input_m) update_classifier = not args.fixed_classifier else: try: confuser except NameError: import copy confuser = copy.deepcopy(classifier) index = random.randint(0, len(F_k) - 1) confuser.load_state_dict(F_k[index]) confuser.eval() output_m, _ = confuser(input_m) update_classifier = False classifier_loss_m = classifier_criterion(output_m, target) ## update metrics losses_m.update(classifier_loss_m.item(), input.size(0)) acc_m.update( accuracy(output_m.detach(), target, topk=(1, ))[0].item(), input.size(0)) if train: ## update classifier - compute gradient, do SGD step for masked image if update_classifier: optimizer['classifier'].zero_grad() classifier_loss_m.backward(retain_graph=True) optimizer['classifier'].step() ## regularizaion for casme _, max_indexes = output.detach().max(1) _, max_indexes_m = output_m.detach().max(1) correct_on_clean = target.eq(max_indexes) mistaken_on_masked = target.ne(max_indexes_m) nontrivially_confused = (correct_on_clean + mistaken_on_masked).eq(2).float() mask_mean = F.avg_pool2d(mask, 224, stride=1).squeeze() ## apply regularization loss only on nontrivially confused images casme_loss = -args.lambda_r * F.relu(nontrivially_confused - mask_mean).mean() ## main loss for casme if args.adversarial: casme_loss += -classifier_loss_m else: log_prob = F.log_softmax(output_m, 1) prob = log_prob.exp() negative_entropy = (log_prob * prob).sum(1) ## apply main loss only when original images are corrected classified negative_entropy_correct = negative_entropy * correct_on_clean.float( ) casme_loss += negative_entropy_correct.mean() ## update casme - compute gradient, do SGD step optimizer['decoder'].zero_grad() casme_loss.backward() torch.nn.utils.clip_grad_norm_(decoder.parameters(), 10) optimizer['decoder'].step() ## measure elapsed time batch_time.update(time.time() - end) end = time.time() ## print log if i % args.print_freq == 0: if train: print('Epoch: [{0}][{1}/{2}/{3}]\t'.format( epoch, i, int(len(data_loader) * args.pot), len(data_loader)), end='') else: print('Test: [{0}/{1}]\t'.format(i, len(data_loader)), end='') print('Time {batch_time.avg:.3f} ({batch_time.val:.3f})\t' 'Data {data_time.avg:.3f} ({data_time.val:.3f})\n' 'Loss(C) {loss.avg:.4f} ({loss.val:.4f})\t' 'Prec@1(C) {acc.avg:.3f} ({acc.val:.3f})\n' 'Loss(M) {loss_m.avg:.4f} ({loss_m.val:.4f})\t' 'Prec@1(M) {acc_m.avg:.3f} ({acc_m.val:.3f})\t'.format( batch_time=batch_time, data_time=data_time, loss=losses, acc=acc, loss_m=losses_m, acc_m=acc_m), flush=True) statistics.printOut() if not train: print(' * Prec@1 {acc.avg:.3f} Prec@1(M) {acc_m.avg:.3f} '.format( acc=acc, acc_m=acc_m)) statistics.printOut() return { 'acc': str(acc.avg), 'acc_m': str(acc_m.avg), **statistics.getDictionary() }
def optimization_step(model, criterion, optimizer, optimizer_fp, optimizer_sf, x_batch, y_batch): x_batch, y_batch = Variable(x_batch.cuda()), Variable(y_batch.cuda(async=True)) # use quantized model logits = model(x_batch) # compute logloss loss = criterion(logits, y_batch) batch_loss = loss.data[0] # compute accuracies pred = F.softmax(logits) batch_accuracy, batch_top5_accuracy = accuracy(y_batch, pred, top_k=(1, 5)) optimizer.zero_grad() optimizer_fp.zero_grad() optimizer_sf.zero_grad() # compute grads for quantized model loss.backward() all_kernels = optimizer.param_groups[2]['params'] all_fp_kernels = optimizer_fp.param_groups[0]['params'] scaling_factors = optimizer_sf.param_groups[0]['params'] for i in range(len(all_kernels)): # get quantized kernel k = all_kernels[i] # get corresponding full precision kernel k_fp = all_fp_kernels[i] # get scaling factors for quantized kernel f = scaling_factors[i] w_p, w_n = f.data[0], f.data[1] # get modified grads k_fp_grad, w_p_grad, w_n_grad = get_grads(k.grad.data, k.data, w_p, w_n) # WARNING: this is not like in the original paper. # In the original paper: k.data -> k_fp.data # grad for full precision kernel k_fp.grad = Variable(k_fp_grad) # we don't need to update quantized kernel directly k.grad.data.zero_() # grad for scaling factors f.grad = Variable(torch.FloatTensor([w_p_grad, w_n_grad]).cuda()) # update the last fc layer and all batch norm params in quantized model optimizer.step() # update full precision kernels optimizer_fp.step() # update scaling factors optimizer_sf.step() # update quantized kernels for i in range(len(all_kernels)): k = all_kernels[i] k_fp = all_fp_kernels[i] f = scaling_factors[i] w_p, w_n = f.data[0], f.data[1] k.data = quantize(k_fp.data, w_p, w_n) return batch_loss, batch_accuracy, batch_top5_accuracy
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, noise_sd: float, attacker: Attacker, device: torch.device, writer=None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to train mode model.train() requires_grad_(model, True) for i, batch in enumerate(loader): # measure data loading time data_time.update(time.time() - end) mini_batches = _chunk_minibatch(batch, args.num_noise_vec) for inputs, targets in mini_batches: inputs, targets = inputs.to(device), targets.to(device) inputs = inputs.repeat( (1, args.num_noise_vec, 1, 1)).reshape(-1, *batch[0].shape[1:]) batch_size = inputs.size(0) # augment inputs with noise noise = torch.randn_like(inputs, device=device) * noise_sd requires_grad_(model, False) model.eval() inputs = attacker.attack(model, inputs, targets, noise=noise, num_noise_vectors=args.num_noise_vec, no_grad=args.no_grad_attack) model.train() requires_grad_(model, True) noisy_inputs = inputs + noise targets = targets.unsqueeze(1).repeat(1, args.num_noise_vec).reshape( -1, 1).squeeze() outputs = model(noisy_inputs) loss = criterion(outputs, targets) acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), batch_size) top1.update(acc1.item(), batch_size) top5.update(acc5.item(), batch_size) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.avg:.3f}\t' 'Data {data_time.avg:.3f}\t' 'Loss {loss.avg:.4f}\t' 'Acc@1 {top1.avg:.3f}\t' 'Acc@5 {top5.avg:.3f}'.format(epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) if writer: writer.add_scalar('loss/train', losses.avg, epoch) writer.add_scalar('batch_time', batch_time.avg, epoch) writer.add_scalar('accuracy/train@1', top1.avg, epoch) writer.add_scalar('accuracy/train@5', top5.avg, epoch) return (losses.avg, top1.avg)
def train(training_model, training_data, opts, lr_scheduler, epochs, optimizer): old_lr = lr_scheduler.get_last_lr()[0] iterations_per_epoch = len(training_data) for epoch in epochs: logging.info(f"Epoch {epoch}/{opts.epoch}") bar = tqdm(training_data, total=iterations_per_epoch) sum_loss = 0.0 sum_acc = 0.0 sum_batch_loss = 0.0 sum_batch_acc = 0.0 last_batch = -1 start_batch_time = start_epoch_time = time.time() total_sample = 0 for batch_idx, (input_data, labels) in enumerate(bar): preds, losses = training_model(input_data, labels) epoch_num = epoch - 1 + float(batch_idx + 1) / iterations_per_epoch if not opts.disable_metrics: with torch.no_grad(): # Convert to full precision for CPU execute. losses = losses.float() preds = preds.float() mean_loss = torch.mean(losses).item() acc = accuracy(preds, labels) sum_acc += acc sum_loss += mean_loss sum_batch_loss += mean_loss sum_batch_acc += acc aggregated_loss = sum_loss / (batch_idx + 1) aggregated_accuracy = sum_acc / (batch_idx + 1) bar.set_description( f"Loss:{aggregated_loss:0.4f} | Accuracy:{aggregated_accuracy:0.2f}%" ) total_sample += input_data.size()[0] if not opts.disable_metrics and ( (batch_idx + 1) % (iterations_per_epoch // opts.logs_per_epoch) == 0): # save metrics result_dict = { "loss_avg": aggregated_loss, "loss_batch": sum_batch_loss / (batch_idx - last_batch), "epoch": epoch_num, "iteration": batch_idx + 1 + (epoch - 1) * iterations_per_epoch, "train_accuracy_avg": aggregated_accuracy, "train_accuracy_batch": sum_batch_acc / (batch_idx - last_batch), "learning_rate": old_lr * (opts.replicas * opts.gradient_accumulation if opts.reduction == 'sum' else 1.0), "train_img_per_sec": ((batch_idx - last_batch) * input_data.size()[0] / (time.time() - start_batch_time)), "latency_sec": (time.time() - start_batch_time) / (batch_idx - last_batch) } utils.Logger.log_train_results(result_dict) sum_batch_loss = 0.0 sum_batch_acc = 0.0 last_batch = batch_idx start_batch_time = time.time() # lr schedule lr_scheduler.step(epoch_num) new_lr = lr_scheduler.get_last_lr()[0] if new_lr != old_lr: training_model.setOptimizer(optimizer) old_lr = new_lr if opts.lr_schedule == "step": logging.info(f"Learning rate is changed to {new_lr}") end_time = time.time() if not opts.disable_metrics: logging.info( f"Epoch {epoch}: Train accuracy is {aggregated_accuracy:0.2f}%" ) epoch_throughput = total_sample / (end_time - start_epoch_time) logging.info( f"Throughput of the epoch:{epoch_throughput:0.1f} img/sec") # save if not opts.checkpoint_path == "": if not os.path.exists(opts.checkpoint_path): os.makedirs(opts.checkpoint_path) filename = f"{opts.model}_{opts.data}_{epoch}.pt" save_path = os.path.join(opts.checkpoint_path, filename) training_model.copyWeightsToHost() state = training_model.model.model.state_dict() optimizer_state = optimizer.state_dict() torch.save( { 'epoch': epoch, 'model_state_dict': state, 'optimizer_state_dict': optimizer_state, 'loss': aggregated_loss, 'train_accuracy': aggregated_accuracy, 'opts': opts }, save_path)
def train( args, train_queue, valid_queue, model, architect, criterion, optimizer, lr, random_arch=False, ): objs = train_utils.AvgrageMeter() top1 = train_utils.AvgrageMeter() top5 = train_utils.AvgrageMeter() for step, datapoint in enumerate(train_queue): # The search dataqueue for nas-bench-201 returns both train and valid data # when looping through queue. This is disabled with single level is indicated. if "nas-bench-201" in args.search.search_space and not ( args.search.single_level): input, target, input_search, target_search = datapoint else: input, target = datapoint input_search, target_search = next(iter(valid_queue)) n = input.size(0) input = Variable(input, requires_grad=False).cuda() target = Variable(target, requires_grad=False).cuda() # get a random minibatch from the search queue with replacement input_search = Variable(input_search, requires_grad=False).cuda() target_search = Variable(target_search, requires_grad=False).cuda() # set the model in train mode (important for layers like dropout and batch normalization) model.train() # TODO: move architecture args into a separate dictionary within args if not random_arch: architect.step( input, target, input_search, target_search, **{ "eta": lr, "network_optimizer": optimizer, "unrolled": args.search.unrolled, "update_weights": True, }) # if random_arch or model.architect_type == "snas": # architect.sample_arch_configure_model() optimizer.zero_grad() architect.zero_arch_var_grad() architect.set_model_alphas() architect.set_model_edge_weights() logits, logits_aux = model(input, discrete=args.search.discrete) loss = criterion(logits, target) if args.train.auxiliary: loss_aux = criterion(logits_aux, target) loss += args.train.auxiliary_weight * loss_aux loss.backward() nn.utils.clip_grad_norm(model.parameters(), args.train.grad_clip) optimizer.step() prec1, prec5 = train_utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.run.report_freq == 0: logging.info( f"| Train | Batch: {step:3d} | Loss: {objs.avg:e} | Top1: {top1.avg} | Top5: {top5.avg} |" ) return top1.avg, objs.avg, top5.avg