Example #1
0
def model_inference_imagenet(base_classifier,
                             loader,
                             device,
                             display=False,
                             print_freq=1000):
    print_freq = 100
    top1 = AverageMeter()
    top5 = AverageMeter()

    start = time.time()
    base_classifier.eval()
    # Regular dataset:
    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            inputs = inputs.to(device, non_blocking=True)
            targets = torch.tensor(targets)
            targets = targets.to(device, non_blocking=True)
            outputs = base_classifier(inputs)
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            if i % print_freq == 0 and display == True:
                print("Test : [{0}/{1}]\t"
                      "Acc@1 {top1.avg:.3f}"
                      "Acc@5 {top5.avg:.3f}".format(i,
                                                    len(loader),
                                                    top1=top1,
                                                    top5=top5))
    end = time.time()
    if display == True:
        print("Inference Time: {0:.3f}".format(end - start))
        print("Final Accuracy: [{0}]".format(top1.avg))

    return top1.avg, top5.avg
def test(loader: DataLoader, model: torch.nn.Module, criterion,
         noise_sd: float, print_freq: int, outdir: str):
    """
    A function to test the denoising performance of a denoiser (i.e. MSE objective)
        :param loader:DataLoader: test dataloader
        :param model:torch.nn.Module: the denoiser
        :param criterion: the loss function
        :param noise_sd:float: the std-dev of the Guassian noise perturbation of the input
        :param print_freq:int: 
        :param outdir:str: the output directory where sample denoised images are saved.
    """
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    end = time.time()

    # switch to eval mode
    model.eval()

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs.cuda()
            targets = targets.cuda()

            # augment inputs with noise
            noise = torch.randn_like(inputs, device='cuda') * noise_sd

            outputs = model(inputs + noise)
            loss = criterion(outputs, inputs)

            # record loss
            losses.update(loss.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                          i,
                          len(loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses))

        pil = toPilImage(inputs[0].cpu())
        image_path = os.path.join(outdir, 'clean.png')
        pil.save(image_path)

        pil = toPilImage(outputs[0].cpu())
        image_path = os.path.join(outdir, 'denoised.png')
        pil.save(image_path)

        return losses.avg
def validate(val_loader, model, criterion):

    global args, rank, world_size, best_prec1

    # validation don't need track the history
    batch_time = AverageMeter(0)
    losses = AverageMeter(0)
    top1 = AverageMeter(0)
    top5 = AverageMeter(0)

    # switch to evaluate mode
    model.eval()

    c1 = 0
    c5 = 0
    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        if i == len(val_loader) / (args.batch_size * world_size):
            break

        input = input.cuda()
        if args.double == 1:
            input = input.double()
        if args.half == 1:
            input = input.half()
        target = target.cuda()

        # compute output
        with torch.no_grad():
            output = model(input)

        # measure accuracy and record loss
        loss = criterion(output, target) / world_size
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))

        reduced_loss = loss.data.clone()
        reduced_prec1 = prec1.clone() / world_size
        reduced_prec5 = prec5.clone() / world_size

        if args.dist == 1:
            dist.all_reduce(reduced_loss)
            dist.all_reduce(reduced_prec1)
            dist.all_reduce(reduced_prec5)

        losses.update(reduced_loss.item())
        top1.update(reduced_prec1.item())
        top5.update(reduced_prec5.item())

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

    if rank == 0:
        print(
            ' * All Loss {loss.avg:.4f} Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
            .format(loss=losses, top1=top1, top5=top5))

    model.train()

    return losses.avg, top1.avg, top5.avg
Example #4
0
def train(loader: DataLoader, model: torch.nn.Module, criterion,
          optimizer: Optimizer, epoch: int, noise_sd: float):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to train mode
    model.train()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    for i, (inputs, targets) in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        inputs = inputs.to(device)
        targets = targets.to(device)

        # augment inputs with noise
        inputs = inputs + torch.randn_like(inputs, device=device) * noise_sd

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
        losses.update(loss.item(), inputs.size(0))
        top1.update(acc1.item(), inputs.size(0))
        top5.update(acc5.item(), inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      i,
                      len(loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5))

    return (losses.avg, top1.avg)
Example #5
0
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer,
          epoch: int, noise_sd: float, device: torch.device, writer=None):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to train mode
    model.train()

    for i, (inputs, targets) in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        inputs, targets = inputs.to(device), targets.to(device)
        batch_size = inputs.size(0)

        # augment inputs with noise
        inputs = inputs + torch.randn_like(inputs, device=device) * noise_sd

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
        losses.update(loss.item(), batch_size)
        top1.update(acc1.item(), batch_size)
        top5.update(acc5.item(), batch_size)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.avg:.3f}\t'
                  'Data {data_time.avg:.3f}\t'
                  'Loss {loss.avg:.4f}\t'
                  'Acc@1 {top1.avg:.3f}\t'
                  'Acc@5 {top5.avg:.3f}'.format(
                epoch, i, len(loader), batch_time=batch_time,
                data_time=data_time, loss=losses, top1=top1, top5=top5))

    if writer:
        writer.add_scalar('loss/train', losses.avg, epoch)
        writer.add_scalar('batch_time', batch_time.avg, epoch)
        writer.add_scalar('accuracy/train@1', top1.avg, epoch)
        writer.add_scalar('accuracy/train@5', top5.avg, epoch)

    return (losses.avg, top1.avg)
Example #6
0
def test(loader: DataLoader, model: torch.nn.Module, criterion,
         noise_sd: float):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to eval mode
    model.eval()

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs.cuda()
            targets = targets.cuda()

            # augment inputs with noise
            inputs = inputs + randgn_like(inputs, p=args.p,
                                          device='cuda') * noise_sd

            # compute output
            if (args.scale_down != 1):
                inputs = torch.nn.functional.interpolate(
                    inputs, scale_factor=args.scale_down)
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.size(0))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        return (losses.avg, top1.avg)
Example #7
0
def test(loader: DataLoader,
         model: torch.nn.Module,
         criterion,
         device,
         print_freq,
         display=False):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to eval mode
    model.eval()

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs.to(device)
            targets = targets.to(device)

            # compute output
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.size(0))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0 and display == True:
                print('Test : [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        return (losses.avg, top1.avg, top5.avg)
Example #8
0
def mask_train(loader: DataLoader,
               model: torch.nn.Module,
               criterion,
               optimizer: Optimizer,
               epoch: int,
               device,
               alpha,
               display=False):
    losses = AverageMeter()

    # switch to train mode
    model.train()

    for i, (inputs, targets) in enumerate(loader):

        inputs = inputs.to(device)
        targets = targets.to(device)

        reg_loss = 0
        for layer in model.modules():
            if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
                reg_loss += torch.norm(layer.weight_mask, p=1)

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets) + alpha * reg_loss

        losses.update(loss.item(), inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return losses.avg
def train(train_loader, model, criterion, optimizer, epoch, args):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.gpu is not None:
            images = images.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)

        # compute output
        output = model(images)
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            progress.display(i)
def validate(val_loader, model, criterion, args):
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top5],
        prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            if args.gpu is not None:
                images = images.cuda(args.gpu, non_blocking=True)
            target = target.cuda(args.gpu, non_blocking=True)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                progress.display(i)

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))

    return top1.avg, top5.avg
Example #11
0
def validate(loader, model, args):
    global score, present

    if args.gpu is not None:
        print("** Running on GPU{}".format(args.gpu))
        torch.cuda.set_device(args.gpu)
        model.cuda(args.gpu)
        score = score.cuda(args.gpu)
        present = present.cuda(args.gpu)
    else:
        model.cpu()

    top1 = AverageMeter("Accuracy1", ":6.2f")
    prefix = "Epoch:[{}]".format(1)
    progress = ProgressMeter(len(loader), [top1], prefix=prefix)

    with torch.set_grad_enabled(mode=False):
        for i, (images, targets) in enumerate(loader):
            if args.gpu is not None:
                images = images.cuda(args.gpu, non_blocking=True)
                targets = targets.cuda(args.gpu, non_blocking=True)

            t0 = time.time()
            output = model(images)
            t1 = time.time()
            accuracy_with_score(output, targets)
            t2 = time.time()

            if i % 50 == 0:
                progress.display(i)
                print("{:<20s}\t{}".format("forward", HTIME(t1 - t0)))
                print("{:<20s}\t{}".format("accuracy", HTIME(t2 - t1)))

    non_zero = present.nonzero().squeeze(1)

    present = present[non_zero]
    score = score[non_zero]
    score = score.float().div(present)
    score = score.mul_(100)

    print("\nBest scores")
    best, ids = score.topk(10, dim=0, largest=True, sorted=True)
    for i, p in zip(ids, best):
        print("{:<10d}\t{:4.1f}".format(i.data, p.data))

    print("\nWorst scores")
    last, ids = score.topk(10, dim=0, largest=False, sorted=True)
    for i, p in zip(ids, last):
        print("{:<10d}\t{:4.1f}".format(i.data, p.data))
Example #12
0
def get_teacher_intermediates(teacher_model, train_loader, layers_to_replace):
    if args.max_batches is not None:
        num_batches = args.max_batches
    else:
        num_batches = len(train_loader)

    # define hook to capture intermediate inputs/activations and intermediate outputs
    # serialize outputs
    # keep dictionary to io objects for different layers
    teacher_inputs = {}
    teacher_outputs = {}
    teacher_input_size = {}
    teacher_output_size = {}

    # store ids of modules to names -- can't figure out
    # other way to get same name of module within hook
    name_map = {}
    for name, module in teacher_model.named_modules():
        name_map[id(module)] = name

    batch_size = args.batch_size

    # use memory mapping to manage large activations
    def make_mmap_file(path, input_size):
        view_size = torch.Size([num_batches * args.batch_size]) + input_size
        # shared needs to be true for file to be created
        return torch.from_file(path, size=int(np.prod(view_size)),
                               shared=True).view(view_size)

    batch_idx = 0
    data_idx = 0

    # TODO: store only inputs or outputs (otherwise we may be storing duplicate info
    # if we already stored neighboring layer)
    # won't cause answers to be wrong, but could be wasteful
    def hook(module, input, output):
        current_batch_size = output.size(0)
        mod_id = id(module)
        input_size = get_size(input)
        output_size = get_size(output)
        if mod_id not in teacher_inputs:
            teacher_inputs[mod_id] = make_mmap_file(
                f'{args.output_dir}/{name_map[mod_id]}_input.pt', input_size)
            teacher_outputs[mod_id] = make_mmap_file(
                f'{args.output_dir}/{name_map[mod_id]}_output.pt', output_size)
        if mod_id not in teacher_input_size:
            teacher_input_size[mod_id] = input_size
            teacher_output_size[mod_id] = output_size

        # save inputs to memory mapped files
        # TODO: input always a length-1 tuple?
        teacher_inputs[mod_id][data_idx:data_idx +
                               current_batch_size] = input[0].cpu().detach()
        teacher_outputs[mod_id][data_idx:data_idx +
                                current_batch_size] = output.cpu().detach()

    teacher_model.eval()
    for name, module in teacher_model.named_modules():
        if name in layers_to_replace:
            module.register_forward_hook(hook)

    batch_time = AverageMeter()
    prefetcher = data_prefetcher(train_loader)
    input, _ = prefetcher.next()
    end = time.time()
    while input is not None:
        input = input.cuda()
        teacher_model(input)
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if batch_idx % args.print_freq == 0:
            logger.info(
                'Batch:{0}/{1}\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})'.format(
                    batch_idx, num_batches, batch_time=batch_time))
        batch_idx += 1
        data_idx += input.size(0)  # for variable size batches
        if args.max_batches is not None and batch_idx == args.max_batches:
            break
        input, _ = prefetcher.next()
    logging.info("Computed teacher intermediates. ")

    # write sizes to disk for easy loading of memory maps at a later time
    for layer_id in teacher_inputs:
        layer_name = name_map[layer_id]
        # write sizes
        with open(f'{args.output_dir}/{layer_name}_input_sz.pt', 'wb') as f:
            input_size = torch.Size([data_idx]) + teacher_input_size[layer_id]
            torch.save(input_size, f)
        with open(f'{args.output_dir}/{layer_name}_output_sz.pt', 'wb') as f:
            output_size = torch.Size([data_idx
                                      ]) + teacher_output_size[layer_id]
            torch.save(output_size, f)
def train(loader: DataLoader,
          denoiser: torch.nn.Module,
          criterion,
          optimizer: Optimizer,
          epoch: int,
          noise_sd: float,
          classifier: torch.nn.Module = None):
    """
    Function for training denoiser for one epoch
        :param loader:DataLoader: training dataloader
        :param denoiser:torch.nn.Module: the denoiser being trained
        :param criterion: loss function
        :param optimizer:Optimizer: optimizer used during trainined
        :param epoch:int: the current epoch (for logging)
        :param noise_sd:float: the std-dev of the Guassian noise perturbation of the input
        :param classifier:torch.nn.Module=None: a ``freezed'' classifier attached to the denoiser 
                                                (required classifciation/stability objectives), None for denoising objective 
    """
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    end = time.time()

    # switch to train mode
    denoiser.train()
    if classifier:
        classifier.eval()

    for i, (inputs, targets) in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        inputs = inputs.cuda()
        targets = targets.cuda()

        # augment inputs with noise
        noise = torch.randn_like(inputs, device='cuda') * noise_sd

        # compute output
        outputs = denoiser(inputs + noise)
        if classifier:
            outputs = classifier(outputs)

        if isinstance(criterion, MSELoss):
            loss = criterion(outputs, inputs)
        elif isinstance(criterion, CrossEntropyLoss):
            if args.objective == 'stability':
                with torch.no_grad():
                    targets = classifier(inputs)
                    targets = targets.argmax(1).detach().clone()
            loss = criterion(outputs, targets)

        # record loss
        losses.update(loss.item(), inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                      epoch,
                      i,
                      len(loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses))

    return losses.avg
def test_with_classifier(loader: DataLoader, denoiser: torch.nn.Module,
                         criterion, noise_sd: float, print_freq: int,
                         classifier: torch.nn.Module):
    """
    A function to test the classification performance of a denoiser when attached to a given classifier
        :param loader:DataLoader: test dataloader
        :param denoiser:torch.nn.Module: the denoiser 
        :param criterion: the loss function (e.g. CE)
        :param noise_sd:float: the std-dev of the Guassian noise perturbation of the input
        :param print_freq:int: the frequency of logging
        :param classifier:torch.nn.Module: the classifier to which the denoiser is attached
    """
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to eval mode
    classifier.eval()
    if denoiser:
        denoiser.eval()

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs.cuda()
            targets = targets.cuda()

            # augment inputs with noise
            inputs = inputs + torch.randn_like(inputs,
                                               device='cuda') * noise_sd

            if denoiser is not None:
                inputs = denoiser(inputs)
            # compute output
            outputs = classifier(inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.size(0))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        return (losses.avg, top1.avg)
def train(loader: DataLoader,
          model: torch.nn.Module,
          criterion,
          optimizer: Optimizer,
          epoch: int,
          noise_sd: float,
          attacker: Attacker,
          device: torch.device,
          writer=None):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    losses_reg = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to train mode
    model.train()
    requires_grad_(model, True)

    for i, batch in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        mini_batches = _chunk_minibatch(batch, args.num_noise_vec)
        for inputs, targets in mini_batches:
            inputs, targets = inputs.to(device), targets.to(device)
            batch_size = inputs.size(0)

            noises = [
                torch.randn_like(inputs, device=device) * noise_sd
                for _ in range(args.num_noise_vec)
            ]

            if args.adv_training:
                requires_grad_(model, False)
                model.eval()
                inputs = attacker.attack(model, inputs, targets, noises=noises)
                model.train()
                requires_grad_(model, True)

            # augment inputs with noise
            inputs_c = torch.cat([inputs + noise for noise in noises], dim=0)
            targets_c = targets.repeat(args.num_noise_vec)

            logits = model(inputs_c)
            loss_xent = criterion(logits, targets_c)

            logits_chunk = torch.chunk(logits, args.num_noise_vec, dim=0)
            loss_con = consistency_loss(logits_chunk, args.lbd, args.eta)

            loss = loss_xent + loss_con

            acc1, acc5 = accuracy(logits, targets_c, topk=(1, 5))
            losses.update(loss_xent.item(), batch_size)
            losses_reg.update(loss_con.item(), batch_size)
            top1.update(acc1.item(), batch_size)
            top5.update(acc5.item(), batch_size)

            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.avg:.3f}\t'
                  'Data {data_time.avg:.3f}\t'
                  'Loss {loss.avg:.4f}\t'
                  'Acc@1 {top1.avg:.3f}\t'
                  'Acc@5 {top5.avg:.3f}'.format(epoch,
                                                i,
                                                len(loader),
                                                batch_time=batch_time,
                                                data_time=data_time,
                                                loss=losses,
                                                top1=top1,
                                                top5=top5))

    writer.add_scalar('loss/train', losses.avg, epoch)
    writer.add_scalar('loss/consistency', losses_reg.avg, epoch)
    writer.add_scalar('batch_time', batch_time.avg, epoch)
    writer.add_scalar('accuracy/train@1', top1.avg, epoch)
    writer.add_scalar('accuracy/train@5', top5.avg, epoch)

    return (losses.avg, top1.avg)
Example #16
0
def train(args):
    args.print_freq = 100
    args.gpu = None
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    # load data
    train_dl, valid_dl, test_dl = load_imagenet(args)
    # define model
    model = torchvision.models.resnet50(pretrained=False)
    # multiple gpus
    model = torch.nn.DataParallel(model).cuda()
    loss_fn = torch.nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=0.9,
                                weight_decay=args.weight_decay)

    model_dir = gen_model_dir(args)
    model_dir.mkdir(parents=True, exist_ok=True)

    torch.backends.cudnn.benchmark = True
    best_acc1 = 0
    for epoch in range(args.n_epochs):
        adjust_learning_rate(optimizer, epoch, args)
        batch_time = AverageMeter('Time', ':6.3f')
        data_time = AverageMeter('Data', ':6.3f')
        losses = AverageMeter('Loss', ':.4e')
        top1 = AverageMeter('Acc@1', ':6.2f')
        top5 = AverageMeter('Acc@5', ':6.2f')
        progress = ProgressMeter(len(train_dl),
                                 [batch_time, data_time, losses, top1, top5],
                                 prefix="Epoch: [{}]".format(epoch))

        # switch to train mode
        model.train()
        end = time.time()
        for batch_idx, (images, target) in enumerate(train_dl):
            # measure data loading time
            data_time.update(time.time() - end)

            # if args.gpu is not None:
            images = images.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

            # compute output
            output = model(images)
            loss = loss_fn(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if batch_idx % args.print_freq == 0:
                progress.display(batch_idx)

        # evaluate on validation set
        acc1 = validate(valid_dl, model, loss_fn, args)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)
        torch.save(
            {
                'epoch': epoch + 1,
                'model_weight': model.state_dict(),
                'heldout_best_acc1': best_acc1,
                'optimizer': optimizer.state_dict(),
            }, Path(model_dir, 'model'))
        if is_best:
            shutil.copyfile(Path(model_dir, 'model'),
                            Path(model_dir, 'model_best'))

    # load best model
    with open(Path(model_dir, "model_best"), 'rb') as f:
        params = torch.load(f)
        model.load_state_dict(params['model_weight'])

    # test
    model.eval()
    # evaluate on test set
    acc_test = validate(test_dl, model, loss_fn, args)

    print('epoch: {}, val acc: {:.4f}, test acc: {:.4f}'.format(
        params["epoch"], params["heldout_best_acc1"], acc_test))

    with open(Path(model_dir, "res.json"), 'w') as fp:
        json.dump(
            {
                'epoch': params["epoch"],
                'heldout_best_acc1': params["heldout_best_acc1"].item(),
                'test_best_acc1': acc_test.item(),
            }, fp)
Example #17
0
def train(loader: DataLoader,
          model: torch.nn.Module,
          criterion,
          optimizer: Optimizer,
          epoch: int,
          device,
          print_freq=100,
          display=True):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()
    #     print("Entered training function")

    # switch to train mode
    model.train()

    for i, (inputs, targets) in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        inputs = inputs.to(device)
        targets = targets.to(device)

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
        losses.update(loss.item(), inputs.size(0))
        top1.update(acc1.item(), inputs.size(0))
        top5.update(acc5.item(), inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0 and display == True:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      i,
                      len(loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5))

    return (losses.avg, top1.avg, top5.avg)
Example #18
0
def train(loader: DataLoader,
          model: torch.nn.Module,
          criterion,
          optimizer: Optimizer,
          epoch: int,
          transformer: AbstractTransformer,
          writer=None):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    losses_reg = AverageMeter()
    confidence = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to train mode
    model.train()

    for i, batch in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        mini_batches = _chunk_minibatch(batch, args.num_noise_vec)
        for inputs, targets in mini_batches:
            targets = targets.cuda()
            batch_size = inputs.size(0)

            noised_inputs = [
                transformer.process(inputs).cuda()
                for _ in range(args.num_noise_vec)
            ]

            # augment inputs with noise
            inputs_c = torch.cat(noised_inputs, dim=0)
            targets_c = targets.repeat(args.num_noise_vec)

            logits = model(inputs_c)

            loss_xent = criterion(logits, targets_c)

            logits_chunk = torch.chunk(logits, args.num_noise_vec, dim=0)
            softmax = [F.softmax(logit, dim=1) for logit in logits_chunk]
            avg_softmax = sum(softmax) / args.num_noise_vec

            consistency = [
                kl_div(logit, avg_softmax, reduction='none').sum(1) +
                _entropy(avg_softmax, reduction='none')
                for logit in logits_chunk
            ]
            consistency = sum(consistency) / args.num_noise_vec
            consistency = consistency.mean()

            loss = loss_xent + args.lbd * consistency

            avg_confidence = -F.nll_loss(avg_softmax, targets)

            acc1, acc5 = accuracy(logits, targets_c, topk=(1, 5))
            losses.update(loss_xent.item(), batch_size)
            losses_reg.update(consistency.item(), batch_size)
            confidence.update(avg_confidence.item(), batch_size)
            top1.update(acc1.item(), batch_size)
            top5.update(acc5.item(), batch_size)

            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.avg:.3f}\t'
                  'Data {data_time.avg:.3f}\t'
                  'Loss {loss.avg:.4f}\t'
                  'Acc@1 {top1.avg:.3f}\t'
                  'Acc@5 {top5.avg:.3f}'.format(epoch,
                                                i,
                                                len(loader),
                                                batch_time=batch_time,
                                                data_time=data_time,
                                                loss=losses,
                                                top1=top1,
                                                top5=top5))

            if args.print_step:
                writer.add_scalar(f'epoch/{epoch}/loss/train', losses.avg, i)
                writer.add_scalar(f'epoch/{epoch}/loss/consistency',
                                  losses_reg.avg, i)
                writer.add_scalar(f'epoch/{epoch}/loss/avg_confidence',
                                  confidence.avg, i)
                writer.add_scalar(f'epoch/{epoch}/batch_time', batch_time.avg,
                                  i)
                writer.add_scalar(f'epoch/{epoch}/accuracy/train@1', top1.avg,
                                  i)
                writer.add_scalar(f'epoch/{epoch}/accuracy/train@5', top5.avg,
                                  i)

    writer.add_scalar('loss/train', losses.avg, epoch)
    writer.add_scalar('loss/consistency', losses_reg.avg, epoch)
    writer.add_scalar('loss/avg_confidence', confidence.avg, epoch)
    writer.add_scalar('batch_time', batch_time.avg, epoch)
    writer.add_scalar('accuracy/train@1', top1.avg, epoch)
    writer.add_scalar('accuracy/train@5', top5.avg, epoch)

    return (losses.avg, top1.avg)
Example #19
0
def test(loader: DataLoader, model: torch.nn.Module, criterion,
         noise_sd: float):
    """
    Function to evaluate the trained model
        :param loader:DataLoader: dataloader (train)
        :param model:torch.nn.Module: the classifer being evaluated
        :param criterion: the loss function
        :param noise_sd:float: the std-dev of the Guassian noise perturbation of the input
    """
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to eval mode
    model.eval()

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs.cuda()
            targets = targets.cuda()

            # augment inputs with noise
            inputs = inputs + torch.randn_like(inputs,
                                               device='cuda') * noise_sd

            # compute output
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.size(0))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        return (losses.avg, top1.avg)
Example #20
0
def test(loader,
         model,
         criterion,
         epoch,
         transformer: AbstractTransformer,
         writer=None,
         print_freq=10):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to eval mode
    model.eval()

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs
            targets = targets.cuda()

            # augment inputs with noise
            inputs = transformer.process(inputs).cuda()

            # compute output
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.size(0))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.avg:.3f}\t'
                      'Data {data_time.avg:.3f}\t'
                      'Loss {loss.avg:.4f}\t'
                      'Acc@1 {top1.avg:.3f}\t'
                      'Acc@5 {top5.avg:.3f}'.format(i,
                                                    len(loader),
                                                    batch_time=batch_time,
                                                    data_time=data_time,
                                                    loss=losses,
                                                    top1=top1,
                                                    top5=top5))

        if writer:
            writer.add_scalar('loss/test', losses.avg, epoch)
            writer.add_scalar('accuracy/test@1', top1.avg, epoch)
            writer.add_scalar('accuracy/test@5', top5.avg, epoch)

        return (losses.avg, top1.avg)
def validate(val_loader, model, criterion, args):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    if args.track_correct:
        corr_dict = {'correct':[]}
    
    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            if args.gpu is not None:
                input = input.cuda(args.gpu, non_blocking=True)
            
            if args.loss_func == 'l2':
                zero_mat = np.zeros((len(target), args.num_classes), dtype=int)
                zero_mat[list(range(len(target))), target] = 1
                targetl2 = torch.from_numpy(zero_mat).float()
                targetl2 = targetl2.cuda(args.gpu, non_blocking=True)
            target = target.cuda(args.gpu, non_blocking=True)

            # compute output
            output = model(input)
            if args.loss_func == 'l2':
                loss = criterion(output, targetl2)
            else:
                loss = criterion(output, target)
            
            # record correctly classified examples
            if args.track_correct:
                correct = accuracy(output, target, topk=(1, 1), track=True)
                corr_dict['correct'] += [(i*args.batch_size) + idx for idx, is_corr in 
                                         enumerate(correct) if is_corr]

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 1))
            losses.update(loss.item(), input.size(0))
            top1.update(acc1[0], input.size(0))
            top5.update(acc5[0], input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                       i, len(val_loader), batch_time=batch_time, loss=losses,
                       top1=top1, top5=top5))

        # Record the indices of the correctly classified images
        if args.track_correct:
            fname, ext = str(args.outpath).split('.')
            corrfile = fname + '_corr.json'
            with open(corrfile, 'w') as f:
                json.dump(corr_dict, f, indent=2)
            return

    return top1.avg, top5.avg
def train(train_loader, model, criterion, optimizer, epoch, args):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        
        if args.use_inverse_sqrt_lr:
            assert not (args.max_lr_adjusting_epoch > 0), "lr scheduler crash, step and inverse sqrt lr can't be mutually set"
            
            for cur_p, param_group in enumerate(optimizer.param_groups):
                d_rate = (args.initial_lr_decay[cur_p]['decay']
                          if args.initial_lr_decay
                          else 512)
                base_lr = (args.initial_lr[cur_p]['lr']
                          if args.initial_lr
                          else args.lr)
                lr = base_lr / np.sqrt(1 + (epoch*len(train_loader) + i)/d_rate)
                param_group['lr'] = lr

        if args.gpu is not None:
            input = input.cuda(args.gpu, non_blocking=True)
        
        if args.loss_func == 'l2':
            zero_mat = np.zeros((len(target), args.num_classes), dtype=int)
            zero_mat[list(range(len(target))), target] = 1
            targetl2 = torch.from_numpy(zero_mat).float()
            targetl2 = targetl2.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)
        
        # for LBFGS
        if args.use_LBFGS:
            def closure():
                optimizer.zero_grad()
                output = model(input)
                loss = criterion(output, target)
                loss.backward()
                return loss
            
            optimizer.step(closure)
            
        else:
            # compute output
            output = model(input)
            if args.loss_func == 'l2':
                loss = criterion(output, targetl2)
            else:
                loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(acc1[0], input.size(0))
            top5.update(acc5[0], input.size(0))

            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses, top1=top1, top5=top5))
Example #23
0
def test(loader: DataLoader, model: torch.nn.Module, criterion, epoch: int,
         args):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to eval mode
    model.eval()
    m = Bernoulli(torch.tensor([args.calibrated_alpha]).cuda())

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs.cuda()
            targets = targets.cuda()

            # make MNIST binary
            if args.dataset == 'mnist':
                inputs = (inputs > 0.5).type(torch.cuda.FloatTensor)

            # augment inputs with noise
            if args.perturb == 'bernoulli':
                mask = m.sample(inputs.shape).squeeze(-1)
                # make sure that the value is normalized
                rand_inputs = torch.randint_like(
                    inputs, low=0, high=args.K + 1, device='cuda') / float(
                        args.K)
                inputs = inputs * mask + rand_inputs * (1 - mask)
            elif args.perturb == 'gaussian':
                inputs = inputs + torch.randn_like(inputs,
                                                   device='cuda') * args.sigma

            # compute output
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.size(0))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if (i + 1) % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i + 1,
                          len(loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))
        print('* Epoch: [{0}] Test: \t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
              'Acc@5 {top5.val:.3f} ({top5.avg:.3f})\n'.format(epoch,
                                                               loss=losses,
                                                               top1=top1,
                                                               top5=top5))

        return (losses.avg, top1.avg)
Example #24
0
def train(loader: DataLoader, model: torch.nn.Module, criterion,
          optimizer: Optimizer, epoch: int):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    learning_rate = args.attlr
    iterations = args.attiters
    ROAwidth = args.ROAwidth
    ROAheight = args.ROAheight
    skip_in_x = args.skip_in_x
    skip_in_y = args.skip_in_y
    potential_nums = args.potential_nums

    # switch to train mode
    model.train()

    for i, (inputs, targets) in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        inputs = inputs.cuda()
        targets = targets.cuda()
        model.eval()
        roa = ROA(model, 32)
        adv_inputs = roa.gradient_based_search(inputs, targets, learning_rate,\
                        iterations, ROAwidth , ROAheight, skip_in_x, skip_in_y, potential_nums)
        imshow(args.outdir, adv_inputs)

        # compute output
        model.train()
        outputs = model(adv_inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
        losses.update(loss.item(), inputs.size(0))
        top1.update(acc1.item(), inputs.size(0))
        top5.update(acc5.item(), inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      i,
                      len(loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5))

    return (losses.avg, top1.avg)
Example #25
0
def test(loader: DataLoader,
         model: torch.nn.Module,
         criterion,
         noise_sd: float,
         attacker: Attacker = None):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    top1_normal = AverageMeter()
    end = time.time()

    # switch to eval mode
    model.eval()
    requires_grad_(model, False)

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs.cuda()
            targets = targets.cuda()

            # augment inputs with noise
            noise = torch.randn_like(inputs, device='cuda') * noise_sd
            noisy_inputs = inputs + noise

            # compute output
            if args.adv_training:
                normal_outputs = model(noisy_inputs)
                acc1_normal, _ = accuracy(normal_outputs, targets, topk=(1, 5))
                top1_normal.update(acc1_normal.item(), inputs.size(0))

                with torch.enable_grad():
                    inputs = attacker.attack(model,
                                             inputs,
                                             targets,
                                             noise=noise)
                # noise = torch.randn_like(inputs, device='cuda') * noise_sd
                noisy_inputs = inputs + noise

            outputs = model(noisy_inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.size(0))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        if args.adv_training:
            return (losses.avg, top1.avg, top1_normal.avg)
        else:
            return (losses.avg, top1.avg, None)
Example #26
0
def test(loader: DataLoader, model: torch.nn.Module, criterion):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    learning_rate = args.attlr
    iterations = args.attiters
    ROAwidth = args.ROAwidth
    ROAheight = args.ROAheight
    skip_in_x = args.skip_in_x
    skip_in_y = args.skip_in_y
    potential_nums = args.potential_nums

    # switch to eval mode
    model.eval()
    roa = ROA(model, 32)

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs.cuda()
            targets = targets.cuda()
            with torch.set_grad_enabled(True):
                adv_inputs = roa.gradient_based_search(inputs, targets, learning_rate,\
                        iterations, ROAwidth , ROAheight, skip_in_x, skip_in_y, potential_nums)

            # compute output
            outputs = model(adv_inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.size(0))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        return (losses.avg, top1.avg)
Example #27
0
def train(loader: DataLoader,
          model: torch.nn.Module,
          criterion,
          optimizer: Optimizer,
          epoch: int,
          noise_sd: float,
          attacker: Attacker = None):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to train mode
    model.train()
    requires_grad_(model, True)

    for i, batch in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        mini_batches = get_minibatches(batch, args.num_noise_vec)
        noisy_inputs_list = []
        for inputs, targets in mini_batches:
            inputs = inputs.cuda()
            targets = targets.cuda()

            inputs = inputs.repeat(
                (1, args.num_noise_vec, 1, 1)).view(batch[0].shape)

            # augment inputs with noise
            noise = torch.randn_like(inputs, device='cuda') * noise_sd

            if args.adv_training:
                requires_grad_(model, False)
                model.eval()
                inputs = attacker.attack(model,
                                         inputs,
                                         targets,
                                         noise=noise,
                                         num_noise_vectors=args.num_noise_vec,
                                         no_grad=args.no_grad_attack)
                model.train()
                requires_grad_(model, True)

            if args.train_multi_noise:
                noisy_inputs = inputs + noise
                targets = targets.unsqueeze(1).repeat(
                    1, args.num_noise_vec).reshape(-1, 1).squeeze()
                outputs = model(noisy_inputs)
                loss = criterion(outputs, targets)

                acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
                losses.update(loss.item(), noisy_inputs.size(0))
                top1.update(acc1.item(), noisy_inputs.size(0))
                top5.update(acc5.item(), noisy_inputs.size(0))

                # compute gradient and do SGD step
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            else:
                inputs = inputs[::args.num_noise_vec]  # subsample the samples
                noise = noise[::args.num_noise_vec]
                # noise = torch.randn_like(inputs, device='cuda') * noise_sd
                noisy_inputs_list.append(inputs + noise)

        if not args.train_multi_noise:
            noisy_inputs = torch.cat(noisy_inputs_list)
            targets = batch[1].cuda()
            assert len(targets) == len(noisy_inputs)

            outputs = model(noisy_inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), noisy_inputs.size(0))
            top1.update(acc1.item(), noisy_inputs.size(0))
            top5.update(acc5.item(), noisy_inputs.size(0))

            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      i,
                      len(loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5))

    return (losses.avg, top1.avg)
Example #28
0
def train_or_eval(train, gpu, loader, model, criterion, optimizer, args, hyper,
                  epoch):
    phase = "train" if train else "test"
    model.train() if train else model.eval()

    losses = AverageMeter("Loss", ":.4e")
    top1 = AverageMeter("Accuracy1", ":6.2f")
    top5 = AverageMeter("Accuracy5", ":6.2f")
    prefix = "Epoch:[{}]".format(epoch + 1) if train else "Test: "
    progress = ProgressMeter(len(loader), [losses, top1, top5], prefix=prefix)

    if args.prof:
        print("Profiling started")
        torch.cuda.cudart().cudaProfilerStart()

    t_init = time.time()
    prefetcher = data_prefetcher(loader)
    with torch.set_grad_enabled(mode=train):
        for i, (images, target) in enumerate(prefetcher):
            niter = epoch * len(loader) + i

            if args.prof:
                torch.cuda.nvtx.range_push("Prof start iteration {}".format(i))

            if args.prof: torch.cuda.nvtx.range_push("forward")
            output = model(images)
            if args.prof: torch.cuda.nvtx.range_pop()

            loss = criterion(output, target)

            if train:
                lr = adjust_learning_rate(optimizer, niter, hyper, len(loader))

                optimizer.zero_grad()

                if args.prof: torch.cuda.nvtx.range_push("backward")
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
                if args.prof: torch.cuda.nvtx.range_pop()

                if args.prof: torch.cuda.nvtx.range_push("optimizer step")
                optimizer.step()
                if args.prof: torch.cuda.nvtx.range_pop()

            distributed = args.gpu is None
            publish_stats = (not distributed or gpu == 0) and i % 100 == 0
            if not train or publish_stats:
                acc1, acc5 = accuracy(output.detach(), target, topk=(1, 5))
                losses.update(loss.item(), images.size(0))
                top1.update(acc1[0], images.size(0))
                top5.update(acc5[0], images.size(0))

            if publish_stats:
                progress.display(i)

            if train and publish_stats:
                args.writer.add_scalar("Loss/{}".format(phase), loss.item(),
                                       niter)
                args.writer.add_scalar("Accuracy/{}".format(phase), acc1,
                                       niter)
                args.writer.add_scalar("Loss/Accuracy", acc1, lr * 10000)

            if args.prof: torch.cuda.nvtx.range_pop()
            if args.prof and i == 20:
                break

    if args.prof:
        print("Profiling stopped")
        torch.cuda.cudart().cudaProfilerStop()

    print("Total {} epoch time: {}".format(phase, HTIME(time.time() - t_init)))
    return top1.avg
Example #29
0
def train(train_loader, val_loader, model, criterion, optimizer, param_copy):

    global args, rank, world_size, best_prec1, dataset_len

    # moving average batch time, data loading time, loss
    batch_time = AverageMeter(10)
    data_time = AverageMeter(10)
    losses = AverageMeter(10)

    model.train()

    end = time.time()
    curr_step = 0

    momentum_buffer = []
    for master_p in param_copy:
        momentum_buffer.append(torch.zeros_like(master_p))

    for i, (input, target) in enumerate(train_loader):
        curr_step += 1
        if curr_step > args.max_iter:
            break

        current_lr = adjust_learning_rate(optimizer, curr_step)

        target = target.cuda()
        input = input.cuda()

        data_time.update(time.time() - end)
        end = time.time()

        output = model(input)
        # loss divided by world_size
        loss = criterion(output, target) / world_size
        if rank == 0:
            print("loss:", loss)

        # average loss
        reduced_loss = loss.data.clone()
        if args.dist == 1:
            dist.all_reduce(reduced_loss)
        losses.update(reduced_loss.item())

        # average gradient
        optimizer.zero_grad()
        model.zero_grad()
        loss.backward()

        if args.dist == 1:
            sum_gradients(model)

        for param_1, param_2 in zip(param_copy, list(model.parameters())):
            param_1.backward(param_2.grad.float())

        for idx, master_p in enumerate(param_copy):
            if master_p.grad is not None:
                update = master_p.grad
                local_lr = master_p.norm(2)/\
                            (master_p.grad.norm(2) \
                             + args.weight_decay * master_p.norm(2))
                momentum_buffer[idx] = args.momentum * momentum_buffer[idx] \
                                        + current_lr \
                                          * local_lr \
                                          * (master_p.grad + args.weight_decay * master_p)
                update = momentum_buffer[idx]
                master_p.data.copy_(master_p - update)

        for param, copy_param in zip(model.parameters(), param_copy):
            param.data.copy_(copy_param.data)

        batch_time.update(time.time() - end)
        end = time.time()
        if curr_step % args.val_freq == 0 and curr_step != 0:
            if rank == 0:
                print(
                    'Iter: [{}/{}]\nTime {batch_time.val:.3f} ({batch_time.avg:.3f})\n'
                    .format(curr_step, args.max_iter, batch_time=batch_time))
            val_loss, prec1, prec5 = validate(val_loader, model, criterion)

    if rank == 0:
        print('Iter: [{}/{}]\n'.format(curr_step, args.max_iter))
    val_loss, prec1, prec5 = validate(val_loader, model, criterion)
Example #30
0
def train(loader: DataLoader,
          model: torch.nn.Module,
          criterion,
          optimizer: Optimizer,
          epoch: int,
          noise_sd: float,
          attacker: Attacker,
          device: torch.device,
          writer=None):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to train mode
    model.train()
    requires_grad_(model, True)

    for i, batch in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        mini_batches = _chunk_minibatch(batch, args.num_noise_vec)
        for inputs, targets in mini_batches:
            inputs, targets = inputs.to(device), targets.to(device)
            inputs = inputs.repeat(
                (1, args.num_noise_vec, 1, 1)).reshape(-1, *batch[0].shape[1:])
            batch_size = inputs.size(0)

            # augment inputs with noise
            noise = torch.randn_like(inputs, device=device) * noise_sd

            requires_grad_(model, False)
            model.eval()
            inputs = attacker.attack(model,
                                     inputs,
                                     targets,
                                     noise=noise,
                                     num_noise_vectors=args.num_noise_vec,
                                     no_grad=args.no_grad_attack)
            model.train()
            requires_grad_(model, True)

            noisy_inputs = inputs + noise

            targets = targets.unsqueeze(1).repeat(1,
                                                  args.num_noise_vec).reshape(
                                                      -1, 1).squeeze()
            outputs = model(noisy_inputs)
            loss = criterion(outputs, targets)

            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), batch_size)
            top1.update(acc1.item(), batch_size)
            top5.update(acc5.item(), batch_size)

            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.avg:.3f}\t'
                  'Data {data_time.avg:.3f}\t'
                  'Loss {loss.avg:.4f}\t'
                  'Acc@1 {top1.avg:.3f}\t'
                  'Acc@5 {top5.avg:.3f}'.format(epoch,
                                                i,
                                                len(loader),
                                                batch_time=batch_time,
                                                data_time=data_time,
                                                loss=losses,
                                                top1=top1,
                                                top5=top5))

    if writer:
        writer.add_scalar('loss/train', losses.avg, epoch)
        writer.add_scalar('batch_time', batch_time.avg, epoch)
        writer.add_scalar('accuracy/train@1', top1.avg, epoch)
        writer.add_scalar('accuracy/train@5', top5.avg, epoch)

    return (losses.avg, top1.avg)