def validate(val_loader, model, criterion):

    global args, rank, world_size, best_prec1

    # validation don't need track the history
    batch_time = AverageMeter(0)
    losses = AverageMeter(0)
    top1 = AverageMeter(0)
    top5 = AverageMeter(0)

    # switch to evaluate mode
    model.eval()

    c1 = 0
    c5 = 0
    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        if i == len(val_loader) / (args.batch_size * world_size):
            break

        input = input.cuda()
        if args.double == 1:
            input = input.double()
        if args.half == 1:
            input = input.half()
        target = target.cuda()

        # compute output
        with torch.no_grad():
            output = model(input)

        # measure accuracy and record loss
        loss = criterion(output, target) / world_size
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))

        reduced_loss = loss.data.clone()
        reduced_prec1 = prec1.clone() / world_size
        reduced_prec5 = prec5.clone() / world_size

        if args.dist == 1:
            dist.all_reduce(reduced_loss)
            dist.all_reduce(reduced_prec1)
            dist.all_reduce(reduced_prec5)

        losses.update(reduced_loss.item())
        top1.update(reduced_prec1.item())
        top5.update(reduced_prec5.item())

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

    if rank == 0:
        print(
            ' * All Loss {loss.avg:.4f} Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
            .format(loss=losses, top1=top1, top5=top5))

    model.train()

    return losses.avg, top1.avg, top5.avg
Esempio n. 2
0
def model_inference_imagenet(base_classifier,
                             loader,
                             device,
                             display=False,
                             print_freq=1000):
    print_freq = 100
    top1 = AverageMeter()
    top5 = AverageMeter()

    start = time.time()
    base_classifier.eval()
    # Regular dataset:
    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            inputs = inputs.to(device, non_blocking=True)
            targets = torch.tensor(targets)
            targets = targets.to(device, non_blocking=True)
            outputs = base_classifier(inputs)
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            if i % print_freq == 0 and display == True:
                print("Test : [{0}/{1}]\t"
                      "Acc@1 {top1.avg:.3f}"
                      "Acc@5 {top5.avg:.3f}".format(i,
                                                    len(loader),
                                                    top1=top1,
                                                    top5=top5))
    end = time.time()
    if display == True:
        print("Inference Time: {0:.3f}".format(end - start))
        print("Final Accuracy: [{0}]".format(top1.avg))

    return top1.avg, top5.avg
Esempio n. 3
0
def train(args, train_queue, model, criterion, optimizer):
    objs = train_utils.AvgrageMeter()
    top1 = train_utils.AvgrageMeter()
    top5 = train_utils.AvgrageMeter()
    model.train()

    for step, (input, target) in enumerate(train_queue):
        input = Variable(input, requires_grad=False).cuda()
        target = Variable(target, requires_grad=False).cuda()

        optimizer.zero_grad()
        logits, logits_aux = model(input)
        loss = criterion(logits, target)
        if args.train.auxiliary:
            loss_aux = criterion(logits_aux, target)
            loss += args.train.auxiliary_weight * loss_aux
        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), args.train.grad_clip)
        optimizer.step()

        prec1, prec5 = train_utils.accuracy(logits, target, topk=(1, 5))
        n = input.size(0)
        objs.update(loss.item(), n)
        top1.update(prec1.item(), n)
        top5.update(prec5.item(), n)

        if step % args.run.report_freq == 0:
            logging.info("train %03d %e %f %f", step, objs.avg, top1.avg,
                         top5.avg)

    return top1.avg, objs.avg
Esempio n. 4
0
def train(loader: DataLoader, model: torch.nn.Module, criterion,
          optimizer: Optimizer, epoch: int, noise_sd: float):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to train mode
    model.train()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    for i, (inputs, targets) in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        inputs = inputs.to(device)
        targets = targets.to(device)

        # augment inputs with noise
        inputs = inputs + torch.randn_like(inputs, device=device) * noise_sd

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
        losses.update(loss.item(), inputs.size(0))
        top1.update(acc1.item(), inputs.size(0))
        top5.update(acc5.item(), inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      i,
                      len(loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5))

    return (losses.avg, top1.avg)
Esempio n. 5
0
def train(training_model, training_data, opts, lr_scheduler, epochs, optimizer):
    nr_batches = len(training_data)
    try:
        for epoch in epochs:
            logging.info("Epoch {0}/{1}".format(epoch, opts.epoch))
            bar = tqdm(training_data)
            sum_loss = 0.0
            sum_acc = 0.0
            start_time = time.time()
            total_sample = 0
            for batch_idx, (data, labels) in enumerate(bar):
                preds, losses = training_model(data, labels)
                if not opts.disable_metrics:
                    with torch.no_grad():
                        # Convert to full precision for CPU execute.
                        losses = losses.float()
                        preds = preds.float()
                        mean_loss = torch.mean(losses).item()
                        acc = accuracy(preds, labels)
                    sum_acc += acc
                    sum_loss += mean_loss
                    aggregated_loss = sum_loss / (batch_idx+1)
                    aggregated_accuracy = sum_acc / (batch_idx+1)
                    bar.set_description("Loss:{:0.4f} | Accuracy:{:0.2f}%".format(aggregated_loss, aggregated_accuracy))

                total_sample += data.size()[0]

            end_time = time.time()
            if not opts.disable_metrics:
                print("Epoch {}: Train accuracy is {:0.2f}%".format(epoch, aggregated_accuracy))
            print("Throughput of the epoch:{:0.1f} img/sec".format(total_sample / (end_time-start_time)))
            # save
            if not opts.checkpoint_path == "":
                if not os.path.exists(opts.checkpoint_path):
                    os.makedirs(opts.checkpoint_path)
                filename = "{0}_{1}_{2}.pt".format(opts.model, opts.data, epoch)
                save_path = os.path.join(opts.checkpoint_path, filename)
                training_model.copyWeightsToHost()
                state = training_model.model.model.state_dict()
                optimizer_state = optimizer.state_dict()
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': state,
                    'optimizer_state_dict': optimizer_state,
                    'loss': aggregated_loss,
                    'train_accuracy': aggregated_accuracy,
                    'opts': opts
                }, save_path)

            # lr schedule
            if not(lr_scheduler is None):
                lr_scheduler.step()
                new_optimizer = copy.copy(optimizer)
                training_model.setOptimizer(new_optimizer)
                logging.info(f"Learning rate is changed to {lr_scheduler.get_last_lr()}")
    finally:
        # kill the process which fetch the data
        if isinstance(training_data, AsyncDataLoader):
            training_data.stop_data_fetch()
Esempio n. 6
0
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer,
          epoch: int, noise_sd: float, device: torch.device, writer=None):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to train mode
    model.train()

    for i, (inputs, targets) in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        inputs, targets = inputs.to(device), targets.to(device)
        batch_size = inputs.size(0)

        # augment inputs with noise
        inputs = inputs + torch.randn_like(inputs, device=device) * noise_sd

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
        losses.update(loss.item(), batch_size)
        top1.update(acc1.item(), batch_size)
        top5.update(acc5.item(), batch_size)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.avg:.3f}\t'
                  'Data {data_time.avg:.3f}\t'
                  'Loss {loss.avg:.4f}\t'
                  'Acc@1 {top1.avg:.3f}\t'
                  'Acc@5 {top5.avg:.3f}'.format(
                epoch, i, len(loader), batch_time=batch_time,
                data_time=data_time, loss=losses, top1=top1, top5=top5))

    if writer:
        writer.add_scalar('loss/train', losses.avg, epoch)
        writer.add_scalar('batch_time', batch_time.avg, epoch)
        writer.add_scalar('accuracy/train@1', top1.avg, epoch)
        writer.add_scalar('accuracy/train@5', top5.avg, epoch)

    return (losses.avg, top1.avg)
Esempio n. 7
0
def test(loader: DataLoader, model: torch.nn.Module, criterion,
         noise_sd: float):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to eval mode
    model.eval()

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs.cuda()
            targets = targets.cuda()

            # augment inputs with noise
            inputs = inputs + randgn_like(inputs, p=args.p,
                                          device='cuda') * noise_sd

            # compute output
            if (args.scale_down != 1):
                inputs = torch.nn.functional.interpolate(
                    inputs, scale_factor=args.scale_down)
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.size(0))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        return (losses.avg, top1.avg)
Esempio n. 8
0
def optimization_step(model, criterion, optimizer, optimizer_fp, x_batch,
                      y_batch):

    x_batch, y_batch = Variable(x_batch.cuda()), Variable(
        y_batch.cuda(async=True))
    # use quantized model
    logits = model(x_batch)

    # compute logloss
    loss = criterion(logits, y_batch)
    batch_loss = loss.data[0]

    # compute accuracies
    pred = F.softmax(logits)
    batch_accuracy, batch_top5_accuracy = accuracy(y_batch, pred, top_k=(1, 5))

    optimizer.zero_grad()
    optimizer_fp.zero_grad()
    # compute grads for quantized model
    loss.backward()

    all_kernels = optimizer.param_groups[2]['params']
    all_fp_kernels = optimizer_fp.param_groups[0]['params']

    for i in range(len(all_kernels)):

        # get quantized kernel
        k = all_kernels[i]

        # get corresponding full precision kernel
        k_fp = all_fp_kernels[i]

        # get modified grads
        k_fp_grad = get_grads(k.grad.data, k.data)

        # grad for full precision kernel
        k_fp.grad = Variable(k_fp_grad)

        # we don't need to update quantized kernel directly
        k.grad.data.zero_()

    # update the last fc layer and all batch norm params in quantized model
    optimizer.step()

    # update full precision kernels
    optimizer_fp.step()

    # update quantized kernels
    for i in range(len(all_kernels)):

        k = all_kernels[i]
        k_fp = all_fp_kernels[i]

        k.data = quantize(k_fp.data)

    return batch_loss, batch_accuracy, batch_top5_accuracy
Esempio n. 9
0
def test(loader: DataLoader,
         model: torch.nn.Module,
         criterion,
         device,
         print_freq,
         display=False):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to eval mode
    model.eval()

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs.to(device)
            targets = targets.to(device)

            # compute output
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.size(0))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0 and display == True:
                print('Test : [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        return (losses.avg, top1.avg, top5.avg)
Esempio n. 10
0
    def update(self, output, target, key):
        with torch.no_grad():
            loss = self.criterion(output, target)
            self.losses[key].update(loss.item(), target.size(0))
            t1, t5 = accuracy(output, target, topk=(1, 5))
            self.top1[key].update(t1.item(), target.size(0))
            self.top5[key].update(t5.item(), target.size(0))

            log_prob = F.log_softmax(output, 1)
            prob = log_prob.exp()
            entropy = -(log_prob * prob).sum(1).data
            self.ent[key].update(entropy.mean().item(), target.size(0))
Esempio n. 11
0
def test(inference_model, test_data, opts):
    nr_batches = len(test_data)
    bar = tqdm(test_data, total=nr_batches)
    sum_acc = 0.0

    with torch.no_grad():
        for idx, (input_data, labels) in enumerate(bar):
            output = inference_model(input_data)
            output = output.float()
            sum_acc += accuracy(output, labels)
            aggregated_accuracy = sum_acc/(idx+1)
            bar.set_description(f"Accuracy:{aggregated_accuracy:0.2f}%")
    acc = sum_acc / nr_batches
    logging.info(f"Accuracy on test set: {acc:0.2f}%")
    return acc
Esempio n. 12
0
def test(inference_model, test_data, opts):
    nr_batches = len(test_data)
    bar = tqdm(test_data, total=nr_batches)
    sum_acc = 0.0
    with torch.no_grad():
        for idx, (data, labels) in enumerate(bar):
            if opts.precision == "half":
                data = data.half()
            data = data.contiguous()
            output = inference_model(data)
            output = output.float()
            sum_acc += accuracy(output, labels)
            aggregated_accuracy = sum_acc / (idx + 1)
            bar.set_description(
                "Accuracy:{:0.2f}%".format(aggregated_accuracy))
    print("Accuracy on test set: {:0.2f}%".format(sum_acc / len(test_data)))
def train(train_loader, model, criterion, optimizer, epoch, args):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.gpu is not None:
            images = images.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)

        # compute output
        output = model(images)
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            progress.display(i)
def validate(val_loader, model, criterion, args):
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top5],
        prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            if args.gpu is not None:
                images = images.cuda(args.gpu, non_blocking=True)
            target = target.cuda(args.gpu, non_blocking=True)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                progress.display(i)

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))

    return top1.avg, top5.avg
Esempio n. 15
0
def train(loader: DataLoader,
          model: torch.nn.Module,
          criterion,
          optimizer: Optimizer,
          epoch: int,
          noise_sd: float,
          attacker: Attacker = None):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to train mode
    model.train()
    requires_grad_(model, True)

    for i, batch in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        mini_batches = get_minibatches(batch, args.num_noise_vec)
        noisy_inputs_list = []
        for inputs, targets in mini_batches:
            inputs = inputs.cuda()
            targets = targets.cuda()

            inputs = inputs.repeat(
                (1, args.num_noise_vec, 1, 1)).view(batch[0].shape)

            # augment inputs with noise
            noise = torch.randn_like(inputs, device='cuda') * noise_sd

            if args.adv_training:
                requires_grad_(model, False)
                model.eval()
                inputs = attacker.attack(model,
                                         inputs,
                                         targets,
                                         noise=noise,
                                         num_noise_vectors=args.num_noise_vec,
                                         no_grad=args.no_grad_attack)
                model.train()
                requires_grad_(model, True)

            if args.train_multi_noise:
                noisy_inputs = inputs + noise
                targets = targets.unsqueeze(1).repeat(
                    1, args.num_noise_vec).reshape(-1, 1).squeeze()
                outputs = model(noisy_inputs)
                loss = criterion(outputs, targets)

                acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
                losses.update(loss.item(), noisy_inputs.size(0))
                top1.update(acc1.item(), noisy_inputs.size(0))
                top5.update(acc5.item(), noisy_inputs.size(0))

                # compute gradient and do SGD step
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            else:
                inputs = inputs[::args.num_noise_vec]  # subsample the samples
                noise = noise[::args.num_noise_vec]
                # noise = torch.randn_like(inputs, device='cuda') * noise_sd
                noisy_inputs_list.append(inputs + noise)

        if not args.train_multi_noise:
            noisy_inputs = torch.cat(noisy_inputs_list)
            targets = batch[1].cuda()
            assert len(targets) == len(noisy_inputs)

            outputs = model(noisy_inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), noisy_inputs.size(0))
            top1.update(acc1.item(), noisy_inputs.size(0))
            top5.update(acc5.item(), noisy_inputs.size(0))

            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      i,
                      len(loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5))

    return (losses.avg, top1.avg)
Esempio n. 16
0
def test(loader: DataLoader,
         model: torch.nn.Module,
         criterion,
         noise_sd: float,
         attacker: Attacker = None):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    top1_normal = AverageMeter()
    end = time.time()

    # switch to eval mode
    model.eval()
    requires_grad_(model, False)

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs.cuda()
            targets = targets.cuda()

            # augment inputs with noise
            noise = torch.randn_like(inputs, device='cuda') * noise_sd
            noisy_inputs = inputs + noise

            # compute output
            if args.adv_training:
                normal_outputs = model(noisy_inputs)
                acc1_normal, _ = accuracy(normal_outputs, targets, topk=(1, 5))
                top1_normal.update(acc1_normal.item(), inputs.size(0))

                with torch.enable_grad():
                    inputs = attacker.attack(model,
                                             inputs,
                                             targets,
                                             noise=noise)
                # noise = torch.randn_like(inputs, device='cuda') * noise_sd
                noisy_inputs = inputs + noise

            outputs = model(noisy_inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.size(0))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        if args.adv_training:
            return (losses.avg, top1.avg, top1_normal.avg)
        else:
            return (losses.avg, top1.avg, None)
Esempio n. 17
0
def train(loader: DataLoader,
          model: torch.nn.Module,
          criterion,
          optimizer: Optimizer,
          epoch: int,
          transformer: AbstractTransformer,
          writer=None):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    losses_reg = AverageMeter()
    confidence = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to train mode
    model.train()

    for i, batch in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        mini_batches = _chunk_minibatch(batch, args.num_noise_vec)
        for inputs, targets in mini_batches:
            targets = targets.cuda()
            batch_size = inputs.size(0)

            noised_inputs = [
                transformer.process(inputs).cuda()
                for _ in range(args.num_noise_vec)
            ]

            # augment inputs with noise
            inputs_c = torch.cat(noised_inputs, dim=0)
            targets_c = targets.repeat(args.num_noise_vec)

            logits = model(inputs_c)

            loss_xent = criterion(logits, targets_c)

            logits_chunk = torch.chunk(logits, args.num_noise_vec, dim=0)
            softmax = [F.softmax(logit, dim=1) for logit in logits_chunk]
            avg_softmax = sum(softmax) / args.num_noise_vec

            consistency = [
                kl_div(logit, avg_softmax, reduction='none').sum(1) +
                _entropy(avg_softmax, reduction='none')
                for logit in logits_chunk
            ]
            consistency = sum(consistency) / args.num_noise_vec
            consistency = consistency.mean()

            loss = loss_xent + args.lbd * consistency

            avg_confidence = -F.nll_loss(avg_softmax, targets)

            acc1, acc5 = accuracy(logits, targets_c, topk=(1, 5))
            losses.update(loss_xent.item(), batch_size)
            losses_reg.update(consistency.item(), batch_size)
            confidence.update(avg_confidence.item(), batch_size)
            top1.update(acc1.item(), batch_size)
            top5.update(acc5.item(), batch_size)

            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.avg:.3f}\t'
                  'Data {data_time.avg:.3f}\t'
                  'Loss {loss.avg:.4f}\t'
                  'Acc@1 {top1.avg:.3f}\t'
                  'Acc@5 {top5.avg:.3f}'.format(epoch,
                                                i,
                                                len(loader),
                                                batch_time=batch_time,
                                                data_time=data_time,
                                                loss=losses,
                                                top1=top1,
                                                top5=top5))

            if args.print_step:
                writer.add_scalar(f'epoch/{epoch}/loss/train', losses.avg, i)
                writer.add_scalar(f'epoch/{epoch}/loss/consistency',
                                  losses_reg.avg, i)
                writer.add_scalar(f'epoch/{epoch}/loss/avg_confidence',
                                  confidence.avg, i)
                writer.add_scalar(f'epoch/{epoch}/batch_time', batch_time.avg,
                                  i)
                writer.add_scalar(f'epoch/{epoch}/accuracy/train@1', top1.avg,
                                  i)
                writer.add_scalar(f'epoch/{epoch}/accuracy/train@5', top5.avg,
                                  i)

    writer.add_scalar('loss/train', losses.avg, epoch)
    writer.add_scalar('loss/consistency', losses_reg.avg, epoch)
    writer.add_scalar('loss/avg_confidence', confidence.avg, epoch)
    writer.add_scalar('batch_time', batch_time.avg, epoch)
    writer.add_scalar('accuracy/train@1', top1.avg, epoch)
    writer.add_scalar('accuracy/train@5', top5.avg, epoch)

    return (losses.avg, top1.avg)
def test_with_classifier(loader: DataLoader, denoiser: torch.nn.Module,
                         criterion, noise_sd: float, print_freq: int,
                         classifier: torch.nn.Module):
    """
    A function to test the classification performance of a denoiser when attached to a given classifier
        :param loader:DataLoader: test dataloader
        :param denoiser:torch.nn.Module: the denoiser 
        :param criterion: the loss function (e.g. CE)
        :param noise_sd:float: the std-dev of the Guassian noise perturbation of the input
        :param print_freq:int: the frequency of logging
        :param classifier:torch.nn.Module: the classifier to which the denoiser is attached
    """
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to eval mode
    classifier.eval()
    if denoiser:
        denoiser.eval()

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs.cuda()
            targets = targets.cuda()

            # augment inputs with noise
            inputs = inputs + torch.randn_like(inputs,
                                               device='cuda') * noise_sd

            if denoiser is not None:
                inputs = denoiser(inputs)
            # compute output
            outputs = classifier(inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.size(0))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        return (losses.avg, top1.avg)
Esempio n. 19
0
def test(loader: DataLoader, model: torch.nn.Module, criterion):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    learning_rate = args.attlr
    iterations = args.attiters
    ROAwidth = args.ROAwidth
    ROAheight = args.ROAheight
    skip_in_x = args.skip_in_x
    skip_in_y = args.skip_in_y
    potential_nums = args.potential_nums

    # switch to eval mode
    model.eval()
    roa = ROA(model, 32)

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs.cuda()
            targets = targets.cuda()
            with torch.set_grad_enabled(True):
                adv_inputs = roa.gradient_based_search(inputs, targets, learning_rate,\
                        iterations, ROAwidth , ROAheight, skip_in_x, skip_in_y, potential_nums)

            # compute output
            outputs = model(adv_inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.size(0))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        return (losses.avg, top1.avg)
Esempio n. 20
0
def train(loader: DataLoader, model: torch.nn.Module, criterion,
          optimizer: Optimizer, epoch: int):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    learning_rate = args.attlr
    iterations = args.attiters
    ROAwidth = args.ROAwidth
    ROAheight = args.ROAheight
    skip_in_x = args.skip_in_x
    skip_in_y = args.skip_in_y
    potential_nums = args.potential_nums

    # switch to train mode
    model.train()

    for i, (inputs, targets) in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        inputs = inputs.cuda()
        targets = targets.cuda()
        model.eval()
        roa = ROA(model, 32)
        adv_inputs = roa.gradient_based_search(inputs, targets, learning_rate,\
                        iterations, ROAwidth , ROAheight, skip_in_x, skip_in_y, potential_nums)
        imshow(args.outdir, adv_inputs)

        # compute output
        model.train()
        outputs = model(adv_inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
        losses.update(loss.item(), inputs.size(0))
        top1.update(acc1.item(), inputs.size(0))
        top5.update(acc5.item(), inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      i,
                      len(loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5))

    return (losses.avg, top1.avg)
Esempio n. 21
0
def test(loader,
         model,
         criterion,
         epoch,
         transformer: AbstractTransformer,
         writer=None,
         print_freq=10):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to eval mode
    model.eval()

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs
            targets = targets.cuda()

            # augment inputs with noise
            inputs = transformer.process(inputs).cuda()

            # compute output
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.size(0))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.avg:.3f}\t'
                      'Data {data_time.avg:.3f}\t'
                      'Loss {loss.avg:.4f}\t'
                      'Acc@1 {top1.avg:.3f}\t'
                      'Acc@5 {top5.avg:.3f}'.format(i,
                                                    len(loader),
                                                    batch_time=batch_time,
                                                    data_time=data_time,
                                                    loss=losses,
                                                    top1=top1,
                                                    top5=top5))

        if writer:
            writer.add_scalar('loss/test', losses.avg, epoch)
            writer.add_scalar('accuracy/test@1', top1.avg, epoch)
            writer.add_scalar('accuracy/test@5', top5.avg, epoch)

        return (losses.avg, top1.avg)
Esempio n. 22
0
def train(loader: DataLoader,
          model: torch.nn.Module,
          criterion,
          optimizer: Optimizer,
          epoch: int,
          device,
          print_freq=100,
          display=True):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()
    #     print("Entered training function")

    # switch to train mode
    model.train()

    for i, (inputs, targets) in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        inputs = inputs.to(device)
        targets = targets.to(device)

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
        losses.update(loss.item(), inputs.size(0))
        top1.update(acc1.item(), inputs.size(0))
        top5.update(acc5.item(), inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0 and display == True:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      i,
                      len(loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5))

    return (losses.avg, top1.avg, top5.avg)
def train(loader: DataLoader,
          model: torch.nn.Module,
          criterion,
          optimizer: Optimizer,
          epoch: int,
          noise_sd: float,
          attacker: Attacker,
          device: torch.device,
          writer=None):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    losses_reg = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to train mode
    model.train()
    requires_grad_(model, True)

    for i, batch in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        mini_batches = _chunk_minibatch(batch, args.num_noise_vec)
        for inputs, targets in mini_batches:
            inputs, targets = inputs.to(device), targets.to(device)
            batch_size = inputs.size(0)

            noises = [
                torch.randn_like(inputs, device=device) * noise_sd
                for _ in range(args.num_noise_vec)
            ]

            if args.adv_training:
                requires_grad_(model, False)
                model.eval()
                inputs = attacker.attack(model, inputs, targets, noises=noises)
                model.train()
                requires_grad_(model, True)

            # augment inputs with noise
            inputs_c = torch.cat([inputs + noise for noise in noises], dim=0)
            targets_c = targets.repeat(args.num_noise_vec)

            logits = model(inputs_c)
            loss_xent = criterion(logits, targets_c)

            logits_chunk = torch.chunk(logits, args.num_noise_vec, dim=0)
            loss_con = consistency_loss(logits_chunk, args.lbd, args.eta)

            loss = loss_xent + loss_con

            acc1, acc5 = accuracy(logits, targets_c, topk=(1, 5))
            losses.update(loss_xent.item(), batch_size)
            losses_reg.update(loss_con.item(), batch_size)
            top1.update(acc1.item(), batch_size)
            top5.update(acc5.item(), batch_size)

            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.avg:.3f}\t'
                  'Data {data_time.avg:.3f}\t'
                  'Loss {loss.avg:.4f}\t'
                  'Acc@1 {top1.avg:.3f}\t'
                  'Acc@5 {top5.avg:.3f}'.format(epoch,
                                                i,
                                                len(loader),
                                                batch_time=batch_time,
                                                data_time=data_time,
                                                loss=losses,
                                                top1=top1,
                                                top5=top5))

    writer.add_scalar('loss/train', losses.avg, epoch)
    writer.add_scalar('loss/consistency', losses_reg.avg, epoch)
    writer.add_scalar('batch_time', batch_time.avg, epoch)
    writer.add_scalar('accuracy/train@1', top1.avg, epoch)
    writer.add_scalar('accuracy/train@5', top5.avg, epoch)

    return (losses.avg, top1.avg)
Esempio n. 24
0
    def train(self, args, logger=None, progressbar=None):
        """
        Train function of FixMatch.
        From data_loader, it inference training data, computes losses, and update the networks.
        """
        ngpus_per_node = torch.cuda.device_count()

        # lb: labeled, ulb: unlabeled
        self.train_model.train()

        # for gpu profiling
        start_batch = torch.cuda.Event(enable_timing=True)
        end_batch = torch.cuda.Event(enable_timing=True)
        start_run = torch.cuda.Event(enable_timing=True)
        end_run = torch.cuda.Event(enable_timing=True)

        total_epochs = args.num_train_iter // args.num_eval_iter
        curr_epoch = 0
        progressbar = tqdm(desc=f"Epoch {curr_epoch}/{total_epochs}",
                           total=args.num_eval_iter)

        start_batch.record()
        best_eval_acc, best_it = 0.0, 0

        scaler = GradScaler()
        amp_cm = autocast if args.amp else contextlib.nullcontext
        for (x_lb, y_lb), (x_ulb_w, x_ulb_s,
                           _) in zip(self.loader_dict["train_lb"],
                                     self.loader_dict["train_ulb"]):

            # prevent the training iterations exceed args.num_train_iter
            if self.it > args.num_train_iter:
                break

            end_batch.record()
            torch.cuda.synchronize()
            start_run.record()

            num_lb = x_lb.shape[0]
            num_ulb = x_ulb_w.shape[0]
            assert num_ulb == x_ulb_s.shape[0]

            x_lb, x_ulb_w, x_ulb_s = (
                x_lb.cuda(args.gpu),
                x_ulb_w.cuda(args.gpu),
                x_ulb_s.cuda(args.gpu),
            )
            y_lb = y_lb.cuda(args.gpu)

            inputs = torch.cat((x_lb, x_ulb_w, x_ulb_s))

            # inference and calculate sup/unsup losses
            with amp_cm():
                logits = self.train_model(inputs)
                logits_x_lb = logits[:num_lb]
                logits_x_ulb_w, logits_x_ulb_s = logits[num_lb:].chunk(2)
                del logits

                # hyper-params for update
                T = self.t_fn(self.it)
                p_cutoff = self.p_fn(self.it)

                sup_loss = ce_loss(logits_x_lb, y_lb, reduction="mean")
                unsup_loss, mask = consistency_loss(
                    logits_x_ulb_w,
                    logits_x_ulb_s,
                    "ce",
                    T,
                    p_cutoff,
                    use_hard_labels=args.hard_label,
                )

                total_loss = sup_loss + self.lambda_u * unsup_loss

            # parameter updates
            if args.amp:
                scaler.scale(total_loss).backward()
                scaler.step(self.optimizer)
                scaler.update()
            else:
                total_loss.backward()
                self.optimizer.step()

            self.scheduler.step()
            self.train_model.zero_grad()

            with torch.no_grad():
                self._eval_model_update()
                train_accuracy = accuracy(logits_x_lb, y_lb)
                train_accuracy = train_accuracy[0]

            end_run.record()
            torch.cuda.synchronize()

            # tensorboard_dict update
            tb_dict = {}
            tb_dict["train/sup_loss"] = sup_loss.detach()
            tb_dict["train/unsup_loss"] = unsup_loss.detach()
            tb_dict["train/total_loss"] = total_loss.detach()
            tb_dict["train/mask_ratio"] = 1.0 - mask.detach()
            tb_dict["lr"] = self.optimizer.param_groups[0]["lr"]
            tb_dict["train/prefetch_time"] = (
                start_batch.elapsed_time(end_batch) / 1000.0)
            tb_dict["train/run_time"] = start_run.elapsed_time(
                end_run) / 1000.0
            tb_dict["train/top-1-acc"] = train_accuracy

            progressbar.set_postfix_str(
                f"Total Loss={total_loss.detach():.3e}")
            progressbar.update(1)

            if self.it % self.num_eval_iter == 0:
                progressbar.close()
                curr_epoch += 1

                eval_dict = self.evaluate(args=args)
                tb_dict.update(eval_dict)

                save_path = os.path.join(args.save_dir, args.save_name)

                if tb_dict["eval/top-1-acc"] > best_eval_acc:
                    best_eval_acc = tb_dict["eval/top-1-acc"]
                    best_it = self.it

                self.print_fn(
                    f"{self.it} iteration, USE_EMA: {hasattr(self, 'eval_model')}, {tb_dict}, BEST_EVAL_ACC: {best_eval_acc}, at {best_it} iters"
                )

                progressbar = tqdm(desc=f"Epoch {curr_epoch}/{total_epochs}",
                                   total=args.num_eval_iter)

            if not args.multiprocessing_distributed or (
                    args.multiprocessing_distributed
                    and args.rank % ngpus_per_node == 0):

                if self.it == best_it:
                    self.save_model("model_best.pth", save_path)

                if not self.tb_log is None:
                    self.tb_log.update(tb_dict, self.it)

            self.it += 1
            del tb_dict
            start_batch.record()
            if self.it > 2**19:
                self.num_eval_iter = 1000

        eval_dict = self.evaluate(args=args)
        eval_dict.update({
            "eval/best_acc": best_eval_acc,
            "eval/best_it": best_it
        })
        return eval_dict
Esempio n. 25
0
def test(loader: DataLoader, model: torch.nn.Module, criterion, epoch: int,
         args):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to eval mode
    model.eval()
    m = Bernoulli(torch.tensor([args.calibrated_alpha]).cuda())

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(loader):
            # measure data loading time
            data_time.update(time.time() - end)

            inputs = inputs.cuda()
            targets = targets.cuda()

            # make MNIST binary
            if args.dataset == 'mnist':
                inputs = (inputs > 0.5).type(torch.cuda.FloatTensor)

            # augment inputs with noise
            if args.perturb == 'bernoulli':
                mask = m.sample(inputs.shape).squeeze(-1)
                # make sure that the value is normalized
                rand_inputs = torch.randint_like(
                    inputs, low=0, high=args.K + 1, device='cuda') / float(
                        args.K)
                inputs = inputs * mask + rand_inputs * (1 - mask)
            elif args.perturb == 'gaussian':
                inputs = inputs + torch.randn_like(inputs,
                                                   device='cuda') * args.sigma

            # compute output
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.size(0))
            top1.update(acc1.item(), inputs.size(0))
            top5.update(acc5.item(), inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if (i + 1) % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i + 1,
                          len(loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))
        print('* Epoch: [{0}] Test: \t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
              'Acc@5 {top5.val:.3f} ({top5.avg:.3f})\n'.format(epoch,
                                                               loss=losses,
                                                               top1=top1,
                                                               top5=top5))

        return (losses.avg, top1.avg)
Esempio n. 26
0
def train_or_eval(data_loader,
                  classifier,
                  decoder,
                  train=False,
                  optimizer=None,
                  epoch=None):
    ## initialize all metric used
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()
    losses_m = AverageMeter()
    acc_m = AverageMeter()
    statistics = StatisticsContainer()

    classifier_criterion = nn.CrossEntropyLoss().to(device)

    ## switch to train mode if needed
    if train:
        decoder.train()
        if args.fixed_classifier:
            classifier.eval()
        else:
            classifier.train()
    else:
        decoder.eval()
        classifier.eval()

    ## data loop
    end = time.time()
    for i, (input, target) in enumerate(data_loader):
        if train and i > len(data_loader) * args.pot:
            break

        ## measure data loading time
        data_time.update(time.time() - end)

        ## move input and target on the device
        input, target = input.to(device), target.to(device)

        ## compute classifier prediction on the original images and get inner layers
        with torch.set_grad_enabled(train and (not args.fixed_classifier)):
            output, layers = classifier(input)
            classifier_loss = classifier_criterion(output, target)

        ## update metrics
        losses.update(classifier_loss.item(), input.size(0))
        acc.update(
            accuracy(output.detach(), target, topk=(1, ))[0].item(),
            input.size(0))

        ## update classifier - compute gradient and do SGD step for clean image, save classifier
        if train and (not args.fixed_classifier):
            optimizer['classifier'].zero_grad()
            classifier_loss.backward()
            optimizer['classifier'].step()

            ## save classifier (needed only if previous iterations are used i.e. args.hp > 0)
            global F_k
            if args.hp > 0 and ((i % args.smf == -1 % args.smf)
                                or len(F_k) < 1):
                print(
                    'Current iteration is saving, will be used in the future. ',
                    end='',
                    flush=True)
                if len(F_k) < args.f_size:
                    index = len(F_k)
                else:
                    index = random.randint(0, len(F_k) - 1)
                state_dict = classifier.state_dict()
                F_k[index] = {}
                for p in state_dict:
                    F_k[index][p] = state_dict[p].cpu()
                print('There are {0} iterations stored.'.format(len(F_k)),
                      flush=True)

        ## detach inner layers to make them be features for decoder
        layers = [l.detach() for l in layers]

        with torch.set_grad_enabled(train):
            ## compute mask and masked input
            mask = decoder(layers)
            input_m = input * (1 - mask)

            ## update statistics
            statistics.update(mask)

            ## randomly select classifier to be evaluated on masked image and compute output
            if (not train) or args.fixed_classifier or (random.random() >
                                                        args.hp):
                output_m, _ = classifier(input_m)
                update_classifier = not args.fixed_classifier
            else:
                try:
                    confuser
                except NameError:
                    import copy
                    confuser = copy.deepcopy(classifier)
                index = random.randint(0, len(F_k) - 1)
                confuser.load_state_dict(F_k[index])
                confuser.eval()

                output_m, _ = confuser(input_m)
                update_classifier = False

            classifier_loss_m = classifier_criterion(output_m, target)

            ## update metrics
            losses_m.update(classifier_loss_m.item(), input.size(0))
            acc_m.update(
                accuracy(output_m.detach(), target, topk=(1, ))[0].item(),
                input.size(0))

        if train:
            ## update classifier - compute gradient, do SGD step for masked image
            if update_classifier:
                optimizer['classifier'].zero_grad()
                classifier_loss_m.backward(retain_graph=True)
                optimizer['classifier'].step()

            ## regularizaion for casme
            _, max_indexes = output.detach().max(1)
            _, max_indexes_m = output_m.detach().max(1)
            correct_on_clean = target.eq(max_indexes)
            mistaken_on_masked = target.ne(max_indexes_m)
            nontrivially_confused = (correct_on_clean +
                                     mistaken_on_masked).eq(2).float()

            mask_mean = F.avg_pool2d(mask, 224, stride=1).squeeze()

            ## apply regularization loss only on nontrivially confused images
            casme_loss = -args.lambda_r * F.relu(nontrivially_confused -
                                                 mask_mean).mean()

            ## main loss for casme
            if args.adversarial:
                casme_loss += -classifier_loss_m
            else:
                log_prob = F.log_softmax(output_m, 1)
                prob = log_prob.exp()
                negative_entropy = (log_prob * prob).sum(1)
                ## apply main loss only when original images are corrected classified
                negative_entropy_correct = negative_entropy * correct_on_clean.float(
                )
                casme_loss += negative_entropy_correct.mean()

            ## update casme - compute gradient, do SGD step
            optimizer['decoder'].zero_grad()
            casme_loss.backward()
            torch.nn.utils.clip_grad_norm_(decoder.parameters(), 10)
            optimizer['decoder'].step()

        ## measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        ## print log
        if i % args.print_freq == 0:
            if train:
                print('Epoch: [{0}][{1}/{2}/{3}]\t'.format(
                    epoch, i, int(len(data_loader) * args.pot),
                    len(data_loader)),
                      end='')
            else:
                print('Test: [{0}/{1}]\t'.format(i, len(data_loader)), end='')
            print('Time {batch_time.avg:.3f} ({batch_time.val:.3f})\t'
                  'Data {data_time.avg:.3f} ({data_time.val:.3f})\n'
                  'Loss(C) {loss.avg:.4f} ({loss.val:.4f})\t'
                  'Prec@1(C) {acc.avg:.3f} ({acc.val:.3f})\n'
                  'Loss(M) {loss_m.avg:.4f} ({loss_m.val:.4f})\t'
                  'Prec@1(M) {acc_m.avg:.3f} ({acc_m.val:.3f})\t'.format(
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      acc=acc,
                      loss_m=losses_m,
                      acc_m=acc_m),
                  flush=True)
            statistics.printOut()

    if not train:
        print(' * Prec@1 {acc.avg:.3f} Prec@1(M) {acc_m.avg:.3f} '.format(
            acc=acc, acc_m=acc_m))
        statistics.printOut()

    return {
        'acc': str(acc.avg),
        'acc_m': str(acc_m.avg),
        **statistics.getDictionary()
    }
Esempio n. 27
0
def optimization_step(model, criterion,
                      optimizer, optimizer_fp, optimizer_sf,
                      x_batch, y_batch):

    x_batch, y_batch = Variable(x_batch.cuda()), Variable(y_batch.cuda(async=True))
    # use quantized model
    logits = model(x_batch)

    # compute logloss
    loss = criterion(logits, y_batch)
    batch_loss = loss.data[0]

    # compute accuracies
    pred = F.softmax(logits)
    batch_accuracy, batch_top5_accuracy = accuracy(y_batch, pred, top_k=(1, 5))

    optimizer.zero_grad()
    optimizer_fp.zero_grad()
    optimizer_sf.zero_grad()
    # compute grads for quantized model
    loss.backward()

    all_kernels = optimizer.param_groups[2]['params']
    all_fp_kernels = optimizer_fp.param_groups[0]['params']
    scaling_factors = optimizer_sf.param_groups[0]['params']

    for i in range(len(all_kernels)):

        # get quantized kernel
        k = all_kernels[i]

        # get corresponding full precision kernel
        k_fp = all_fp_kernels[i]

        # get scaling factors for quantized kernel
        f = scaling_factors[i]
        w_p, w_n = f.data[0], f.data[1]

        # get modified grads
        k_fp_grad, w_p_grad, w_n_grad = get_grads(k.grad.data, k.data, w_p, w_n)
        # WARNING: this is not like in the original paper.
        # In the original paper: k.data -> k_fp.data

        # grad for full precision kernel
        k_fp.grad = Variable(k_fp_grad)

        # we don't need to update quantized kernel directly
        k.grad.data.zero_()

        # grad for scaling factors
        f.grad = Variable(torch.FloatTensor([w_p_grad, w_n_grad]).cuda())

    # update the last fc layer and all batch norm params in quantized model
    optimizer.step()

    # update full precision kernels
    optimizer_fp.step()

    # update scaling factors
    optimizer_sf.step()

    # update quantized kernels
    for i in range(len(all_kernels)):

        k = all_kernels[i]
        k_fp = all_fp_kernels[i]
        f = scaling_factors[i]
        w_p, w_n = f.data[0], f.data[1]

        k.data = quantize(k_fp.data, w_p, w_n)

    return batch_loss, batch_accuracy, batch_top5_accuracy
Esempio n. 28
0
def train(loader: DataLoader,
          model: torch.nn.Module,
          criterion,
          optimizer: Optimizer,
          epoch: int,
          noise_sd: float,
          attacker: Attacker,
          device: torch.device,
          writer=None):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    # switch to train mode
    model.train()
    requires_grad_(model, True)

    for i, batch in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        mini_batches = _chunk_minibatch(batch, args.num_noise_vec)
        for inputs, targets in mini_batches:
            inputs, targets = inputs.to(device), targets.to(device)
            inputs = inputs.repeat(
                (1, args.num_noise_vec, 1, 1)).reshape(-1, *batch[0].shape[1:])
            batch_size = inputs.size(0)

            # augment inputs with noise
            noise = torch.randn_like(inputs, device=device) * noise_sd

            requires_grad_(model, False)
            model.eval()
            inputs = attacker.attack(model,
                                     inputs,
                                     targets,
                                     noise=noise,
                                     num_noise_vectors=args.num_noise_vec,
                                     no_grad=args.no_grad_attack)
            model.train()
            requires_grad_(model, True)

            noisy_inputs = inputs + noise

            targets = targets.unsqueeze(1).repeat(1,
                                                  args.num_noise_vec).reshape(
                                                      -1, 1).squeeze()
            outputs = model(noisy_inputs)
            loss = criterion(outputs, targets)

            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), batch_size)
            top1.update(acc1.item(), batch_size)
            top5.update(acc5.item(), batch_size)

            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.avg:.3f}\t'
                  'Data {data_time.avg:.3f}\t'
                  'Loss {loss.avg:.4f}\t'
                  'Acc@1 {top1.avg:.3f}\t'
                  'Acc@5 {top5.avg:.3f}'.format(epoch,
                                                i,
                                                len(loader),
                                                batch_time=batch_time,
                                                data_time=data_time,
                                                loss=losses,
                                                top1=top1,
                                                top5=top5))

    if writer:
        writer.add_scalar('loss/train', losses.avg, epoch)
        writer.add_scalar('batch_time', batch_time.avg, epoch)
        writer.add_scalar('accuracy/train@1', top1.avg, epoch)
        writer.add_scalar('accuracy/train@5', top5.avg, epoch)

    return (losses.avg, top1.avg)
Esempio n. 29
0
def train(training_model, training_data, opts, lr_scheduler, epochs,
          optimizer):
    old_lr = lr_scheduler.get_last_lr()[0]
    iterations_per_epoch = len(training_data)
    for epoch in epochs:
        logging.info(f"Epoch {epoch}/{opts.epoch}")
        bar = tqdm(training_data, total=iterations_per_epoch)
        sum_loss = 0.0
        sum_acc = 0.0
        sum_batch_loss = 0.0
        sum_batch_acc = 0.0
        last_batch = -1
        start_batch_time = start_epoch_time = time.time()
        total_sample = 0
        for batch_idx, (input_data, labels) in enumerate(bar):
            preds, losses = training_model(input_data, labels)
            epoch_num = epoch - 1 + float(batch_idx + 1) / iterations_per_epoch
            if not opts.disable_metrics:
                with torch.no_grad():
                    # Convert to full precision for CPU execute.
                    losses = losses.float()
                    preds = preds.float()
                    mean_loss = torch.mean(losses).item()
                    acc = accuracy(preds, labels)
                sum_acc += acc
                sum_loss += mean_loss
                sum_batch_loss += mean_loss
                sum_batch_acc += acc
                aggregated_loss = sum_loss / (batch_idx + 1)
                aggregated_accuracy = sum_acc / (batch_idx + 1)
                bar.set_description(
                    f"Loss:{aggregated_loss:0.4f} | Accuracy:{aggregated_accuracy:0.2f}%"
                )
            total_sample += input_data.size()[0]

            if not opts.disable_metrics and (
                (batch_idx + 1) %
                (iterations_per_epoch // opts.logs_per_epoch) == 0):
                # save metrics
                result_dict = {
                    "loss_avg":
                    aggregated_loss,
                    "loss_batch":
                    sum_batch_loss / (batch_idx - last_batch),
                    "epoch":
                    epoch_num,
                    "iteration":
                    batch_idx + 1 + (epoch - 1) * iterations_per_epoch,
                    "train_accuracy_avg":
                    aggregated_accuracy,
                    "train_accuracy_batch":
                    sum_batch_acc / (batch_idx - last_batch),
                    "learning_rate":
                    old_lr * (opts.replicas * opts.gradient_accumulation
                              if opts.reduction == 'sum' else 1.0),
                    "train_img_per_sec":
                    ((batch_idx - last_batch) * input_data.size()[0] /
                     (time.time() - start_batch_time)),
                    "latency_sec":
                    (time.time() - start_batch_time) / (batch_idx - last_batch)
                }
                utils.Logger.log_train_results(result_dict)
                sum_batch_loss = 0.0
                sum_batch_acc = 0.0
                last_batch = batch_idx
                start_batch_time = time.time()

            # lr schedule
            lr_scheduler.step(epoch_num)
            new_lr = lr_scheduler.get_last_lr()[0]
            if new_lr != old_lr:
                training_model.setOptimizer(optimizer)
                old_lr = new_lr
                if opts.lr_schedule == "step":
                    logging.info(f"Learning rate is changed to {new_lr}")

        end_time = time.time()
        if not opts.disable_metrics:
            logging.info(
                f"Epoch {epoch}: Train accuracy is {aggregated_accuracy:0.2f}%"
            )
        epoch_throughput = total_sample / (end_time - start_epoch_time)
        logging.info(
            f"Throughput of the epoch:{epoch_throughput:0.1f} img/sec")
        # save
        if not opts.checkpoint_path == "":
            if not os.path.exists(opts.checkpoint_path):
                os.makedirs(opts.checkpoint_path)
            filename = f"{opts.model}_{opts.data}_{epoch}.pt"
            save_path = os.path.join(opts.checkpoint_path, filename)
            training_model.copyWeightsToHost()
            state = training_model.model.model.state_dict()
            optimizer_state = optimizer.state_dict()
            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': state,
                    'optimizer_state_dict': optimizer_state,
                    'loss': aggregated_loss,
                    'train_accuracy': aggregated_accuracy,
                    'opts': opts
                }, save_path)
Esempio n. 30
0
def train(
    args,
    train_queue,
    valid_queue,
    model,
    architect,
    criterion,
    optimizer,
    lr,
    random_arch=False,
):
    objs = train_utils.AvgrageMeter()
    top1 = train_utils.AvgrageMeter()
    top5 = train_utils.AvgrageMeter()

    for step, datapoint in enumerate(train_queue):
        # The search dataqueue for nas-bench-201 returns both train and valid data
        # when looping through queue.  This is disabled with single level is indicated.
        if "nas-bench-201" in args.search.search_space and not (
                args.search.single_level):
            input, target, input_search, target_search = datapoint
        else:
            input, target = datapoint
            input_search, target_search = next(iter(valid_queue))

        n = input.size(0)

        input = Variable(input, requires_grad=False).cuda()
        target = Variable(target, requires_grad=False).cuda()

        # get a random minibatch from the search queue with replacement
        input_search = Variable(input_search, requires_grad=False).cuda()
        target_search = Variable(target_search, requires_grad=False).cuda()

        # set the model in train mode (important for layers like dropout and batch normalization)
        model.train()

        # TODO: move architecture args into a separate dictionary within args
        if not random_arch:
            architect.step(
                input, target, input_search, target_search, **{
                    "eta": lr,
                    "network_optimizer": optimizer,
                    "unrolled": args.search.unrolled,
                    "update_weights": True,
                })
        # if random_arch or model.architect_type == "snas":
        #    architect.sample_arch_configure_model()

        optimizer.zero_grad()
        architect.zero_arch_var_grad()
        architect.set_model_alphas()
        architect.set_model_edge_weights()

        logits, logits_aux = model(input, discrete=args.search.discrete)
        loss = criterion(logits, target)
        if args.train.auxiliary:
            loss_aux = criterion(logits_aux, target)
            loss += args.train.auxiliary_weight * loss_aux

        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), args.train.grad_clip)
        optimizer.step()

        prec1, prec5 = train_utils.accuracy(logits, target, topk=(1, 5))
        objs.update(loss.item(), n)
        top1.update(prec1.item(), n)
        top5.update(prec5.item(), n)

        if step % args.run.report_freq == 0:
            logging.info(
                f"| Train | Batch: {step:3d} | Loss: {objs.avg:e} | Top1: {top1.avg} | Top5: {top5.avg} |"
            )

    return top1.avg, objs.avg, top5.avg