Exemple #1
0
                                      momentum=0.9)
            elif i == 48000:
                optimizer = optim.SGD(model.parameters(),
                                      lr=1e-3,
                                      weight_decay=1e-4,
                                      momentum=0.9)
            elif i == 64000:
                end_time = time.time()
                print("total time %.1f h" % ((end_time - start_time) / 3600))
                sys.exit(0)

            # shift to train mode
            model.train()

            # get the inputs
            inputs, labels = mixup(data1, data2, 0.2)
            inputs = inputs.cuda()
            labels = labels.cuda()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # count acc,loss on trainset
            total += labels.size(0)
            predicted = outputs.data.argmax(dim=1)
            correct += (predicted == labels.argmax(dim=1)).sum().item()
            train_loss += loss.item()
Exemple #2
0
def train_mixmatch(label_loader, unlabel_loader, num_classes, model, optimizer,
                   ema_optimizer, epoch, args):

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    losses_x = AverageMeter()
    losses_u = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    weights = AverageMeter()
    nu = 2
    end = time.time()

    label_iter = iter(label_loader)
    unlabel_iter = iter(unlabel_loader)

    model.train()
    for i in range(args.val_iteration):

        try:
            (input, _), target = next(label_iter)
        except:
            label_iter = iter(label_loader)
            (input, _), target = next(label_iter)
        try:
            (input_ul, input1_ul), _ = next(unlabel_iter)
        except:
            unlabel_iter = iter(unlabel_loader)
            (input_ul, input1_ul), _ = next(unlabel_iter)

        bs = input.size(0)
        # measure data loading time
        data_time.update(time.time() - end)

        input, target = input.cuda(), target.cuda(non_blocking=True)
        input_ul, input1_ul = input_ul.cuda(), input1_ul.cuda()

        with torch.no_grad():
            # compute guess label
            logits = model(torch.cat([input_ul, input1_ul], dim=0))
            p = torch.nn.functional.softmax(logits, dim=-1).view(
                nu, -1, logits.shape[1])
            p_target = p.mean(dim=0).pow(1. / args.T)
            p_target /= p_target.sum(dim=1, keepdim=True)
            guess = p_target.detach_()

            assert input.shape[0] == input_ul.shape[0]

            # mixup
            target_in_onehot = torch.zeros(
                bs,
                num_classes).float().cuda().scatter_(1, target.view(-1, 1), 1)
            mixed_input, mixed_target = mixup(
                torch.cat([input] + [input_ul, input1_ul], dim=0),
                torch.cat([target_in_onehot] + [guess] * nu, dim=0),
                beta=args.beta)
            # reshape to (nu+1, bs, w, h, c)
            mixed_input = mixed_input.reshape([nu + 1] + list(input.shape))
            # reshape to (nu+1, bs)
            mixed_target = mixed_target.reshape([nu + 1] +
                                                list(target_in_onehot.shape))
            input_x, input_u = mixed_input[0], mixed_input[1:]
            target_x, target_u = mixed_target[0], mixed_target[1:]

        model.train()
        batches = interleave([input_x, input_u[0], input_u[1]], bs)
        logits = [model(batches[0])]
        for batchi in batches[1:]:
            logits.append(model(batchi))
        logits = interleave(logits, bs)
        logits_x = logits[0]
        logits_u = torch.cat(logits[1:], 0)

        # loss
        # cross entropy loss for soft label
        loss_xe = torch.mean(
            torch.sum(-target_x * F.log_softmax(logits_x, dim=-1), dim=1))
        # L2 loss
        loss_l2u = F.mse_loss(F.softmax(logits_u, dim=-1),
                              target_u.reshape(nu * bs, num_classes))
        # weight for unlabeled loss with warmup
        w_match = args.lambda_u * linear_rampup(epoch + i / args.val_iteration,
                                                args.epochs)
        loss = loss_xe + w_match * loss_l2u

        # measure accuracy and record loss
        prec1, prec5 = accuracy(logits_x, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        losses_x.update(loss_xe.item(), input.size(0))
        losses_u.update(loss_l2u.item(), input.size(0))

        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))
        weights.update(w_match, input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        ema_optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Loss_x {loss_x.val:.4f} ({loss_x.avg:.4f})\t'
                  'Loss_u {loss_u.val:.4f} ({loss_u.avg:.4f})\t'
                  'Ws {ws.val:.4f}\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      i,
                      args.val_iteration,
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      loss_x=losses_x,
                      loss_u=losses_u,
                      ws=weights,
                      top1=top1,
                      top5=top5))

    return top1.avg, top5.avg, losses.avg, losses_x.avg, losses_u.avg, weights.avg
Exemple #3
0
def train(epoch, model, optimizer, scheduler, criterion, train_loader, config,
          writer):
    global global_step

    run_config = config['run_config']
    optim_config = config['optim_config']
    data_config = config['data_config']

    logger.info('Train {}'.format(epoch))

    model.train()

    loss_meter = AverageMeter()
    accuracy_meter = AverageMeter()
    start = time.time()
    for step, (data, targets) in enumerate(train_loader):
        global_step += 1

        if data_config['use_mixup']:
            data, targets = mixup(data, targets, data_config['mixup_alpha'],
                                  data_config['n_classes'])

        if run_config['tensorboard_train_images']:
            if step == 0:
                image = torchvision.utils.make_grid(data,
                                                    normalize=True,
                                                    scale_each=True)
                writer.add_image('Train/Image', image, epoch)

        if optim_config['scheduler'] == 'multistep':
            scheduler.step(epoch - 1)
        elif optim_config['scheduler'] == 'cosine':
            scheduler.step()

        if run_config['tensorboard']:
            if optim_config['scheduler'] != 'none':
                lr = scheduler.get_lr()[0]
            else:
                lr = optim_config['base_lr']
            writer.add_scalar('Train/LearningRate', lr, global_step)

        if run_config['use_gpu']:
            data = data.cuda()
            targets = targets.cuda()
        data = Variable(data)
        targets = Variable(targets)

        optimizer.zero_grad()

        outputs = model(data)
        loss = criterion(outputs, targets)
        loss.backward()

        optimizer.step()

        _, preds = torch.max(outputs, dim=1)

        loss_ = loss.data[0]
        if data_config['use_mixup']:
            _, targets = targets.max(dim=1)
        correct_ = preds.eq(targets).cpu().sum().data.numpy()[0]
        num = data.size(0)

        accuracy = correct_ / num

        loss_meter.update(loss_, num)
        accuracy_meter.update(accuracy, num)

        if run_config['tensorboard']:
            writer.add_scalar('Train/RunningLoss', loss_, global_step)
            writer.add_scalar('Train/RunningAccuracy', accuracy, global_step)

        if step % 100 == 0:
            logger.info('Epoch {} '
                        'Train Loss {:.3f} '
                        'Train Acc {:.4f}'.format(
                            epoch,
                            loss_meter.avg,
                            accuracy_meter.avg,
                        ))

    elapsed = time.time() - start
    logger.info('Elapsed {:.2f}'.format(elapsed))

    if run_config['tensorboard']:
        writer.add_scalar('Train/Loss', loss_meter.avg, epoch)
        writer.add_scalar('Train/Accuracy', accuracy_meter.avg, epoch)
        writer.add_scalar('Train/Time', elapsed, epoch)
Exemple #4
0
def train(epoch, model, optimizer, scheduler, criterion, train_loader, config,
          writer, moment_dict):
    global global_step

    run_config = config['run_config']
    optim_config = config['optim_config']
    data_config = config['data_config']

    logger.info('Train {}'.format(epoch))

    model.train()

    loss_meter = AverageMeter()
    loss_before_meter = AverageMeter() # re-evaluate error on images before the gradient update, first 20 batches
    loss_after_meter = AverageMeter() # re-evaluate error on images after the gradient update, first 20 batches
    accuracy_meter = AverageMeter()

    # approximate losses and average meters are assembled here
    apx_meters = {
        'vanilla': AverageMeter(),
        'mixup': AverageMeter(),
        'doublesum': AverageMeter()
    }

    apx_callbacks = {
        'vanilla': lambda imgs, lbls, mdl: apx.vanilla_loss(imgs, lbls, mdl, run_config['use_gpu']),
        'mixup': lambda imgs, lbls, mdl: apx.mixup_loss(imgs, lbls, data_config['mixup_alpha'], data_config['n_classes'], data_config['fixlam'], mdl, run_config['use_gpu']),
        'doublesum': lambda imgs, lbls, mdl: apx.doublesum_loss(imgs, lbls, data_config['mixup_alpha'], data_config['n_classes'], data_config['fixlam'], mdl, run_config['use_gpu'])
    }

    ### take 2 for computing apx losses ###
    # we're going to store all the images that we saw throughout this epoch
    # and compute our loss on these at the end - this is effectively what is being optimised

    # then recompute a mixed up dataset and compute the loss on that - this is our (likely close) approximation
    # for double sum loss

    # actually, for validity: do this batchwise too - makes your life easier too since you don't need to load up all images and labels
    images_train = [] #: all images that were encountered in this epoch
    labels_train = [] #: all labels that were encountered in this epoch
    images_eval = [] # images that we're lining up for eval at the end of the epoch
    labels_eval = []
    images_eval2 = [] # second trial to check concentration
    labels_eval2 = []

    start = time.time()
    for step, (data, targets) in enumerate(train_loader):
        global_step += 1

        images = copy.deepcopy(data)
        labels = copy.deepcopy(targets)

        if data_config['use_mixup']:
            data, targets = mixup(data, targets, data_config['mixup_alpha'],
                                  data_config['n_classes'], data_config['fixtrainlam'], True)

            # assembling the data for our doublesum apx test here
            images_train.append(copy.deepcopy(data))
            labels_train.append(copy.deepcopy(targets))

        if run_config['tensorboard_train_images']:
            if step == 0:
                image = torchvision.utils.make_grid(
                    data, normalize=True, scale_each=True)
                writer.add_image('Train/Image', image, epoch)

        if optim_config['scheduler'] == 'multistep':
            scheduler.step(epoch - 1)
        elif optim_config['scheduler'] == 'cosine':
            scheduler.step()

        if run_config['tensorboard']:
            if optim_config['scheduler'] != 'none':
                lr = scheduler.get_lr()[0]
            else:
                lr = optim_config['base_lr']
            writer.add_scalar('Train/LearningRate', lr, global_step)

        if run_config['use_gpu']:
            data = data.cuda()
            targets = targets.cuda()

        optimizer.zero_grad()

        outputs = model(data)
        loss = criterion(outputs, targets)
        loss.backward()

        optimizer.step()

        _, preds = torch.max(outputs, dim=1)

        loss_ = loss.item()

        # compute loss after the gradient update
        outputs = model(data)
        newloss = criterion(outputs, targets)
        newloss_ = newloss.item()

        if data_config['use_mixup']:
            _, targets = targets.max(dim=1)
        correct_ = preds.eq(targets).sum().item()
        num = data.size(0)

        accuracy = correct_ / num

        loss_meter.update(loss_, num)
        accuracy_meter.update(accuracy, num)

        # this is where the approximate losses are computed
        # loss_before_meter.update(loss_, num) # now we're not restricting batches
        # loss_after_meter.update(loss_, num)
        #if step < data_config['doublesum_batches']:
            #for k in apx_meters.keys():
                #l = apx_callbacks[k](images, labels, model)
                #apx_meters[k].update(l.item(), num)
            #loss_before_meter.update(loss_, num)
            #loss_after_meter.update(newloss_, num)

        if data_config['compute_mixup_reg'] > 0:
            # batch sizee
            N = data_config['batch_size']

            # original shape of images
            data_shape = data.shape
            # datavar = torch.autograd.Variable(data), requires_grad=True)
            # hello = model(data)
            # data_flat is a stack of rows, where each row
            # is a flattened data point:
            # --- data_flat[i,:] = data[i,:,...,:].reshape((1, int(data.numel() / N)))
            data_flat = data.reshape((N, int(data.numel() / N)))

            # y_vec is a stack of rows, where each row is the one_hot version
            # of the correct label
            y_vec = torch.zeros((N, targets.max() + 1)).cuda()
            y_vec[np.arange(N), targets] = 1

            # vec to take action of hessian on
            V = (data_flat - data_flat.sum(axis=0)).detach().clone()
            W = (y_vec - y_vec.sum(axis=0)).detach().clone()
            X = 2*torch.ones((2, 2)).cuda()
            Y = 2*torch.ones((2, 2)).cuda()
            V = torch.ones((2,2)).cuda()
            hvprod = taylor.hess_quadratic(lambda x, y : torch.sum(x.pow(2) + y.pow(2)), lambda x: x, X.shape, X, Y, 'x', 'x', V, V)
            print(hvprod)

        if run_config['tensorboard']:
            writer.add_scalar('Train/RunningLoss', loss_, global_step)
            writer.add_scalar('Train/RunningAccuracy', accuracy, global_step)

        if step % 100 == 0:
            logger.info('Epoch {} Step {}/{} '
                        'Loss {:.4f} ({:.4f}) '
                        'Accuracy {:.4f} ({:.4f})'.format(
                            epoch,
                            step,
                            len(train_loader),
                            loss_meter.val,
                            loss_meter.avg,
                            accuracy_meter.val,
                            accuracy_meter.avg,
                        ))

    ret = [epoch, loss_meter.avg, accuracy_meter.avg]

    if data_config['use_mixup'] and (epoch <= 4 or epoch % 5 == 0):
        model.eval()
        # reiterating through trainloader to completely separate the construction of the eval sets from the train set
        for step, (data, targets) in enumerate(train_loader):
            old_data = copy.deepcopy(data)
            old_targets = copy.deepcopy(targets)
            data_eval, targets_eval = mixup(old_data, old_targets, data_config['mixup_alpha'],
                                    data_config['n_classes'], data_config['fixlam'], True)

            images_eval.append(copy.deepcopy(data_eval))
            labels_eval.append(copy.deepcopy(targets_eval))

        for step, (data, targets) in enumerate(train_loader):
            old_data = copy.deepcopy(data)
            old_targets = copy.deepcopy(targets)

            data_eval2, targets_eval2 = mixup(old_data, old_targets, data_config['mixup_alpha'],
                                    data_config['n_classes'], data_config['fixlam'], True)

            images_eval2.append(copy.deepcopy(data_eval2))
            labels_eval2.append(copy.deepcopy(targets_eval2))

        # evaluating approximate losses
        images_train = torch.cat(images_train)
        labels_train = torch.cat(labels_train)
        images_eval = torch.cat(images_eval)
        labels_eval = torch.cat(labels_eval)
        images_eval2 = torch.cat(images_eval2)
        labels_eval2 = torch.cat(labels_eval2)

        apxloss_train = apx.compute_loss(images_train, labels_train, model, run_config['use_gpu'])
        apxloss_eval = apx.compute_loss(images_eval, labels_eval, model, run_config['use_gpu'])
        apxloss_eval2 = apx.compute_loss(images_eval2, labels_eval2, model, run_config['use_gpu'])

        logger.info('Train {:.4f}, Eval {:.4f}, Eval retrial {:.4f}'.format(
            apxloss_train,
            apxloss_eval,
            apxloss_eval2
        ))

        ret.append(apxloss_train.item())
        ret.append(apxloss_eval.item())
        ret.append(apxloss_eval2.item())
        model.train()

    # compute Taylor approximate loss
    if data_config['cov_components'] > 0 and (epoch <= 4 or epoch % 5 == 0):
        model.eval()
        base_meter = AverageMeter()
        de_meter = AverageMeter()

        d2_meters = {}
        d2e_meters = {}

        num_components_list = [1, 2, 5, 20, 50, 200]

        for k in num_components_list:
            d2_meters[k] = AverageMeter()
            d2e_meters[k] = AverageMeter()

        d2_batch_counts = {}

        for k in num_components_list:
            d2_batch_counts[k] = 10
        
        d2e_batch_counts = {
            1: 10,
            2: 10,
            5: 4,
            20: 2,
            50: 2,
            200: 1
        }

        max_batch_count = 10

        for step, (data, targets) in enumerate(train_loader):
            if step == max_batch_count:
                break

            num = data.shape[0]

            # base term
            base = taylor.taylor_loss_base(
                data.cuda(), targets.cuda(), model,
                moment_dict['xbar'],
                moment_dict['ybar'],
                moment_dict['Uxx'],
                moment_dict['Sxx'],
                moment_dict['Vxx'],
                moment_dict['Uxy'],
                moment_dict['Sxy'],
                moment_dict['Vxy'],
                moment_dict['T_U'],
                moment_dict['T_S'],
                moment_dict['T_V'],
            )

            base_meter.update(base, num)

            # de term
            de = taylor.taylor_loss_de(
                data.cuda(), targets.cuda(), model,
                moment_dict['xbar'],
                moment_dict['ybar'],
                moment_dict['Uxx'],
                moment_dict['Sxx'],
                moment_dict['Vxx'],
                moment_dict['Uxy'],
                moment_dict['Sxy'],
                moment_dict['Vxy'],
                moment_dict['T_U'],
                moment_dict['T_S'],
                moment_dict['T_V'],
            )

            de_meter.update(de, num)

            # d2 term
            d2_dict = taylor.taylor_loss_d2(
                data.cuda(), targets.cuda(), model,
                moment_dict['xbar'],
                moment_dict['ybar'],
                moment_dict['Uxx'],
                moment_dict['Sxx'],
                moment_dict['Vxx'],
                moment_dict['Uxy'],
                moment_dict['Sxy'],
                moment_dict['Vxy'],
                moment_dict['T_U'],
                moment_dict['T_S'],
                moment_dict['T_V'],
            )

            logger.info("Computed base, de, d2")

            for k in num_components_list:
                d2_meters[k].update(d2_dict[k], num)

            # d2e term
            kmax = max([k for k in num_components_list if d2e_batch_counts[k] > step])
            d2e_dict = taylor.taylor_loss_d2e(
                data.cuda(), targets.cuda(), model,
                moment_dict['xbar'],
                moment_dict['ybar'],
                moment_dict['Uxx'],
                moment_dict['Sxx'],
                moment_dict['Vxx'],
                moment_dict['Uxy'],
                moment_dict['Sxy'],
                moment_dict['Vxy'],
                moment_dict['T_U'][:, :, :kmax],
                moment_dict['T_S'][:, :kmax],
                moment_dict['T_V'][:, :, :kmax],
            )

            logger.info("Computed d2e, batch id")

            for k in num_components_list:
                if k <= kmax:
                    d2e_meters[k].update(d2e_dict[k], num)

            logger.info("Done batch")
        

        logger.info("CHECKS")
        print("Base", base_meter.count, base_meter.avg)
        print("DE", de_meter.count, de_meter.avg)
        for k in num_components_list:
            print("d2", k, d2_meters[k].count, d2_meters[k].avg)
        for k in num_components_list:
            print("d2e", k, d2e_meters[k].count, d2e_meters[k].avg)

        ret.append(base_meter.avg.item())
        ret.append(de_meter.avg.item())
        for k in num_components_list:
            ret.append(d2_meters[k].avg.item())
        for k in num_components_list:
            ret.append(d2e_meters[k].avg.item())
        
        model.train()

    elapsed = time.time() - start
    logger.info('Elapsed {:.2f}'.format(elapsed))
    #logger.info('Vanilla {:.2f}, Mixup {:.2f}, Double sum {:.2f}, Train before {:.2f}, Train after {:.2f}'.format(
    #    apx_meters['vanilla'].avg,
    #    apx_meters['mixup'].avg,
    #    apx_meters['doublesum'].avg,
    #    loss_before_meter.avg,
    #    loss_after_meter.avg
    #))

    if run_config['tensorboard']:
        writer.add_scalar('Train/Loss', loss_meter.avg, epoch)
        writer.add_scalar('Train/Accuracy', accuracy_meter.avg, epoch)
        writer.add_scalar('Train/Time', elapsed, epoch)

    if epoch <= 4 or epoch % 5 == 0:
        return ret
    else:
        return []
Exemple #5
0
def train(epoch, model, optimizer, criterion, train_loader, config, writer):
    global global_step

    run_config = config['run_config']
    data_config = config['data_config']

    logger.info('Train {}'.format(epoch))

    model.train()

    loss_meter = AverageMeter()
    accuracy_meter = AverageMeter()
    start = time.time()
    for step, (data, targets) in enumerate(train_loader):
        global_step += 1

        if data_config['use_mixup']:
            data, targets = mixup(data, targets, data_config['mixup_alpha'],
                                  data_config['n_classes'])

        if run_config['tensorboard'] and step == 0:
            image = torchvision.utils.make_grid(data,
                                                normalize=True,
                                                scale_each=True)
            writer.add_image('Train/Image', image, epoch)

        data = data.cuda()
        targets = targets.cuda()

        optimizer.zero_grad()

        outputs = model(data)
        loss = criterion(outputs, targets)
        loss.backward()

        optimizer.step()

        _, preds = torch.max(outputs, dim=1)

        loss_ = loss.item()
        if data_config['use_mixup']:
            _, targets = targets.max(dim=1)
        correct_ = preds.eq(targets).sum().item()
        num = data.size(0)

        accuracy = correct_ / num

        loss_meter.update(loss_, num)
        accuracy_meter.update(accuracy, num)

        if run_config['tensorboard']:
            writer.add_scalar('Train/RunningLoss', loss_, global_step)
            writer.add_scalar('Train/RunningAccuracy', accuracy, global_step)

        if step % 100 == 0:
            logger.info('Epoch {} Step {}/{} '
                        'Loss {:.4f} ({:.4f}) '
                        'Accuracy {:.4f} ({:.4f})'.format(
                            epoch,
                            step,
                            len(train_loader),
                            loss_meter.val,
                            loss_meter.avg,
                            accuracy_meter.val,
                            accuracy_meter.avg,
                        ))

    elapsed = time.time() - start
    logger.info('Elapsed {:.2f}'.format(elapsed))

    if run_config['tensorboard']:
        writer.add_scalar('Train/Loss', loss_meter.avg, epoch)
        writer.add_scalar('Train/Accuracy', accuracy_meter.avg, epoch)
        writer.add_scalar('Train/Time', elapsed, epoch)
Exemple #6
0
def train(epoch, model, optimizer, scheduler, criterion, train_loader, config,
          writer, AT):
    global global_step

    run_config = config['run_config']
    optim_config = config['optim_config']
    data_config = config['data_config']

    logger.info('Train {}'.format(epoch))

    model.train()

    loss_meter = AverageMeter()
    accuracy_meter = AverageMeter()
    start = time.time()
    for step, (data, targets) in enumerate(train_loader):
        global_step += 1

        if data_config['use_mixup']:
            data, targets = mixup(data, targets, data_config['mixup_alpha'],
                                  data_config['n_classes'])

        if run_config['tensorboard_train_images']:
            if step == 0:
                image = torchvision.utils.make_grid(data,
                                                    normalize=True,
                                                    scale_each=True)
                writer.add_image('Train/Image', image, epoch)

        if optim_config['scheduler'] == 'multistep':
            scheduler.step(epoch - 1)
        elif optim_config['scheduler'] == 'cosine':
            scheduler.step()

        if run_config['tensorboard']:
            if optim_config['scheduler'] != 'none':
                lr = scheduler.get_lr()[0]
            else:
                lr = optim_config['base_lr']
            writer.add_scalar('Train/LearningRate', lr, global_step)

        if run_config['use_gpu']:
            data = data.cuda()
            targets = targets.cuda()

        optimizer.zero_grad()

        if AT:
            # all for the attack
            mean = torch.FloatTensor(
                np.array([0.4914, 0.4822, 0.4465])[None, :, None,
                                                   None]).cuda()
            std = torch.FloatTensor(
                np.array([0.2470, 0.2435, 0.2616])[None, :, None,
                                                   None]).cuda()
            data = data.mul_(std).add_(mean)
            atk = torchattacks.PGD(model,
                                   eps=5 / 255,
                                   alpha=0.5 / 255,
                                   steps=10)
            data = atk(data, targets)
            data = data.sub_(mean).div_(std)
            # end of attack

        outputs = model(data)
        loss = criterion(outputs, targets)

        # SD
        if optim_config['SD'] != 0.0:
            loss += (outputs**2).mean() * optim_config['SD']

        loss.backward()

        optimizer.step()

        _, preds = torch.max(outputs, dim=1)

        loss_ = loss.item()
        if data_config['use_mixup']:
            _, targets = targets.max(dim=1)
        correct_ = preds.eq(targets).sum().item()
        num = data.size(0)

        accuracy = correct_ / num

        loss_meter.update(loss_, num)
        accuracy_meter.update(accuracy, num)

        if run_config['tensorboard']:
            writer.add_scalar('Train/RunningLoss', loss_, global_step)
            writer.add_scalar('Train/RunningAccuracy', accuracy, global_step)

        if step % 100 == 0:
            logger.info('Epoch {} Step {}/{} '
                        'Loss {:.4f} ({:.4f}) '
                        'Accuracy {:.4f} ({:.4f})'.format(
                            epoch,
                            step,
                            len(train_loader),
                            loss_meter.val,
                            loss_meter.avg,
                            accuracy_meter.val,
                            accuracy_meter.avg,
                        ))

    elapsed = time.time() - start
    logger.info('Elapsed {:.2f}'.format(elapsed))

    if run_config['tensorboard']:
        writer.add_scalar('Train/Loss', loss_meter.avg, epoch)
        writer.add_scalar('Train/Accuracy', accuracy_meter.avg, epoch)
        writer.add_scalar('Train/Time', elapsed, epoch)
def mixup_loss(images, labels, alpha, n_classes, fixlam, model, use_gpu):
    miximages, mixlabels = mixup(images, labels, alpha, n_classes, fixlam)
    return compute_loss(miximages, mixlabels, model, use_gpu)
Exemple #8
0
def main():
    """Model training."""
    train_speakers, valid_speakers = get_valid_speakers()

    # define transforms for train & validation samples
    train_transform = Compose([Resize(760, 80), ToTensor()])

    # define datasets & loaders
    train_dataset = TrainDataset('train',
                                 train_speakers,
                                 transform=train_transform)
    valid_dataset = TrainDataset('train',
                                 valid_speakers,
                                 transform=train_transform)

    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=256, shuffle=False)

    device = get_device()
    print(f'Selected device: {device}')

    model = torch.hub.load('huawei-noah/ghostnet',
                           'ghostnet_1x',
                           pretrained=True)
    model.classifier = nn.Linear(in_features=1280, out_features=1, bias=True)

    net = model
    net.to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = AdaBelief(net.parameters(), lr=1e-3)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     mode='min',
                                                     factor=0.2,
                                                     patience=3,
                                                     eps=1e-4,
                                                     verbose=True)

    # prepare valid target
    yvalid = get_valid_targets(valid_dataset)

    # training loop
    for epoch in range(10):
        loss_log = {'train': [], 'valid': []}
        train_loss = []

        net.train()
        for x, y in tqdm(train_loader):
            x, y = mixup(x, y, alpha=0.2)
            x, y = x.to(device), y.to(device, dtype=torch.float32)
            optimizer.zero_grad()
            outputs = net(x)

            loss = criterion(outputs, y.unsqueeze(1))
            loss.backward()
            optimizer.step()

            # save loss
            train_loss.append(loss.item())

        # evaluate
        net.eval()
        valid_pred = torch.Tensor([]).to(device)

        for x, y in valid_loader:
            with torch.no_grad():
                x, y = x.to(device), y.to(device, dtype=torch.float32)
                ypred = net(x)
                valid_pred = torch.cat([valid_pred, ypred], 0)

        valid_pred = sigmoid(valid_pred.cpu().numpy())
        val_loss = log_loss(yvalid, valid_pred, eps=1e-7)
        val_acc = (yvalid == (valid_pred > 0.5).astype(int).flatten()).mean()
        tqdm.write(
            f'Epoch {epoch} train_loss={np.mean(train_loss):.4f}; val_loss={val_loss:.4f}; val_acc={val_acc:.4f}'
        )

        loss_log['train'].append(np.mean(train_loss))
        loss_log['valid'].append(val_loss)
        scheduler.step(loss_log['valid'][-1])

    torch.save(net.state_dict(), 'ghostnet_model.pt')
    print('Training is complete.')