Ejemplo n.º 1
0
def main():
    model = StandardModel(args.dataset,
                          args.arch,
                          no_grad=False,
                          load_pretrained=False)
    model.cuda()
    model.train()
    device = torch.device("cuda")
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    model_path = '{}/train_pytorch_model/adversarial_train/TRADES/{}@{}@epoch_{}@batch_{}.pth.tar'.format(
        PY_ROOT, args.dataset, args.arch, args.epochs, args.batch_size)
    os.makedirs(os.path.dirname(model_path), exist_ok=True)
    print("After trained, the model will save to {}".format(model_path))
    for epoch in range(1, args.epochs + 1):
        # adjust learning rate for SGD
        adjust_learning_rate(optimizer, epoch)

        # adversarial training
        train(args, model, device, train_loader, optimizer, epoch)

        # evaluation on natural examples
        print(
            '================================================================')
        eval_train(model, device, train_loader)
        eval_test(model, device, test_loader)
        print(
            '================================================================')

        # save checkpoint
        if epoch % args.save_freq == 0:
            state = {
                'state_dict': model.state_dict(),
                'epoch': epoch,
                'optimizer': optimizer.state_dict()
            }
            torch.save(state, os.path.join(model_dir, model_path))
Ejemplo n.º 2
0
    ma_img = ma[:, :, None, None]

    inputs1 = inputs1 * torch.from_numpy(ma_img).cuda().float()
    inputs2 = inputs2 * torch.from_numpy(1 - ma_img).cuda().float()

    targets1 = targets1.float() * torch.from_numpy(ma).cuda().float()
    targets2 = targets2.float() * torch.from_numpy(1 - ma).cuda().float()

    inputs_shuffle = (inputs1 + inputs2).cuda()
    targets_shuffle = (targets1 + targets2).cuda()

    return inputs_shuffle, targets_shuffle


criterion = soft_cross_entropy
optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9)
lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, [anneal_lr])

for epoch in range(EPOCH_TOTAL):  # loop over the dataset multiple times
    logger.info("Start Epoch {}".format(epoch))
    running_loss_1, running_loss_2 = 0.0, 0.0
    lr_scheduler.step(epoch)

    for i, data_batch in enumerate(train_loader):
        # get the inputs; data is a list of [inputs, labels]
        # if i == 19:
        #     break
        img_batch, label_batch = data_batch
        img_batch, label_batch = img_batch.cuda(), label_batch.cuda()

        train_img_batch, train_label_batch = [], []
Ejemplo n.º 3
0
def main():
    args = get_args()
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
    logger.info(args)
    model_path = '{}/train_pytorch_model/adversarial_train/fast_adv_train/{}@{}@epoch_{}.pth.tar'.format(
        PY_ROOT, args.dataset, args.arch, args.epochs)
    out_dir = os.path.dirname(model_path)
    os.makedirs(out_dir, exist_ok=True)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    start_start_time = time.time()
    train_loader = DataLoaderMaker.get_img_label_data_loader(args.dataset, args.batch_size, True)
    test_loader = DataLoaderMaker.get_img_label_data_loader(args.dataset, args.batch_size, False)

    epsilon = (args.epsilon / 255.) / std
    pgd_alpha = (args.pgd_alpha / 255.) / std

    model = StandardModel(args.dataset, args.arch, no_grad=False)
    model.apply(initialize_weights)
    model.cuda()
    model.train()

    opt = torch.optim.SGD(model.parameters(), lr=args.lr_max, momentum=0.9, weight_decay=5e-4)

    model, opt = amp.initialize(model, opt, opt_level="O2", loss_scale=1.0, master_weights=False)

    criterion = nn.CrossEntropyLoss()

    if args.attack == 'free':
        delta = torch.zeros(args.batch_size, 3, 32, 32).cuda()
        delta.requires_grad = True
    elif args.attack == 'fgsm' and args.fgsm_init == 'previous':
        delta = torch.zeros(args.batch_size, 3, 32, 32).cuda()
        delta.requires_grad = True

    if args.attack == 'free':
        assert args.epochs % args.attack_iters == 0
        epochs = int(math.ceil(args.epochs / args.attack_iters))
    else:
        epochs = args.epochs

    if args.lr_schedule == 'cyclic':
        lr_schedule = lambda t: np.interp([t], [0, args.epochs * 2 // 5, args.epochs], [0, args.lr_max, 0])[0]
    elif args.lr_schedule == 'piecewise':
        def lr_schedule(t):
            if t / args.epochs < 0.5:
                return args.lr_max
            elif t / args.epochs < 0.75:
                return args.lr_max / 10.
            else:
                return args.lr_max / 100.

    prev_robust_acc = 0.
    logger.info('Epoch \t Time \t LR \t \t Train Loss \t Train Acc')
    for epoch in range(epochs):
        start_time = time.time()
        train_loss = 0
        train_acc = 0
        train_n = 0
        for i, (X, y) in enumerate(train_loader):
            X = X.cuda().float()
            y = y.cuda().long()
            if i == 0:
                first_batch = X, y
            lr = lr_schedule(epoch + (i + 1) / len(train_loader))
            opt.param_groups[0].update(lr=lr)

            if args.attack == 'pgd':
                delta = attack_pgd(model, X, y, epsilon, pgd_alpha, args.attack_iters, args.restarts, opt)

            elif args.attack == 'fgsm':
                if args.fgsm_init == 'zero':
                    delta = torch.zeros_like(X, requires_grad=True)
                    delta.requires_grad = True
                elif args.fgsm_init == 'random':
                    delta = torch.zeros_like(X).cuda()
                    delta[:, 0, :, :].uniform_(-epsilon[0][0][0].item(), epsilon[0][0][0].item())
                    delta[:, 1, :, :].uniform_(-epsilon[1][0][0].item(), epsilon[1][0][0].item())
                    delta[:, 2, :, :].uniform_(-epsilon[2][0][0].item(), epsilon[2][0][0].item())
                    delta.requires_grad = True
                elif args.fgsm_init == 'previous':
                    delta.requires_grad = True
                output = model(X + delta[:X.size(0)])
                loss = F.cross_entropy(output, y)
                with amp.scale_loss(loss, opt) as scaled_loss:
                    scaled_loss.backward()
                grad = delta.grad.detach()
                delta.data = clamp(delta + args.fgsm_alpha * epsilon * torch.sign(grad), -epsilon, epsilon)
                delta = delta.detach()

            elif args.attack == 'free':
                delta.requires_grad = True
                for j in range(args.attack_iters):
                    epoch_iters = epoch * args.attack_iters + (i * args.attack_iters + j + 1) / len(train_loader)
                    lr = lr_schedule(epoch_iters)
                    opt.param_groups[0].update(lr=lr)
                    output = model(clamp(X + delta[:X.size(0)], lower_limit, upper_limit))
                    loss = F.cross_entropy(output, y)
                    opt.zero_grad()
                    with amp.scale_loss(loss, opt) as scaled_loss:
                        scaled_loss.backward()
                    grad = delta.grad.detach()
                    delta.data = clamp(delta + epsilon * torch.sign(grad), -epsilon, epsilon)
                    nn.utils.clip_grad_norm_(model.parameters(), 0.5)
                    opt.step()
                    delta.grad.zero_()
            elif args.attack == 'none':
                delta = torch.zeros_like(X)

            output = model(clamp(X + delta[:X.size(0)], lower_limit, upper_limit))
            loss = criterion(output, y)
            if args.attack != 'free':
                opt.zero_grad()
                with amp.scale_loss(loss, opt) as scaled_loss:
                    scaled_loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), 0.5)
                opt.step()

            train_loss += loss.item() * y.size(0)
            train_acc += (output.max(1)[1] == y).sum().item()
            train_n += y.size(0)

        if args.overfit_check:
            # Check current PGD robustness of model using random minibatch
            X, y = first_batch['input'], first_batch['target']
            pgd_delta = attack_pgd(model, X, y, epsilon, pgd_alpha, args.attack_iters, args.restarts, opt)
            with torch.no_grad():
                output = model(clamp(X + pgd_delta[:X.size(0)], lower_limit, upper_limit))
            robust_acc = (output.max(1)[1] == y).sum().item() / y.size(0)
            if robust_acc - prev_robust_acc < -0.5:
                break
            prev_robust_acc = robust_acc
        best_state_dict = copy.deepcopy(model.state_dict())

        train_time = time.time()
        logger.info('%d \t %.1f \t %.4f \t %.4f \t %.4f',
            epoch, train_time - start_time, lr, train_loss/train_n, train_acc/train_n)
    torch.save(best_state_dict, model_path)
    logger.info('Total time: %.4f', train_time - start_start_time)