def perturb(self,
                original_images,
                labels,
                reduction4loss='mean',
                random_start=False):
        # original_images: values are within self.min_val and self.max_val

        # The adversaries created from random close points to the original data
        if random_start:
            rand_perturb = torch.FloatTensor(original_images.shape).uniform_(
                -self.epsilon, self.epsilon)
            rand_perturb = tensor2cuda(rand_perturb)
            x = original_images + rand_perturb
            x.clamp_(self.min_val, self.max_val)
        else:
            x = original_images.clone()

        x.requires_grad = True

        # max_x = original_images + self.epsilon
        # min_x = original_images - self.epsilon

        self.model.eval()

        with torch.enable_grad():
            for _iter in range(self.max_iters):
                outputs = self.model(x, _eval=True)

                loss = F.cross_entropy(outputs,
                                       labels,
                                       reduction=reduction4loss)

                if reduction4loss == 'none':
                    grad_outputs = tensor2cuda(torch.ones(loss.shape))

                else:
                    grad_outputs = None

                grads = torch.autograd.grad(loss,
                                            x,
                                            grad_outputs=grad_outputs,
                                            only_inputs=True)[0]

                x.data += self.alpha * torch.sign(grads.data)

                # the adversaries' pixel value should within max_x and min_x due
                # to the l_infinity / l2 restriction
                x = project(x, original_images, self.epsilon, self._type)
                # the adversaries' value should be valid pixel value
                x.clamp_(self.min_val, self.max_val)

        self.model.train()

        return x
    def generate_gradients(self, input_image, target_class):
        # Put model in evaluation mode
        self.model.eval()

        x = input_image.clone()

        x.requires_grad = True

        with torch.enable_grad():
            # Forward
            model_output = self.model(x)
            # Zero grads
            self.model.zero_grad()

            grad_outputs = one_hot(target_class, model_output.shape[1])
            grad_outputs = tensor2cuda(grad_outputs)

            grad = torch.autograd.grad(model_output,
                                       x,
                                       grad_outputs=grad_outputs,
                                       only_inputs=True)[0]

            self.model.train()

        return grad
    def test(self, model, loader, adv_test=False, use_pseudo_label=False):
        # adv_test is False, return adv_acc as -1

        total_acc = 0.0
        num = 0
        total_adv_acc = 0.0

        with torch.no_grad():
            for data, label in loader:
                data, label = tensor2cuda(data), tensor2cuda(label)
                model.eval()
                output = model(data)
                # output = model(data, _eval=True)

                pred = torch.max(output, dim=1)[1]
                te_acc = evaluate(pred.cpu().numpy(),
                                  label.cpu().numpy(), 'sum')

                total_acc += te_acc
                num += output.shape[0]

                if adv_test:
                    # use predicted label as target label
                    with torch.enable_grad():
                        adv_data = self.attack.perturb(
                            data, pred if use_pseudo_label else label, 'mean',
                            False)
                    model.eval()
                    adv_output = model(adv_data)
                    # adv_output = model(adv_data, _eval=True)

                    adv_pred = torch.max(adv_output, dim=1)[1]
                    adv_acc = evaluate(adv_pred.cpu().numpy(),
                                       label.cpu().numpy(), 'sum')
                    total_adv_acc += adv_acc
                else:
                    total_adv_acc = -num

        return total_acc / num, total_adv_acc / num
args = parser()

label_dict = LabelDict(args.dataset)

te_dataset = tv.datasets.CIFAR10(args.data_root,
                                 train=False,
                                 transform=tv.transforms.ToTensor(),
                                 download=True)

te_loader = DataLoader(te_dataset,
                       batch_size=args.batch_size,
                       shuffle=False,
                       num_workers=4)

for data, label in te_loader:
    data, label = tensor2cuda(data), tensor2cuda(label)

    break

adv_list = []
pred_list = []

with torch.no_grad():
    model = WideResNet(depth=34, num_classes=10, widen_factor=10, dropRate=0.0)

    load_model(model, args.load_checkpoint)

    if torch.cuda.is_available():
        model.cuda()

    attack = FastGradientSignUntargeted(model,
Esempio n. 5
0
    def train(self, model, tr_loader, va_loader=None, adv_train=False):
        args = self.args
        logger = self.logger

        opt = torch.optim.SGD(model.parameters(),
                              args.learning_rate,
                              weight_decay=args.weight_decay,
                              momentum=args.momentum)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            opt, milestones=[40000, 60000], gamma=0.1)
        _iter = 0

        begin_time = time()

        for epoch in range(1, args.max_epoch + 1):
            for data, label in tr_loader:
                data, label = tensor2cuda(data), tensor2cuda(label)

                if adv_train:
                    # When training, the adversarial example is created from a random
                    # close point to the original data point. If in evaluation mode,
                    # just start from the original data point.
                    adv_data = self.attack.perturb(data, label, 'mean', True)
                    output = model(adv_data, _eval=False)
                else:
                    output = model(data, _eval=False)

                loss = F.cross_entropy(output, label)

                opt.zero_grad()
                loss.backward()
                opt.step()

                if _iter % args.n_eval_step == 0:
                    t1 = time()

                    if adv_train:
                        with torch.no_grad():
                            stand_output = model(data, _eval=True)
                        pred = torch.max(stand_output, dim=1)[1]

                        # print(pred)
                        std_acc = evaluate(pred.cpu().numpy(),
                                           label.cpu().numpy()) * 100

                        pred = torch.max(output, dim=1)[1]
                        # print(pred)
                        adv_acc = evaluate(pred.cpu().numpy(),
                                           label.cpu().numpy()) * 100

                    else:

                        adv_data = self.attack.perturb(data, label, 'mean',
                                                       False)

                        with torch.no_grad():
                            adv_output = model(adv_data, _eval=True)
                        pred = torch.max(adv_output, dim=1)[1]
                        # print(label)
                        # print(pred)
                        adv_acc = evaluate(pred.cpu().numpy(),
                                           label.cpu().numpy()) * 100

                        pred = torch.max(output, dim=1)[1]
                        # print(pred)
                        std_acc = evaluate(pred.cpu().numpy(),
                                           label.cpu().numpy()) * 100

                    t2 = time()

                    logger.info(
                        f'epoch: {epoch}, iter: {_iter}, lr={opt.param_groups[0]["lr"]}, '
                        f'spent {time()-begin_time:.2f} s, tr_loss: {loss.item():.3f}'
                    )

                    logger.info(
                        f'standard acc: {std_acc:.3f}%, robustness acc: {adv_acc:.3f}%'
                    )

                    # begin_time = time()

                    # if va_loader is not None:
                    #     va_acc, va_adv_acc = self.test(model, va_loader, True)
                    #     va_acc, va_adv_acc = va_acc * 100.0, va_adv_acc * 100.0

                    #     logger.info('\n' + '='*30 + ' evaluation ' + '='*30)
                    #     logger.info('test acc: %.3f %%, test adv acc: %.3f %%, spent: %.3f' % (
                    #         va_acc, va_adv_acc, time() - begin_time))
                    #     logger.info('='*28 + ' end of evaluation ' + '='*28 + '\n')

                    begin_time = time()

                if _iter % args.n_store_image_step == 0:
                    tv.utils.save_image(
                        torch.cat([data.cpu(), adv_data.cpu()], dim=0),
                        os.path.join(args.log_folder, f'images_{_iter}.jpg'),
                        nrow=16)

                if _iter % args.n_checkpoint_step == 0:
                    file_name = os.path.join(args.model_folder,
                                             f'checkpoint_{_iter}.pth')
                    save_model(model, file_name)

                _iter += 1
                # scheduler depends on training interation
                scheduler.step()

            if va_loader is not None:
                t1 = time()
                va_acc, va_adv_acc = self.test(model, va_loader, True, False)
                va_acc, va_adv_acc = va_acc * 100.0, va_adv_acc * 100.0

                t2 = time()
                logger.info('\n'+'='*20 +f' evaluation at epoch: {epoch} iteration: {_iter} ' \
                    +'='*20)
                logger.info(
                    f'test acc: {va_acc:.3f}%, test adv acc: {va_adv_acc:.3f}%, spent: {t2-t1:.3f} s'
                )
                logger.info('=' * 28 + ' end of evaluation ' + '=' * 28 + '\n')
    def train(self, model, tr_loader, va_loader=None, adv_train=False):
        args = self.args
        logger = self.logger

        opt = torch.optim.Adam(model.parameters(),
                               args.learning_rate,
                               weight_decay=args.weight_decay)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(opt,
                                                         milestones=[100, 150],
                                                         gamma=0.1)
        _iter = 0

        begin_time = time()

        for epoch in range(1, args.max_epoch + 1):
            scheduler.step()
            for data, label in tr_loader:
                data, label = tensor2cuda(data), tensor2cuda(label)

                if adv_train:
                    # When training, the adversarial example is created from a random
                    # close point to the original data point. If in evaluation mode,
                    # just start from the original data point.
                    adv_data = self.attack.perturb(data, label, 'mean', True)
                    # output = model(adv_data, _eval=False)
                    # ????????? don't know if this is the case###########
                    model.train()
                    output = model(adv_data)
                else:
                    # output = model(data, _eval=False)
                    model.train()
                    output = model(data)

                loss = F.cross_entropy(output, label)

                opt.zero_grad()
                loss.backward()
                opt.step()

                if _iter % args.n_eval_step == 0:
                    t1 = time()

                    if adv_train:
                        with torch.no_grad():
                            model.eval()
                            stand_output = model(data)
                            # stand_output = model(data, _eval=True)
                        pred = torch.max(stand_output, dim=1)[1]

                        # print(pred)
                        std_acc = evaluate(pred.cpu().numpy(),
                                           label.cpu().numpy()) * 100

                        pred = torch.max(output, dim=1)[1]
                        # print(pred)
                        adv_acc = evaluate(pred.cpu().numpy(),
                                           label.cpu().numpy()) * 100

                    else:

                        adv_data = self.attack.perturb(data, label, 'mean',
                                                       False)

                        with torch.no_grad():
                            model.eval()
                            adv_output = model(adv_data)
                            # adv_output = model(adv_data, _eval=True)
                        pred = torch.max(adv_output, dim=1)[1]
                        # print(label)
                        # print(pred)
                        adv_acc = evaluate(pred.cpu().numpy(),
                                           label.cpu().numpy()) * 100

                        pred = torch.max(output, dim=1)[1]
                        # print(pred)
                        std_acc = evaluate(pred.cpu().numpy(),
                                           label.cpu().numpy()) * 100

                    t2 = time()

                    print('%.3f' % (t2 - t1))

                    logger.info(
                        'epoch: %d, iter: %d, spent %.2f s, tr_loss: %.3f' %
                        (epoch, _iter, time() - begin_time, loss.item()))

                    logger.info(
                        'standard acc: %.3f %%, robustness acc: %.3f %%' %
                        (std_acc, adv_acc))

                    # begin_time = time()

                    # if va_loader is not None:
                    #     va_acc, va_adv_acc = self.test(model, va_loader, True)
                    #     va_acc, va_adv_acc = va_acc * 100.0, va_adv_acc * 100.0

                    #     logger.info('\n' + '='*30 + ' evaluation ' + '='*30)
                    #     logger.info('test acc: %.3f %%, test adv acc: %.3f %%, spent: %.3f' % (
                    #         va_acc, va_adv_acc, time() - begin_time))
                    #     logger.info('='*28 + ' end of evaluation ' + '='*28 + '\n')

                    begin_time = time()

                if _iter % args.n_store_image_step == 0:
                    tv.utils.save_image(
                        torch.cat([data.cpu(), adv_data.cpu()], dim=0),
                        os.path.join(args.log_folder, 'images_%d.jpg' % _iter),
                        nrow=16)

                if _iter % args.n_checkpoint_step == 0:
                    file_name = os.path.join(args.model_folder,
                                             'checkpoint_%d.pth' % _iter)
                    save_model(model, file_name)

                _iter += 1

            if va_loader is not None:
                t1 = time()
                va_acc, va_adv_acc = self.test(model, va_loader, True, False)
                va_acc, va_adv_acc = va_acc * 100.0, va_adv_acc * 100.0

                t2 = time()
                logger.info('\n'+'='*20 +' evaluation at epoch: %d iteration: %d '%(epoch, _iter) \
                    +'='*20)
                logger.info(
                    'test acc: %.3f %%, test adv acc: %.3f %%, spent: %.3f' %
                    (va_acc, va_adv_acc, t2 - t1))
                logger.info('=' * 28 + ' end of evaluation ' + '=' * 28 + '\n')