Beispiel #1
0
 def __init__(self,
              pgd_alpha: float = 2.0 / 255,
              pgd_eps: float = 8.0 / 255,
              pgd_iter: int = 7,
              **kwargs):
     super().__init__(**kwargs)
     self.param_list['adv_train'] = ['pgd_alpha', 'pgd_eps', 'pgd_iter']
     self.pgd_alpha = pgd_alpha
     self.pgd_eps = pgd_eps
     self.pgd_iter = pgd_iter
     self.pgd = PGD(pgd_alpha=pgd_alpha,
                    pgd_eps=pgd_eps,
                    iteration=pgd_iter,
                    stop_threshold=None,
                    target_idx=0,
                    model=self.model,
                    dataset=self.dataset)
Beispiel #2
0
 def attack(self, epochs: int, **kwargs):
     # model._validate()
     total = 0
     target_conf_list = []
     target_acc_list = []
     clean_acc_list = []
     pgd_norm_list = []
     pgd_alpha = 1.0 / 255
     pgd_eps = 8.0 / 255
     if self.dataset.name in ['cifar10', 'gtsrb', 'isic2018']:
         pgd_alpha = 1.0 / 255
         pgd_eps = 8.0 / 255
     if self.dataset.name in ['sample_imagenet', 'sample_vggface2']:
         pgd_alpha = 0.25 / 255
         pgd_eps = 2.0 / 255
     pgd_checker = PGD(pgd_alpha=pgd_alpha, pgd_eps=pgd_eps, iteration=8,
                       dataset=self.dataset, model=self.model, target_idx=self.target_idx, stop_threshold=0.95)
     easy = 0
     difficult = 0
     normal = 0
     loader = self.dataset.get_dataloader(mode='valid', batch_size=1)
     if 'curvature' in self.__dict__.keys():
         benign_curvature = self.curvature.benign_measure()
         tgt_curvature_list = []
         org_curvature_list = []
     if self.randomized_smooth:
         org_conf_list = []
         tgt_conf_list = []
     if 'magnet' in self.__dict__.keys():
         org_magnet_list = []
         tgt_magnet_list = []
     for data in loader:
         print(easy, normal, difficult)
         if normal >= 100:
             break
         self.model.load()
         _input, _label = self.model.remove_misclassify(data)
         if len(_label) == 0:
             continue
         target_label = self.model.generate_target(_input, idx=self.target_idx)
         self.temp_input = _input
         self.temp_label = target_label
         _, _iter = pgd_checker.optimize(_input)
         if _iter is None:
             difficult += 1
             continue
         if _iter < 4:
             easy += 1
             continue
         normal += 1
         target_conf, target_acc, clean_acc = self.validate_fn()
         noise = torch.zeros_like(_input)
         trigger_input = self.optimize(_input=_input, _label=target_label, epochs=epochs, noise=noise, **kwargs)
         pgd_norm = float(noise.norm(p=float('inf')))
         target_conf, target_acc, clean_acc = self.validate_fn()
         target_conf_list.append(target_conf)
         target_acc_list.append(target_acc)
         clean_acc_list.append(max(self.clean_acc - clean_acc, 0.0))
         pgd_norm_list.append(pgd_norm)
         print(f'[{total+1} / 100]\n'
               f'target confidence: {np.mean(target_conf_list)}({np.std(target_conf_list)})\n'
               f'target accuracy: {np.mean(target_acc_list)}({np.std(target_acc_list)})\n'
               f'clean accuracy Drop: {np.mean(clean_acc_list)}({np.std(clean_acc_list)})\n'
               f'PGD Norm: {np.mean(pgd_norm_list)}({np.std(pgd_norm_list)})\n\n\n')
         org_conf = self.model.get_target_prob(_input=trigger_input, target=_label)
         tgt_conf = self.model.get_target_prob(_input=trigger_input, target=target_label)
         if 'curvature' in self.__dict__.keys():
             org_curvature_list.extend(self.curvature.measure(trigger_input, _label).detach().cpu().tolist())
             tgt_curvature_list.extend(self.curvature.measure(trigger_input, target_label).detach().cpu().tolist())
             print('Curvature:')
             print(f'    org_curvature: {ks_2samp(org_curvature_list, benign_curvature)}')    # type: ignore
             print(f'    tgt_curvature: {ks_2samp(tgt_curvature_list, benign_curvature)}')    # type: ignore
             print()
         if self.randomized_smooth:
             org_new = self.model.get_target_prob(_input=trigger_input, target=_label, randomized_smooth=True)
             tgt_new = self.model.get_target_prob(_input=trigger_input, target=target_label, randomized_smooth=True)
             org_increase = (org_new - org_conf).clamp(min=0.0)
             tgt_decrease = (tgt_new - tgt_conf).clamp(min=0.0)
             org_conf_list.extend(org_increase.detach().cpu().tolist())
             tgt_conf_list.extend(tgt_decrease.detach().cpu().tolist())
             print('Randomized Smooth:')
             print(f'    org_confidence: {np.mean(org_conf_list)}')
             print(f'    tgt_confidence: {np.mean(tgt_conf_list)}')
             print()
         if 'magnet' in self.__dict__.keys():
             trigger_input = self.magnet(trigger_input)
             org_new = self.model.get_target_prob(_input=trigger_input, target=_label)
             tgt_new = self.model.get_target_prob(_input=trigger_input, target=target_label)
             org_increase = (org_new - org_conf).clamp(min=0.0)
             tgt_decrease = (tgt_conf - tgt_new).clamp(min=0.0)
             org_magnet_list.extend(org_increase.detach().cpu().tolist())
             tgt_magnet_list.extend(tgt_decrease.detach().cpu().tolist())
             print('MagNet:')
             print(f'    org_confidence: {np.mean(org_magnet_list)}')
             print(f'    tgt_confidence: {np.mean(tgt_magnet_list)}')
             print()
         total += 1
Beispiel #3
0
class AdvTrain(BackdoorDefense):

    name: str = 'adv_train'

    @classmethod
    def add_argument(cls, group: argparse._ArgumentGroup):
        super().add_argument(group)
        group.add_argument('--pgd_alpha', type=float)
        group.add_argument('--pgd_eps', type=float)
        group.add_argument('--pgd_iter', type=int)
        return group

    def __init__(self,
                 pgd_alpha: float = 2.0 / 255,
                 pgd_eps: float = 8.0 / 255,
                 pgd_iter: int = 7,
                 **kwargs):
        super().__init__(**kwargs)
        self.param_list['adv_train'] = ['pgd_alpha', 'pgd_eps', 'pgd_iter']
        self.pgd_alpha = pgd_alpha
        self.pgd_eps = pgd_eps
        self.pgd_iter = pgd_iter
        self.pgd = PGD(pgd_alpha=pgd_alpha,
                       pgd_eps=pgd_eps,
                       iteration=pgd_iter,
                       stop_threshold=None,
                       target_idx=0,
                       model=self.model,
                       dataset=self.dataset)

    def detect(self, **kwargs):
        super().detect(**kwargs)
        print()
        self.adv_train(verbose=True, **kwargs)
        self.attack.validate_fn()

    def validate_fn(self, get_data_fn=None, **kwargs) -> tuple[float, float]:
        clean_acc, _ = self.model._validate(print_prefix='Validate Clean',
                                            get_data_fn=None,
                                            **kwargs)
        adv_acc, _ = self.model._validate(print_prefix='Validate Adv',
                                          get_data_fn=self.get_data,
                                          **kwargs)
        if self.clean_acc - clean_acc > 20:
            adv_acc = 0.0
        return adv_acc, clean_acc

    def get_data(self, data: tuple[torch.Tensor, torch.Tensor],
                 **kwargs) -> tuple[torch.Tensor, torch.Tensor]:
        _input, _label = self.model.get_data(data, **kwargs)
        adv_x, _ = self.pgd.optimize(_input=_input, target=_label)
        return adv_x, _label

    def adv_train(self,
                  epochs: int,
                  optimizer: optim.Optimizer,
                  lr_scheduler: optim.lr_scheduler._LRScheduler = None,
                  validate_interval=10,
                  save=False,
                  verbose=True,
                  indent=0,
                  **kwargs):
        loader_train = self.dataset.loader['train']
        file_path = os.path.join(self.folder_path,
                                 self.get_filename() + '.pth')

        best_acc, _ = self.validate_fn(verbose=verbose,
                                       indent=indent,
                                       **kwargs)

        losses = AverageMeter('Loss', ':.4e')
        top1 = AverageMeter('Acc@1', ':6.2f')
        top5 = AverageMeter('Acc@5', ':6.2f')
        params: list[nn.Parameter] = []
        for param_group in optimizer.param_groups:
            params.extend(param_group['params'])
        for _epoch in range(epochs):
            losses.reset()
            top1.reset()
            top5.reset()
            epoch_start = time.perf_counter()
            if verbose and env['tqdm']:
                loader_train = tqdm(loader_train)
            self.model.activate_params(params)
            optimizer.zero_grad()
            for data in loader_train:
                _input, _label = self.model.get_data(data)
                noise = torch.zeros_like(_input)
                adv_x = _input
                self.model.train()
                loss = self.model.loss(adv_x, _label)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
                for m in range(self.pgd.iteration):
                    self.model.eval()
                    adv_x, _ = self.pgd.optimize(_input=_input,
                                                 noise=noise,
                                                 target=_label,
                                                 iteration=1)
                    optimizer.zero_grad()
                    self.model.train()
                    loss = self.model.loss(adv_x, _label)
                    loss.backward()
                    optimizer.step()
                optimizer.zero_grad()
                with torch.no_grad():
                    _output = self.model(_input)
                acc1, acc5 = self.model.accuracy(_output, _label, topk=(1, 5))
                batch_size = int(_label.size(0))
                losses.update(loss.item(), batch_size)
                top1.update(acc1, batch_size)
                top5.update(acc5, batch_size)
            epoch_time = str(
                datetime.timedelta(seconds=int(time.perf_counter() -
                                               epoch_start)))
            self.model.eval()
            self.model.activate_params([])
            if verbose:
                pre_str = '{blue_light}Epoch: {0}{reset}'.format(
                    output_iter(_epoch + 1, epochs),
                    **ansi).ljust(64 if env['color'] else 35)
                _str = ' '.join([
                    f'Loss: {losses.avg:.4f},'.ljust(20),
                    f'Top1 Clean Acc: {top1.avg:.3f}, '.ljust(30),
                    f'Top5 Clean Acc: {top5.avg:.3f},'.ljust(30),
                    f'Time: {epoch_time},'.ljust(20),
                ])
                prints(pre_str,
                       _str,
                       prefix='{upline}{clear_line}'.format(
                           **ansi) if env['tqdm'] else '',
                       indent=indent)
            if lr_scheduler:
                lr_scheduler.step()

            if validate_interval != 0:
                if (_epoch +
                        1) % validate_interval == 0 or _epoch == epochs - 1:
                    adv_acc, _ = self.validate_fn(verbose=verbose,
                                                  indent=indent,
                                                  **kwargs)
                    if adv_acc < best_acc:
                        prints('{purple}best result update!{reset}'.format(
                            **ansi),
                               indent=indent)
                        prints(
                            f'Current Acc: {adv_acc:.3f}    Previous Best Acc: {best_acc:.3f}',
                            indent=indent)
                        best_acc = adv_acc
                    if save:
                        self.model.save(file_path=file_path, verbose=verbose)
                    if verbose:
                        print('-' * 50)
        self.model.zero_grad()
Beispiel #4
0
class GradTrain(Defense):

    name: str = 'grad_train'

    def __init__(self,
                 pgd_alpha: float = 2.0 / 255,
                 pgd_eps: float = 8.0 / 255,
                 pgd_iter: int = 7,
                 grad_lambda: float = 10,
                 **kwargs):
        super().__init__(**kwargs)
        self.param_list['grad_train'] = ['grad_lambda']
        self.grad_lambda = grad_lambda

        self.param_list['adv_train'] = ['pgd_alpha', 'pgd_eps', 'pgd_iter']
        self.pgd_alpha = pgd_alpha
        self.pgd_eps = pgd_eps
        self.pgd_iter = pgd_iter
        self.pgd = PGD(pgd_alpha=pgd_alpha,
                       pgd_eps=pgd_eps,
                       iteration=pgd_iter,
                       target_idx=0,
                       stop_threshold=None,
                       model=self.model,
                       dataset=self.dataset)

    def detect(self, **kwargs):
        return self.model._train(loss_fn=self.loss,
                                 validate_fn=self.validate_fn,
                                 verbose=True,
                                 **kwargs)

    def loss(self, _input: torch.Tensor, _label: torch.Tensor,
             **kwargs) -> torch.Tensor:
        new_input = _input.expand(4, -1, -1, -1)
        new_label = _label.expand(4)
        noise = torch.randn_like(new_input)
        noise: torch.Tensor = noise / noise.norm(p=float('inf')) * self.pgd_eps
        new_input = new_input + noise
        new_input = new_input.clamp(0, 1).detach()
        new_input.requires_grad_()
        loss = self.model.loss(new_input, new_label)
        grad = torch.autograd.grad(loss, new_input, create_graph=True)[0]
        grad_mean: torch.Tensor = grad.flatten(start_dim=1).norm(p=1,
                                                                 dim=1).mean()
        new_loss = loss + self.grad_lambda * grad_mean
        return new_loss

    def validate_fn(self,
                    get_data_fn=None,
                    loss_fn=None,
                    **kwargs) -> tuple[float, float]:
        # TODO
        clean_acc, _ = self.model._validate(print_prefix='Validate Clean',
                                            get_data_fn=None,
                                            **kwargs)
        adv_acc, _ = self.model._validate(print_prefix='Validate Adv',
                                          get_data_fn=self.get_data,
                                          **kwargs)
        # todo: Return value
        if self.clean_acc - clean_acc > 20:
            adv_acc = 0.0
        return adv_acc, clean_acc

    def get_data(self, data: tuple[torch.Tensor, torch.Tensor],
                 **kwargs) -> tuple[torch.Tensor, torch.Tensor]:
        _input, _label = self.model.get_data(data, **kwargs)
        adv_x, _ = self.pgd.optimize(_input=_input, target=_label)
        return adv_x, _label

    def save(self, **kwargs):
        self.model.save(folder_path=self.folder_path,
                        suffix='_grad_train',
                        verbose=True,
                        **kwargs)