def __init__(self, pgd_alpha: float = 2.0 / 255, pgd_eps: float = 8.0 / 255, pgd_iter: int = 7, **kwargs): super().__init__(**kwargs) self.param_list['adv_train'] = ['pgd_alpha', 'pgd_eps', 'pgd_iter'] self.pgd_alpha = pgd_alpha self.pgd_eps = pgd_eps self.pgd_iter = pgd_iter self.pgd = PGD(pgd_alpha=pgd_alpha, pgd_eps=pgd_eps, iteration=pgd_iter, stop_threshold=None, target_idx=0, model=self.model, dataset=self.dataset)
def attack(self, epochs: int, **kwargs): # model._validate() total = 0 target_conf_list = [] target_acc_list = [] clean_acc_list = [] pgd_norm_list = [] pgd_alpha = 1.0 / 255 pgd_eps = 8.0 / 255 if self.dataset.name in ['cifar10', 'gtsrb', 'isic2018']: pgd_alpha = 1.0 / 255 pgd_eps = 8.0 / 255 if self.dataset.name in ['sample_imagenet', 'sample_vggface2']: pgd_alpha = 0.25 / 255 pgd_eps = 2.0 / 255 pgd_checker = PGD(pgd_alpha=pgd_alpha, pgd_eps=pgd_eps, iteration=8, dataset=self.dataset, model=self.model, target_idx=self.target_idx, stop_threshold=0.95) easy = 0 difficult = 0 normal = 0 loader = self.dataset.get_dataloader(mode='valid', batch_size=1) if 'curvature' in self.__dict__.keys(): benign_curvature = self.curvature.benign_measure() tgt_curvature_list = [] org_curvature_list = [] if self.randomized_smooth: org_conf_list = [] tgt_conf_list = [] if 'magnet' in self.__dict__.keys(): org_magnet_list = [] tgt_magnet_list = [] for data in loader: print(easy, normal, difficult) if normal >= 100: break self.model.load() _input, _label = self.model.remove_misclassify(data) if len(_label) == 0: continue target_label = self.model.generate_target(_input, idx=self.target_idx) self.temp_input = _input self.temp_label = target_label _, _iter = pgd_checker.optimize(_input) if _iter is None: difficult += 1 continue if _iter < 4: easy += 1 continue normal += 1 target_conf, target_acc, clean_acc = self.validate_fn() noise = torch.zeros_like(_input) trigger_input = self.optimize(_input=_input, _label=target_label, epochs=epochs, noise=noise, **kwargs) pgd_norm = float(noise.norm(p=float('inf'))) target_conf, target_acc, clean_acc = self.validate_fn() target_conf_list.append(target_conf) target_acc_list.append(target_acc) clean_acc_list.append(max(self.clean_acc - clean_acc, 0.0)) pgd_norm_list.append(pgd_norm) print(f'[{total+1} / 100]\n' f'target confidence: {np.mean(target_conf_list)}({np.std(target_conf_list)})\n' f'target accuracy: {np.mean(target_acc_list)}({np.std(target_acc_list)})\n' f'clean accuracy Drop: {np.mean(clean_acc_list)}({np.std(clean_acc_list)})\n' f'PGD Norm: {np.mean(pgd_norm_list)}({np.std(pgd_norm_list)})\n\n\n') org_conf = self.model.get_target_prob(_input=trigger_input, target=_label) tgt_conf = self.model.get_target_prob(_input=trigger_input, target=target_label) if 'curvature' in self.__dict__.keys(): org_curvature_list.extend(self.curvature.measure(trigger_input, _label).detach().cpu().tolist()) tgt_curvature_list.extend(self.curvature.measure(trigger_input, target_label).detach().cpu().tolist()) print('Curvature:') print(f' org_curvature: {ks_2samp(org_curvature_list, benign_curvature)}') # type: ignore print(f' tgt_curvature: {ks_2samp(tgt_curvature_list, benign_curvature)}') # type: ignore print() if self.randomized_smooth: org_new = self.model.get_target_prob(_input=trigger_input, target=_label, randomized_smooth=True) tgt_new = self.model.get_target_prob(_input=trigger_input, target=target_label, randomized_smooth=True) org_increase = (org_new - org_conf).clamp(min=0.0) tgt_decrease = (tgt_new - tgt_conf).clamp(min=0.0) org_conf_list.extend(org_increase.detach().cpu().tolist()) tgt_conf_list.extend(tgt_decrease.detach().cpu().tolist()) print('Randomized Smooth:') print(f' org_confidence: {np.mean(org_conf_list)}') print(f' tgt_confidence: {np.mean(tgt_conf_list)}') print() if 'magnet' in self.__dict__.keys(): trigger_input = self.magnet(trigger_input) org_new = self.model.get_target_prob(_input=trigger_input, target=_label) tgt_new = self.model.get_target_prob(_input=trigger_input, target=target_label) org_increase = (org_new - org_conf).clamp(min=0.0) tgt_decrease = (tgt_conf - tgt_new).clamp(min=0.0) org_magnet_list.extend(org_increase.detach().cpu().tolist()) tgt_magnet_list.extend(tgt_decrease.detach().cpu().tolist()) print('MagNet:') print(f' org_confidence: {np.mean(org_magnet_list)}') print(f' tgt_confidence: {np.mean(tgt_magnet_list)}') print() total += 1
class AdvTrain(BackdoorDefense): name: str = 'adv_train' @classmethod def add_argument(cls, group: argparse._ArgumentGroup): super().add_argument(group) group.add_argument('--pgd_alpha', type=float) group.add_argument('--pgd_eps', type=float) group.add_argument('--pgd_iter', type=int) return group def __init__(self, pgd_alpha: float = 2.0 / 255, pgd_eps: float = 8.0 / 255, pgd_iter: int = 7, **kwargs): super().__init__(**kwargs) self.param_list['adv_train'] = ['pgd_alpha', 'pgd_eps', 'pgd_iter'] self.pgd_alpha = pgd_alpha self.pgd_eps = pgd_eps self.pgd_iter = pgd_iter self.pgd = PGD(pgd_alpha=pgd_alpha, pgd_eps=pgd_eps, iteration=pgd_iter, stop_threshold=None, target_idx=0, model=self.model, dataset=self.dataset) def detect(self, **kwargs): super().detect(**kwargs) print() self.adv_train(verbose=True, **kwargs) self.attack.validate_fn() def validate_fn(self, get_data_fn=None, **kwargs) -> tuple[float, float]: clean_acc, _ = self.model._validate(print_prefix='Validate Clean', get_data_fn=None, **kwargs) adv_acc, _ = self.model._validate(print_prefix='Validate Adv', get_data_fn=self.get_data, **kwargs) if self.clean_acc - clean_acc > 20: adv_acc = 0.0 return adv_acc, clean_acc def get_data(self, data: tuple[torch.Tensor, torch.Tensor], **kwargs) -> tuple[torch.Tensor, torch.Tensor]: _input, _label = self.model.get_data(data, **kwargs) adv_x, _ = self.pgd.optimize(_input=_input, target=_label) return adv_x, _label def adv_train(self, epochs: int, optimizer: optim.Optimizer, lr_scheduler: optim.lr_scheduler._LRScheduler = None, validate_interval=10, save=False, verbose=True, indent=0, **kwargs): loader_train = self.dataset.loader['train'] file_path = os.path.join(self.folder_path, self.get_filename() + '.pth') best_acc, _ = self.validate_fn(verbose=verbose, indent=indent, **kwargs) losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') params: list[nn.Parameter] = [] for param_group in optimizer.param_groups: params.extend(param_group['params']) for _epoch in range(epochs): losses.reset() top1.reset() top5.reset() epoch_start = time.perf_counter() if verbose and env['tqdm']: loader_train = tqdm(loader_train) self.model.activate_params(params) optimizer.zero_grad() for data in loader_train: _input, _label = self.model.get_data(data) noise = torch.zeros_like(_input) adv_x = _input self.model.train() loss = self.model.loss(adv_x, _label) loss.backward() optimizer.step() optimizer.zero_grad() for m in range(self.pgd.iteration): self.model.eval() adv_x, _ = self.pgd.optimize(_input=_input, noise=noise, target=_label, iteration=1) optimizer.zero_grad() self.model.train() loss = self.model.loss(adv_x, _label) loss.backward() optimizer.step() optimizer.zero_grad() with torch.no_grad(): _output = self.model(_input) acc1, acc5 = self.model.accuracy(_output, _label, topk=(1, 5)) batch_size = int(_label.size(0)) losses.update(loss.item(), batch_size) top1.update(acc1, batch_size) top5.update(acc5, batch_size) epoch_time = str( datetime.timedelta(seconds=int(time.perf_counter() - epoch_start))) self.model.eval() self.model.activate_params([]) if verbose: pre_str = '{blue_light}Epoch: {0}{reset}'.format( output_iter(_epoch + 1, epochs), **ansi).ljust(64 if env['color'] else 35) _str = ' '.join([ f'Loss: {losses.avg:.4f},'.ljust(20), f'Top1 Clean Acc: {top1.avg:.3f}, '.ljust(30), f'Top5 Clean Acc: {top5.avg:.3f},'.ljust(30), f'Time: {epoch_time},'.ljust(20), ]) prints(pre_str, _str, prefix='{upline}{clear_line}'.format( **ansi) if env['tqdm'] else '', indent=indent) if lr_scheduler: lr_scheduler.step() if validate_interval != 0: if (_epoch + 1) % validate_interval == 0 or _epoch == epochs - 1: adv_acc, _ = self.validate_fn(verbose=verbose, indent=indent, **kwargs) if adv_acc < best_acc: prints('{purple}best result update!{reset}'.format( **ansi), indent=indent) prints( f'Current Acc: {adv_acc:.3f} Previous Best Acc: {best_acc:.3f}', indent=indent) best_acc = adv_acc if save: self.model.save(file_path=file_path, verbose=verbose) if verbose: print('-' * 50) self.model.zero_grad()
class GradTrain(Defense): name: str = 'grad_train' def __init__(self, pgd_alpha: float = 2.0 / 255, pgd_eps: float = 8.0 / 255, pgd_iter: int = 7, grad_lambda: float = 10, **kwargs): super().__init__(**kwargs) self.param_list['grad_train'] = ['grad_lambda'] self.grad_lambda = grad_lambda self.param_list['adv_train'] = ['pgd_alpha', 'pgd_eps', 'pgd_iter'] self.pgd_alpha = pgd_alpha self.pgd_eps = pgd_eps self.pgd_iter = pgd_iter self.pgd = PGD(pgd_alpha=pgd_alpha, pgd_eps=pgd_eps, iteration=pgd_iter, target_idx=0, stop_threshold=None, model=self.model, dataset=self.dataset) def detect(self, **kwargs): return self.model._train(loss_fn=self.loss, validate_fn=self.validate_fn, verbose=True, **kwargs) def loss(self, _input: torch.Tensor, _label: torch.Tensor, **kwargs) -> torch.Tensor: new_input = _input.expand(4, -1, -1, -1) new_label = _label.expand(4) noise = torch.randn_like(new_input) noise: torch.Tensor = noise / noise.norm(p=float('inf')) * self.pgd_eps new_input = new_input + noise new_input = new_input.clamp(0, 1).detach() new_input.requires_grad_() loss = self.model.loss(new_input, new_label) grad = torch.autograd.grad(loss, new_input, create_graph=True)[0] grad_mean: torch.Tensor = grad.flatten(start_dim=1).norm(p=1, dim=1).mean() new_loss = loss + self.grad_lambda * grad_mean return new_loss def validate_fn(self, get_data_fn=None, loss_fn=None, **kwargs) -> tuple[float, float]: # TODO clean_acc, _ = self.model._validate(print_prefix='Validate Clean', get_data_fn=None, **kwargs) adv_acc, _ = self.model._validate(print_prefix='Validate Adv', get_data_fn=self.get_data, **kwargs) # todo: Return value if self.clean_acc - clean_acc > 20: adv_acc = 0.0 return adv_acc, clean_acc def get_data(self, data: tuple[torch.Tensor, torch.Tensor], **kwargs) -> tuple[torch.Tensor, torch.Tensor]: _input, _label = self.model.get_data(data, **kwargs) adv_x, _ = self.pgd.optimize(_input=_input, target=_label) return adv_x, _label def save(self, **kwargs): self.model.save(folder_path=self.folder_path, suffix='_grad_train', verbose=True, **kwargs)