def __init__(self, preprocess_layer: str = 'flatten', threshold: float = 5, target_value: float = 10, neuron_lr: float = 0.015, neuron_epoch: int = 20, neuron_num: int = 2, **kwargs): super().__init__(**kwargs) if self.mark.random_pos: raise Exception( 'TrojanNN requires "random pos" to be False to max activate neurons.' ) self.param_list['trojannn'] = [ 'preprocess_layer', 'threshold', 'target_value', 'neuron_lr', 'neuron_epoch', 'neuron_num' ] self.preprocess_layer: str = preprocess_layer self.threshold: float = threshold self.target_value: float = target_value self.neuron_lr: float = neuron_lr self.neuron_epoch: int = neuron_epoch self.neuron_num: int = neuron_num self.neuron_idx = None self.pgd = PGD(alpha=self.neuron_lr, epsilon=1.0, iteration=self.neuron_epoch, output=0)
def __init__(self, poison_generation_method: str = 'pgd', pgd_alpha: float = 2 / 255, pgd_eps: float = 8 / 255, pgd_iter=7, tau: float = 0.2, noise_dim: int = 100, train_gan: bool = False, generator_iters: int = 1000, critic_iter: int = 5, **kwargs): super().__init__(**kwargs) self.param_list['clean_label'] = ['poison_generation_method', 'poison_num'] self.poison_generation_method: str = poison_generation_method match poison_generation_method: case 'pgd': self.poison_num: int = int(len(self.dataset.get_dataset( 'train', class_list=[self.target_class])) * self.poison_percent) case 'gan': self.poison_num: int = int(len(self.dataset.get_dataset('train')) * self.poison_percent) match poison_generation_method: case 'pgd': self.param_list['pgd'] = ['pgd_alpha', 'pgd_eps', 'pgd_iter'] if pgd_alpha is None: pgd_alpha = 1.5 * pgd_eps / pgd_iter self.pgd_alpha: float = pgd_alpha self.pgd_eps: float = pgd_eps self.pgd_iter: int = pgd_iter self.pgd: PGD = PGD(pgd_alpha=pgd_alpha, pgd_eps=pgd_eps, iteration=pgd_iter, target_idx=0, output=self.output, dataset=self.dataset, model=self.model) case 'gan': self.param_list['gan'] = ['tau', 'noise_dim', 'train_gan', 'critic_iter', 'generator_iters'] self.tau: float = tau self.noise_dim: int = noise_dim self.train_gan: bool = train_gan self.generator_iters = generator_iters self.critic_iter = critic_iter self.wgan = WGAN(noise_dim=self.noise_dim, dim=64, data_shape=self.dataset.data_shape, generator_iters=self.generator_iters, critic_iter=self.critic_iter)
def __init__(self, preprocess_layer: str = 'classifier', poison_generation_method: str = 'pgd', pgd_alpha: float = 2 / 255, pgd_epsilon: float = 16 / 255, pgd_iteration=20, tau: float = 0.2, noise_dim: int = 100, train_gan: bool = False, generator_iters: int = 1000, critic_iter: int = 5, **kwargs): super().__init__(**kwargs) self.param_list['clean_label'] = ['poison_generation_method', 'poison_num'] self.poison_generation_method: str = poison_generation_method if poison_generation_method == 'pgd': self.poison_num: int = int(len(self.dataset.get_dataset( 'train', classes=[self.target_class])) * self.poison_percent) elif poison_generation_method == 'gan': self.poison_num: int = int(len(self.dataset.get_dataset('train')) * self.poison_percent) data_shape = [self.dataset.n_channel] data_shape.extend(self.dataset.n_dim) self.data_shape: list[int] = data_shape if poison_generation_method == 'pgd': self.param_list['pgd'] = ['pgd_alpha', 'pgd_epsilon', 'pgd_iteration'] self.pgd_alpha: float = pgd_alpha self.pgd_epsilon: float = pgd_epsilon self.pgd_iteration: int = pgd_iteration self.pgd: PGD = PGD(alpha=pgd_alpha, epsilon=pgd_epsilon, iteration=pgd_iteration, target_idx=0, output=self.output, dataset=self.dataset, model=self.model) elif poison_generation_method == 'gan': self.param_list['gan'] = ['tau', 'noise_dim', 'train_gan', 'critic_iter', 'generator_iters'] self.tau: float = tau self.noise_dim: int = noise_dim self.train_gan: bool = train_gan self.generator_iters = generator_iters self.critic_iter = critic_iter self.wgan = WGAN(noise_dim=self.noise_dim, dim=64, data_shape=self.data_shape, generator_iters=self.generator_iters, critic_iter=self.critic_iter)
class TrojanNN(BadNet): r""" TrojanNN Backdoor Attack is described in detail in the paper `TrojanNN`_ by Yingqi Liu. Based on :class:`trojanzoo.attacks.backdoor.BadNet`, TrojanNN preprocesses the watermark pixel values to maximize the neuron activation on rarely used neurons to avoid the negative impact of model performance on clean iamges. The authors have posted `original source code`_. Args: preprocess_layer (str): The preprocess layer. threshold (float): the target class. Default: ``5``. target_value (float): The proportion of malicious images in the training set (Max 0.5). Default: 10. .. _TrojanNN: https://github.com/PurduePAML/TrojanNN/blob/master/trojan_nn.pdf .. _original source code: https://github.com/PurduePAML/TrojanNN """ name: str = 'trojannn' @classmethod def add_argument(cls, group: argparse._ArgumentGroup): super().add_argument(group) group.add_argument('--preprocess_layer', dest='preprocess_layer', type=str, help='the chosen feature layer patched by trigger where rare neuron activation is maxmized, defaults to ``flatten``') group.add_argument('--threshold', dest='threshold', type=float, help='Trojan Net Threshold, defaults to 5') group.add_argument('--target_value', dest='target_value', type=float, help='Trojan Net Target_Value, defaults to 10') group.add_argument('--neuron_lr', dest='neuron_lr', type=float, help='Trojan Net learning rate in neuron preprocessing, defaults to 0.015') group.add_argument('--neuron_epoch', dest='neuron_epoch', type=int, help='Trojan Net epoch in neuron preprocessing, defaults to 20') group.add_argument('--neuron_num', dest='neuron_num', type=int, help='Trojan Net neuron numbers in neuron preprocessing, defaults to 2') def __init__(self, preprocess_layer: str = 'flatten', threshold: float = 5, target_value: float = 10, neuron_lr: float = 0.015, neuron_epoch: int = 20, neuron_num: int = 2, **kwargs): super().__init__(**kwargs) if self.mark.random_pos: raise Exception('TrojanNN requires "random pos" to be False to max activate neurons.') self.param_list['trojannn'] = ['preprocess_layer', 'threshold', 'target_value', 'neuron_lr', 'neuron_epoch', 'neuron_num'] self.preprocess_layer: str = preprocess_layer self.threshold: float = threshold self.target_value: float = target_value self.neuron_lr: float = neuron_lr self.neuron_epoch: int = neuron_epoch self.neuron_num: int = neuron_num self.neuron_idx = None self.pgd = PGD(pgd_alpha=self.neuron_lr, pgd_eps=1.0, iteration=self.neuron_epoch, output=0, **kwargs) def attack(self, *args, **kwargs): self.neuron_idx = self.get_neuron_idx() self.mark.mark = self.preprocess_mark(mark=self.mark.mark * self.mark.mask, neuron_idx=self.neuron_idx) super().attack(*args, **kwargs) # get the neuron idx for preprocess. def get_neuron_idx(self) -> torch.Tensor: with torch.no_grad(): result = [] loader = self.dataset.loader['train'] if env['tqdm']: loader = tqdm(loader) for i, data in enumerate(loader): _input, _label = self.model.get_data(data) fm = self.model.get_layer(_input, layer_output=self.preprocess_layer) if fm.dim() > 2: fm = fm.flatten(start_dim=2).mean(dim=2) fm = fm.mean(dim=0) result.append(fm.detach()) if env['tqdm']: print('{upline}{clear_line}'.format(**ansi)) return torch.stack(result).sum(dim=0).argsort(descending=False)[:self.neuron_num] def get_neuron_value(self, x: torch.Tensor, neuron_idx: torch.Tensor) -> torch.Tensor: return self.model.get_layer(x, layer_output=self.preprocess_layer)[:, neuron_idx].mean() # train the mark to activate the least-used neurons. def preprocess_mark(self, mark: torch.Tensor, neuron_idx: torch.Tensor, **kwargs) -> torch.Tensor: with torch.no_grad(): print("Neuron Value Before Preprocessing: ", self.get_neuron_value(mark, neuron_idx)) def loss_fn(X: torch.Tensor): fm = self.model.get_layer(X, layer_output=self.preprocess_layer) loss = fm[:, neuron_idx].mean(dim=0) - self.target_value return loss.norm(p=2) noise = torch.zeros_like(mark) x = mark for _iter in range(self.neuron_epoch): cost = loss_fn(x) if cost < self.threshold: break x, _ = self.pgd.craft_example(mark, noise=noise, iteration=1, loss_fn=loss_fn) noise = noise * self.mark.mask x = x * self.mark.mask x = x.detach() with torch.no_grad(): print("Neuron Value After Preprocessing: ", self.get_neuron_value(x, neuron_idx)) return x def validate_fn(self, get_data_fn=None, **kwargs) -> tuple[float, float]: if self.neuron_idx is not None: with torch.no_grad(): print("Neuron Value After Preprocessing: ", self.get_neuron_value(self.mark.mark * self.mark.mask, self.neuron_idx)) return super().validate_fn(get_data_fn=get_data_fn, **kwargs)