def uniform_l1_n_balls(dummy: ep.Tensor, batch_size: int, n: int) -> ep.Tensor: # https://mathoverflow.net/a/9188 u = ep.uniform(dummy, (batch_size, n)) v = u.sort(axis=-1) vp = ep.concatenate([ep.zeros(v, (batch_size, 1)), v[:, :n - 1]], axis=-1) assert v.shape == vp.shape x = v - vp sign = ep.uniform(dummy, (batch_size, n), low=-1.0, high=1.0).sign() return sign * x
def __call__(self, model: Model, inputs: T, criterion: Union[Misclassification, T]) -> T: x0, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion if not isinstance(criterion_, Misclassification): raise ValueError("unsupported criterion") labels = criterion_.labels def loss_fn(inputs: ep.Tensor) -> ep.Tensor: logits = model(inputs) return ep.crossentropy(logits, labels).sum() x = x0 if self.random_start: x = x + ep.uniform(x, x.shape, -self.epsilon, self.epsilon) x = ep.clip(x, *model.bounds) for _ in range(self.steps): _, gradients = ep.value_and_grad(loss_fn, x) gradients = gradients.sign() x = x + self.stepsize * gradients x = x0 + ep.clip(x - x0, -self.epsilon, self.epsilon) x = ep.clip(x, *model.bounds) return restore_type(x)
def __call__(self, inputs, labels, *, directions=1000, steps=1000): x = ep.astensor(inputs) min_, max_ = self.model.bounds() N = len(x) assert directions >= 1 for j in range(directions): # random noise inputs tend to be classified into the same class, # so we might need to make very many draws if the original class # is that one random_ = ep.uniform(x, x.shape, min_, max_) logits_ = self.model.forward(random_) classes_ = logits_.argmax(axis=-1) is_adv_ = atleast_kd(classes_ != labels, x.ndim) if j == 0: random = random_ is_adv = is_adv_ else: cond1 = is_adv.astype(x.dtype) cond2 = is_adv_.astype(x.dtype) random = cond1 * random + (1 - cond1) * cond2 * random_ is_adv = is_adv.logical_or(is_adv_) if is_adv.all(): break if not is_adv.all(): warnings.warn( f"{self.__class__.__name__} failed to draw sufficent random" " inputs that are adversarial ({is_adv.sum()} / {N}).") x0 = x npdtype = x.numpy().dtype epsilons = np.linspace(0, 1, num=steps + 1, dtype=npdtype) best = np.ones((N, ), dtype=npdtype) for epsilon in epsilons: x = (1 - epsilon) * x0 + epsilon * random # TODO: due to limited floating point precision, clipping can be required logits = self.model.forward(x) classes = logits.argmax(axis=-1) is_adv = (classes != labels).numpy() best = np.minimum( np.logical_not(is_adv).astype(npdtype) + is_adv.astype(npdtype) * epsilon, best, ) if (best < 1).all(): break best = ep.from_numpy(x0, best) best = atleast_kd(best, x0.ndim) x = (1 - best) * x0 + best * random return x.tensor
def approximate_gradients( self, is_adversarial: Callable[[ep.Tensor], ep.Tensor], x_advs: ep.Tensor, steps: int, delta: ep.Tensor, ) -> ep.Tensor: # (steps, bs, ...) noise_shape = tuple([steps] + list(x_advs.shape)) if self.constraint == "l2": rv = ep.normal(x_advs, noise_shape) elif self.constraint == "linf": rv = ep.uniform(x_advs, low=-1, high=1, shape=noise_shape) rv /= atleast_kd(ep.norms.l2(flatten(rv, keep=1), -1), rv.ndim) + 1e-12 scaled_rv = atleast_kd(ep.expand_dims(delta, 0), rv.ndim) * rv perturbed = ep.expand_dims(x_advs, 0) + scaled_rv perturbed = ep.clip(perturbed, 0, 1) rv = (perturbed - x_advs) / atleast_kd(ep.expand_dims(delta + 1e-8, 0), rv.ndim) multipliers_list: List[ep.Tensor] = [] for step in range(steps): decision = is_adversarial(perturbed[step]) multipliers_list.append( ep.where( decision, ep.ones( x_advs, (len(x_advs, )), ), -ep.ones( x_advs, (len(decision, )), ), )) # (steps, bs, ...) multipliers = ep.stack(multipliers_list, 0) vals = ep.where( ep.abs(ep.mean(multipliers, axis=0, keepdims=True)) == 1, multipliers, multipliers - ep.mean(multipliers, axis=0, keepdims=True), ) grad = ep.mean(atleast_kd(vals, rv.ndim) * rv, axis=0) grad /= ep.norms.l2(atleast_kd(flatten(grad), grad.ndim)) + 1e-12 return grad
def __call__(self, model: Model, inputs, labels): inputs, labels, restore = wrap(inputs, labels) def loss_fn(inputs): logits = model.forward(inputs) return ep.crossentropy(logits, labels).sum() x = x0 = inputs if self.random_start: x = x + ep.uniform(x, x.shape, -self.epsilon, self.epsilon) x = ep.clip(x, *model.bounds()) for _ in range(self.steps): _, gradients = ep.value_and_grad(loss_fn, x) gradients = gradients.sign() x = x + self.stepsize * gradients x = x0 + ep.clip(x - x0, -self.epsilon, self.epsilon) x = ep.clip(x, *model.bounds()) return restore(x)
def __call__( self, inputs, labels, *, rescale=False, epsilon=0.3, step_size=0.05, num_steps=10, random_start=False, ): def loss_fn(inputs: ep.Tensor, labels: ep.Tensor) -> ep.Tensor: logits = ep.astensor(self.model.forward(inputs.tensor)) return ep.crossentropy(logits, labels).sum() if rescale: min_, max_ = self.model.bounds() scale = max_ - min_ epsilon = epsilon * scale step_size = step_size * scale x = ep.astensor(inputs) y = ep.astensor(labels) assert x.shape[0] == y.shape[0] assert y.ndim == 1 x0 = x if random_start: x = x + ep.uniform(x, x.shape, -epsilon, epsilon) x = ep.clip(x, *self.model.bounds()) for _ in range(num_steps): _, gradients = ep.value_and_grad(loss_fn, x, y) gradients = gradients.sign() x = x + step_size * gradients x = x0 + ep.clip(x - x0, -epsilon, epsilon) x = ep.clip(x, *self.model.bounds()) return x.tensor
def get_random_start(self, x0: ep.Tensor, epsilon: float) -> ep.Tensor: return x0 + ep.uniform(x0, x0.shape, -epsilon, epsilon)
def test_uniform_tuple(t: Tensor) -> Shape: return ep.uniform(t, (2, 3)).shape
def test_uniform_scalar(t: Tensor) -> Shape: return ep.uniform(t, 5).shape
def run( self, model: Model, inputs: T, criterion: Union[Criterion, Any] = None, *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs is_adversarial = get_is_adversarial(criterion_, model) min_, max_ = model.bounds N = len(x) for j in range(self.directions): # random noise inputs tend to be classified into the same class, # so we might need to make very many draws if the original class # is that one random_ = ep.uniform(x, x.shape, min_, max_) is_adv_ = atleast_kd(is_adversarial(random_), x.ndim) if j == 0: random = random_ is_adv = is_adv_ else: random = ep.where(is_adv, random, random_) is_adv = is_adv.logical_or(is_adv_) if is_adv.all(): break if not is_adv.all(): warnings.warn( f"{self.__class__.__name__} failed to draw sufficient random" f" inputs that are adversarial ({is_adv.sum()} / {N}).") x0 = x epsilons = np.linspace(0, 1, num=self.steps + 1, dtype=np.float32) best = ep.ones(x, (N, )) for epsilon in epsilons: x = (1 - epsilon) * x0 + epsilon * random # TODO: due to limited floating point precision, clipping can be required is_adv = is_adversarial(x) epsilon = epsilon.item() best = ep.minimum(ep.where(is_adv, epsilon, 1.0), best) if (best < 1).all(): break best = atleast_kd(best, x0.ndim) x = (1 - best) * x0 + best * random return restore_type(x)
def __call__( self, model: Model, inputs, labels, *, criterion=misclassification, channel_axis: Optional[int] = None, ): """ Parameters ---------- channel_axis The axis across which the noise should be the same (if across_channels is True). If None, will be automatically inferred from the model if possible. """ inputs, labels, restore = wrap(inputs, labels) is_adversarial = get_is_adversarial(criterion, inputs, labels, model) x0 = inputs N = len(x0) shape = list(x0.shape) if self.across_channels and x0.ndim > 2: if channel_axis is None and not hasattr(model, "data_format"): raise ValueError( "cannot infer the data_format from the model, please specify" " channel_axis when calling the attack") elif channel_axis is None: data_format = model.data_format # type: ignore if (data_format is None or data_format != "channels_first" and data_format != "channels_last"): raise ValueError( f"expected data_format to be 'channels_first' or 'channels_last'" ) channel_axis = 1 if data_format == "channels_first" else x0.ndim - 1 elif not 0 <= channel_axis < x0.ndim: raise ValueError( f"expected channel_axis to be in [0, {x0.ndim})") shape[channel_axis] = 1 min_, max_ = model.bounds() r = max_ - min_ result = x0 is_adv = is_adversarial(result) best_advs_norms = ep.where(is_adv, ep.zeros(x0, N), ep.full(x0, N, ep.inf)) min_probability = ep.zeros(x0, N) max_probability = ep.ones(x0, N) stepsizes = max_probability / self.steps p = stepsizes for step in range(self.steps): # add salt and pepper u = ep.uniform(x0, shape) p_ = atleast_kd(p, x0.ndim) salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r pepper = -(u < p_ / 2).astype(x0.dtype) * r x = x0 + salt + pepper x = ep.clip(x, min_, max_) # check if we found new best adversarials norms = flatten(x).square().sum(axis=-1).sqrt() closer = norms < best_advs_norms is_adv = is_adversarial( x) # TODO: ignore those that are not closer anyway is_best_adv = ep.logical_and(is_adv, closer) # update results and search space result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result) best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms) min_probability = ep.where(is_best_adv, 0.5 * p, min_probability) # we set max_probability a bit higher than p because the relationship # between p and norms is not strictly monotonic max_probability = ep.where(is_best_adv, ep.minimum(p * 1.2, 1.0), max_probability) remaining = self.steps - step stepsizes = ep.where( is_best_adv, (max_probability - min_probability) / remaining, stepsizes) reset = p == max_probability p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p) p = ep.minimum(p + stepsizes, max_probability) return restore(result)
def run( self, model: Model, inputs: T, criterion: TargetedMisclassification, *, epsilon: float, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) del inputs, kwargs N = len(x) if isinstance(criterion, TargetedMisclassification): classes = criterion.target_classes else: raise ValueError("unsupported criterion") if classes.shape != (N, ): raise ValueError( f"expected target_classes to have shape ({N},), got {classes.shape}" ) noise_shape: Union[Tuple[int, int, int, int], Tuple[int, ...]] channel_axis: Optional[int] = None if self.reduced_dims is not None: if x.ndim != 4: raise NotImplementedError( "only implemented for inputs with two spatial dimensions" " (and one channel and one batch dimension)") if self.channel_axis is None: maybe_axis = get_channel_axis(model, x.ndim) if maybe_axis is None: raise ValueError( "cannot infer the data_format from the model, please" " specify channel_axis when initializing the attack") else: channel_axis = maybe_axis else: channel_axis = self.channel_axis % x.ndim if channel_axis == 1: noise_shape = (x.shape[1], *self.reduced_dims) elif channel_axis == 3: noise_shape = (*self.reduced_dims, x.shape[3]) else: raise ValueError( "expected 'channel_axis' to be 1 or 3, got {channel_axis}") else: noise_shape = x.shape[1:] # pragma: no cover def is_adversarial(logits: ep.TensorType) -> ep.TensorType: return ep.argmax(logits, 1) == classes num_plateaus = ep.zeros(x, len(x)) mutation_probability = (ep.ones_like(num_plateaus) * self.min_mutation_probability) mutation_range = ep.ones_like(num_plateaus) * self.min_mutation_range noise_pops = ep.uniform(x, (N, self.population, *noise_shape), -epsilon, epsilon) def calculate_fitness(logits: ep.TensorType) -> ep.TensorType: first = logits[range(N), classes] second = ep.log(ep.exp(logits).sum(1) - first) return first - second n_its_wo_change = ep.zeros(x, (N, )) for step in range(self.steps): fitness_l, is_adv_l = [], [] for i in range(self.population): it = self.apply_noise(x, noise_pops[:, i], epsilon, channel_axis) logits = model(it) f = calculate_fitness(logits) a = is_adversarial(logits) fitness_l.append(f) is_adv_l.append(a) fitness = ep.stack(fitness_l) is_adv = ep.stack(is_adv_l, 1) elite_idxs = ep.argmax(fitness, 0) elite_noise = noise_pops[range(N), elite_idxs] is_adv = is_adv[range(N), elite_idxs] # early stopping if is_adv.all(): return restore_type( # pragma: no cover self.apply_noise(x, elite_noise, epsilon, channel_axis)) probs = ep.softmax(fitness / self.sampling_temperature, 0) parents_idxs = np.stack( [ self.choice( self.population, 2 * self.population - 2, replace=True, p=probs[:, i], ) for i in range(N) ], 1, ) mutations = [ ep.uniform( x, noise_shape, -mutation_range[i].item() * epsilon, mutation_range[i].item() * epsilon, ) for i in range(N) ] new_noise_pops = [elite_noise] for i in range(0, self.population - 1): parents_1 = noise_pops[range(N), parents_idxs[2 * i]] parents_2 = noise_pops[range(N), parents_idxs[2 * i + 1]] # calculate crossover p = probs[parents_idxs[2 * i], range(N)] / ( probs[parents_idxs[2 * i], range(N)] + probs[parents_idxs[2 * i + 1], range(N)]) p = atleast_kd(p, x.ndim) p = ep.tile(p, (1, *noise_shape)) crossover_mask = ep.uniform(p, p.shape, 0, 1) < p children = ep.where(crossover_mask, parents_1, parents_2) # calculate mutation mutation_mask = ep.uniform(children, children.shape) mutation_mask = mutation_mask <= atleast_kd( mutation_probability, children.ndim) children = ep.where(mutation_mask, children + mutations[i], children) # project back to epsilon range children = ep.clip(children, -epsilon, epsilon) new_noise_pops.append(children) noise_pops = ep.stack(new_noise_pops, 1) # increase num_plateaus if fitness does not improve # for 100 consecutive steps n_its_wo_change = ep.where(elite_idxs == 0, n_its_wo_change + 1, ep.zeros_like(n_its_wo_change)) num_plateaus = ep.where(n_its_wo_change >= 100, num_plateaus + 1, num_plateaus) n_its_wo_change = ep.where(n_its_wo_change >= 100, ep.zeros_like(n_its_wo_change), n_its_wo_change) mutation_probability = ep.maximum( self.min_mutation_probability, 0.5 * ep.exp( math.log(0.9) * ep.ones_like(num_plateaus) * num_plateaus), ) mutation_range = ep.maximum( self.min_mutation_range, 0.5 * ep.exp( math.log(0.9) * ep.ones_like(num_plateaus) * num_plateaus), ) return restore_type( self.apply_noise(x, elite_noise, epsilon, channel_axis))
def run( self, model: Model, inputs: T, criterion: Misclassification, *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x0, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs is_adversarial = get_is_adversarial(criterion_, model) N = len(x0) shape = list(x0.shape) if self.across_channels and x0.ndim > 2: if self.channel_axis is None: channel_axis = get_channel_axis(model, x0.ndim) else: channel_axis = self.channel_axis % x0.ndim if channel_axis is not None: shape[channel_axis] = 1 min_, max_ = model.bounds r = max_ - min_ result = x0 is_adv = is_adversarial(result) best_advs_norms = ep.where(is_adv, ep.zeros(x0, N), ep.full(x0, N, ep.inf)) min_probability = ep.zeros(x0, N) max_probability = ep.ones(x0, N) stepsizes = max_probability / self.steps p = stepsizes for step in range(self.steps): # add salt and pepper u = ep.uniform(x0, tuple(shape)) p_ = atleast_kd(p, x0.ndim) salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r pepper = -(u < p_ / 2).astype(x0.dtype) * r x = x0 + salt + pepper x = ep.clip(x, min_, max_) # check if we found new best adversarials norms = flatten(x).norms.l2(axis=-1) closer = norms < best_advs_norms is_adv = is_adversarial( x) # TODO: ignore those that are not closer anyway is_best_adv = ep.logical_and(is_adv, closer) # update results and search space result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result) best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms) min_probability = ep.where(is_best_adv, 0.5 * p, min_probability) # we set max_probability a bit higher than p because the relationship # between p and norms is not strictly monotonic max_probability = ep.where(is_best_adv, ep.minimum(p * 1.2, 1.0), max_probability) remaining = self.steps - step stepsizes = ep.where( is_best_adv, (max_probability - min_probability) / remaining, stepsizes) reset = p == max_probability p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p) p = ep.minimum(p + stepsizes, max_probability) return restore_type(result)
def _get_vector_dct(self) -> ep.Tensor: probs = ep.uniform(self._originals, self._originals.shape, 0, 3).astype(int) - 1 r_np = self.dcts * probs r_np = self._inverse_dct(r_np) return r_np + ep.normal(self._originals, r_np.shape, stddev=self._beta)
def uniform_n_ball(dummy: ep.Tensor, n: int) -> ep.Tensor: s = uniform_n_sphere(dummy, n - 1) c = ep.uniform(dummy, 1) b = c.pow(1 / n) * s return b
def __call__(self, inputs, labels, *, criterion, steps=1000): originals = ep.astensor(inputs) labels = ep.astensor(labels) def is_adversarial(p: ep.Tensor) -> ep.Tensor: """For each input in x, returns true if it is an adversarial for the given model and criterion""" logits = ep.astensor(self.model.forward(p.tensor)) return criterion(originals, labels, p, logits) x0 = ep.astensor(inputs) N = len(x0) shape = list(x0.shape) if self.channel_axis is not None: shape[self.channel_axis] = 1 min_, max_ = self.model.bounds() r = max_ - min_ result = x0 is_adv = is_adversarial(result) best_advs_norms = ep.where(is_adv, ep.zeros(x0, N), ep.full(x0, N, ep.inf)) min_probability = ep.zeros(x0, N) max_probability = ep.ones(x0, N) stepsizes = max_probability / steps p = stepsizes for step in range(steps): # add salt and pepper u = ep.uniform(x0, shape) p_ = atleast_kd(p, x0.ndim) salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r pepper = -(u < p_ / 2).astype(x0.dtype) * r x = x0 + salt + pepper x = ep.clip(x, min_, max_) # check if we found new best adversarials norms = flatten(x).square().sum(axis=-1).sqrt() closer = norms < best_advs_norms is_adv = is_adversarial(x) # TODO: ignore those that are not closer anyway is_best_adv = ep.logical_and(is_adv, closer) # update results and search space result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result) best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms) min_probability = ep.where(is_best_adv, 0.5 * p, min_probability) # we set max_probability a bit higher than p because the relationship # between p and norms is not strictly monotonic max_probability = ep.where( is_best_adv, ep.minimum(p * 1.2, 1.0), max_probability ) remaining = steps - step stepsizes = ep.where( is_best_adv, (max_probability - min_probability) / remaining, stepsizes ) reset = p == max_probability p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p) p = ep.minimum(p + stepsizes, max_probability) return result.tensor