def __call__(self, model: Model, inputs: T, criterion: Union[Misclassification, T]) -> T: x0, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion if not isinstance(criterion_, Misclassification): raise ValueError("unsupported criterion") labels = criterion_.labels def loss_fn(inputs: ep.Tensor) -> ep.Tensor: logits = model(inputs) return ep.crossentropy(logits, labels).sum() x = x0 if self.random_start: x = x + ep.uniform(x, x.shape, -self.epsilon, self.epsilon) x = ep.clip(x, *model.bounds) for _ in range(self.steps): _, gradients = ep.value_and_grad(loss_fn, x) gradients = gradients.sign() x = x + self.stepsize * gradients x = x0 + ep.clip(x - x0, -self.epsilon, self.epsilon) x = ep.clip(x, *model.bounds) return restore_type(x)
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], *, epsilon: float, mc: int, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x0, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs # perform a gradient ascent (targeted attack) or descent (untargeted attack) if isinstance(criterion_, Misclassification): gradient_step_sign = 1.0 classes = criterion_.labels elif hasattr(criterion_, "target_classes"): gradient_step_sign = -1.0 classes = criterion_.target_classes # type: ignore else: raise ValueError("unsupported criterion") loss_fn = self.get_loss_fn(model, classes) if self.abs_stepsize is None: stepsize = self.rel_stepsize * epsilon else: stepsize = self.abs_stepsize if self.random_start: x = self.get_random_start(x0, epsilon) x = ep.clip(x, *model.bounds) else: x = x0 for _ in range(self.steps): gradient_sum = 0. for _ in range(mc): _, gradients = self.value_and_grad(loss_fn, x) gradient_sum += gradients gradients = self.normalize(gradient_sum, x=x, bounds=model.bounds) x = x + gradient_step_sign * stepsize * gradients x = self.project(x, x0, epsilon) x = ep.clip(x, *model.bounds) return restore_type(x)
def __call__(self, model: Model, inputs: T, criterion: Union[Misclassification, T]) -> T: x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion N = len(x) if isinstance(criterion_, Misclassification): classes = criterion_.labels else: raise ValueError("unsupported criterion") if classes.shape != (N, ): raise ValueError( f"expected labels to have shape ({N},), got {classes.shape}") min_, max_ = model.bounds x_l2_norm = flatten(x.square()).sum(1) def loss_fun( x: ep.Tensor) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]: logits = model(x) scores = ep.softmax(logits) pred_scores = scores[range(N), classes] loss = pred_scores.sum() return loss, (scores, pred_scores) for i in range(self.steps): # (1) get the scores and gradients _, (scores, pred_scores), gradients = ep.value_aux_and_grad(loss_fun, x) pred = scores.argmax(-1) num_classes = scores.shape[-1] # (2) calculate gradient norm gradients_l2_norm = flatten(gradients.square()).sum(1) # (3) calculate delta a = self.stepsize * x_l2_norm * gradients_l2_norm b = pred_scores - 1.0 / num_classes delta = ep.minimum(a, b) # (4) stop the attack if an adversarial example has been found # this is not described in the paper but otherwise once the prob. drops # below chance level the likelihood is not decreased but increased is_not_adversarial = (pred == classes).float32() delta *= is_not_adversarial # (5) calculate & apply current perturbation a = atleast_kd(delta / gradients_l2_norm.square(), gradients.ndim) x -= a * gradients x = ep.clip(x, min_, max_) return restore_type(x)
def __call__( self, inputs, labels, *, rescale=False, epsilon=2.0, step_size=0.4, num_steps=10, ): def loss_fn(inputs: ep.Tensor, labels: ep.Tensor) -> ep.Tensor: logits = ep.astensor(self.model.forward(inputs.tensor)) return ep.crossentropy(logits, labels).sum() if rescale: min_, max_ = self.model.bounds() scale = (max_ - min_) * np.sqrt(np.prod(inputs.shape[1:])) epsilon = epsilon * scale step_size = step_size * scale x = ep.astensor(inputs) y = ep.astensor(labels) assert x.shape[0] == y.shape[0] assert y.ndim == 1 x0 = x for _ in range(num_steps): _, gradients = ep.value_and_grad(loss_fn, x, y) gradients = normalize_l2_norms(gradients) x = x + step_size * gradients x = x0 + clip_l2_norms(x - x0, epsilon) x = ep.clip(x, *self.model.bounds()) return x.tensor
def draw_line(x1, y1, x2, y2, radius, color, t, blend=0.75): ''' Draws a line onto an image tensor. All units are in pixels Parameters: x1: x position for endpoint 1 y1: y position for endpoint 1 x2: x position for endpoint 2 y2: y position for endpoint 2 radius: line width (radius) color: rgb color tensor with shape (3,) and values in the range 0.0-1.0 blend (optional): blending distance Returns: tensor with circle drawn onto it ''' if type(t) == torch.Tensor: t = t.permute(1, 2, 0) t = ep.astensor(t) uvx, uvy = make_uv(t) pax, pay, bax, bay = uvx - x1, uvy - y1, x2 - x1, y2 - y1 h = ep.clip((pax * bax + pay * bay) / (bax * bax + bay * bay), 0.0, 1.0) dlx, dly = pax - bax * h, pay - bay * h dist = (dlx * dlx + dly * dly).sqrt() - radius t = dist_to_col(dist, color, blend, t) t = t.raw if type(t) == torch.Tensor: t = t.permute(2, 0, 1) return t
def clip_perturbation(self, references: T, perturbed: T, epsilon: float) -> T: """Clips the perturbations to epsilon and returns the new perturbed Args: references: A batch of reference inputs. perturbed: A batch of perturbed inputs. Returns: A tenosr like perturbed but with the perturbation clipped to epsilon. """ (x, y), restore_type = ep.astensors_(references, perturbed) p = y - x if self.p == ep.inf: clipped_perturbation = ep.clip(p, -epsilon, epsilon) return restore_type(x + clipped_perturbation) norms = ep.norms.lp(flatten(p), self.p, axis=-1) norms = ep.maximum(norms, 1e-12) # avoid divsion by zero factor = epsilon / norms factor = ep.minimum( 1, factor) # clipping -> decreasing but not increasing if self.p == 0: if (factor == 1).all(): return perturbed raise NotImplementedError("reducing L0 norms not yet supported") factor = atleast_kd(factor, x.ndim) clipped_perturbation = factor * p return restore_type(x + clipped_perturbation)
def dist_to_col(dist, color, blend, t): msk = ep.clip((dist + blend) / (2.0 * blend), 0.0, 1.0) msk = msk * msk * (3.0 - 2.0 * msk) msk = msk.expand_dims(axis=2).tile([1, 1, 3]) col_t = ep.astensor(color).expand_dims(axis=0).expand_dims(axis=0).tile( [t.shape[0], t.shape[1], 1]) return msk * t + (1.0 - msk) * col_t
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, T], *, epsilon: float, mc: int, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x0, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs if not isinstance(criterion_, Misclassification): raise ValueError("unsupported criterion") labels = criterion_.labels loss_fn = self.get_loss_fn(model, labels) if self.abs_stepsize is None: stepsize = self.rel_stepsize * epsilon else: stepsize = self.abs_stepsize if self.random_start: x = self.get_random_start(x0, epsilon) x = ep.clip(x, *model.bounds) else: x = x0 for _ in range(self.steps): gradientsCum = 0 for _ in range(mc): _, gradients = self.value_and_grad(loss_fn, x) # import pdb # pdb.set_trace() # assert not (gradients == gradientsCum).all() gradientsCum += gradients gradients = self.normalize(gradientsCum, x=x, bounds=model.bounds) x = x + stepsize * gradients x = self.project(x, x0, epsilon) x = ep.clip(x, *model.bounds) return restore_type(x)
def apply_noise( self, x: ep.TensorType, noise: ep.TensorType, epsilon: float, channel_axis: Optional[int], ) -> ep.TensorType: if noise.shape != x.shape and channel_axis is not None: # upscale noise noise = rescale_images(noise, x.shape, channel_axis) # clip noise to valid linf bounds noise = ep.clip(noise, -epsilon, +epsilon) # clip to image bounds return ep.clip(x + noise, 0.0, 1.0)
def __call__(self, model: Model, inputs: T, criterion: Union[Misclassification, T]) -> T: x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion N = len(x) if isinstance(criterion_, Misclassification): classes = criterion_.labels else: raise ValueError("unsupported criterion") if classes.shape != (N, ): raise ValueError( f"expected labels to have shape ({N},), got {classes.shape}") bounds = model.bounds def loss_fun(delta: ep.Tensor, logits: ep.Tensor) -> ep.Tensor: assert x.shape[0] == logits.shape[0] assert delta.shape == x.shape x_hat = x + delta logits_hat = model(x_hat) loss = ep.kl_div_with_logits(logits, logits_hat).sum() return loss value_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=False) clean_logits = model(x) # start with random vector as search vector d = ep.normal(x, shape=x.shape, mean=0, stddev=1) for it in range(self.iterations): # normalize proposal to be unit vector d = d * self.xi / atleast_kd(ep.norms.l2(flatten(d), axis=-1), x.ndim) # use gradient of KL divergence as new search vector _, grad = value_and_grad(d, clean_logits) d = grad # rescale search vector d = (bounds[1] - bounds[0]) * d if ep.any(ep.norms.l2(flatten(d), axis=-1) < 1e-64): raise RuntimeError( "Gradient vanished; this can happen if xi is too small.") final_delta = (self.epsilon / ep.sqrt( (d**2).sum(keepdims=True, axis=(1, 2, 3))) * d) x_adv = ep.clip(x + final_delta, *bounds) return restore_type(x_adv)
def __call__(self, model, input_data, labels, epsilon): labels = ep.astensor(labels) loss_function = self.get_loss_function(model, labels) modified_data = input_data # algorytm FGSM _, gradients = ep.value_and_grad(loss_function, input_data) gradient_sign = gradients.sign() modified_data = input_data + epsilon * gradient_sign modified_data = ep.clip(modified_data, *model.bounds) return modified_data
def __call__(self, model: Model, inputs, labels): inputs, labels, restore = wrap(inputs, labels) def loss_fn(inputs): logits = model.forward(inputs) return ep.crossentropy(logits, labels).sum() x = x0 = inputs if self.random_start: x = x + ep.uniform(x, x.shape, -self.epsilon, self.epsilon) x = ep.clip(x, *model.bounds()) for _ in range(self.steps): _, gradients = ep.value_and_grad(loss_fn, x) gradients = gradients.sign() x = x + self.stepsize * gradients x = x0 + ep.clip(x - x0, -self.epsilon, self.epsilon) x = ep.clip(x, *model.bounds()) return restore(x)
def __call__( self, inputs, labels, *, rescale=False, epsilon=0.3, step_size=0.05, num_steps=10, random_start=False, ): def loss_fn(inputs: ep.Tensor, labels: ep.Tensor) -> ep.Tensor: logits = ep.astensor(self.model.forward(inputs.tensor)) return ep.crossentropy(logits, labels).sum() if rescale: min_, max_ = self.model.bounds() scale = max_ - min_ epsilon = epsilon * scale step_size = step_size * scale x = ep.astensor(inputs) y = ep.astensor(labels) assert x.shape[0] == y.shape[0] assert y.ndim == 1 x0 = x if random_start: x = x + ep.uniform(x, x.shape, -epsilon, epsilon) x = ep.clip(x, *self.model.bounds()) for _ in range(num_steps): _, gradients = ep.value_and_grad(loss_fn, x, y) gradients = gradients.sign() x = x + step_size * gradients x = x0 + ep.clip(x - x0, -epsilon, epsilon) x = ep.clip(x, *self.model.bounds()) return x.tensor
def apply_noise( self, x: ep.TensorType, noise: ep.TensorType, epsilon: float, channel_axis: Optional[int], ) -> ep.TensorType: if noise.shape != x.shape and channel_axis is not None: # upscale noise noise = rescale_images(noise, x.shape, channel_axis) return ep.clip(noise + x, -epsilon, +epsilon)
def approximate_gradients( self, is_adversarial: Callable[[ep.Tensor], ep.Tensor], x_advs: ep.Tensor, steps: int, delta: ep.Tensor, ) -> ep.Tensor: # (steps, bs, ...) noise_shape = tuple([steps] + list(x_advs.shape)) if self.constraint == "l2": rv = ep.normal(x_advs, noise_shape) elif self.constraint == "linf": rv = ep.uniform(x_advs, low=-1, high=1, shape=noise_shape) rv /= atleast_kd(ep.norms.l2(flatten(rv, keep=1), -1), rv.ndim) + 1e-12 scaled_rv = atleast_kd(ep.expand_dims(delta, 0), rv.ndim) * rv perturbed = ep.expand_dims(x_advs, 0) + scaled_rv perturbed = ep.clip(perturbed, 0, 1) rv = (perturbed - x_advs) / atleast_kd(ep.expand_dims(delta + 1e-8, 0), rv.ndim) multipliers_list: List[ep.Tensor] = [] for step in range(steps): decision = is_adversarial(perturbed[step]) multipliers_list.append( ep.where( decision, ep.ones( x_advs, (len(x_advs, )), ), -ep.ones( x_advs, (len(decision, )), ), )) # (steps, bs, ...) multipliers = ep.stack(multipliers_list, 0) vals = ep.where( ep.abs(ep.mean(multipliers, axis=0, keepdims=True)) == 1, multipliers, multipliers - ep.mean(multipliers, axis=0, keepdims=True), ) grad = ep.mean(atleast_kd(vals, rv.ndim) * rv, axis=0) grad /= ep.norms.l2(atleast_kd(flatten(grad), grad.ndim)) + 1e-12 return grad
def __call__(self, model: Model, inputs, labels): inputs, labels, restore = wrap(inputs, labels) def loss_fn(inputs): logits = model.forward(inputs) return ep.crossentropy(logits, labels).sum() x = x0 = inputs for _ in range(self.steps): _, gradients = ep.value_and_grad(loss_fn, x) gradients = normalize_l2_norms(gradients) x = x + self.stepsize * gradients x = x0 + clip_l2_norms(x - x0, self.epsilon) x = ep.clip(x, *model.bounds()) return restore(x)
def __call__( self, model: Model, inputs, labels, *, criterion=misclassification, channel_axis: Optional[int] = None, ): """ Parameters ---------- channel_axis The axis across which the noise should be the same (if across_channels is True). If None, will be automatically inferred from the model if possible. """ inputs, labels, restore = wrap(inputs, labels) is_adversarial = get_is_adversarial(criterion, inputs, labels, model) x0 = inputs N = len(x0) shape = list(x0.shape) if self.across_channels and x0.ndim > 2: if channel_axis is None and not hasattr(model, "data_format"): raise ValueError( "cannot infer the data_format from the model, please specify" " channel_axis when calling the attack") elif channel_axis is None: data_format = model.data_format # type: ignore if (data_format is None or data_format != "channels_first" and data_format != "channels_last"): raise ValueError( f"expected data_format to be 'channels_first' or 'channels_last'" ) channel_axis = 1 if data_format == "channels_first" else x0.ndim - 1 elif not 0 <= channel_axis < x0.ndim: raise ValueError( f"expected channel_axis to be in [0, {x0.ndim})") shape[channel_axis] = 1 min_, max_ = model.bounds() r = max_ - min_ result = x0 is_adv = is_adversarial(result) best_advs_norms = ep.where(is_adv, ep.zeros(x0, N), ep.full(x0, N, ep.inf)) min_probability = ep.zeros(x0, N) max_probability = ep.ones(x0, N) stepsizes = max_probability / self.steps p = stepsizes for step in range(self.steps): # add salt and pepper u = ep.uniform(x0, shape) p_ = atleast_kd(p, x0.ndim) salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r pepper = -(u < p_ / 2).astype(x0.dtype) * r x = x0 + salt + pepper x = ep.clip(x, min_, max_) # check if we found new best adversarials norms = flatten(x).square().sum(axis=-1).sqrt() closer = norms < best_advs_norms is_adv = is_adversarial( x) # TODO: ignore those that are not closer anyway is_best_adv = ep.logical_and(is_adv, closer) # update results and search space result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result) best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms) min_probability = ep.where(is_best_adv, 0.5 * p, min_probability) # we set max_probability a bit higher than p because the relationship # between p and norms is not strictly monotonic max_probability = ep.where(is_best_adv, ep.minimum(p * 1.2, 1.0), max_probability) remaining = self.steps - step stepsizes = ep.where( is_best_adv, (max_probability - min_probability) / remaining, stepsizes) reset = p == max_probability p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p) p = ep.minimum(p + stepsizes, max_probability) return restore(result)
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, T], *, epsilon: float, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x0, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs targeted = False if isinstance(criterion_, Misclassification): labels = criterion_.labels elif isinstance(criterion_, TargetedMisclassification): labels = criterion_.target_classes targeted = True else: raise ValueError("unsupported criterion") mod = self.mod.copy() if not self.nes: if self.loss == "logit": match_target = extract_target_logits(model, x0, labels) mod.update({"match_target": match_target}) loss_fn = get_loss_fn(model, labels, self.loss, targeted, mod) else: mod.update({"indiv": 1}) if self.loss == "logit": match_target = extract_target_logits(model, x0, labels) mod.update({"match_target": match_target}) def loss_fn(x): fn = get_loss_fn(model, labels, self.loss, targeted, mod) _, result = fn(x) return result if self.abs_stepsize is None: stepsize = self.rel_stepsize * epsilon else: stepsize = self.abs_stepsize if self.random_start: x = self.get_random_start(x0, epsilon) x = ep.clip(x, *model.bounds) else: x = x0 if self.nes: """ If nes is True, then use NES algorithm NES is a black box attack algorithm, the basic idea is to to estimate the gradient by sampling with Gaussian distribution centered around the point of interest. """ import torch sigma = epsilon # def single_sample_loss_fn(inputs: ep.Tensor, ind: int) -> ep.Tensor: # logits = model(inputs) # return ep.crossentropy(logits, ep.tile(labels[ind:ind + 1], [self.n_samples])) with torch.no_grad(): for i in range(self.steps): g = torch.zeros(x.shape).to( 'cuda') # holds the gradient estimation if not self.parallel: for _ in range(self.n_samples): # delta = ep.normal(ep.PyTorchTensor, x.shape, 0, epsilon) delta = torch.normal(0, sigma, size=x.shape).to('cuda') x_torch = x.raw delta_plus = ep.astensor(x_torch + delta) delta_minus = ep.astensor(x_torch - delta) g += (atleast_kd(loss_fn(delta_plus), delta.ndim) * delta).raw g -= ( atleast_kd(loss_fn(delta_minus), delta.ndim) * delta).raw else: # Not supporting individual losses raise NotImplementedError # g = torch.zeros(x.shape).to('cuda') # holds the gradient estimation # for ind in range(x.shape[0]): # # delta = ep.normal(ep.PyTorchTensor, x.shape, 0, epsilon) # delta = torch.normal(0, sigma, size=(self.n_samples,) + x.shape[1:]).to('cuda') # x_torch = x.raw[ind:ind + 1, :] # delta_plus = ep.astensor(x_torch + delta) # delta_minus = ep.astensor(x_torch - delta) # g[ind, :] += (atleast_kd(single_sample_loss_fn(delta_plus, ind), delta.ndim) * delta).sum(axis=0).raw # g[ind, :] -= (atleast_kd(single_sample_loss_fn(delta_minus, ind), delta.ndim) * delta).sum(axis=0).raw g = 1 / (2 * self.n_samples * sigma) * g g = self.normalize(g, x=x, bounds=model.bounds) if isinstance(criterion_, Misclassification): # step away from the original label # x = ep.where(is_adv, x, x + stepsize * g) x = x + stepsize * g else: # step towards the target label # x = ep.where(is_adv, x, x - stepsize * g) x = x - stepsize * g x = self.project(x, x0, epsilon) x = ep.clip(x, *model.bounds) # is_adv = is_adversarial(x, criterion_) return restore_type(x) for _ in range(self.steps): _, mean_gradients = self.value_and_grad(loss_fn, x) for n in range(2, self.EOT + 1): """ Computes numerically stable mean: \mu_n = (1 / n) * sum_{x=1}^n (x_i) = (1 / n) * (x_n + sum_{x=1}^{n-1} (x_i)) = (1 / n) * (x_n + (n - 1) \mu_{n-1}) = \mu_{n-1} + (1 / n) * (x_n - \mu_{n-1}) """ _, gradients = self.value_and_grad(loss_fn, x) mean_gradients = mean_gradients + (gradients - mean_gradients) / n mean_gradients = self.normalize(mean_gradients, x=x, bounds=model.bounds) # step away from the original label x = x + stepsize * mean_gradients return restore_type(x)
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs N = len(x) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes else: raise ValueError("unsupported criterion") if classes.shape != (N, ): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}") stepsize = 1.0 min_, max_ = model.bounds def loss_fn(inputs: ep.Tensor, labels: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]: logits = model(inputs) sign = -1.0 if targeted else 1.0 loss = sign * ep.crossentropy(logits, labels).sum() return loss, logits grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True) delta = ep.zeros_like(x) epsilon = self.init_epsilon * ep.ones(x, len(x)) worst_norm = ep.norms.l2(flatten(ep.maximum(x - min_, max_ - x)), -1) best_l2 = worst_norm best_delta = delta adv_found = ep.zeros(x, len(x)).bool() for i in range(self.steps): # perform cosine annealing of LR starting from 1.0 to 0.01 stepsize = (0.01 + (stepsize - 0.01) * (1 + math.cos(math.pi * i / self.steps)) / 2) x_adv = x + delta _, logits, gradients = grad_and_logits(x_adv, classes) gradients = normalize_gradient_l2_norms(gradients) is_adversarial = criterion_(x_adv, logits) l2 = ep.norms.l2(flatten(delta), axis=-1) is_smaller = l2 <= best_l2 is_both = ep.logical_and(is_adversarial, is_smaller) adv_found = ep.logical_or(adv_found, is_adversarial) best_l2 = ep.where(is_both, l2, best_l2) best_delta = ep.where(atleast_kd(is_both, x.ndim), delta, best_delta) # do step delta = delta + stepsize * gradients epsilon = epsilon * ep.where(is_adversarial, 1.0 - self.gamma, 1.0 + self.gamma) epsilon = ep.minimum(epsilon, worst_norm) # project to epsilon ball delta *= atleast_kd(epsilon / ep.norms.l2(flatten(delta), -1), x.ndim) # clip to valid bounds delta = ep.clip(x + delta, *model.bounds) - x x_adv = x + best_delta return restore_type(x_adv)
def run( self, model: Model, inputs: T, criterion: TargetedMisclassification, *, epsilon: float, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) del inputs, kwargs N = len(x) if isinstance(criterion, TargetedMisclassification): classes = criterion.target_classes else: raise ValueError("unsupported criterion") if classes.shape != (N, ): raise ValueError( f"expected target_classes to have shape ({N},), got {classes.shape}" ) noise_shape: Union[Tuple[int, int, int, int], Tuple[int, ...]] channel_axis: Optional[int] = None if self.reduced_dims is not None: if x.ndim != 4: raise NotImplementedError( "only implemented for inputs with two spatial dimensions" " (and one channel and one batch dimension)") if self.channel_axis is None: maybe_axis = get_channel_axis(model, x.ndim) if maybe_axis is None: raise ValueError( "cannot infer the data_format from the model, please" " specify channel_axis when initializing the attack") else: channel_axis = maybe_axis else: channel_axis = self.channel_axis % x.ndim if channel_axis == 1: noise_shape = (x.shape[1], *self.reduced_dims) elif channel_axis == 3: noise_shape = (*self.reduced_dims, x.shape[3]) else: raise ValueError( "expected 'channel_axis' to be 1 or 3, got {channel_axis}") else: noise_shape = x.shape[1:] # pragma: no cover def is_adversarial(logits: ep.TensorType) -> ep.TensorType: return ep.argmax(logits, 1) == classes num_plateaus = ep.zeros(x, len(x)) mutation_probability = (ep.ones_like(num_plateaus) * self.min_mutation_probability) mutation_range = ep.ones_like(num_plateaus) * self.min_mutation_range noise_pops = ep.uniform(x, (N, self.population, *noise_shape), -epsilon, epsilon) def calculate_fitness(logits: ep.TensorType) -> ep.TensorType: first = logits[range(N), classes] second = ep.log(ep.exp(logits).sum(1) - first) return first - second n_its_wo_change = ep.zeros(x, (N, )) for step in range(self.steps): fitness_l, is_adv_l = [], [] for i in range(self.population): it = self.apply_noise(x, noise_pops[:, i], epsilon, channel_axis) logits = model(it) f = calculate_fitness(logits) a = is_adversarial(logits) fitness_l.append(f) is_adv_l.append(a) fitness = ep.stack(fitness_l) is_adv = ep.stack(is_adv_l, 1) elite_idxs = ep.argmax(fitness, 0) elite_noise = noise_pops[range(N), elite_idxs] is_adv = is_adv[range(N), elite_idxs] # early stopping if is_adv.all(): return restore_type( # pragma: no cover self.apply_noise(x, elite_noise, epsilon, channel_axis)) probs = ep.softmax(fitness / self.sampling_temperature, 0) parents_idxs = np.stack( [ self.choice( self.population, 2 * self.population - 2, replace=True, p=probs[:, i], ) for i in range(N) ], 1, ) mutations = [ ep.uniform( x, noise_shape, -mutation_range[i].item() * epsilon, mutation_range[i].item() * epsilon, ) for i in range(N) ] new_noise_pops = [elite_noise] for i in range(0, self.population - 1): parents_1 = noise_pops[range(N), parents_idxs[2 * i]] parents_2 = noise_pops[range(N), parents_idxs[2 * i + 1]] # calculate crossover p = probs[parents_idxs[2 * i], range(N)] / ( probs[parents_idxs[2 * i], range(N)] + probs[parents_idxs[2 * i + 1], range(N)]) p = atleast_kd(p, x.ndim) p = ep.tile(p, (1, *noise_shape)) crossover_mask = ep.uniform(p, p.shape, 0, 1) < p children = ep.where(crossover_mask, parents_1, parents_2) # calculate mutation mutation_mask = ep.uniform(children, children.shape) mutation_mask = mutation_mask <= atleast_kd( mutation_probability, children.ndim) children = ep.where(mutation_mask, children + mutations[i], children) # project back to epsilon range children = ep.clip(children, -epsilon, epsilon) new_noise_pops.append(children) noise_pops = ep.stack(new_noise_pops, 1) # increase num_plateaus if fitness does not improve # for 100 consecutive steps n_its_wo_change = ep.where(elite_idxs == 0, n_its_wo_change + 1, ep.zeros_like(n_its_wo_change)) num_plateaus = ep.where(n_its_wo_change >= 100, num_plateaus + 1, num_plateaus) n_its_wo_change = ep.where(n_its_wo_change >= 100, ep.zeros_like(n_its_wo_change), n_its_wo_change) mutation_probability = ep.maximum( self.min_mutation_probability, 0.5 * ep.exp( math.log(0.9) * ep.ones_like(num_plateaus) * num_plateaus), ) mutation_range = ep.maximum( self.min_mutation_range, 0.5 * ep.exp( math.log(0.9) * ep.ones_like(num_plateaus) * num_plateaus), ) return restore_type( self.apply_noise(x, elite_noise, epsilon, channel_axis))
def run( self, model: Model, inputs: T, criterion: Misclassification, *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x0, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs is_adversarial = get_is_adversarial(criterion_, model) N = len(x0) shape = list(x0.shape) if self.across_channels and x0.ndim > 2: if self.channel_axis is None: channel_axis = get_channel_axis(model, x0.ndim) else: channel_axis = self.channel_axis % x0.ndim if channel_axis is not None: shape[channel_axis] = 1 min_, max_ = model.bounds r = max_ - min_ result = x0 is_adv = is_adversarial(result) best_advs_norms = ep.where(is_adv, ep.zeros(x0, N), ep.full(x0, N, ep.inf)) min_probability = ep.zeros(x0, N) max_probability = ep.ones(x0, N) stepsizes = max_probability / self.steps p = stepsizes for step in range(self.steps): # add salt and pepper u = ep.uniform(x0, tuple(shape)) p_ = atleast_kd(p, x0.ndim) salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r pepper = -(u < p_ / 2).astype(x0.dtype) * r x = x0 + salt + pepper x = ep.clip(x, min_, max_) # check if we found new best adversarials norms = flatten(x).norms.l2(axis=-1) closer = norms < best_advs_norms is_adv = is_adversarial( x) # TODO: ignore those that are not closer anyway is_best_adv = ep.logical_and(is_adv, closer) # update results and search space result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result) best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms) min_probability = ep.where(is_best_adv, 0.5 * p, min_probability) # we set max_probability a bit higher than p because the relationship # between p and norms is not strictly monotonic max_probability = ep.where(is_best_adv, ep.minimum(p * 1.2, 1.0), max_probability) remaining = self.steps - step stepsizes = ep.where( is_best_adv, (max_probability - min_probability) / remaining, stepsizes) reset = p == max_probability p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p) p = ep.minimum(p + stepsizes, max_probability) return restore_type(result)
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, starting_points: Optional[T] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) originals, restore_type = ep.astensor_(inputs) del inputs, kwargs verify_input_bounds(originals, model) criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) if starting_points is None: init_attack: MinimizationAttack if self.init_attack is None: init_attack = LinearSearchBlendedUniformNoiseAttack(steps=50) logging.info( f"Neither starting_points nor init_attack given. Falling" f" back to {init_attack!r} for initialization.") else: init_attack = self.init_attack # TODO: use call and support all types of attacks (once early_stop is # possible in __call__) x_advs = init_attack.run(model, originals, criterion, early_stop=early_stop) else: x_advs = ep.astensor(starting_points) is_adv = is_adversarial(x_advs) if not is_adv.all(): failed = is_adv.logical_not().float32().sum() if starting_points is None: raise ValueError( f"init_attack failed for {failed} of {len(is_adv)} inputs") else: raise ValueError( f"{failed} of {len(is_adv)} starting_points are not adversarial" ) del starting_points tb = TensorBoard(logdir=self.tensorboard) # Project the initialization to the boundary. x_advs = self._binary_search(is_adversarial, originals, x_advs) assert ep.all(is_adversarial(x_advs)) distances = self.distance(originals, x_advs) for step in range(self.steps): delta = self.select_delta(originals, distances, step) # Choose number of gradient estimation steps. num_gradient_estimation_steps = int( min([ self.initial_num_evals * math.sqrt(step + 1), self.max_num_evals ])) gradients = self.approximate_gradients( is_adversarial, x_advs, num_gradient_estimation_steps, delta) if self.constraint == "linf": update = ep.sign(gradients) else: update = gradients if self.stepsize_search == "geometric_progression": # find step size. epsilons = distances / math.sqrt(step + 1) while True: x_advs_proposals = ep.clip( x_advs + atleast_kd(epsilons, x_advs.ndim) * update, 0, 1) success = is_adversarial(x_advs_proposals) epsilons = ep.where(success, epsilons, epsilons / 2.0) if ep.all(success): break # Update the sample. x_advs = ep.clip( x_advs + atleast_kd(epsilons, update.ndim) * update, 0, 1) assert ep.all(is_adversarial(x_advs)) # Binary search to return to the boundary. x_advs = self._binary_search(is_adversarial, originals, x_advs) assert ep.all(is_adversarial(x_advs)) elif self.stepsize_search == "grid_search": # Grid search for stepsize. epsilons_grid = ep.expand_dims( ep.from_numpy( distances, np.logspace( -4, 0, num=20, endpoint=True, dtype=np.float32), ), 1, ) * ep.expand_dims(distances, 0) proposals_list = [] for epsilons in epsilons_grid: x_advs_proposals = ( x_advs + atleast_kd(epsilons, update.ndim) * update) x_advs_proposals = ep.clip(x_advs_proposals, 0, 1) mask = is_adversarial(x_advs_proposals) x_advs_proposals = self._binary_search( is_adversarial, originals, x_advs_proposals) # only use new values where initial guess was already adversarial x_advs_proposals = ep.where(atleast_kd(mask, x_advs.ndim), x_advs_proposals, x_advs) proposals_list.append(x_advs_proposals) proposals = ep.stack(proposals_list, 0) proposals_distances = self.distance( ep.expand_dims(originals, 0), proposals) minimal_idx = ep.argmin(proposals_distances, 0) x_advs = proposals[minimal_idx] distances = self.distance(originals, x_advs) # log stats tb.histogram("norms", distances, step) return restore_type(x_advs)
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, T], *, epsilon: float, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x0, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs if isinstance(criterion_, Misclassification): labels = criterion_.labels elif isinstance(criterion_, TargetedMisclassification): labels = criterion_.target_classes else: raise ValueError("unsupported criterion") if self.loss == 'ce': loss_fn = self.get_loss_fn(model, labels) elif self.loss == 'dlr': loss_fn = self.get_dlr_loss_fn(model, labels) else: assert False, "Unrecognized loss function" if self.abs_stepsize is None: self.stepsize = self.rel_stepsize * epsilon else: self.stepsize = self.abs_stepsize if self.random_start: x = self.get_random_start(x0, epsilon) x = ep.clip(x, *model.bounds) else: x = x0 for i in range(self.steps): _, mean_gradients = self.value_and_grad(loss_fn, x) # loss_val = loss_fn(x) for n in range(2, self.EOT + 1): """ Computes numerically stable mean: \mu_n = (1 / n) * sum_{x=1}^n (x_i) = (1 / n) * (x_n + sum_{x=1}^{n-1} (x_i)) = (1 / n) * (x_n + (n - 1) \mu_{n-1}) = \mu_{n-1} + (1 / n) * (x_n - \mu_{n-1}) """ _, gradients = self.value_and_grad(loss_fn, x) mean_gradients = mean_gradients + (gradients - mean_gradients) / n mean_gradients = self.normalize(mean_gradients, x=x, bounds=model.bounds) get_stepsize = self.get_stepsize_fn() stepsize = get_stepsize(i) if isinstance(criterion_, Misclassification): # step away from the original label x = x + stepsize * mean_gradients else: # step towards the target label x = x - stepsize * mean_gradients x = self.project(x, x0, epsilon) x = ep.clip(x, *model.bounds) return restore_type(x)
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], *, starting_points: Optional[ep.Tensor] = None, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) criterion_ = get_criterion(criterion) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes else: raise ValueError("unsupported criterion") def loss_fn( inputs: ep.Tensor, labels: ep.Tensor ) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]: logits = model(inputs) if targeted: c_minimize = best_other_classes(logits, labels) c_maximize = labels # target_classes else: c_minimize = labels # labels c_maximize = best_other_classes(logits, labels) loss = logits[rows, c_minimize] - logits[rows, c_maximize] return -loss.sum(), (logits, loss) x, restore_type = ep.astensor_(inputs) del inputs, criterion, kwargs N = len(x) # start from initialization points/attack if starting_points is not None: x1 = starting_points else: if self.init_attack is not None: x1 = self.init_attack.run(model, x, criterion_) else: x1 = None # if initial points or initialization attacks are provided, # search for the boundary if x1 is not None: is_adv = get_is_adversarial(criterion_, model) assert is_adv(x1).all() lower_bound = ep.zeros(x, shape=(N, )) upper_bound = ep.ones(x, shape=(N, )) for _ in range(self.binary_search_steps): epsilons = (lower_bound + upper_bound) / 2 mid_points = self.mid_points(x, x1, epsilons, model.bounds) is_advs = is_adv(mid_points) lower_bound = ep.where(is_advs, lower_bound, epsilons) upper_bound = ep.where(is_advs, epsilons, upper_bound) starting_points = self.mid_points(x, x1, upper_bound, model.bounds) delta = starting_points - x else: # start from x0 delta = ep.zeros_like(x) if classes.shape != (N, ): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}") min_, max_ = model.bounds rows = range(N) grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True) if self.p != 0: epsilon = ep.inf * ep.ones(x, len(x)) else: epsilon = ep.ones(x, len(x)) if x1 is None \ else ep.norms.l0(flatten(delta), axis=-1) if self.p != 0: worst_norm = ep.norms.lp(flatten(ep.maximum(x - min_, max_ - x)), p=self.p, axis=-1) else: worst_norm = flatten(ep.ones_like(x)).bool().sum(axis=1).float32() best_lp = worst_norm best_delta = delta adv_found = ep.zeros(x, len(x)).bool() for i in range(self.steps): # perform cosine annealing of learning rates stepsize = (self.min_stepsize + (self.max_stepsize - self.min_stepsize) * (1 + math.cos(math.pi * i / self.steps)) / 2) gamma = (0.001 + (self.gamma - 0.001) * (1 + math.cos(math.pi * (i / self.steps))) / 2) x_adv = x + delta loss, (logits, loss_batch), gradients = grad_and_logits(x_adv, classes) is_adversarial = criterion_(x_adv, logits) lp = ep.norms.lp(flatten(delta), p=self.p, axis=-1) is_smaller = lp <= best_lp is_both = ep.logical_and(is_adversarial, is_smaller) adv_found = ep.logical_or(adv_found, is_adversarial) best_lp = ep.where(is_both, lp, best_lp) best_delta = ep.where(atleast_kd(is_both, x.ndim), delta, best_delta) # update epsilon if self.p != 0: distance_to_boundary = abs(loss_batch) / ep.norms.lp( flatten(gradients), p=self.dual, axis=-1) epsilon = ep.where( is_adversarial, ep.minimum( epsilon * (1 - gamma), ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)), ep.where( adv_found, epsilon * (1 + gamma), ep.norms.lp(flatten(delta), p=self.p, axis=-1) + distance_to_boundary)) else: epsilon = ep.where( is_adversarial, ep.minimum( ep.minimum(epsilon - 1, (epsilon * (1 - gamma)).astype(int).astype( epsilon.dtype)), ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)), ep.maximum(epsilon + 1, (epsilon * (1 + gamma)).astype(int).astype( epsilon.dtype))) epsilon = ep.maximum(0, epsilon).astype(epsilon.dtype) # clip epsilon epsilon = ep.minimum(epsilon, worst_norm) # computes normalized gradient update grad_ = self.normalize(gradients, x=x, bounds=model.bounds) * stepsize # do step delta = delta + grad_ # project according to the given norm delta = self.project(x=x + delta, x0=x, epsilon=epsilon) - x # clip to valid bounds delta = ep.clip(x + delta, *model.bounds) - x x_adv = x + best_delta return restore_type(x_adv)
def test_clip(t: Tensor) -> Tensor: return ep.clip(t, 2, 3.5)
def project(self, x: ep.Tensor, x0: ep.Tensor, epsilon: float) -> ep.Tensor: return x0 + ep.clip(x - x0, -epsilon, epsilon)
def __call__( self, inputs, labels, *, p, candidates=10, overshoot=0.02, steps=50, loss="logits", ): """ Parameters ---------- p : int or float Lp-norm that should be minimzed, must be 2 or np.inf. candidates : int Limit on the number of the most likely classes that should be considered. A small value is usually sufficient and much faster. overshoot : float steps : int Maximum number of steps to perform. """ if not (1 <= p <= np.inf): raise ValueError if p not in [2, np.inf]: raise NotImplementedError min_, max_ = self.model.bounds() inputs = ep.astensor(inputs) labels = ep.astensor(labels) N = len(inputs) logits = self.model.forward(inputs) candidates = min(candidates, logits.shape[-1]) classes = logits.argsort(axis=-1).flip(axis=-1) if candidates: assert candidates >= 2 logging.info(f"Only testing the top-{candidates} classes") classes = classes[:, :candidates] i0 = classes[:, 0] rows = ep.arange(inputs, N) if loss == "logits": def loss_fun(x: ep.Tensor, k: int) -> ep.Tensor: logits = self.model.forward(x) ik = classes[:, k] l0 = logits[rows, i0] lk = logits[rows, ik] loss = lk - l0 return loss.sum(), (loss, logits) elif loss == "crossentropy": def loss_fun(x: ep.Tensor, k: int) -> ep.Tensor: logits = self.model.forward(x) ik = classes[:, k] l0 = -ep.crossentropy(logits, i0) lk = -ep.crossentropy(logits, ik) loss = lk - l0 return loss.sum(), (loss, logits) else: raise ValueError( f"expected loss to be 'logits' or 'crossentropy', got '{loss}'" ) loss_aux_and_grad = ep.value_and_grad_fn(inputs, loss_fun, has_aux=True) x = x0 = inputs p_total = ep.zeros_like(x) for step in range(steps): # let's first get the logits using k = 1 to see if we are done diffs = [loss_aux_and_grad(x, 1)] _, (_, logits), _ = diffs[0] is_adv = logits.argmax(axis=-1) != labels if is_adv.all(): break # then run all the other k's as well # we could avoid repeated forward passes and only repeat # the backward pass, but this cannot currently be done in eagerpy diffs += [loss_aux_and_grad(x, k) for k in range(2, candidates)] # we don't need the logits diffs = [(losses, grad) for _, (losses, _), grad in diffs] losses = ep.stack([l for l, _ in diffs], axis=1) grads = ep.stack([g for _, g in diffs], axis=1) assert losses.shape == (N, candidates - 1) assert grads.shape == (N, candidates - 1) + x0.shape[1:] # calculate the distances distances = self.get_distances(losses, grads) assert distances.shape == (N, candidates - 1) # determine the best directions best = distances.argmin(axis=1) distances = distances[rows, best] losses = losses[rows, best] grads = grads[rows, best] assert distances.shape == (N, ) assert losses.shape == (N, ) assert grads.shape == x0.shape # apply perturbation distances = distances + 1e-4 # for numerical stability p_step = self.get_perturbations(distances, grads) assert p_step.shape == x0.shape p_total += p_step # don't do anything for those that are already adversarial x = ep.where(atleast_kd(is_adv, x.ndim), x, x0 + (1.0 + overshoot) * p_total) x = ep.clip(x, min_, max_) return x.tensor
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: #raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) del inputs, kwargs verify_input_bounds(x, model) criterion = get_criterion(criterion) min_, max_ = model.bounds logits = model(x) classes = logits.argsort(axis=-1).flip(axis=-1) if self.candidates is None: candidates = logits.shape[-1] # pragma: no cover else: candidates = min(self.candidates, logits.shape[-1]) if not candidates >= 2: raise ValueError( # pragma: no cover f"expected the model output to have atleast 2 classes, got {logits.shape[-1]}" ) logging.info(f"Only testing the top-{candidates} classes") classes = classes[:, :candidates] N = len(x) rows = range(N) loss_fun = self._get_loss_fn(model, classes) loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True) x0 = x p_total = ep.zeros_like(x) for _ in range(self.steps): # let's first get the logits using k = 1 to see if we are done diffs = [loss_aux_and_grad(x, 1)] _, (_, logits), _ = diffs[0] is_adv = criterion(x, logits) if is_adv.all(): break # then run all the other k's as well # we could avoid repeated forward passes and only repeat # the backward pass, but this cannot currently be done in eagerpy diffs += [loss_aux_and_grad(x, k) for k in range(2, candidates)] # we don't need the logits diffs_ = [(losses, grad) for _, (losses, _), grad in diffs] losses = ep.stack([lo for lo, _ in diffs_], axis=1) grads = ep.stack([g for _, g in diffs_], axis=1) assert losses.shape == (N, candidates - 1) assert grads.shape == (N, candidates - 1) + x0.shape[1:] # calculate the distances distances = self.get_distances(losses, grads) assert distances.shape == (N, candidates - 1) # determine the best directions best = distances.argmin(axis=1) distances = distances[rows, best] losses = losses[rows, best] grads = grads[rows, best] assert distances.shape == (N,) assert losses.shape == (N,) assert grads.shape == x0.shape # apply perturbation distances = distances + 1e-4 # for numerical stability p_step = self.get_perturbations(distances, grads) assert p_step.shape == x0.shape p_total += p_step # don't do anything for those that are already adversarial x = ep.where( atleast_kd(is_adv, x.ndim), x, x0 + (1.0 + self.overshoot) * p_total ) x = ep.clip(x, min_, max_) return restore_type(x)
def __call__(self, inputs, labels, *, criterion, steps=1000): originals = ep.astensor(inputs) labels = ep.astensor(labels) def is_adversarial(p: ep.Tensor) -> ep.Tensor: """For each input in x, returns true if it is an adversarial for the given model and criterion""" logits = ep.astensor(self.model.forward(p.tensor)) return criterion(originals, labels, p, logits) x0 = ep.astensor(inputs) N = len(x0) shape = list(x0.shape) if self.channel_axis is not None: shape[self.channel_axis] = 1 min_, max_ = self.model.bounds() r = max_ - min_ result = x0 is_adv = is_adversarial(result) best_advs_norms = ep.where(is_adv, ep.zeros(x0, N), ep.full(x0, N, ep.inf)) min_probability = ep.zeros(x0, N) max_probability = ep.ones(x0, N) stepsizes = max_probability / steps p = stepsizes for step in range(steps): # add salt and pepper u = ep.uniform(x0, shape) p_ = atleast_kd(p, x0.ndim) salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r pepper = -(u < p_ / 2).astype(x0.dtype) * r x = x0 + salt + pepper x = ep.clip(x, min_, max_) # check if we found new best adversarials norms = flatten(x).square().sum(axis=-1).sqrt() closer = norms < best_advs_norms is_adv = is_adversarial(x) # TODO: ignore those that are not closer anyway is_best_adv = ep.logical_and(is_adv, closer) # update results and search space result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result) best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms) min_probability = ep.where(is_best_adv, 0.5 * p, min_probability) # we set max_probability a bit higher than p because the relationship # between p and norms is not strictly monotonic max_probability = ep.where( is_best_adv, ep.minimum(p * 1.2, 1.0), max_probability ) remaining = steps - step stepsizes = ep.where( is_best_adv, (max_probability - min_probability) / remaining, stepsizes ) reset = p == max_probability p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p) p = ep.minimum(p + stepsizes, max_probability) return result.tensor