def __call__(self, model: Model, inputs: T,
                 criterion: Union[Misclassification, T]) -> T:
        x0, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion

        if not isinstance(criterion_, Misclassification):
            raise ValueError("unsupported criterion")

        labels = criterion_.labels

        def loss_fn(inputs: ep.Tensor) -> ep.Tensor:
            logits = model(inputs)
            return ep.crossentropy(logits, labels).sum()

        x = x0

        if self.random_start:
            x = x + ep.uniform(x, x.shape, -self.epsilon, self.epsilon)
            x = ep.clip(x, *model.bounds)

        for _ in range(self.steps):
            _, gradients = ep.value_and_grad(loss_fn, x)
            gradients = gradients.sign()
            x = x + self.stepsize * gradients
            x = x0 + ep.clip(x - x0, -self.epsilon, self.epsilon)
            x = ep.clip(x, *model.bounds)

        return restore_type(x)
Exemple #2
    def run(
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
        epsilon: float,
        mc: int,
        **kwargs: Any,
    ) -> T:
        x0, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        # perform a gradient ascent (targeted attack) or descent (untargeted attack)
        if isinstance(criterion_, Misclassification):
            gradient_step_sign = 1.0
            classes = criterion_.labels
        elif hasattr(criterion_, "target_classes"):
            gradient_step_sign = -1.0
            classes = criterion_.target_classes  # type: ignore
            raise ValueError("unsupported criterion")

        loss_fn = self.get_loss_fn(model, classes)

        if self.abs_stepsize is None:
            stepsize = self.rel_stepsize * epsilon
            stepsize = self.abs_stepsize

        if self.random_start:
            x = self.get_random_start(x0, epsilon)
            x = ep.clip(x, *model.bounds)
            x = x0

        for _ in range(self.steps):
            gradient_sum = 0.
            for _ in range(mc):
                _, gradients = self.value_and_grad(loss_fn, x)
                gradient_sum += gradients
            gradients = self.normalize(gradient_sum, x=x, bounds=model.bounds)
            x = x + gradient_step_sign * stepsize * gradients
            x = self.project(x, x0, epsilon)
            x = ep.clip(x, *model.bounds)

        return restore_type(x)
Exemple #3
    def __call__(self, model: Model, inputs: T,
                 criterion: Union[Misclassification, T]) -> T:

        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion

        N = len(x)

        if isinstance(criterion_, Misclassification):
            classes = criterion_.labels
            raise ValueError("unsupported criterion")

        if classes.shape != (N, ):
            raise ValueError(
                f"expected labels to have shape ({N},), got {classes.shape}")

        min_, max_ = model.bounds

        x_l2_norm = flatten(x.square()).sum(1)

        def loss_fun(
                x: ep.Tensor) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]:
            logits = model(x)
            scores = ep.softmax(logits)
            pred_scores = scores[range(N), classes]
            loss = pred_scores.sum()
            return loss, (scores, pred_scores)

        for i in range(self.steps):
            # (1) get the scores and gradients
            _, (scores,
                pred_scores), gradients = ep.value_aux_and_grad(loss_fun, x)

            pred = scores.argmax(-1)
            num_classes = scores.shape[-1]

            # (2) calculate gradient norm
            gradients_l2_norm = flatten(gradients.square()).sum(1)

            # (3) calculate delta
            a = self.stepsize * x_l2_norm * gradients_l2_norm
            b = pred_scores - 1.0 / num_classes

            delta = ep.minimum(a, b)

            # (4) stop the attack if an adversarial example has been found
            # this is not described in the paper but otherwise once the prob. drops
            # below chance level the likelihood is not decreased but increased
            is_not_adversarial = (pred == classes).float32()
            delta *= is_not_adversarial

            # (5) calculate & apply current perturbation
            a = atleast_kd(delta / gradients_l2_norm.square(), gradients.ndim)
            x -= a * gradients

            x = ep.clip(x, min_, max_)

        return restore_type(x)
Exemple #4
    def __call__(
        def loss_fn(inputs: ep.Tensor, labels: ep.Tensor) -> ep.Tensor:
            logits = ep.astensor(self.model.forward(inputs.tensor))
            return ep.crossentropy(logits, labels).sum()

        if rescale:
            min_, max_ = self.model.bounds()
            scale = (max_ - min_) * np.sqrt([1:]))
            epsilon = epsilon * scale
            step_size = step_size * scale

        x = ep.astensor(inputs)
        y = ep.astensor(labels)
        assert x.shape[0] == y.shape[0]
        assert y.ndim == 1

        x0 = x

        for _ in range(num_steps):
            _, gradients = ep.value_and_grad(loss_fn, x, y)
            gradients = normalize_l2_norms(gradients)
            x = x + step_size * gradients
            x = x0 + clip_l2_norms(x - x0, epsilon)
            x = ep.clip(x, *self.model.bounds())

        return x.tensor
Exemple #5
def draw_line(x1, y1, x2, y2, radius, color, t, blend=0.75):
  Draws a line onto an image tensor. All units are in pixels
    x1: x position for endpoint 1
    y1: y position for endpoint 1
    x2: x position for endpoint 2
    y2: y position for endpoint 2
    radius: line width (radius)
    color: rgb color tensor with shape (3,) and values in the range 0.0-1.0
    blend (optional): blending distance
    tensor with circle drawn onto it
    if type(t) == torch.Tensor:
        t = t.permute(1, 2, 0)
    t = ep.astensor(t)
    uvx, uvy = make_uv(t)
    pax, pay, bax, bay = uvx - x1, uvy - y1, x2 - x1, y2 - y1
    h = ep.clip((pax * bax + pay * bay) / (bax * bax + bay * bay), 0.0, 1.0)
    dlx, dly = pax - bax * h, pay - bay * h
    dist = (dlx * dlx + dly * dly).sqrt() - radius
    t = dist_to_col(dist, color, blend, t)
    t = t.raw
    if type(t) == torch.Tensor:
        t = t.permute(2, 0, 1)
    return t
Exemple #6
    def clip_perturbation(self, references: T, perturbed: T,
                          epsilon: float) -> T:
        """Clips the perturbations to epsilon and returns the new perturbed

            references: A batch of reference inputs.
            perturbed: A batch of perturbed inputs.

            A tenosr like perturbed but with the perturbation clipped to epsilon.
        (x, y), restore_type = ep.astensors_(references, perturbed)
        p = y - x
        if self.p == ep.inf:
            clipped_perturbation = ep.clip(p, -epsilon, epsilon)
            return restore_type(x + clipped_perturbation)
        norms = ep.norms.lp(flatten(p), self.p, axis=-1)
        norms = ep.maximum(norms, 1e-12)  # avoid divsion by zero
        factor = epsilon / norms
        factor = ep.minimum(
            1, factor)  # clipping -> decreasing but not increasing
        if self.p == 0:
            if (factor == 1).all():
                return perturbed
            raise NotImplementedError("reducing L0 norms not yet supported")
        factor = atleast_kd(factor, x.ndim)
        clipped_perturbation = factor * p
        return restore_type(x + clipped_perturbation)
Exemple #7
def dist_to_col(dist, color, blend, t):
    msk = ep.clip((dist + blend) / (2.0 * blend), 0.0, 1.0)
    msk = msk * msk * (3.0 - 2.0 * msk)
    msk = msk.expand_dims(axis=2).tile([1, 1, 3])
    col_t = ep.astensor(color).expand_dims(axis=0).expand_dims(axis=0).tile(
        [t.shape[0], t.shape[1], 1])
    return msk * t + (1.0 - msk) * col_t
    def run(
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, T],
        epsilon: float,
        mc: int,
        **kwargs: Any,
    ) -> T:
        x0, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        if not isinstance(criterion_, Misclassification):
            raise ValueError("unsupported criterion")

        labels = criterion_.labels
        loss_fn = self.get_loss_fn(model, labels)

        if self.abs_stepsize is None:
            stepsize = self.rel_stepsize * epsilon
            stepsize = self.abs_stepsize

        if self.random_start:
            x = self.get_random_start(x0, epsilon)
            x = ep.clip(x, *model.bounds)
            x = x0

        for _ in range(self.steps):
            gradientsCum = 0
            for _ in range(mc):
                _, gradients = self.value_and_grad(loss_fn, x)
                # import pdb
                # pdb.set_trace()
                # assert not (gradients == gradientsCum).all()
                gradientsCum += gradients

            gradients = self.normalize(gradientsCum, x=x, bounds=model.bounds)
            x = x + stepsize * gradients
            x = self.project(x, x0, epsilon)
            x = ep.clip(x, *model.bounds)

        return restore_type(x)
Exemple #9
    def apply_noise(
        x: ep.TensorType,
        noise: ep.TensorType,
        epsilon: float,
        channel_axis: Optional[int],
    ) -> ep.TensorType:
        if noise.shape != x.shape and channel_axis is not None:
            # upscale noise

            noise = rescale_images(noise, x.shape, channel_axis)

        # clip noise to valid linf bounds
        noise = ep.clip(noise, -epsilon, +epsilon)

        # clip to image bounds
        return ep.clip(x + noise, 0.0, 1.0)
Exemple #10
    def __call__(self, model: Model, inputs: T,
                 criterion: Union[Misclassification, T]) -> T:
        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion

        N = len(x)

        if isinstance(criterion_, Misclassification):
            classes = criterion_.labels
            raise ValueError("unsupported criterion")

        if classes.shape != (N, ):
            raise ValueError(
                f"expected labels to have shape ({N},), got {classes.shape}")

        bounds = model.bounds

        def loss_fun(delta: ep.Tensor, logits: ep.Tensor) -> ep.Tensor:
            assert x.shape[0] == logits.shape[0]
            assert delta.shape == x.shape

            x_hat = x + delta
            logits_hat = model(x_hat)
            loss = ep.kl_div_with_logits(logits, logits_hat).sum()

            return loss

        value_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=False)

        clean_logits = model(x)

        # start with random vector as search vector
        d = ep.normal(x, shape=x.shape, mean=0, stddev=1)
        for it in range(self.iterations):
            # normalize proposal to be unit vector
            d = d * self.xi / atleast_kd(ep.norms.l2(flatten(d), axis=-1),

            # use gradient of KL divergence as new search vector
            _, grad = value_and_grad(d, clean_logits)
            d = grad

            # rescale search vector
            d = (bounds[1] - bounds[0]) * d

            if ep.any(ep.norms.l2(flatten(d), axis=-1) < 1e-64):
                raise RuntimeError(
                    "Gradient vanished; this can happen if xi is too small.")

        final_delta = (self.epsilon / ep.sqrt(
            (d**2).sum(keepdims=True, axis=(1, 2, 3))) * d)
        x_adv = ep.clip(x + final_delta, *bounds)

        return restore_type(x_adv)
Exemple #11
    def __call__(self, model, input_data, labels, epsilon):
        labels = ep.astensor(labels)
        loss_function = self.get_loss_function(model, labels)
        modified_data = input_data

        # algorytm FGSM
        _, gradients = ep.value_and_grad(loss_function, input_data)
        gradient_sign = gradients.sign()
        modified_data = input_data + epsilon * gradient_sign
        modified_data = ep.clip(modified_data, *model.bounds)
        return modified_data
    def __call__(self, model: Model, inputs, labels):
        inputs, labels, restore = wrap(inputs, labels)

        def loss_fn(inputs):
            logits = model.forward(inputs)
            return ep.crossentropy(logits, labels).sum()

        x = x0 = inputs

        if self.random_start:
            x = x + ep.uniform(x, x.shape, -self.epsilon, self.epsilon)
            x = ep.clip(x, *model.bounds())

        for _ in range(self.steps):
            _, gradients = ep.value_and_grad(loss_fn, x)
            gradients = gradients.sign()
            x = x + self.stepsize * gradients
            x = x0 + ep.clip(x - x0, -self.epsilon, self.epsilon)
            x = ep.clip(x, *model.bounds())

        return restore(x)
Exemple #13
    def __call__(
        def loss_fn(inputs: ep.Tensor, labels: ep.Tensor) -> ep.Tensor:
            logits = ep.astensor(self.model.forward(inputs.tensor))
            return ep.crossentropy(logits, labels).sum()

        if rescale:
            min_, max_ = self.model.bounds()
            scale = max_ - min_
            epsilon = epsilon * scale
            step_size = step_size * scale

        x = ep.astensor(inputs)
        y = ep.astensor(labels)
        assert x.shape[0] == y.shape[0]
        assert y.ndim == 1

        x0 = x

        if random_start:
            x = x + ep.uniform(x, x.shape, -epsilon, epsilon)
            x = ep.clip(x, *self.model.bounds())

        for _ in range(num_steps):
            _, gradients = ep.value_and_grad(loss_fn, x, y)
            gradients = gradients.sign()
            x = x + step_size * gradients
            x = x0 + ep.clip(x - x0, -epsilon, epsilon)
            x = ep.clip(x, *self.model.bounds())

        return x.tensor
Exemple #14
    def apply_noise(
        x: ep.TensorType,
        noise: ep.TensorType,
        epsilon: float,
        channel_axis: Optional[int],
    ) -> ep.TensorType:
        if noise.shape != x.shape and channel_axis is not None:
            # upscale noise

            noise = rescale_images(noise, x.shape, channel_axis)

        return ep.clip(noise + x, -epsilon, +epsilon)
Exemple #15
    def approximate_gradients(
        is_adversarial: Callable[[ep.Tensor], ep.Tensor],
        x_advs: ep.Tensor,
        steps: int,
        delta: ep.Tensor,
    ) -> ep.Tensor:
        # (steps, bs, ...)
        noise_shape = tuple([steps] + list(x_advs.shape))
        if self.constraint == "l2":
            rv = ep.normal(x_advs, noise_shape)
        elif self.constraint == "linf":
            rv = ep.uniform(x_advs, low=-1, high=1, shape=noise_shape)
        rv /= atleast_kd(ep.norms.l2(flatten(rv, keep=1), -1), rv.ndim) + 1e-12

        scaled_rv = atleast_kd(ep.expand_dims(delta, 0), rv.ndim) * rv

        perturbed = ep.expand_dims(x_advs, 0) + scaled_rv
        perturbed = ep.clip(perturbed, 0, 1)

        rv = (perturbed - x_advs) / atleast_kd(ep.expand_dims(delta + 1e-8, 0),

        multipliers_list: List[ep.Tensor] = []
        for step in range(steps):
            decision = is_adversarial(perturbed[step])
                        (len(x_advs, )),
                        (len(decision, )),
        # (steps, bs, ...)
        multipliers = ep.stack(multipliers_list, 0)

        vals = ep.where(
            ep.abs(ep.mean(multipliers, axis=0, keepdims=True)) == 1,
            multipliers - ep.mean(multipliers, axis=0, keepdims=True),
        grad = ep.mean(atleast_kd(vals, rv.ndim) * rv, axis=0)

        grad /= ep.norms.l2(atleast_kd(flatten(grad), grad.ndim)) + 1e-12

        return grad
    def __call__(self, model: Model, inputs, labels):
        inputs, labels, restore = wrap(inputs, labels)

        def loss_fn(inputs):
            logits = model.forward(inputs)
            return ep.crossentropy(logits, labels).sum()

        x = x0 = inputs

        for _ in range(self.steps):
            _, gradients = ep.value_and_grad(loss_fn, x)
            gradients = normalize_l2_norms(gradients)
            x = x + self.stepsize * gradients
            x = x0 + clip_l2_norms(x - x0, self.epsilon)
            x = ep.clip(x, *model.bounds())

        return restore(x)
    def __call__(
        model: Model,
        channel_axis: Optional[int] = None,
            The axis across which the noise should be the same (if across_channels is True).
            If None, will be automatically inferred from the model if possible.
        inputs, labels, restore = wrap(inputs, labels)
        is_adversarial = get_is_adversarial(criterion, inputs, labels, model)

        x0 = inputs
        N = len(x0)
        shape = list(x0.shape)
        if self.across_channels and x0.ndim > 2:
            if channel_axis is None and not hasattr(model, "data_format"):
                raise ValueError(
                    "cannot infer the data_format from the model, please specify"
                    " channel_axis when calling the attack")
            elif channel_axis is None:
                data_format = model.data_format  # type: ignore
                if (data_format is None or data_format != "channels_first"
                        and data_format != "channels_last"):
                    raise ValueError(
                        f"expected data_format to be 'channels_first' or 'channels_last'"
                channel_axis = 1 if data_format == "channels_first" else x0.ndim - 1
            elif not 0 <= channel_axis < x0.ndim:
                raise ValueError(
                    f"expected channel_axis to be in [0, {x0.ndim})")

            shape[channel_axis] = 1

        min_, max_ = model.bounds()
        r = max_ - min_

        result = x0
        is_adv = is_adversarial(result)
        best_advs_norms = ep.where(is_adv, ep.zeros(x0, N),
                                   ep.full(x0, N, ep.inf))
        min_probability = ep.zeros(x0, N)
        max_probability = ep.ones(x0, N)
        stepsizes = max_probability / self.steps
        p = stepsizes

        for step in range(self.steps):
            # add salt and pepper
            u = ep.uniform(x0, shape)
            p_ = atleast_kd(p, x0.ndim)
            salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r
            pepper = -(u < p_ / 2).astype(x0.dtype) * r
            x = x0 + salt + pepper
            x = ep.clip(x, min_, max_)

            # check if we found new best adversarials
            norms = flatten(x).square().sum(axis=-1).sqrt()
            closer = norms < best_advs_norms
            is_adv = is_adversarial(
                x)  # TODO: ignore those that are not closer anyway
            is_best_adv = ep.logical_and(is_adv, closer)

            # update results and search space
            result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result)
            best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms)
            min_probability = ep.where(is_best_adv, 0.5 * p, min_probability)
            # we set max_probability a bit higher than p because the relationship
            # between p and norms is not strictly monotonic
            max_probability = ep.where(is_best_adv, ep.minimum(p * 1.2, 1.0),
            remaining = self.steps - step
            stepsizes = ep.where(
                is_best_adv, (max_probability - min_probability) / remaining,
            reset = p == max_probability
            p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p)
            p = ep.minimum(p + stepsizes, max_probability)

        return restore(result)
    def run(
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, T],
        epsilon: float,
        **kwargs: Any,
    ) -> T:
        x0, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs
        targeted = False
        if isinstance(criterion_, Misclassification):
            labels = criterion_.labels
        elif isinstance(criterion_, TargetedMisclassification):
            labels = criterion_.target_classes
            targeted = True
            raise ValueError("unsupported criterion")

        mod = self.mod.copy()
        if not self.nes:
            if self.loss == "logit":
                match_target = extract_target_logits(model, x0, labels)
                mod.update({"match_target": match_target})
            loss_fn = get_loss_fn(model, labels, self.loss, targeted, mod)
            mod.update({"indiv": 1})
            if self.loss == "logit":
                match_target = extract_target_logits(model, x0, labels)
                mod.update({"match_target": match_target})

            def loss_fn(x):
                fn = get_loss_fn(model, labels, self.loss, targeted, mod)
                _, result = fn(x)
                return result

        if self.abs_stepsize is None:
            stepsize = self.rel_stepsize * epsilon
            stepsize = self.abs_stepsize

        if self.random_start:
            x = self.get_random_start(x0, epsilon)
            x = ep.clip(x, *model.bounds)
            x = x0

        if self.nes:
            If nes is True, then use NES algorithm
            NES is a black box attack algorithm, the basic idea is to to estimate the gradient by sampling with 
            Gaussian distribution centered around the point of interest.
            import torch
            sigma = epsilon

            # def single_sample_loss_fn(inputs: ep.Tensor, ind: int) -> ep.Tensor:
            #     logits = model(inputs)
            #     return ep.crossentropy(logits, ep.tile(labels[ind:ind + 1], [self.n_samples]))

            with torch.no_grad():
                for i in range(self.steps):
                    g = torch.zeros(x.shape).to(
                        'cuda')  # holds the gradient estimation
                    if not self.parallel:
                        for _ in range(self.n_samples):
                            # delta = ep.normal(ep.PyTorchTensor, x.shape, 0, epsilon)
                            delta = torch.normal(0, sigma,
                            x_torch = x.raw
                            delta_plus = ep.astensor(x_torch + delta)
                            delta_minus = ep.astensor(x_torch - delta)

                            g += (atleast_kd(loss_fn(delta_plus), delta.ndim) *
                            g -= (
                                atleast_kd(loss_fn(delta_minus), delta.ndim) *
                        # Not supporting individual losses
                        raise NotImplementedError
                        # g = torch.zeros(x.shape).to('cuda')  # holds the gradient estimation
                        # for ind in range(x.shape[0]):
                        #     # delta = ep.normal(ep.PyTorchTensor, x.shape, 0, epsilon)
                        #     delta = torch.normal(0, sigma, size=(self.n_samples,) + x.shape[1:]).to('cuda')
                        #     x_torch = x.raw[ind:ind + 1, :]
                        #     delta_plus = ep.astensor(x_torch + delta)
                        #     delta_minus = ep.astensor(x_torch - delta)
                        #     g[ind, :] += (atleast_kd(single_sample_loss_fn(delta_plus, ind), delta.ndim) * delta).sum(axis=0).raw
                        #     g[ind, :] -= (atleast_kd(single_sample_loss_fn(delta_minus, ind), delta.ndim) * delta).sum(axis=0).raw

                    g = 1 / (2 * self.n_samples * sigma) * g
                    g = self.normalize(g, x=x, bounds=model.bounds)
                    if isinstance(criterion_, Misclassification):
                        # step away from the original label
                        # x = ep.where(is_adv, x, x + stepsize * g)
                        x = x + stepsize * g
                        # step towards the target label
                        # x = ep.where(is_adv, x, x - stepsize * g)
                        x = x - stepsize * g
                    x = self.project(x, x0, epsilon)
                    x = ep.clip(x, *model.bounds)
                    # is_adv = is_adversarial(x, criterion_)

            return restore_type(x)

        for _ in range(self.steps):
            _, mean_gradients = self.value_and_grad(loss_fn, x)
            for n in range(2, self.EOT + 1):
                Computes numerically stable mean:
                \mu_n = (1 / n) * sum_{x=1}^n (x_i)
                      = (1 / n) * (x_n + sum_{x=1}^{n-1} (x_i))
                      = (1 / n) * (x_n + (n - 1) \mu_{n-1})
                      = \mu_{n-1} + (1 / n) * (x_n - \mu_{n-1})
                _, gradients = self.value_and_grad(loss_fn, x)
                mean_gradients = mean_gradients + (gradients -
                                                   mean_gradients) / n
            mean_gradients = self.normalize(mean_gradients,

            # step away from the original label
            x = x + stepsize * mean_gradients

        return restore_type(x)
Exemple #19
    def run(
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        N = len(x)

        if isinstance(criterion_, Misclassification):
            targeted = False
            classes = criterion_.labels
        elif isinstance(criterion_, TargetedMisclassification):
            targeted = True
            classes = criterion_.target_classes
            raise ValueError("unsupported criterion")

        if classes.shape != (N, ):
            name = "target_classes" if targeted else "labels"
            raise ValueError(
                f"expected {name} to have shape ({N},), got {classes.shape}")

        stepsize = 1.0
        min_, max_ = model.bounds

        def loss_fn(inputs: ep.Tensor,
                    labels: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]:
            logits = model(inputs)

            sign = -1.0 if targeted else 1.0
            loss = sign * ep.crossentropy(logits, labels).sum()

            return loss, logits

        grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True)

        delta = ep.zeros_like(x)

        epsilon = self.init_epsilon * ep.ones(x, len(x))
        worst_norm = ep.norms.l2(flatten(ep.maximum(x - min_, max_ - x)), -1)

        best_l2 = worst_norm
        best_delta = delta
        adv_found = ep.zeros(x, len(x)).bool()

        for i in range(self.steps):
            # perform cosine annealing of LR starting from 1.0 to 0.01
            stepsize = (0.01 + (stepsize - 0.01) *
                        (1 + math.cos(math.pi * i / self.steps)) / 2)

            x_adv = x + delta

            _, logits, gradients = grad_and_logits(x_adv, classes)
            gradients = normalize_gradient_l2_norms(gradients)
            is_adversarial = criterion_(x_adv, logits)

            l2 = ep.norms.l2(flatten(delta), axis=-1)
            is_smaller = l2 <= best_l2

            is_both = ep.logical_and(is_adversarial, is_smaller)
            adv_found = ep.logical_or(adv_found, is_adversarial)
            best_l2 = ep.where(is_both, l2, best_l2)

            best_delta = ep.where(atleast_kd(is_both, x.ndim), delta,

            # do step
            delta = delta + stepsize * gradients

            epsilon = epsilon * ep.where(is_adversarial, 1.0 - self.gamma,
                                         1.0 + self.gamma)
            epsilon = ep.minimum(epsilon, worst_norm)

            # project to epsilon ball
            delta *= atleast_kd(epsilon / ep.norms.l2(flatten(delta), -1),

            # clip to valid bounds
            delta = ep.clip(x + delta, *model.bounds) - x

        x_adv = x + best_delta

        return restore_type(x_adv)
Exemple #20
    def run(
        model: Model,
        inputs: T,
        criterion: TargetedMisclassification,
        epsilon: float,
        **kwargs: Any,
    ) -> T:
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        N = len(x)

        if isinstance(criterion, TargetedMisclassification):
            classes = criterion.target_classes
            raise ValueError("unsupported criterion")

        if classes.shape != (N, ):
            raise ValueError(
                f"expected target_classes to have shape ({N},), got {classes.shape}"

        noise_shape: Union[Tuple[int, int, int, int], Tuple[int, ...]]
        channel_axis: Optional[int] = None
        if self.reduced_dims is not None:
            if x.ndim != 4:
                raise NotImplementedError(
                    "only implemented for inputs with two spatial dimensions"
                    " (and one channel and one batch dimension)")

            if self.channel_axis is None:
                maybe_axis = get_channel_axis(model, x.ndim)
                if maybe_axis is None:
                    raise ValueError(
                        "cannot infer the data_format from the model, please"
                        " specify channel_axis when initializing the attack")
                    channel_axis = maybe_axis
                channel_axis = self.channel_axis % x.ndim

            if channel_axis == 1:
                noise_shape = (x.shape[1], *self.reduced_dims)
            elif channel_axis == 3:
                noise_shape = (*self.reduced_dims, x.shape[3])
                raise ValueError(
                    "expected 'channel_axis' to be 1 or 3, got {channel_axis}")
            noise_shape = x.shape[1:]  # pragma: no cover

        def is_adversarial(logits: ep.TensorType) -> ep.TensorType:
            return ep.argmax(logits, 1) == classes

        num_plateaus = ep.zeros(x, len(x))
        mutation_probability = (ep.ones_like(num_plateaus) *
        mutation_range = ep.ones_like(num_plateaus) * self.min_mutation_range

        noise_pops = ep.uniform(x, (N, self.population, *noise_shape),
                                -epsilon, epsilon)

        def calculate_fitness(logits: ep.TensorType) -> ep.TensorType:
            first = logits[range(N), classes]
            second = ep.log(ep.exp(logits).sum(1) - first)

            return first - second

        n_its_wo_change = ep.zeros(x, (N, ))
        for step in range(self.steps):
            fitness_l, is_adv_l = [], []

            for i in range(self.population):
                it = self.apply_noise(x, noise_pops[:, i], epsilon,
                logits = model(it)
                f = calculate_fitness(logits)
                a = is_adversarial(logits)

            fitness = ep.stack(fitness_l)
            is_adv = ep.stack(is_adv_l, 1)
            elite_idxs = ep.argmax(fitness, 0)

            elite_noise = noise_pops[range(N), elite_idxs]
            is_adv = is_adv[range(N), elite_idxs]

            # early stopping
            if is_adv.all():
                return restore_type(  # pragma: no cover
                    self.apply_noise(x, elite_noise, epsilon, channel_axis))

            probs = ep.softmax(fitness / self.sampling_temperature, 0)
            parents_idxs = np.stack(
                        2 * self.population - 2,
                        p=probs[:, i],
                    ) for i in range(N)

            mutations = [
                    -mutation_range[i].item() * epsilon,
                    mutation_range[i].item() * epsilon,
                ) for i in range(N)

            new_noise_pops = [elite_noise]
            for i in range(0, self.population - 1):
                parents_1 = noise_pops[range(N), parents_idxs[2 * i]]
                parents_2 = noise_pops[range(N), parents_idxs[2 * i + 1]]

                # calculate crossover
                p = probs[parents_idxs[2 * i], range(N)] / (
                    probs[parents_idxs[2 * i], range(N)] +
                    probs[parents_idxs[2 * i + 1],
                p = atleast_kd(p, x.ndim)
                p = ep.tile(p, (1, *noise_shape))

                crossover_mask = ep.uniform(p, p.shape, 0, 1) < p
                children = ep.where(crossover_mask, parents_1, parents_2)

                # calculate mutation
                mutation_mask = ep.uniform(children, children.shape)
                mutation_mask = mutation_mask <= atleast_kd(
                    mutation_probability, children.ndim)
                children = ep.where(mutation_mask, children + mutations[i],

                # project back to epsilon range
                children = ep.clip(children, -epsilon, epsilon)


            noise_pops = ep.stack(new_noise_pops, 1)

            # increase num_plateaus if fitness does not improve
            # for 100 consecutive steps
            n_its_wo_change = ep.where(elite_idxs == 0, n_its_wo_change + 1,
            num_plateaus = ep.where(n_its_wo_change >= 100, num_plateaus + 1,
            n_its_wo_change = ep.where(n_its_wo_change >= 100,

            mutation_probability = ep.maximum(
                0.5 * ep.exp(
                    math.log(0.9) * ep.ones_like(num_plateaus) * num_plateaus),
            mutation_range = ep.maximum(
                0.5 * ep.exp(
                    math.log(0.9) * ep.ones_like(num_plateaus) * num_plateaus),

        return restore_type(
            self.apply_noise(x, elite_noise, epsilon, channel_axis))
    def run(
        model: Model,
        inputs: T,
        criterion: Misclassification,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        x0, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        is_adversarial = get_is_adversarial(criterion_, model)

        N = len(x0)
        shape = list(x0.shape)

        if self.across_channels and x0.ndim > 2:
            if self.channel_axis is None:
                channel_axis = get_channel_axis(model, x0.ndim)
                channel_axis = self.channel_axis % x0.ndim
            if channel_axis is not None:
                shape[channel_axis] = 1

        min_, max_ = model.bounds
        r = max_ - min_

        result = x0
        is_adv = is_adversarial(result)
        best_advs_norms = ep.where(is_adv, ep.zeros(x0, N),
                                   ep.full(x0, N, ep.inf))
        min_probability = ep.zeros(x0, N)
        max_probability = ep.ones(x0, N)
        stepsizes = max_probability / self.steps
        p = stepsizes

        for step in range(self.steps):
            # add salt and pepper
            u = ep.uniform(x0, tuple(shape))
            p_ = atleast_kd(p, x0.ndim)
            salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r
            pepper = -(u < p_ / 2).astype(x0.dtype) * r
            x = x0 + salt + pepper
            x = ep.clip(x, min_, max_)

            # check if we found new best adversarials
            norms = flatten(x).norms.l2(axis=-1)
            closer = norms < best_advs_norms
            is_adv = is_adversarial(
                x)  # TODO: ignore those that are not closer anyway
            is_best_adv = ep.logical_and(is_adv, closer)

            # update results and search space
            result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result)
            best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms)
            min_probability = ep.where(is_best_adv, 0.5 * p, min_probability)
            # we set max_probability a bit higher than p because the relationship
            # between p and norms is not strictly monotonic
            max_probability = ep.where(is_best_adv, ep.minimum(p * 1.2, 1.0),
            remaining = self.steps - step
            stepsizes = ep.where(
                is_best_adv, (max_probability - min_probability) / remaining,
            reset = p == max_probability
            p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p)
            p = ep.minimum(p + stepsizes, max_probability)

        return restore_type(result)
Exemple #22
    def run(
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        early_stop: Optional[float] = None,
        starting_points: Optional[T] = None,
        **kwargs: Any,
    ) -> T:
        originals, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(originals, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        if starting_points is None:
            init_attack: MinimizationAttack
            if self.init_attack is None:
                init_attack = LinearSearchBlendedUniformNoiseAttack(steps=50)
                    f"Neither starting_points nor init_attack given. Falling"
                    f" back to {init_attack!r} for initialization.")
                init_attack = self.init_attack
            # TODO: use call and support all types of attacks (once early_stop is
            # possible in __call__)
            x_advs =,
            x_advs = ep.astensor(starting_points)

        is_adv = is_adversarial(x_advs)
        if not is_adv.all():
            failed = is_adv.logical_not().float32().sum()
            if starting_points is None:
                raise ValueError(
                    f"init_attack failed for {failed} of {len(is_adv)} inputs")
                raise ValueError(
                    f"{failed} of {len(is_adv)} starting_points are not adversarial"
        del starting_points

        tb = TensorBoard(logdir=self.tensorboard)

        # Project the initialization to the boundary.
        x_advs = self._binary_search(is_adversarial, originals, x_advs)

        assert ep.all(is_adversarial(x_advs))

        distances = self.distance(originals, x_advs)

        for step in range(self.steps):
            delta = self.select_delta(originals, distances, step)

            # Choose number of gradient estimation steps.
            num_gradient_estimation_steps = int(
                    self.initial_num_evals * math.sqrt(step + 1),

            gradients = self.approximate_gradients(
                is_adversarial, x_advs, num_gradient_estimation_steps, delta)

            if self.constraint == "linf":
                update = ep.sign(gradients)
                update = gradients

            if self.stepsize_search == "geometric_progression":
                # find step size.
                epsilons = distances / math.sqrt(step + 1)

                while True:
                    x_advs_proposals = ep.clip(
                        x_advs + atleast_kd(epsilons, x_advs.ndim) * update, 0,
                    success = is_adversarial(x_advs_proposals)
                    epsilons = ep.where(success, epsilons, epsilons / 2.0)

                    if ep.all(success):

                # Update the sample.
                x_advs = ep.clip(
                    x_advs + atleast_kd(epsilons, update.ndim) * update, 0, 1)

                assert ep.all(is_adversarial(x_advs))

                # Binary search to return to the boundary.
                x_advs = self._binary_search(is_adversarial, originals, x_advs)

                assert ep.all(is_adversarial(x_advs))

            elif self.stepsize_search == "grid_search":
                # Grid search for stepsize.
                epsilons_grid = ep.expand_dims(
                            -4, 0, num=20, endpoint=True, dtype=np.float32),
                ) * ep.expand_dims(distances, 0)

                proposals_list = []

                for epsilons in epsilons_grid:
                    x_advs_proposals = (
                        x_advs + atleast_kd(epsilons, update.ndim) * update)
                    x_advs_proposals = ep.clip(x_advs_proposals, 0, 1)

                    mask = is_adversarial(x_advs_proposals)

                    x_advs_proposals = self._binary_search(
                        is_adversarial, originals, x_advs_proposals)

                    # only use new values where initial guess was already adversarial
                    x_advs_proposals = ep.where(atleast_kd(mask, x_advs.ndim),
                                                x_advs_proposals, x_advs)


                proposals = ep.stack(proposals_list, 0)
                proposals_distances = self.distance(
                    ep.expand_dims(originals, 0), proposals)
                minimal_idx = ep.argmin(proposals_distances, 0)

                x_advs = proposals[minimal_idx]

            distances = self.distance(originals, x_advs)

            # log stats
            tb.histogram("norms", distances, step)

        return restore_type(x_advs)
Exemple #23
    def run(
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, T],
        epsilon: float,
        **kwargs: Any,
    ) -> T:
        x0, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        if isinstance(criterion_, Misclassification):
            labels = criterion_.labels
        elif isinstance(criterion_, TargetedMisclassification):
            labels = criterion_.target_classes
            raise ValueError("unsupported criterion")

        if self.loss == 'ce':
            loss_fn = self.get_loss_fn(model, labels)
        elif self.loss == 'dlr':
            loss_fn = self.get_dlr_loss_fn(model, labels)
            assert False, "Unrecognized loss function"

        if self.abs_stepsize is None:
            self.stepsize = self.rel_stepsize * epsilon
            self.stepsize = self.abs_stepsize

        if self.random_start:
            x = self.get_random_start(x0, epsilon)
            x = ep.clip(x, *model.bounds)
            x = x0

        for i in range(self.steps):
            _, mean_gradients = self.value_and_grad(loss_fn, x)
            # loss_val = loss_fn(x)
            for n in range(2, self.EOT + 1):
                Computes numerically stable mean:
                \mu_n = (1 / n) * sum_{x=1}^n (x_i)
                      = (1 / n) * (x_n + sum_{x=1}^{n-1} (x_i))
                      = (1 / n) * (x_n + (n - 1) \mu_{n-1})
                      = \mu_{n-1} + (1 / n) * (x_n - \mu_{n-1})
                _, gradients = self.value_and_grad(loss_fn, x)
                mean_gradients = mean_gradients + (gradients -
                                                   mean_gradients) / n
            mean_gradients = self.normalize(mean_gradients,

            get_stepsize = self.get_stepsize_fn()
            stepsize = get_stepsize(i)
            if isinstance(criterion_, Misclassification):
                # step away from the original label
                x = x + stepsize * mean_gradients
                # step towards the target label
                x = x - stepsize * mean_gradients
            x = self.project(x, x0, epsilon)
            x = ep.clip(x, *model.bounds)

        return restore_type(x)
    def run(
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
        starting_points: Optional[ep.Tensor] = None,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        criterion_ = get_criterion(criterion)

        if isinstance(criterion_, Misclassification):
            targeted = False
            classes = criterion_.labels
        elif isinstance(criterion_, TargetedMisclassification):
            targeted = True
            classes = criterion_.target_classes
            raise ValueError("unsupported criterion")

        def loss_fn(
            inputs: ep.Tensor, labels: ep.Tensor
        ) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]:

            logits = model(inputs)

            if targeted:
                c_minimize = best_other_classes(logits, labels)
                c_maximize = labels  # target_classes
                c_minimize = labels  # labels
                c_maximize = best_other_classes(logits, labels)

            loss = logits[rows, c_minimize] - logits[rows, c_maximize]

            return -loss.sum(), (logits, loss)

        x, restore_type = ep.astensor_(inputs)
        del inputs, criterion, kwargs
        N = len(x)

        # start from initialization points/attack
        if starting_points is not None:
            x1 = starting_points
            if self.init_attack is not None:
                x1 =, x, criterion_)
                x1 = None

        # if initial points or initialization attacks are provided,
        #   search for the boundary
        if x1 is not None:
            is_adv = get_is_adversarial(criterion_, model)
            assert is_adv(x1).all()
            lower_bound = ep.zeros(x, shape=(N, ))
            upper_bound = ep.ones(x, shape=(N, ))
            for _ in range(self.binary_search_steps):
                epsilons = (lower_bound + upper_bound) / 2
                mid_points = self.mid_points(x, x1, epsilons, model.bounds)
                is_advs = is_adv(mid_points)
                lower_bound = ep.where(is_advs, lower_bound, epsilons)
                upper_bound = ep.where(is_advs, epsilons, upper_bound)
            starting_points = self.mid_points(x, x1, upper_bound, model.bounds)
            delta = starting_points - x
            # start from x0
            delta = ep.zeros_like(x)

        if classes.shape != (N, ):
            name = "target_classes" if targeted else "labels"
            raise ValueError(
                f"expected {name} to have shape ({N},), got {classes.shape}")

        min_, max_ = model.bounds
        rows = range(N)
        grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True)

        if self.p != 0:
            epsilon = ep.inf * ep.ones(x, len(x))
            epsilon = ep.ones(x, len(x)) if x1 is None \
                else ep.norms.l0(flatten(delta), axis=-1)
        if self.p != 0:
            worst_norm = ep.norms.lp(flatten(ep.maximum(x - min_, max_ - x)),
            worst_norm = flatten(ep.ones_like(x)).bool().sum(axis=1).float32()

        best_lp = worst_norm
        best_delta = delta
        adv_found = ep.zeros(x, len(x)).bool()

        for i in range(self.steps):
            # perform cosine annealing of learning rates
            stepsize = (self.min_stepsize +
                        (self.max_stepsize - self.min_stepsize) *
                        (1 + math.cos(math.pi * i / self.steps)) / 2)
            gamma = (0.001 + (self.gamma - 0.001) *
                     (1 + math.cos(math.pi * (i / self.steps))) / 2)

            x_adv = x + delta

            loss, (logits,
                   loss_batch), gradients = grad_and_logits(x_adv, classes)
            is_adversarial = criterion_(x_adv, logits)

            lp = ep.norms.lp(flatten(delta), p=self.p, axis=-1)
            is_smaller = lp <= best_lp
            is_both = ep.logical_and(is_adversarial, is_smaller)
            adv_found = ep.logical_or(adv_found, is_adversarial)
            best_lp = ep.where(is_both, lp, best_lp)
            best_delta = ep.where(atleast_kd(is_both, x.ndim), delta,

            # update epsilon
            if self.p != 0:
                distance_to_boundary = abs(loss_batch) / ep.norms.lp(
                    flatten(gradients), p=self.dual, axis=-1)
                epsilon = ep.where(
                        epsilon * (1 - gamma),
                        ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)),
                        adv_found, epsilon * (1 + gamma),
                        ep.norms.lp(flatten(delta), p=self.p, axis=-1) +
                epsilon = ep.where(
                        ep.minimum(epsilon - 1,
                                   (epsilon * (1 - gamma)).astype(int).astype(
                        ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)),
                    ep.maximum(epsilon + 1,
                               (epsilon * (1 + gamma)).astype(int).astype(
                epsilon = ep.maximum(0, epsilon).astype(epsilon.dtype)

            # clip epsilon
            epsilon = ep.minimum(epsilon, worst_norm)

            # computes normalized gradient update
            grad_ = self.normalize(gradients, x=x,
                                   bounds=model.bounds) * stepsize

            # do step
            delta = delta + grad_

            # project according to the given norm
            delta = self.project(x=x + delta, x0=x, epsilon=epsilon) - x

            # clip to valid bounds
            delta = ep.clip(x + delta, *model.bounds) - x

        x_adv = x + best_delta
        return restore_type(x_adv)
Exemple #25
def test_clip(t: Tensor) -> Tensor:
    return ep.clip(t, 2, 3.5)
 def project(self, x: ep.Tensor, x0: ep.Tensor,
             epsilon: float) -> ep.Tensor:
     return x0 + ep.clip(x - x0, -epsilon, epsilon)
Exemple #27
    def __call__(
        p : int or float
            Lp-norm that should be minimzed, must be 2 or np.inf.
        candidates : int
            Limit on the number of the most likely classes that should
            be considered. A small value is usually sufficient and much
        overshoot : float
        steps : int
            Maximum number of steps to perform.

        if not (1 <= p <= np.inf):
            raise ValueError
        if p not in [2, np.inf]:
            raise NotImplementedError

        min_, max_ = self.model.bounds()

        inputs = ep.astensor(inputs)
        labels = ep.astensor(labels)

        N = len(inputs)

        logits = self.model.forward(inputs)
        candidates = min(candidates, logits.shape[-1])
        classes = logits.argsort(axis=-1).flip(axis=-1)
        if candidates:
            assert candidates >= 2
  "Only testing the top-{candidates} classes")
            classes = classes[:, :candidates]

        i0 = classes[:, 0]
        rows = ep.arange(inputs, N)

        if loss == "logits":

            def loss_fun(x: ep.Tensor, k: int) -> ep.Tensor:
                logits = self.model.forward(x)
                ik = classes[:, k]
                l0 = logits[rows, i0]
                lk = logits[rows, ik]
                loss = lk - l0
                return loss.sum(), (loss, logits)

        elif loss == "crossentropy":

            def loss_fun(x: ep.Tensor, k: int) -> ep.Tensor:
                logits = self.model.forward(x)
                ik = classes[:, k]
                l0 = -ep.crossentropy(logits, i0)
                lk = -ep.crossentropy(logits, ik)
                loss = lk - l0
                return loss.sum(), (loss, logits)

            raise ValueError(
                f"expected loss to be 'logits' or 'crossentropy', got '{loss}'"

        loss_aux_and_grad = ep.value_and_grad_fn(inputs,

        x = x0 = inputs
        p_total = ep.zeros_like(x)
        for step in range(steps):
            # let's first get the logits using k = 1 to see if we are done
            diffs = [loss_aux_and_grad(x, 1)]
            _, (_, logits), _ = diffs[0]
            is_adv = logits.argmax(axis=-1) != labels
            if is_adv.all():
            # then run all the other k's as well
            # we could avoid repeated forward passes and only repeat
            # the backward pass, but this cannot currently be done in eagerpy
            diffs += [loss_aux_and_grad(x, k) for k in range(2, candidates)]

            # we don't need the logits
            diffs = [(losses, grad) for _, (losses, _), grad in diffs]
            losses = ep.stack([l for l, _ in diffs], axis=1)
            grads = ep.stack([g for _, g in diffs], axis=1)
            assert losses.shape == (N, candidates - 1)
            assert grads.shape == (N, candidates - 1) + x0.shape[1:]

            # calculate the distances
            distances = self.get_distances(losses, grads)
            assert distances.shape == (N, candidates - 1)

            # determine the best directions
            best = distances.argmin(axis=1)
            distances = distances[rows, best]
            losses = losses[rows, best]
            grads = grads[rows, best]
            assert distances.shape == (N, )
            assert losses.shape == (N, )
            assert grads.shape == x0.shape

            # apply perturbation
            distances = distances + 1e-4  # for numerical stability
            p_step = self.get_perturbations(distances, grads)
            assert p_step.shape == x0.shape

            p_total += p_step
            # don't do anything for those that are already adversarial
            x = ep.where(atleast_kd(is_adv, x.ndim), x,
                         x0 + (1.0 + overshoot) * p_total)
            x = ep.clip(x, min_, max_)

        return x.tensor
Exemple #28
    def run(
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)

        min_, max_ = model.bounds

        logits = model(x)
        classes = logits.argsort(axis=-1).flip(axis=-1)
        if self.candidates is None:
            candidates = logits.shape[-1]  # pragma: no cover
            candidates = min(self.candidates, logits.shape[-1])
            if not candidates >= 2:
                raise ValueError(  # pragma: no cover
                    f"expected the model output to have atleast 2 classes, got {logits.shape[-1]}"
  "Only testing the top-{candidates} classes")
            classes = classes[:, :candidates]

        N = len(x)
        rows = range(N)

        loss_fun = self._get_loss_fn(model, classes)
        loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True)

        x0 = x
        p_total = ep.zeros_like(x)
        for _ in range(self.steps):
            # let's first get the logits using k = 1 to see if we are done
            diffs = [loss_aux_and_grad(x, 1)]
            _, (_, logits), _ = diffs[0]

            is_adv = criterion(x, logits)
            if is_adv.all():

            # then run all the other k's as well
            # we could avoid repeated forward passes and only repeat
            # the backward pass, but this cannot currently be done in eagerpy
            diffs += [loss_aux_and_grad(x, k) for k in range(2, candidates)]

            # we don't need the logits
            diffs_ = [(losses, grad) for _, (losses, _), grad in diffs]
            losses = ep.stack([lo for lo, _ in diffs_], axis=1)
            grads = ep.stack([g for _, g in diffs_], axis=1)
            assert losses.shape == (N, candidates - 1)
            assert grads.shape == (N, candidates - 1) + x0.shape[1:]

            # calculate the distances
            distances = self.get_distances(losses, grads)
            assert distances.shape == (N, candidates - 1)

            # determine the best directions
            best = distances.argmin(axis=1)
            distances = distances[rows, best]
            losses = losses[rows, best]
            grads = grads[rows, best]
            assert distances.shape == (N,)
            assert losses.shape == (N,)
            assert grads.shape == x0.shape

            # apply perturbation
            distances = distances + 1e-4  # for numerical stability
            p_step = self.get_perturbations(distances, grads)
            assert p_step.shape == x0.shape

            p_total += p_step
            # don't do anything for those that are already adversarial
            x = ep.where(
                atleast_kd(is_adv, x.ndim), x, x0 + (1.0 + self.overshoot) * p_total
            x = ep.clip(x, min_, max_)

        return restore_type(x)
Exemple #29
    def __call__(self, inputs, labels, *, criterion, steps=1000):
        originals = ep.astensor(inputs)
        labels = ep.astensor(labels)

        def is_adversarial(p: ep.Tensor) -> ep.Tensor:
            """For each input in x, returns true if it is an adversarial for
            the given model and criterion"""
            logits = ep.astensor(self.model.forward(p.tensor))
            return criterion(originals, labels, p, logits)

        x0 = ep.astensor(inputs)

        N = len(x0)
        shape = list(x0.shape)
        if self.channel_axis is not None:
            shape[self.channel_axis] = 1

        min_, max_ = self.model.bounds()
        r = max_ - min_

        result = x0
        is_adv = is_adversarial(result)
        best_advs_norms = ep.where(is_adv, ep.zeros(x0, N), ep.full(x0, N, ep.inf))
        min_probability = ep.zeros(x0, N)
        max_probability = ep.ones(x0, N)
        stepsizes = max_probability / steps
        p = stepsizes

        for step in range(steps):
            # add salt and pepper
            u = ep.uniform(x0, shape)
            p_ = atleast_kd(p, x0.ndim)
            salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r
            pepper = -(u < p_ / 2).astype(x0.dtype) * r
            x = x0 + salt + pepper
            x = ep.clip(x, min_, max_)

            # check if we found new best adversarials
            norms = flatten(x).square().sum(axis=-1).sqrt()
            closer = norms < best_advs_norms
            is_adv = is_adversarial(x)  # TODO: ignore those that are not closer anyway
            is_best_adv = ep.logical_and(is_adv, closer)

            # update results and search space
            result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result)
            best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms)
            min_probability = ep.where(is_best_adv, 0.5 * p, min_probability)
            # we set max_probability a bit higher than p because the relationship
            # between p and norms is not strictly monotonic
            max_probability = ep.where(
                is_best_adv, ep.minimum(p * 1.2, 1.0), max_probability
            remaining = steps - step
            stepsizes = ep.where(
                is_best_adv, (max_probability - min_probability) / remaining, stepsizes
            reset = p == max_probability
            p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p)
            p = ep.minimum(p + stepsizes, max_probability)

        return result.tensor