Example #1
0
    def __call__(  # type: ignore
        self,
        model: Model,
        inputs: T,
        criterion: Any,
        *,
        epsilons: Union[Sequence[Union[float, None]], float, None],
        **kwargs: Any,
    ) -> Union[Tuple[List[T], List[T], T], Tuple[T, T, T]]:
        x, restore_type = ep.astensor_(inputs)
        del inputs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        was_iterable = True
        if not isinstance(epsilons, Iterable):
            epsilons = [epsilons]
            was_iterable = False

        N = len(x)
        K = len(epsilons)

        # None means: just minimize, no early stopping, no limit on the perturbation size
        if any(eps is None for eps in epsilons):
            early_stop = None
        else:
            early_stop = min(epsilons)

        # run the actual attack
        xp = self.run(model, x, criterion, early_stop=early_stop, **kwargs)

        xpcs = []
        success = []
        for epsilon in epsilons:
            start = timer()
            if epsilon is None:
                xpc = xp
            else:
                xpc = self.distance.clip_perturbation(x, xp, epsilon)
            is_adv = is_adversarial(xpc)

            xpcs.append(xpc)
            success.append(is_adv)
            end = timer()
            print(end-start)

        success_ = ep.stack(success)
        assert success_.shape == (K, N)

        xp_ = restore_type(xp)
        xpcs_ = [restore_type(xpc) for xpc in xpcs]

        if was_iterable:
            return [xp_] * K, xpcs_, restore_type(success_)
        else:
            assert len(xpcs_) == 1
            return xp_, xpcs_[0], restore_type(success_.squeeze(axis=0))
Example #2
0
    def approximate_gradients(
        self,
        is_adversarial: Callable[[ep.Tensor], ep.Tensor],
        x_advs: ep.Tensor,
        steps: int,
        delta: ep.Tensor,
    ) -> ep.Tensor:
        # (steps, bs, ...)
        noise_shape = tuple([steps] + list(x_advs.shape))
        if self.constraint == "l2":
            rv = ep.normal(x_advs, noise_shape)
        elif self.constraint == "linf":
            rv = ep.uniform(x_advs, low=-1, high=1, shape=noise_shape)
        rv /= atleast_kd(ep.norms.l2(flatten(rv, keep=1), -1), rv.ndim) + 1e-12

        scaled_rv = atleast_kd(ep.expand_dims(delta, 0), rv.ndim) * rv

        perturbed = ep.expand_dims(x_advs, 0) + scaled_rv
        perturbed = ep.clip(perturbed, 0, 1)

        rv = (perturbed - x_advs) / atleast_kd(ep.expand_dims(delta + 1e-8, 0),
                                               rv.ndim)

        multipliers_list: List[ep.Tensor] = []
        for step in range(steps):
            decision = is_adversarial(perturbed[step])
            multipliers_list.append(
                ep.where(
                    decision,
                    ep.ones(
                        x_advs,
                        (len(x_advs, )),
                    ),
                    -ep.ones(
                        x_advs,
                        (len(decision, )),
                    ),
                ))
        # (steps, bs, ...)
        multipliers = ep.stack(multipliers_list, 0)

        vals = ep.where(
            ep.abs(ep.mean(multipliers, axis=0, keepdims=True)) == 1,
            multipliers,
            multipliers - ep.mean(multipliers, axis=0, keepdims=True),
        )
        grad = ep.mean(atleast_kd(vals, rv.ndim) * rv, axis=0)

        grad /= ep.norms.l2(atleast_kd(flatten(grad), grad.ndim)) + 1e-12

        return grad
Example #3
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        #raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)

        min_, max_ = model.bounds

        logits = model(x)
        classes = logits.argsort(axis=-1).flip(axis=-1)
        if self.candidates is None:
            candidates = logits.shape[-1]  # pragma: no cover
        else:
            candidates = min(self.candidates, logits.shape[-1])
            if not candidates >= 2:
                raise ValueError(  # pragma: no cover
                    f"expected the model output to have atleast 2 classes, got {logits.shape[-1]}"
                )
            logging.info(f"Only testing the top-{candidates} classes")
            classes = classes[:, :candidates]

        N = len(x)
        rows = range(N)

        loss_fun = self._get_loss_fn(model, classes)
        loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True)

        x0 = x
        p_total = ep.zeros_like(x)
        for _ in range(self.steps):
            # let's first get the logits using k = 1 to see if we are done
            diffs = [loss_aux_and_grad(x, 1)]
            _, (_, logits), _ = diffs[0]

            is_adv = criterion(x, logits)
            if is_adv.all():
                break

            # then run all the other k's as well
            # we could avoid repeated forward passes and only repeat
            # the backward pass, but this cannot currently be done in eagerpy
            diffs += [loss_aux_and_grad(x, k) for k in range(2, candidates)]

            # we don't need the logits
            diffs_ = [(losses, grad) for _, (losses, _), grad in diffs]
            losses = ep.stack([lo for lo, _ in diffs_], axis=1)
            grads = ep.stack([g for _, g in diffs_], axis=1)
            assert losses.shape == (N, candidates - 1)
            assert grads.shape == (N, candidates - 1) + x0.shape[1:]

            # calculate the distances
            distances = self.get_distances(losses, grads)
            assert distances.shape == (N, candidates - 1)

            # determine the best directions
            best = distances.argmin(axis=1)
            distances = distances[rows, best]
            losses = losses[rows, best]
            grads = grads[rows, best]
            assert distances.shape == (N,)
            assert losses.shape == (N,)
            assert grads.shape == x0.shape

            # apply perturbation
            distances = distances + 1e-4  # for numerical stability
            p_step = self.get_perturbations(distances, grads)
            assert p_step.shape == x0.shape

            p_total += p_step
            # don't do anything for those that are already adversarial
            x = ep.where(
                atleast_kd(is_adv, x.ndim), x, x0 + (1.0 + self.overshoot) * p_total
            )
            x = ep.clip(x, min_, max_)

        return restore_type(x)
Example #4
0
def test_list_stack(t: Tensor) -> None:
    t2 = ep.stack(list(t))
    assert t.shape == t2.shape
    assert (t == t2).all()
Example #5
0
def test_stack(t1: Tensor, t2: Tensor, axis: int) -> Tensor:
    return ep.stack([t1, t2], axis=axis)
Example #6
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: TargetedMisclassification,
        *,
        epsilon: float,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        N = len(x)

        if isinstance(criterion, TargetedMisclassification):
            classes = criterion.target_classes
        else:
            raise ValueError("unsupported criterion")

        if classes.shape != (N, ):
            raise ValueError(
                f"expected target_classes to have shape ({N},), got {classes.shape}"
            )

        noise_shape: Union[Tuple[int, int, int, int], Tuple[int, ...]]
        channel_axis: Optional[int] = None
        if self.reduced_dims is not None:
            if x.ndim != 4:
                raise NotImplementedError(
                    "only implemented for inputs with two spatial dimensions"
                    " (and one channel and one batch dimension)")

            if self.channel_axis is None:
                maybe_axis = get_channel_axis(model, x.ndim)
                if maybe_axis is None:
                    raise ValueError(
                        "cannot infer the data_format from the model, please"
                        " specify channel_axis when initializing the attack")
                else:
                    channel_axis = maybe_axis
            else:
                channel_axis = self.channel_axis % x.ndim

            if channel_axis == 1:
                noise_shape = (x.shape[1], *self.reduced_dims)
            elif channel_axis == 3:
                noise_shape = (*self.reduced_dims, x.shape[3])
            else:
                raise ValueError(
                    "expected 'channel_axis' to be 1 or 3, got {channel_axis}")
        else:
            noise_shape = x.shape[1:]  # pragma: no cover

        def is_adversarial(logits: ep.TensorType) -> ep.TensorType:
            return ep.argmax(logits, 1) == classes

        num_plateaus = ep.zeros(x, len(x))
        mutation_probability = (ep.ones_like(num_plateaus) *
                                self.min_mutation_probability)
        mutation_range = ep.ones_like(num_plateaus) * self.min_mutation_range

        noise_pops = ep.uniform(x, (N, self.population, *noise_shape),
                                -epsilon, epsilon)

        def calculate_fitness(logits: ep.TensorType) -> ep.TensorType:
            first = logits[range(N), classes]
            second = ep.log(ep.exp(logits).sum(1) - first)

            return first - second

        n_its_wo_change = ep.zeros(x, (N, ))
        for step in range(self.steps):
            fitness_l, is_adv_l = [], []

            for i in range(self.population):
                it = self.apply_noise(x, noise_pops[:, i], epsilon,
                                      channel_axis)
                logits = model(it)
                f = calculate_fitness(logits)
                a = is_adversarial(logits)
                fitness_l.append(f)
                is_adv_l.append(a)

            fitness = ep.stack(fitness_l)
            is_adv = ep.stack(is_adv_l, 1)
            elite_idxs = ep.argmax(fitness, 0)

            elite_noise = noise_pops[range(N), elite_idxs]
            is_adv = is_adv[range(N), elite_idxs]

            # early stopping
            if is_adv.all():
                return restore_type(  # pragma: no cover
                    self.apply_noise(x, elite_noise, epsilon, channel_axis))

            probs = ep.softmax(fitness / self.sampling_temperature, 0)
            parents_idxs = np.stack(
                [
                    self.choice(
                        self.population,
                        2 * self.population - 2,
                        replace=True,
                        p=probs[:, i],
                    ) for i in range(N)
                ],
                1,
            )

            mutations = [
                ep.uniform(
                    x,
                    noise_shape,
                    -mutation_range[i].item() * epsilon,
                    mutation_range[i].item() * epsilon,
                ) for i in range(N)
            ]

            new_noise_pops = [elite_noise]
            for i in range(0, self.population - 1):
                parents_1 = noise_pops[range(N), parents_idxs[2 * i]]
                parents_2 = noise_pops[range(N), parents_idxs[2 * i + 1]]

                # calculate crossover
                p = probs[parents_idxs[2 * i], range(N)] / (
                    probs[parents_idxs[2 * i], range(N)] +
                    probs[parents_idxs[2 * i + 1],
                          range(N)])
                p = atleast_kd(p, x.ndim)
                p = ep.tile(p, (1, *noise_shape))

                crossover_mask = ep.uniform(p, p.shape, 0, 1) < p
                children = ep.where(crossover_mask, parents_1, parents_2)

                # calculate mutation
                mutation_mask = ep.uniform(children, children.shape)
                mutation_mask = mutation_mask <= atleast_kd(
                    mutation_probability, children.ndim)
                children = ep.where(mutation_mask, children + mutations[i],
                                    children)

                # project back to epsilon range
                children = ep.clip(children, -epsilon, epsilon)

                new_noise_pops.append(children)

            noise_pops = ep.stack(new_noise_pops, 1)

            # increase num_plateaus if fitness does not improve
            # for 100 consecutive steps
            n_its_wo_change = ep.where(elite_idxs == 0, n_its_wo_change + 1,
                                       ep.zeros_like(n_its_wo_change))
            num_plateaus = ep.where(n_its_wo_change >= 100, num_plateaus + 1,
                                    num_plateaus)
            n_its_wo_change = ep.where(n_its_wo_change >= 100,
                                       ep.zeros_like(n_its_wo_change),
                                       n_its_wo_change)

            mutation_probability = ep.maximum(
                self.min_mutation_probability,
                0.5 * ep.exp(
                    math.log(0.9) * ep.ones_like(num_plateaus) * num_plateaus),
            )
            mutation_range = ep.maximum(
                self.min_mutation_range,
                0.5 * ep.exp(
                    math.log(0.9) * ep.ones_like(num_plateaus) * num_plateaus),
            )

        return restore_type(
            self.apply_noise(x, elite_noise, epsilon, channel_axis))
Example #7
0
    def __call__(
        self,
        inputs,
        labels,
        *,
        p,
        candidates=10,
        overshoot=0.02,
        steps=50,
        loss="logits",
    ):
        """
        Parameters
        ----------
        p : int or float
            Lp-norm that should be minimzed, must be 2 or np.inf.
        candidates : int
            Limit on the number of the most likely classes that should
            be considered. A small value is usually sufficient and much
            faster.
        overshoot : float
        steps : int
            Maximum number of steps to perform.
        """

        if not (1 <= p <= np.inf):
            raise ValueError
        if p not in [2, np.inf]:
            raise NotImplementedError

        min_, max_ = self.model.bounds()

        inputs = ep.astensor(inputs)
        labels = ep.astensor(labels)

        N = len(inputs)

        logits = self.model.forward(inputs)
        candidates = min(candidates, logits.shape[-1])
        classes = logits.argsort(axis=-1).flip(axis=-1)
        if candidates:
            assert candidates >= 2
            logging.info(f"Only testing the top-{candidates} classes")
            classes = classes[:, :candidates]

        i0 = classes[:, 0]
        rows = ep.arange(inputs, N)

        if loss == "logits":

            def loss_fun(x: ep.Tensor, k: int) -> ep.Tensor:
                logits = self.model.forward(x)
                ik = classes[:, k]
                l0 = logits[rows, i0]
                lk = logits[rows, ik]
                loss = lk - l0
                return loss.sum(), (loss, logits)

        elif loss == "crossentropy":

            def loss_fun(x: ep.Tensor, k: int) -> ep.Tensor:
                logits = self.model.forward(x)
                ik = classes[:, k]
                l0 = -ep.crossentropy(logits, i0)
                lk = -ep.crossentropy(logits, ik)
                loss = lk - l0
                return loss.sum(), (loss, logits)

        else:
            raise ValueError(
                f"expected loss to be 'logits' or 'crossentropy', got '{loss}'"
            )

        loss_aux_and_grad = ep.value_and_grad_fn(inputs,
                                                 loss_fun,
                                                 has_aux=True)

        x = x0 = inputs
        p_total = ep.zeros_like(x)
        for step in range(steps):
            # let's first get the logits using k = 1 to see if we are done
            diffs = [loss_aux_and_grad(x, 1)]
            _, (_, logits), _ = diffs[0]
            is_adv = logits.argmax(axis=-1) != labels
            if is_adv.all():
                break
            # then run all the other k's as well
            # we could avoid repeated forward passes and only repeat
            # the backward pass, but this cannot currently be done in eagerpy
            diffs += [loss_aux_and_grad(x, k) for k in range(2, candidates)]

            # we don't need the logits
            diffs = [(losses, grad) for _, (losses, _), grad in diffs]
            losses = ep.stack([l for l, _ in diffs], axis=1)
            grads = ep.stack([g for _, g in diffs], axis=1)
            assert losses.shape == (N, candidates - 1)
            assert grads.shape == (N, candidates - 1) + x0.shape[1:]

            # calculate the distances
            distances = self.get_distances(losses, grads)
            assert distances.shape == (N, candidates - 1)

            # determine the best directions
            best = distances.argmin(axis=1)
            distances = distances[rows, best]
            losses = losses[rows, best]
            grads = grads[rows, best]
            assert distances.shape == (N, )
            assert losses.shape == (N, )
            assert grads.shape == x0.shape

            # apply perturbation
            distances = distances + 1e-4  # for numerical stability
            p_step = self.get_perturbations(distances, grads)
            assert p_step.shape == x0.shape

            p_total += p_step
            # don't do anything for those that are already adversarial
            x = ep.where(atleast_kd(is_adv, x.ndim), x,
                         x0 + (1.0 + overshoot) * p_total)
            x = ep.clip(x, min_, max_)

        return x.tensor
Example #8
0
    def __call__(  # type: ignore
        self,
        model: Model,
        inputs: T,
        criterion: Any,
        *,
        epsilons: Union[Sequence[Union[float, None]], float, None],
        **kwargs: Any,
    ) -> Union[Tuple[List[T], List[T], T], Tuple[T, T, T]]:

        x, restore_type = ep.astensor_(inputs)
        del inputs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        was_iterable = True
        if not isinstance(epsilons, Iterable):
            epsilons = [epsilons]
            was_iterable = False

        N = len(x)
        K = len(epsilons)

        # None means: just minimize, no early stopping, no limit on the perturbation size
        if any(eps is None for eps in epsilons):
            # TODO: implement a binary search
            raise NotImplementedError(
                "FixedEpsilonAttack subclasses do not yet support None in epsilons"
            )
        real_epsilons = [eps for eps in epsilons if eps is not None]
        del epsilons

        xps = []
        xpcs = []
        success = []
        for epsilon in real_epsilons:
            xp = self.run(model, x, criterion, epsilon=epsilon, **kwargs)

            # clip to epsilon because we don't really know what the attack returns;
            # alternatively, we could check if the perturbation is at most epsilon,
            # but then we would need to handle numerical violations;
            xpc = self.distance.clip_perturbation(x, xp, epsilon)
            is_adv = is_adversarial(xpc)

            xps.append(xp)
            xpcs.append(xpc)
            success.append(is_adv)

        # # TODO: the correction we apply here should make sure that the limits
        # # are not violated, but this is a hack and we need a better solution
        # # Alternatively, maybe can just enforce the limits in __call__
        # xps = [
        #     self.run(model, x, criterion, epsilon=epsilon, **kwargs)
        #     for epsilon in real_epsilons
        # ]

        # is_adv = ep.stack([is_adversarial(xp) for xp in xps])
        # assert is_adv.shape == (K, N)

        # in_limits = ep.stack(
        #     [
        #         self.distance(x, xp) <= epsilon
        #         for xp, epsilon in zip(xps, real_epsilons)
        #     ],
        # )
        # assert in_limits.shape == (K, N)

        # if not in_limits.all():
        #     # TODO handle (numerical) violations
        #     # warn user if run() violated the epsilon constraint
        #     import pdb

        #     pdb.set_trace()

        # success = ep.logical_and(in_limits, is_adv)
        # assert success.shape == (K, N)

        success_ = ep.stack(success)
        assert success_.shape == (K, N)

        xps_ = [restore_type(xp) for xp in xps]
        xpcs_ = [restore_type(xpc) for xpc in xpcs]

        if was_iterable:
            return xps_, xpcs_, restore_type(success_)
        else:
            assert len(xps_) == 1
            assert len(xpcs_) == 1
            return xps_[0], xpcs_[0], restore_type(success_.squeeze(axis=0))
Example #9
0
 def forward(self, x):
     res = []
     for model in self.models:
         res.append(model(x))
     return ep.stack(res, 1).mean(1)
Example #10
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        starting_points: Optional[T] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        originals, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(originals, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        if starting_points is None:
            init_attack: MinimizationAttack
            if self.init_attack is None:
                init_attack = LinearSearchBlendedUniformNoiseAttack(steps=50)
                logging.info(
                    f"Neither starting_points nor init_attack given. Falling"
                    f" back to {init_attack!r} for initialization.")
            else:
                init_attack = self.init_attack
            # TODO: use call and support all types of attacks (once early_stop is
            # possible in __call__)
            x_advs = init_attack.run(model,
                                     originals,
                                     criterion,
                                     early_stop=early_stop)
        else:
            x_advs = ep.astensor(starting_points)

        is_adv = is_adversarial(x_advs)
        if not is_adv.all():
            failed = is_adv.logical_not().float32().sum()
            if starting_points is None:
                raise ValueError(
                    f"init_attack failed for {failed} of {len(is_adv)} inputs")
            else:
                raise ValueError(
                    f"{failed} of {len(is_adv)} starting_points are not adversarial"
                )
        del starting_points

        tb = TensorBoard(logdir=self.tensorboard)

        # Project the initialization to the boundary.
        x_advs = self._binary_search(is_adversarial, originals, x_advs)

        assert ep.all(is_adversarial(x_advs))

        distances = self.distance(originals, x_advs)

        for step in range(self.steps):
            delta = self.select_delta(originals, distances, step)

            # Choose number of gradient estimation steps.
            num_gradient_estimation_steps = int(
                min([
                    self.initial_num_evals * math.sqrt(step + 1),
                    self.max_num_evals
                ]))

            gradients = self.approximate_gradients(
                is_adversarial, x_advs, num_gradient_estimation_steps, delta)

            if self.constraint == "linf":
                update = ep.sign(gradients)
            else:
                update = gradients

            if self.stepsize_search == "geometric_progression":
                # find step size.
                epsilons = distances / math.sqrt(step + 1)

                while True:
                    x_advs_proposals = ep.clip(
                        x_advs + atleast_kd(epsilons, x_advs.ndim) * update, 0,
                        1)
                    success = is_adversarial(x_advs_proposals)
                    epsilons = ep.where(success, epsilons, epsilons / 2.0)

                    if ep.all(success):
                        break

                # Update the sample.
                x_advs = ep.clip(
                    x_advs + atleast_kd(epsilons, update.ndim) * update, 0, 1)

                assert ep.all(is_adversarial(x_advs))

                # Binary search to return to the boundary.
                x_advs = self._binary_search(is_adversarial, originals, x_advs)

                assert ep.all(is_adversarial(x_advs))

            elif self.stepsize_search == "grid_search":
                # Grid search for stepsize.
                epsilons_grid = ep.expand_dims(
                    ep.from_numpy(
                        distances,
                        np.logspace(
                            -4, 0, num=20, endpoint=True, dtype=np.float32),
                    ),
                    1,
                ) * ep.expand_dims(distances, 0)

                proposals_list = []

                for epsilons in epsilons_grid:
                    x_advs_proposals = (
                        x_advs + atleast_kd(epsilons, update.ndim) * update)
                    x_advs_proposals = ep.clip(x_advs_proposals, 0, 1)

                    mask = is_adversarial(x_advs_proposals)

                    x_advs_proposals = self._binary_search(
                        is_adversarial, originals, x_advs_proposals)

                    # only use new values where initial guess was already adversarial
                    x_advs_proposals = ep.where(atleast_kd(mask, x_advs.ndim),
                                                x_advs_proposals, x_advs)

                    proposals_list.append(x_advs_proposals)

                proposals = ep.stack(proposals_list, 0)
                proposals_distances = self.distance(
                    ep.expand_dims(originals, 0), proposals)
                minimal_idx = ep.argmin(proposals_distances, 0)

                x_advs = proposals[minimal_idx]

            distances = self.distance(originals, x_advs)

            # log stats
            tb.histogram("norms", distances, step)

        return restore_type(x_advs)