Beispiel #1
0
    def normalize(self, gradients: ep.Tensor, *, x: ep.Tensor,
                  bounds: Bounds) -> ep.Tensor:
        bad_pos = ep.logical_or(
            ep.logical_and(x == bounds.lower, gradients < 0),
            ep.logical_and(x == bounds.upper, gradients > 0),
        )
        gradients = ep.where(bad_pos, ep.zeros_like(gradients), gradients)

        abs_gradients = gradients.abs()
        quantiles = np.quantile(flatten(abs_gradients).numpy(),
                                q=self.quantile,
                                axis=-1)
        keep = abs_gradients >= atleast_kd(ep.from_numpy(gradients, quantiles),
                                           gradients.ndim)
        e = ep.where(keep, gradients.sign(), ep.zeros_like(gradients))
        return normalize_lp_norms(e, p=1)
    def __call__(self, model: Model, inputs: T, criterion: Union[Criterion, T]) -> T:

        x, restore_type = ep.astensor_(inputs)
        del inputs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)
        direction = target - x

        best = ep.ones(x, len(x))

        epsilon = 0.0
        stepsize = 1.0 / self.steps
        for _ in range(self.steps):
            # TODO: reduce the batch size to the ones that have not yet been sucessful

            is_adv = is_adversarial(x + epsilon * direction)
            is_best_adv = ep.logical_and(is_adv, best == 1)
            best = ep.where(is_best_adv, epsilon, best)

            if (best < 1).all():
                break

            epsilon += stepsize

        eps = atleast_kd(best, x.ndim)
        xp = x + eps * direction
        return restore_type(xp)
Beispiel #3
0
    def __call__(self, inputs, labels, *, steps=1000):
        x = ep.astensor(inputs)
        y = ep.astensor(labels)
        assert x.shape[0] == y.shape[0]
        assert y.ndim == 1

        assert x.ndim == 4
        if self.channel_axis == 1:
            h, w = x.shape[2:4]
        elif self.channel_axis == 3:
            h, w = x.shape[1:3]
        else:
            raise ValueError(
                "expected 'channel_axis' to be 1 or 3, got {channel_axis}")

        size = max(h, w)

        min_, max_ = self.model.bounds()

        x0 = x
        x0np = x0.numpy()

        epsilons = np.linspace(0, 1, num=steps + 1)[1:]

        logits = ep.astensor(self.model.forward(x0.tensor))
        classes = logits.argmax(axis=-1)
        is_adv = classes != labels
        found = is_adv

        result = x0

        for epsilon in epsilons:
            # TODO: reduce the batch size to the ones that haven't been sucessful

            sigmas = [epsilon * size] * 4
            sigmas[0] = 0
            sigmas[self.channel_axis] = 0

            # TODO: once we can implement gaussian_filter in eagerpy, avoid converting from numpy
            x = gaussian_filter(x0np, sigmas)
            x = np.clip(x, min_, max_)
            x = ep.from_numpy(x0, x)

            logits = ep.astensor(self.model.forward(x.tensor))
            classes = logits.argmax(axis=-1)
            is_adv = classes != labels

            new_adv = ep.logical_and(is_adv, found.logical_not())
            result = ep.where(atleast_kd(new_adv, x.ndim), x, result)
            found = ep.logical_or(new_adv, found)

            if found.all():
                break

        return result.tensor
Beispiel #4
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)

        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        found = is_adversarial(x)
        results = x

        def grid_search_generator() -> Generator[Any, Any, Any]:
            dphis = np.linspace(-self.max_rot, self.max_rot, self.num_rots)
            dxs = np.linspace(-self.max_trans, self.max_trans, self.num_trans)
            dys = np.linspace(-self.max_trans, self.max_trans, self.num_trans)
            for dphi in dphis:
                for dx in dxs:
                    for dy in dys:
                        yield dphi, dx, dy

        def random_search_generator() -> Generator[Any, Any, Any]:
            dphis = np.random.uniform(-self.max_rot, self.max_rot,
                                      self.random_steps)
            dxs = np.random.uniform(-self.max_trans, self.max_trans,
                                    self.random_steps)
            dys = np.random.uniform(-self.max_trans, self.max_trans,
                                    self.random_steps)
            for dphi, dx, dy in zip(dphis, dxs, dys):
                yield dphi, dx, dy

        gen = grid_search_generator(
        ) if self.grid_search else random_search_generator()
        for dphi, dx, dy in gen:
            # TODO: reduce the batch size to the ones that haven't been successful

            x_p = rotate_and_shift(x, translation=(dx, dy), rotation=dphi)
            is_adv = is_adversarial(x_p)
            new_adv = ep.logical_and(is_adv, found.logical_not())

            results = ep.where(atleast_kd(new_adv, x_p.ndim), x_p, results)
            found = ep.logical_or(new_adv, found)
            if found.all():
                break  # all images in batch misclassified
        return restore_type(results)
Beispiel #5
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        self.process_raw()
        assert self.inputs is not None
        assert self.outputs is not None
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)

        result = x
        found = criterion(x, model(x))

        batch_size = len(x)

        # for every sample try every other sample
        index_pools: List[List[int]] = []
        for i in range(batch_size):
            indices = list(range(batch_size))
            indices.remove(i)
            indices = list(indices)
            np.random.shuffle(indices)
            index_pools.append(indices)

        for i in range(batch_size - 1):
            if found.all():
                break

            indices = np.array([pool[i] for pool in index_pools])

            xp = self.inputs[indices]
            yp = self.outputs[indices]
            is_adv = criterion(xp, yp)

            new_found = ep.logical_and(is_adv, found.logical_not())
            result = ep.where(atleast_kd(new_found, result.ndim), xp, result)
            found = ep.logical_or(found, new_found)

        return restore_type(result)
    def __call__(self,
                 inputs,
                 labels,
                 *,
                 epsilon,
                 criterion,
                 repeats=100,
                 check_trivial=True):
        originals = ep.astensor(inputs)
        labels = ep.astensor(labels)

        def is_adversarial(p: ep.Tensor) -> ep.Tensor:
            """For each input in x, returns true if it is an adversarial for
            the given model and criterion"""
            logits = self.model.forward(p)
            return criterion(originals, labels, p, logits)

        x0 = ep.astensor(inputs)
        min_, max_ = self.model.bounds()

        result = x0
        if check_trivial:
            found = is_adversarial(result)
        else:
            found = ep.zeros(x0, len(result)).bool()

        for _ in range(repeats):
            if found.all():
                break

            p = self.sample_noise(x0)
            norms = self.get_norms(p)
            p = p / atleast_kd(norms, p.ndim)
            x = x0 + epsilon * p
            x = x.clip(min_, max_)
            is_adv = is_adversarial(x)
            is_new_adv = ep.logical_and(is_adv, ep.logical_not(found))
            result = ep.where(atleast_kd(is_new_adv, x.ndim), x, result)
            found = ep.logical_or(found, is_adv)

        return result.tensor
Beispiel #7
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)
        direction = target - x

        best = ep.ones(x, len(x))

        epsilon = 0.0
        stepsize = 1.0 / self.steps
        for _ in range(self.steps):
            # TODO: reduce the batch size to the ones that have not yet been sucessful

            is_adv = is_adversarial(x + epsilon * direction)
            is_best_adv = ep.logical_and(is_adv, best == 1)
            best = ep.where(is_best_adv, epsilon, best)

            if (best < 1).all():
                break  # pragma: no cover

            epsilon += stepsize

        eps = atleast_kd(best, x.ndim)
        xp = x + eps * direction
        return restore_type(xp)
Beispiel #8
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, Any] = None,
        *,
        epsilon: float,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x0, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        verify_input_bounds(x0, model)

        is_adversarial = get_is_adversarial(criterion_, model)

        min_, max_ = model.bounds

        result = x0
        if self.check_trivial:
            found = is_adversarial(result)
        else:
            found = ep.zeros(x0, len(result)).bool()

        for _ in range(self.repeats):
            if found.all():
                break

            p = self.sample_noise(x0)
            epsilons = self.get_epsilons(x0, p, epsilon, min_=min_, max_=max_)
            x = x0 + epsilons * p
            x = x.clip(min_, max_)
            is_adv = is_adversarial(x)
            is_new_adv = ep.logical_and(is_adv, ep.logical_not(found))
            result = ep.where(atleast_kd(is_new_adv, x.ndim), x, result)
            found = ep.logical_or(found, is_adv)

        return restore_type(result)
Beispiel #9
0
    def __call__(self, model: Model, inputs: T, criterion: Union[Criterion,
                                                                 T]) -> T:
        x, restore_type = ep.astensor_(inputs)
        del inputs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        best = self._attack(model, x, criterion)
        best_is_adv = is_adversarial(best)

        for _ in range(1, self._times):
            xp = self._attack(model, x, criterion)
            # assumes xp does not violate the perturbation size constraint

            is_adv = is_adversarial(xp)
            new_best = ep.logical_and(is_adv, best_is_adv.logical_not())

            best = ep.where(atleast_kd(new_best, best.ndim), xp, best)
            best_is_adv = ep.logical_or(is_adv, best_is_adv)

        return restore_type(best)
Beispiel #10
0
def _apply_decision_rule(
    decision_rule: Union[Literal["EN"], Literal["L1"]],
    beta: float,
    best_advs: ep.Tensor,
    best_advs_norms: ep.Tensor,
    x_k: ep.Tensor,
    x: ep.Tensor,
    found_advs: ep.Tensor,
) -> Tuple[ep.Tensor, ep.Tensor]:
    if decision_rule == "EN":
        norms = beta * flatten(x_k - x).abs().sum(
            axis=-1) + flatten(x_k - x).square().sum(axis=-1)
    else:
        # decision rule = L1
        norms = flatten(x_k - x).abs().sum(axis=-1)

    new_best = ep.logical_and(norms < best_advs_norms, found_advs)
    new_best_kd = atleast_kd(new_best, best_advs.ndim)
    best_advs = ep.where(new_best_kd, x_k, best_advs)
    best_advs_norms = ep.where(new_best, norms, best_advs_norms)

    return best_advs, best_advs_norms
Beispiel #11
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        self.process_raw()
        assert self.inputs is not None
        assert self.outputs is not None
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        criterion = get_criterion(criterion)

        result = x
        found = criterion(x, model(x))

        dataset_size = len(self.inputs)
        batch_size = len(x)

        while not found.all():
            indices = np.random.randint(0, dataset_size, size=(batch_size, ))

            xp = self.inputs[indices]
            yp = self.outputs[indices]
            is_adv = criterion(xp, yp)

            new_found = ep.logical_and(is_adv, found.logical_not())
            result = ep.where(atleast_kd(new_found, result.ndim), xp, result)
            found = ep.logical_or(found, new_found)

        return restore_type(result)
Beispiel #12
0
def test_logical_and(t: Tensor) -> Tensor:
    return ep.logical_and(t < 3, t > 1)
    def __call__(
        self,
        model: Model,
        inputs,
        labels,
        *,
        criterion=misclassification,
        channel_axis: Optional[int] = None,
    ):
        """
        Parameters
        ----------
        channel_axis
            The axis across which the noise should be the same (if across_channels is True).
            If None, will be automatically inferred from the model if possible.
        """
        inputs, labels, restore = wrap(inputs, labels)
        is_adversarial = get_is_adversarial(criterion, inputs, labels, model)

        x0 = inputs
        N = len(x0)
        shape = list(x0.shape)
        if self.across_channels and x0.ndim > 2:
            if channel_axis is None and not hasattr(model, "data_format"):
                raise ValueError(
                    "cannot infer the data_format from the model, please specify"
                    " channel_axis when calling the attack")
            elif channel_axis is None:
                data_format = model.data_format  # type: ignore
                if (data_format is None or data_format != "channels_first"
                        and data_format != "channels_last"):
                    raise ValueError(
                        f"expected data_format to be 'channels_first' or 'channels_last'"
                    )
                channel_axis = 1 if data_format == "channels_first" else x0.ndim - 1
            elif not 0 <= channel_axis < x0.ndim:
                raise ValueError(
                    f"expected channel_axis to be in [0, {x0.ndim})")

            shape[channel_axis] = 1

        min_, max_ = model.bounds()
        r = max_ - min_

        result = x0
        is_adv = is_adversarial(result)
        best_advs_norms = ep.where(is_adv, ep.zeros(x0, N),
                                   ep.full(x0, N, ep.inf))
        min_probability = ep.zeros(x0, N)
        max_probability = ep.ones(x0, N)
        stepsizes = max_probability / self.steps
        p = stepsizes

        for step in range(self.steps):
            # add salt and pepper
            u = ep.uniform(x0, shape)
            p_ = atleast_kd(p, x0.ndim)
            salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r
            pepper = -(u < p_ / 2).astype(x0.dtype) * r
            x = x0 + salt + pepper
            x = ep.clip(x, min_, max_)

            # check if we found new best adversarials
            norms = flatten(x).square().sum(axis=-1).sqrt()
            closer = norms < best_advs_norms
            is_adv = is_adversarial(
                x)  # TODO: ignore those that are not closer anyway
            is_best_adv = ep.logical_and(is_adv, closer)

            # update results and search space
            result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result)
            best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms)
            min_probability = ep.where(is_best_adv, 0.5 * p, min_probability)
            # we set max_probability a bit higher than p because the relationship
            # between p and norms is not strictly monotonic
            max_probability = ep.where(is_best_adv, ep.minimum(p * 1.2, 1.0),
                                       max_probability)
            remaining = self.steps - step
            stepsizes = ep.where(
                is_best_adv, (max_probability - min_probability) / remaining,
                stepsizes)
            reset = p == max_probability
            p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p)
            p = ep.minimum(p + stepsizes, max_probability)

        return restore(result)
Beispiel #14
0
 def __call__(self, perturbed: T, outputs: T) -> T:
     args, restore_type = ep.astensors_(perturbed, outputs)
     a = self.a(*args)
     b = self.b(*args)
     is_adv = ep.logical_and(a, b)
     return restore_type(is_adv)
Beispiel #15
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        N = len(x)

        if isinstance(criterion_, Misclassification):
            targeted = False
            classes = criterion_.labels
        elif isinstance(criterion_, TargetedMisclassification):
            targeted = True
            classes = criterion_.target_classes
        else:
            raise ValueError("unsupported criterion")

        if classes.shape != (N, ):
            name = "target_classes" if targeted else "labels"
            raise ValueError(
                f"expected {name} to have shape ({N},), got {classes.shape}")

        stepsize = 1.0
        min_, max_ = model.bounds

        def loss_fn(inputs: ep.Tensor,
                    labels: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]:
            logits = model(inputs)

            sign = -1.0 if targeted else 1.0
            loss = sign * ep.crossentropy(logits, labels).sum()

            return loss, logits

        grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True)

        delta = ep.zeros_like(x)

        epsilon = self.init_epsilon * ep.ones(x, len(x))
        worst_norm = ep.norms.l2(flatten(ep.maximum(x - min_, max_ - x)), -1)

        best_l2 = worst_norm
        best_delta = delta
        adv_found = ep.zeros(x, len(x)).bool()

        for i in range(self.steps):
            # perform cosine annealing of LR starting from 1.0 to 0.01
            stepsize = (0.01 + (stepsize - 0.01) *
                        (1 + math.cos(math.pi * i / self.steps)) / 2)

            x_adv = x + delta

            _, logits, gradients = grad_and_logits(x_adv, classes)
            gradients = normalize_gradient_l2_norms(gradients)
            is_adversarial = criterion_(x_adv, logits)

            l2 = ep.norms.l2(flatten(delta), axis=-1)
            is_smaller = l2 <= best_l2

            is_both = ep.logical_and(is_adversarial, is_smaller)
            adv_found = ep.logical_or(adv_found, is_adversarial)
            best_l2 = ep.where(is_both, l2, best_l2)

            best_delta = ep.where(atleast_kd(is_both, x.ndim), delta,
                                  best_delta)

            # do step
            delta = delta + stepsize * gradients

            epsilon = epsilon * ep.where(is_adversarial, 1.0 - self.gamma,
                                         1.0 + self.gamma)
            epsilon = ep.minimum(epsilon, worst_norm)

            # project to epsilon ball
            delta *= atleast_kd(epsilon / ep.norms.l2(flatten(delta), -1),
                                x.ndim)

            # clip to valid bounds
            delta = ep.clip(x + delta, *model.bounds) - x

        x_adv = x + best_delta

        return restore_type(x_adv)
Beispiel #16
0
    def __call__(  # noqa: F811
        self,
        model: Model,
        inputs: T,
        criterion: Any,
        *,
        epsilons: Union[Sequence[Union[float, None]], float, None],
        **kwargs: Any,
    ) -> Union[Tuple[List[T], List[T], T], Tuple[T, T, T]]:
        x, restore_type = ep.astensor_(inputs)
        del inputs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)

        was_iterable = True
        if not isinstance(epsilons, Iterable):
            epsilons = [epsilons]
            was_iterable = False

        N = len(x)
        K = len(epsilons)

        for i in range(self.times):
            # run the attack
            xps, xpcs, success = self.attack(
                model, x, criterion, epsilons=epsilons, **kwargs
            )
            assert len(xps) == K
            assert len(xpcs) == K
            for xp in xps:
                assert xp.shape == x.shape
            for xpc in xpcs:
                assert xpc.shape == x.shape
            assert success.shape == (K, N)

            if i == 0:
                best_xps = xps
                best_xpcs = xpcs
                best_success = success
                continue

            # TODO: test if stacking the list to a single tensor and
            # getting rid of the loop is faster

            for k, epsilon in enumerate(epsilons):
                first = best_success[k].logical_not()
                assert first.shape == (N,)
                if epsilon is None:
                    # if epsilon is None, we need the minimum

                    # TODO: maybe cache some of these distances
                    # and then remove the else part
                    closer = self.distance(x, xps[k]) < self.distance(x, best_xps[k])
                    assert closer.shape == (N,)
                    new_best = ep.logical_and(success[k], ep.logical_or(closer, first))
                else:
                    # for concrete epsilon, we just need a successful one
                    new_best = ep.logical_and(success[k], first)
                new_best = atleast_kd(new_best, x.ndim)
                best_xps[k] = ep.where(new_best, xps[k], best_xps[k])
                best_xpcs[k] = ep.where(new_best, xpcs[k], best_xpcs[k])

            best_success = ep.logical_or(success, best_success)

        best_xps_ = [restore_type(xp) for xp in best_xps]
        best_xpcs_ = [restore_type(xpc) for xpc in best_xpcs]
        if was_iterable:
            return best_xps_, best_xpcs_, restore_type(best_success)
        else:
            assert len(best_xps_) == 1
            assert len(best_xpcs_) == 1
            return (
                best_xps_[0],
                best_xpcs_[0],
                restore_type(best_success.squeeze(axis=0)),
            )
Beispiel #17
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Misclassification,
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x0, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        is_adversarial = get_is_adversarial(criterion_, model)

        N = len(x0)
        shape = list(x0.shape)

        if self.across_channels and x0.ndim > 2:
            if self.channel_axis is None:
                channel_axis = get_channel_axis(model, x0.ndim)
            else:
                channel_axis = self.channel_axis % x0.ndim
            if channel_axis is not None:
                shape[channel_axis] = 1

        min_, max_ = model.bounds
        r = max_ - min_

        result = x0
        is_adv = is_adversarial(result)
        best_advs_norms = ep.where(is_adv, ep.zeros(x0, N),
                                   ep.full(x0, N, ep.inf))
        min_probability = ep.zeros(x0, N)
        max_probability = ep.ones(x0, N)
        stepsizes = max_probability / self.steps
        p = stepsizes

        for step in range(self.steps):
            # add salt and pepper
            u = ep.uniform(x0, tuple(shape))
            p_ = atleast_kd(p, x0.ndim)
            salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r
            pepper = -(u < p_ / 2).astype(x0.dtype) * r
            x = x0 + salt + pepper
            x = ep.clip(x, min_, max_)

            # check if we found new best adversarials
            norms = flatten(x).norms.l2(axis=-1)
            closer = norms < best_advs_norms
            is_adv = is_adversarial(
                x)  # TODO: ignore those that are not closer anyway
            is_best_adv = ep.logical_and(is_adv, closer)

            # update results and search space
            result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result)
            best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms)
            min_probability = ep.where(is_best_adv, 0.5 * p, min_probability)
            # we set max_probability a bit higher than p because the relationship
            # between p and norms is not strictly monotonic
            max_probability = ep.where(is_best_adv, ep.minimum(p * 1.2, 1.0),
                                       max_probability)
            remaining = self.steps - step
            stepsizes = ep.where(
                is_best_adv, (max_probability - min_probability) / remaining,
                stepsizes)
            reset = p == max_probability
            p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p)
            p = ep.minimum(p + stepsizes, max_probability)

        return restore_type(result)
Beispiel #18
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        starting_points: Optional[T] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        if starting_points is None:
            raise ValueError("BinarizationRefinementAttack requires starting_points")
        (o, x), restore_type = ep.astensors_(inputs, starting_points)
        del inputs, starting_points, kwargs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        if self.threshold is None:
            min_, max_ = model.bounds
            threshold = (min_ + max_) / 2.0
        else:
            threshold = self.threshold

        assert o.dtype == x.dtype

        nptype = o.reshape(-1)[0].numpy().dtype.type
        if nptype not in [np.float16, np.float32, np.float64]:
            raise ValueError(  # pragma: no cover
                f"expected dtype to be float16, float32 or float64, found '{nptype}'"
            )

        threshold = nptype(threshold)
        offset = nptype(1.0)

        if self.included_in == "lower":
            lower_ = threshold
            upper_ = np.nextafter(threshold, threshold + offset)
        elif self.included_in == "upper":
            lower_ = np.nextafter(threshold, threshold - offset)
            upper_ = threshold
        else:
            raise ValueError(
                f"expected included_in to be 'lower' or 'upper', found '{self.included_in}'"
            )

        assert lower_ < upper_

        p = ep.full_like(o, ep.nan)

        lower = ep.ones_like(o) * lower_
        upper = ep.ones_like(o) * upper_

        indices = ep.logical_and(o <= lower, x <= lower)
        p = ep.where(indices, o, p)

        indices = ep.logical_and(o <= lower, x >= upper)
        p = ep.where(indices, upper, p)

        indices = ep.logical_and(o >= upper, x <= lower)
        p = ep.where(indices, lower, p)

        indices = ep.logical_and(o >= upper, x >= upper)
        p = ep.where(indices, o, p)

        assert not ep.any(ep.isnan(p))

        is_adv1 = is_adversarial(x)
        is_adv2 = is_adversarial(p)
        if (is_adv1 != is_adv2).any():
            raise ValueError(
                "The specified threshold does not match what is done by the model."
            )
        return restore_type(p)
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
        *,
        starting_points: Optional[ep.Tensor] = None,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        criterion_ = get_criterion(criterion)

        if isinstance(criterion_, Misclassification):
            targeted = False
            classes = criterion_.labels
        elif isinstance(criterion_, TargetedMisclassification):
            targeted = True
            classes = criterion_.target_classes
        else:
            raise ValueError("unsupported criterion")

        def loss_fn(
            inputs: ep.Tensor, labels: ep.Tensor
        ) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]:

            logits = model(inputs)

            if targeted:
                c_minimize = best_other_classes(logits, labels)
                c_maximize = labels  # target_classes
            else:
                c_minimize = labels  # labels
                c_maximize = best_other_classes(logits, labels)

            loss = logits[rows, c_minimize] - logits[rows, c_maximize]

            return -loss.sum(), (logits, loss)

        x, restore_type = ep.astensor_(inputs)
        del inputs, criterion, kwargs
        N = len(x)

        # start from initialization points/attack
        if starting_points is not None:
            x1 = starting_points
        else:
            if self.init_attack is not None:
                x1 = self.init_attack.run(model, x, criterion_)
            else:
                x1 = None

        # if initial points or initialization attacks are provided,
        #   search for the boundary
        if x1 is not None:
            is_adv = get_is_adversarial(criterion_, model)
            assert is_adv(x1).all()
            lower_bound = ep.zeros(x, shape=(N, ))
            upper_bound = ep.ones(x, shape=(N, ))
            for _ in range(self.binary_search_steps):
                epsilons = (lower_bound + upper_bound) / 2
                mid_points = self.mid_points(x, x1, epsilons, model.bounds)
                is_advs = is_adv(mid_points)
                lower_bound = ep.where(is_advs, lower_bound, epsilons)
                upper_bound = ep.where(is_advs, epsilons, upper_bound)
            starting_points = self.mid_points(x, x1, upper_bound, model.bounds)
            delta = starting_points - x
        else:
            # start from x0
            delta = ep.zeros_like(x)

        if classes.shape != (N, ):
            name = "target_classes" if targeted else "labels"
            raise ValueError(
                f"expected {name} to have shape ({N},), got {classes.shape}")

        min_, max_ = model.bounds
        rows = range(N)
        grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True)

        if self.p != 0:
            epsilon = ep.inf * ep.ones(x, len(x))
        else:
            epsilon = ep.ones(x, len(x)) if x1 is None \
                else ep.norms.l0(flatten(delta), axis=-1)
        if self.p != 0:
            worst_norm = ep.norms.lp(flatten(ep.maximum(x - min_, max_ - x)),
                                     p=self.p,
                                     axis=-1)
        else:
            worst_norm = flatten(ep.ones_like(x)).bool().sum(axis=1).float32()

        best_lp = worst_norm
        best_delta = delta
        adv_found = ep.zeros(x, len(x)).bool()

        for i in range(self.steps):
            # perform cosine annealing of learning rates
            stepsize = (self.min_stepsize +
                        (self.max_stepsize - self.min_stepsize) *
                        (1 + math.cos(math.pi * i / self.steps)) / 2)
            gamma = (0.001 + (self.gamma - 0.001) *
                     (1 + math.cos(math.pi * (i / self.steps))) / 2)

            x_adv = x + delta

            loss, (logits,
                   loss_batch), gradients = grad_and_logits(x_adv, classes)
            is_adversarial = criterion_(x_adv, logits)

            lp = ep.norms.lp(flatten(delta), p=self.p, axis=-1)
            is_smaller = lp <= best_lp
            is_both = ep.logical_and(is_adversarial, is_smaller)
            adv_found = ep.logical_or(adv_found, is_adversarial)
            best_lp = ep.where(is_both, lp, best_lp)
            best_delta = ep.where(atleast_kd(is_both, x.ndim), delta,
                                  best_delta)

            # update epsilon
            if self.p != 0:
                distance_to_boundary = abs(loss_batch) / ep.norms.lp(
                    flatten(gradients), p=self.dual, axis=-1)
                epsilon = ep.where(
                    is_adversarial,
                    ep.minimum(
                        epsilon * (1 - gamma),
                        ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)),
                    ep.where(
                        adv_found, epsilon * (1 + gamma),
                        ep.norms.lp(flatten(delta), p=self.p, axis=-1) +
                        distance_to_boundary))
            else:
                epsilon = ep.where(
                    is_adversarial,
                    ep.minimum(
                        ep.minimum(epsilon - 1,
                                   (epsilon * (1 - gamma)).astype(int).astype(
                                       epsilon.dtype)),
                        ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)),
                    ep.maximum(epsilon + 1,
                               (epsilon * (1 + gamma)).astype(int).astype(
                                   epsilon.dtype)))
                epsilon = ep.maximum(0, epsilon).astype(epsilon.dtype)

            # clip epsilon
            epsilon = ep.minimum(epsilon, worst_norm)

            # computes normalized gradient update
            grad_ = self.normalize(gradients, x=x,
                                   bounds=model.bounds) * stepsize

            # do step
            delta = delta + grad_

            # project according to the given norm
            delta = self.project(x=x + delta, x0=x, epsilon=epsilon) - x

            # clip to valid bounds
            delta = ep.clip(x + delta, *model.bounds) - x

        x_adv = x + best_delta
        return restore_type(x_adv)
Beispiel #20
0
    def __call__(
        self,
        inputs,
        labels,
        *,
        adversarials,
        criterion,
        threshold=None,
        included_in="upper",
    ):
        """For models that preprocess their inputs by binarizing the
        inputs, this attack can improve adversarials found by other
        attacks. It does this by utilizing information about the
        binarization and mapping values to the corresponding value in
        the clean input or to the right side of the threshold.

        Parameters
        ----------
        threshold : float
            The treshold used by the models binarization. If none,
            defaults to (model.bounds()[1] - model.bounds()[0]) / 2.
        included_in : str
            Whether the threshold value itself belongs to the lower or
            upper interval.

        """
        originals = ep.astensor(inputs)
        labels = ep.astensor(labels)

        def is_adversarial(p: ep.Tensor) -> ep.Tensor:
            """For each input in x, returns true if it is an adversarial for
            the given model and criterion"""
            logits = ep.astensor(self.model.forward(p.tensor))
            return criterion(originals, labels, p, logits)

        o = ep.astensor(inputs)
        x = ep.astensor(adversarials)

        min_, max_ = self.model.bounds()
        if threshold is None:
            threshold = (min_ + max_) / 2.0

        assert o.dtype == x.dtype
        dtype = o.dtype

        if dtype == o.backend.float16:
            nptype = np.float16
        elif dtype == o.backend.float32:
            nptype = np.float32
        elif dtype == o.backend.float64:
            nptype = np.float64
        else:
            raise ValueError(
                "expected dtype to be float16, float32 or float64, found '{dtype}'"
            )

        threshold = nptype(threshold)
        offset = nptype(1.0)

        if included_in == "lower":
            lower = threshold
            upper = np.nextafter(threshold, threshold + offset)
        elif included_in == "upper":
            lower = np.nextafter(threshold, threshold - offset)
            upper = threshold
        else:
            raise ValueError(
                "expected included_in to be 'lower' or 'upper', found '{included_in}'"
            )

        assert lower < upper

        p = ep.full_like(o, ep.nan)

        lower = ep.ones_like(o) * lower
        upper = ep.ones_like(o) * upper

        indices = ep.logical_and(o <= lower, x <= lower)
        p = ep.where(indices, o, p)

        indices = ep.logical_and(o <= lower, x >= upper)
        p = ep.where(indices, upper, p)

        indices = ep.logical_and(o >= upper, x <= lower)
        p = ep.where(indices, lower, p)

        indices = ep.logical_and(o >= upper, x >= upper)
        p = ep.where(indices, o, p)

        assert not ep.any(ep.isnan(p))

        is_adv1 = is_adversarial(x)
        is_adv2 = is_adversarial(p)
        assert (is_adv1 == is_adv2).all(
        ), "The specified threshold does not match what is done by the model."
        return p.tensor
Beispiel #21
0
    def _binary_search_on_alpha(
            self, 
            function_evolution: Callable[[ep.Tensor], ep.Tensor], 
            lower: ep.Tensor) -> ep.Tensor:    
        # Upper --> not adversarial /  Lower --> adversarial
        v_type = function_evolution(lower)
        def get_alpha(theta: ep.Tensor) -> ep.Tensor:
            return 1 - ep.astensor(self._cos(theta.raw * np.pi / 180))

        check_opposite = lower > 0 # if param < 0: abs(param) doesn't work
        
        # Get the upper range
        upper = ep.where(
            abs(lower) != self.theta_max, 
            lower + ep.sign(lower) * self.theta_max / self.T,
            ep.zeros_like(lower)
            )

        mask_upper = (upper == 0)
        while mask_upper.any():
            # Find the correct lower/upper range
            # if True in mask_upper, the range haven't been found
            new_upper = lower + ep.sign(lower) * self.theta_max / self.T
            potential_x = function_evolution(new_upper)
            x = ep.where(
                atleast_kd(mask_upper, potential_x.ndim),
                potential_x,
                ep.zeros_like(potential_x)
            )

            is_advs =  self._is_adversarial(x)
            lower = ep.where(ep.logical_and(mask_upper, is_advs), new_upper, lower) 
            upper = ep.where(ep.logical_and(mask_upper, is_advs.logical_not()), new_upper, upper) 
            mask_upper = mask_upper * is_advs

        step = 0
        over_gamma = abs(get_alpha(upper) - get_alpha(lower)) > self._BS_gamma
        while step < self._BS_max_iteration and over_gamma.any(): 
            mid_bound = (upper + lower) / 2
            mid = ep.where(
                atleast_kd(ep.logical_and(mid_bound != 0, over_gamma), v_type.ndim),
                function_evolution(mid_bound),
                ep.zeros_like(v_type)
            )
            is_adv = self._is_adversarial(mid)

            mid_opp = ep.where(
                atleast_kd(ep.logical_and(ep.astensor(check_opposite), over_gamma), mid.ndim),
                function_evolution(-mid_bound),
                ep.zeros_like(mid)
            )
            is_adv_opp = self._is_adversarial(mid_opp)

            lower = ep.where(over_gamma * is_adv, mid_bound, lower)
            lower = ep.where(over_gamma * is_adv.logical_not() * check_opposite * is_adv_opp, -mid_bound, lower)
            upper = ep.where(over_gamma * is_adv.logical_not() * check_opposite * is_adv_opp, - upper, upper)
            upper = ep.where(over_gamma * (abs(lower) != abs(mid_bound)), mid_bound, upper)

            check_opposite = over_gamma * check_opposite * is_adv_opp * (lower > 0)
            over_gamma = abs(get_alpha(upper) - get_alpha(lower)) > self._BS_gamma

            step += 1
        return ep.astensor(lower)
Beispiel #22
0
    def __call__(self, inputs, labels, *, criterion, steps=1000):
        originals = ep.astensor(inputs)
        labels = ep.astensor(labels)

        def is_adversarial(p: ep.Tensor) -> ep.Tensor:
            """For each input in x, returns true if it is an adversarial for
            the given model and criterion"""
            logits = ep.astensor(self.model.forward(p.tensor))
            return criterion(originals, labels, p, logits)

        x0 = ep.astensor(inputs)

        N = len(x0)
        shape = list(x0.shape)
        if self.channel_axis is not None:
            shape[self.channel_axis] = 1

        min_, max_ = self.model.bounds()
        r = max_ - min_

        result = x0
        is_adv = is_adversarial(result)
        best_advs_norms = ep.where(is_adv, ep.zeros(x0, N), ep.full(x0, N, ep.inf))
        min_probability = ep.zeros(x0, N)
        max_probability = ep.ones(x0, N)
        stepsizes = max_probability / steps
        p = stepsizes

        for step in range(steps):
            # add salt and pepper
            u = ep.uniform(x0, shape)
            p_ = atleast_kd(p, x0.ndim)
            salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r
            pepper = -(u < p_ / 2).astype(x0.dtype) * r
            x = x0 + salt + pepper
            x = ep.clip(x, min_, max_)

            # check if we found new best adversarials
            norms = flatten(x).square().sum(axis=-1).sqrt()
            closer = norms < best_advs_norms
            is_adv = is_adversarial(x)  # TODO: ignore those that are not closer anyway
            is_best_adv = ep.logical_and(is_adv, closer)

            # update results and search space
            result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result)
            best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms)
            min_probability = ep.where(is_best_adv, 0.5 * p, min_probability)
            # we set max_probability a bit higher than p because the relationship
            # between p and norms is not strictly monotonic
            max_probability = ep.where(
                is_best_adv, ep.minimum(p * 1.2, 1.0), max_probability
            )
            remaining = steps - step
            stepsizes = ep.where(
                is_best_adv, (max_probability - min_probability) / remaining, stepsizes
            )
            reset = p == max_probability
            p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p)
            p = ep.minimum(p + stepsizes, max_probability)

        return result.tensor
Beispiel #23
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        starting_points: Optional[T] = None,
        epsilons: float,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        originals, restore_type = ep.astensor_(inputs)
        del inputs, kwargs
        
        if self.eps_early_stop and len(epsilons)!=1: print('epsilon-based early stopping only possible for one epsilon value')
        assert not(self.eps_early_stop and len(epsilons)!=1)
        

        verify_input_bounds(originals, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        if starting_points is None:
            init_attack: MinimizationAttack
            if self.init_attack is None:
                init_attack = LinearSearchBlendedUniformNoiseAttack(steps=50)
                logging.info(
                    f"Neither starting_points nor init_attack given. Falling"
                    f" back to {init_attack!r} for initialization."
                )
            else:
                init_attack = self.init_attack
            # TODO: use call and support all types of attacks (once early_stop is
            # possible in __call__)
            best_advs = init_attack.run(
                model, originals, criterion, early_stop=early_stop
            )
        else:
            best_advs = ep.astensor(starting_points)

        is_adv = is_adversarial(best_advs)
        if not is_adv.all():
            failed = is_adv.logical_not().float32().sum()
            if starting_points is None:
                raise ValueError(
                    f"init_attack failed for {failed} of {len(is_adv)} inputs"
                )
            else:
                raise ValueError(
                    f"{failed} of {len(is_adv)} starting_points are not adversarial"
                )
        del starting_points

        tb = TensorBoard(logdir=self.tensorboard)

        N = len(originals)
        epsilon = ep.astensor(epsilons[0] * ep.ones(originals,(N,)))
        ndim = originals.ndim
        spherical_steps = ep.ones(originals, N) * self.spherical_step
        source_steps = ep.ones(originals, N) * self.source_step

        tb.scalar("batchsize", N, 0)

        # create two queues for each sample to track success rates
        # (used to update the hyper parameters)
        stats_spherical_adversarial = ArrayQueue(maxlen=100, N=N)
        stats_step_adversarial = ArrayQueue(maxlen=30, N=N)

        bounds = model.bounds

        for step in range(1, self.steps + 1):
            converged = source_steps < self.source_step_convergance
            if converged.all():
                break  # pragma: no cover
            converged = atleast_kd(converged, ndim)

            # TODO: performance: ignore those that have converged
            # (we could select the non-converged ones, but we currently
            # cannot easily invert this in the end using EagerPy)

            unnormalized_source_directions = originals - best_advs
            source_norms = ep.norms.l2(flatten(unnormalized_source_directions), axis=-1)
            source_directions = unnormalized_source_directions / atleast_kd(
                source_norms, ndim
            )

            # only check spherical candidates every k steps
            check_spherical_and_update_stats = step % self.update_stats_every_k == 0

            candidates, spherical_candidates = draw_proposals(
                bounds,
                originals,
                best_advs,
                unnormalized_source_directions,
                source_directions,
                source_norms,
                spherical_steps,
                source_steps,
            )
            candidates.dtype == originals.dtype
            spherical_candidates.dtype == spherical_candidates.dtype

            is_adv = is_adversarial(candidates)

            spherical_is_adv: Optional[ep.Tensor]
            if check_spherical_and_update_stats:
                spherical_is_adv = is_adversarial(spherical_candidates)
                stats_spherical_adversarial.append(spherical_is_adv)
                # TODO: algorithm: the original implementation ignores those samples
                # for which spherical is not adversarial and continues with the
                # next iteration -> we estimate different probabilities (conditional vs. unconditional)
                # TODO: thoughts: should we always track this because we compute it anyway
                stats_step_adversarial.append(is_adv)
            else:
                spherical_is_adv = None

            # in theory, we are closer per construction
            # but limited numerical precision might break this
            distances = ep.norms.l2(flatten(originals - candidates), axis=-1)
            closer = distances < source_norms
            is_best_adv = ep.logical_and(is_adv, closer)
            is_best_adv = atleast_kd(is_best_adv, ndim)

            cond = converged.logical_not().logical_and(is_best_adv)
            best_advs = ep.where(cond, candidates, best_advs)

            tb.probability("converged", converged, step)
            tb.scalar("updated_stats", check_spherical_and_update_stats, step)
            tb.histogram("norms", source_norms, step)
            tb.probability("is_adv", is_adv, step)
            if spherical_is_adv is not None:
                tb.probability("spherical_is_adv", spherical_is_adv, step)
            tb.histogram("candidates/distances", distances, step)
            tb.probability("candidates/closer", closer, step)
            tb.probability("candidates/is_best_adv", is_best_adv, step)
            tb.probability("new_best_adv_including_converged", is_best_adv, step)
            tb.probability("new_best_adv", cond, step)

            if check_spherical_and_update_stats:
                full = stats_spherical_adversarial.isfull()
                tb.probability("spherical_stats/full", full, step)
                if full.any():
                    probs = stats_spherical_adversarial.mean()
                    cond1 = ep.logical_and(probs > 0.5, full)
                    spherical_steps = ep.where(
                        cond1, spherical_steps * self.step_adaptation, spherical_steps
                    )
                    source_steps = ep.where(
                        cond1, source_steps * self.step_adaptation, source_steps
                    )
                    cond2 = ep.logical_and(probs < 0.2, full)
                    spherical_steps = ep.where(
                        cond2, spherical_steps / self.step_adaptation, spherical_steps
                    )
                    source_steps = ep.where(
                        cond2, source_steps / self.step_adaptation, source_steps
                    )
                    stats_spherical_adversarial.clear(ep.logical_or(cond1, cond2))
                    tb.conditional_mean(
                        "spherical_stats/isfull/success_rate/mean", probs, full, step
                    )
                    tb.probability_ratio(
                        "spherical_stats/isfull/too_linear", cond1, full, step
                    )
                    tb.probability_ratio(
                        "spherical_stats/isfull/too_nonlinear", cond2, full, step
                    )

                full = stats_step_adversarial.isfull()
                tb.probability("step_stats/full", full, step)
                if full.any():
                    probs = stats_step_adversarial.mean()
                    # TODO: algorithm: changed the two values because we are currently tracking p(source_step_sucess)
                    # instead of p(source_step_success | spherical_step_sucess) that was tracked before
                    cond1 = ep.logical_and(probs > 0.25, full)
                    source_steps = ep.where(
                        cond1, source_steps * self.step_adaptation, source_steps
                    )
                    cond2 = ep.logical_and(probs < 0.1, full)
                    source_steps = ep.where(
                        cond2, source_steps / self.step_adaptation, source_steps
                    )
                    stats_step_adversarial.clear(ep.logical_or(cond1, cond2))
                    tb.conditional_mean(
                        "step_stats/isfull/success_rate/mean", probs, full, step
                    )
                    tb.probability_ratio(
                        "step_stats/isfull/success_rate_too_high", cond1, full, step
                    )
                    tb.probability_ratio(
                        "step_stats/isfull/success_rate_too_low", cond2, full, step
                    )

            tb.histogram("spherical_step", spherical_steps, step)
            tb.histogram("source_step", source_steps, step)
            best_advs_norms = flatten(originals - best_advs).norms.l2(axis=-1)
            if self.eps_early_stop and (ep.maximum(best_advs_norms,epsilon) == epsilon).all():
                print('early stopped because epsilon condition satisfied')
                break
        tb.close()
        return restore_type(best_advs)
Beispiel #24
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        if x.ndim != 4:
            raise NotImplementedError(
                "only implemented for inputs with two spatial dimensions (and one channel and one batch dimension)"
            )

        if self.channel_axis is None:
            channel_axis = get_channel_axis(model, x.ndim)
        else:
            channel_axis = self.channel_axis % x.ndim

        if channel_axis is None:
            raise ValueError(
                "cannot infer the data_format from the model, please specify"
                " channel_axis when initializing the attack"
            )

        max_sigma: float
        if self.max_sigma is None:
            if channel_axis == 1:
                h, w = x.shape[2:4]
            elif channel_axis == 3:
                h, w = x.shape[1:3]
            else:
                raise ValueError(
                    "expected 'channel_axis' to be 1 or 3, got {channel_axis}"
                )
            max_sigma = max(h, w)
        else:
            max_sigma = self.max_sigma

        min_, max_ = model.bounds

        x0 = x
        x0_ = x0.numpy()

        result = x0
        found = is_adversarial(x0)

        epsilon = 0.0
        stepsize = 1.0 / self.steps
        for _ in range(self.steps):
            # TODO: reduce the batch size to the ones that haven't been sucessful

            epsilon += stepsize

            sigmas = [epsilon * max_sigma] * x0.ndim
            sigmas[0] = 0
            sigmas[channel_axis] = 0

            # TODO: once we can implement gaussian_filter in eagerpy, avoid converting from numpy
            x_ = gaussian_filter(x0_, sigmas)
            x_ = np.clip(x_, min_, max_)
            x = ep.from_numpy(x0, x_)

            is_adv = is_adversarial(x)
            new_adv = ep.logical_and(is_adv, found.logical_not())
            result = ep.where(atleast_kd(new_adv, x.ndim), x, result)
            found = ep.logical_or(new_adv, found)

            if found.all():
                break

        return restore_type(result)
Beispiel #25
0
def test_logical_and_scalar(t: Tensor) -> Tensor:
    return ep.logical_and(True, t < 3)
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        N = len(x)

        if isinstance(criterion_, Misclassification):
            targeted = False
            classes = criterion_.labels
            change_classes_logits = self.confidence
        elif isinstance(criterion_, TargetedMisclassification):
            targeted = True
            classes = criterion_.target_classes
            change_classes_logits = -self.confidence
        else:
            raise ValueError("unsupported criterion")

        def is_adversarial(perturbed: ep.Tensor,
                           logits: ep.Tensor) -> ep.Tensor:
            if change_classes_logits != 0:
                logits += ep.onehot_like(logits,
                                         classes,
                                         value=change_classes_logits)
            return criterion_(perturbed, logits)

        if classes.shape != (N, ):
            name = "target_classes" if targeted else "labels"
            raise ValueError(
                f"expected {name} to have shape ({N},), got {classes.shape}")

        bounds = model.bounds
        to_attack_space = partial(_to_attack_space, bounds=bounds)
        to_model_space = partial(_to_model_space, bounds=bounds)

        x_attack = to_attack_space(x)
        reconstsructed_x = to_model_space(x_attack)

        rows = range(N)

        def loss_fun(
            delta: ep.Tensor, consts: ep.Tensor
        ) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]:
            assert delta.shape == x_attack.shape
            assert consts.shape == (N, )

            x = to_model_space(x_attack + delta)
            logits = model(x)

            if targeted:
                c_minimize = best_other_classes(logits, classes)
                c_maximize = classes  # target_classes
            else:
                c_minimize = classes  # labels
                c_maximize = best_other_classes(logits, classes)

            is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize]
            assert is_adv_loss.shape == (N, )

            is_adv_loss = is_adv_loss + self.confidence
            is_adv_loss = ep.maximum(0, is_adv_loss)
            is_adv_loss = is_adv_loss * consts

            squared_norms = flatten(x - reconstsructed_x).square().sum(axis=-1)
            loss = is_adv_loss.sum() + squared_norms.sum()
            return loss, (x, logits)

        loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True)

        consts = self.initial_const * np.ones((N, ))
        lower_bounds = np.zeros((N, ))
        upper_bounds = np.inf * np.ones((N, ))

        best_advs = ep.zeros_like(x)
        best_advs_norms = ep.full(x, (N, ), ep.inf)

        # the binary search searches for the smallest consts that produce adversarials
        for binary_search_step in range(self.binary_search_steps):
            if (binary_search_step == self.binary_search_steps - 1
                    and self.binary_search_steps >= 10):
                # in the last binary search step, repeat the search once
                consts = np.minimum(upper_bounds, 1e10)

            # create a new optimizer find the delta that minimizes the loss
            delta = ep.zeros_like(x_attack)
            optimizer = AdamOptimizer(delta)

            # tracks whether adv with the current consts was found
            found_advs = np.full((N, ), fill_value=False)
            loss_at_previous_check = np.inf

            consts_ = ep.from_numpy(x, consts.astype(np.float32))

            for step in range(self.steps):
                loss, (perturbed,
                       logits), gradient = loss_aux_and_grad(delta, consts_)
                delta += optimizer(gradient, self.stepsize)

                if self.abort_early and step % (np.ceil(self.steps / 10)) == 0:
                    # after each tenth of the overall steps, check progress
                    if not (loss <= 0.9999 * loss_at_previous_check):
                        break  # stop Adam if there has been no progress
                    loss_at_previous_check = loss

                found_advs_iter = is_adversarial(perturbed, logits)
                found_advs = np.logical_or(found_advs, found_advs_iter.numpy())

                norms = flatten(perturbed - x).norms.l2(axis=-1)
                closer = norms < best_advs_norms
                new_best = ep.logical_and(closer, found_advs_iter)

                new_best_ = atleast_kd(new_best, best_advs.ndim)
                best_advs = ep.where(new_best_, perturbed, best_advs)
                best_advs_norms = ep.where(new_best, norms, best_advs_norms)

            upper_bounds = np.where(found_advs, consts, upper_bounds)
            lower_bounds = np.where(found_advs, lower_bounds, consts)

            consts_exponential_search = consts * 10
            consts_binary_search = (lower_bounds + upper_bounds) / 2
            consts = np.where(np.isinf(upper_bounds),
                              consts_exponential_search, consts_binary_search)

        return restore_type(best_advs)
Beispiel #27
0
def test_logical_and_manual(t: Tensor) -> None:
    assert (ep.logical_and(t < 3, ep.ones_like(t).bool()) == (t < 3)).all()
Beispiel #28
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        starting_points: Optional[T] = None,
        **kwargs: Any,
    ) -> T:
        """For models that preprocess their inputs by binarizing the
        inputs, this attack can improve adversarials found by other
        attacks. It does this by utilizing information about the
        binarization and mapping values to the corresponding value in
        the clean input or to the right side of the threshold.

        Parameters
        ----------
        threshold : float
            The treshold used by the models binarization. If none,
            defaults to (model.bounds()[1] - model.bounds()[0]) / 2.
        included_in : str
            Whether the threshold value itself belongs to the lower or
            upper interval.

        """
        raise_if_kwargs(kwargs)
        if starting_points is None:
            raise ValueError(
                "BinarizationRefinementAttack requires starting_points")
        (o, x), restore_type = ep.astensors_(inputs, starting_points)
        del inputs, starting_points, kwargs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        if self.threshold is None:
            min_, max_ = model.bounds
            threshold = (min_ + max_) / 2.0
        else:
            threshold = self.threshold

        assert o.dtype == x.dtype

        nptype = o.reshape(-1)[0].numpy().dtype.type
        if nptype not in [np.float16, np.float32, np.float64]:
            raise ValueError(  # pragma: no cover
                f"expected dtype to be float16, float32 or float64, found '{nptype}'"
            )

        threshold = nptype(threshold)
        offset = nptype(1.0)

        if self.included_in == "lower":
            lower_ = threshold
            upper_ = np.nextafter(threshold, threshold + offset)
        elif self.included_in == "upper":
            lower_ = np.nextafter(threshold, threshold - offset)
            upper_ = threshold
        else:
            raise ValueError(
                f"expected included_in to be 'lower' or 'upper', found '{self.included_in}'"
            )

        assert lower_ < upper_

        p = ep.full_like(o, ep.nan)

        lower = ep.ones_like(o) * lower_
        upper = ep.ones_like(o) * upper_

        indices = ep.logical_and(o <= lower, x <= lower)
        p = ep.where(indices, o, p)

        indices = ep.logical_and(o <= lower, x >= upper)
        p = ep.where(indices, upper, p)

        indices = ep.logical_and(o >= upper, x <= lower)
        p = ep.where(indices, lower, p)

        indices = ep.logical_and(o >= upper, x >= upper)
        p = ep.where(indices, o, p)

        assert not ep.any(ep.isnan(p))

        is_adv1 = is_adversarial(x)
        is_adv2 = is_adversarial(p)
        if (is_adv1 != is_adv2).any():
            raise ValueError(
                "The specified threshold does not match what is done by the model."
            )
        return restore_type(p)
Beispiel #29
0
    def __call__(
        self,
        inputs,
        labels,
        *,
        starting_points=None,
        init_attack=None,
        criterion: Callable = misclassification,
        steps=25000,
        spherical_step=1e-2,
        source_step=1e-2,
        source_step_convergance=1e-7,
        step_adaptation=1.5,
        tensorboard=False,
        update_stats_every_k=10,
    ):
        """Boundary Attack

        Differences to the original reference implementation:
        * We do not perform internal operations with float64
        * The samples within a batch can currently influence each other a bit
        * We don't perform the additional convergence confirmation
        * The success rate tracking changed a bit
        * Some other changes due to batching and merged loops

        Parameters
        ----------
        criterion : Callable
            A callable that returns true if the given logits of perturbed
            inputs should be considered adversarial w.r.t. to the given labels
            and unperturbed inputs.
        tensorboard : str
            The log directory for TensorBoard summaries. If False, TensorBoard
            summaries will be disabled (default). If None, the logdir will be
            runs/CURRENT_DATETIME_HOSTNAME.
        """
        tb = TensorBoard(logdir=tensorboard)

        originals = ep.astensor(inputs)
        labels = ep.astensor(labels)

        def is_adversarial(p: ep.Tensor) -> ep.Tensor:
            """For each input in x, returns true if it is an adversarial for
            the given model and criterion"""
            logits = self.model.forward(p)
            return criterion(originals, labels, p, logits)

        if starting_points is None:
            if init_attack is None:
                init_attack = LinearSearchBlendedUniformNoiseAttack
                logging.info(
                    f"Neither starting_points nor init_attack given. Falling"
                    f" back to {init_attack.__name__} for initialization.")
            starting_points = init_attack(self.model)(inputs, labels)

        best_advs = ep.astensor(starting_points)
        assert is_adversarial(best_advs).all()

        N = len(originals)
        ndim = originals.ndim
        spherical_steps = ep.ones(originals, N) * spherical_step
        source_steps = ep.ones(originals, N) * source_step

        tb.scalar("batchsize", N, 0)

        # create two queues for each sample to track success rates
        # (used to update the hyper parameters)
        stats_spherical_adversarial = ArrayQueue(maxlen=100, N=N)
        stats_step_adversarial = ArrayQueue(maxlen=30, N=N)

        bounds = self.model.bounds()

        for step in range(1, steps + 1):
            converged = source_steps < source_step_convergance
            if converged.all():
                break
            converged = atleast_kd(converged, ndim)

            # TODO: performance: ignore those that have converged
            # (we could select the non-converged ones, but we currently
            # cannot easily invert this in the end using EagerPy)

            unnormalized_source_directions = originals - best_advs
            source_norms = l2norms(unnormalized_source_directions)
            source_directions = unnormalized_source_directions / atleast_kd(
                source_norms, ndim)

            # only check spherical candidates every k steps
            check_spherical_and_update_stats = step % update_stats_every_k == 0

            candidates, spherical_candidates = draw_proposals(
                bounds,
                originals,
                best_advs,
                unnormalized_source_directions,
                source_directions,
                source_norms,
                spherical_steps,
                source_steps,
            )
            candidates.dtype == originals.dtype
            spherical_candidates.dtype == spherical_candidates.dtype

            is_adv = is_adversarial(candidates)

            if check_spherical_and_update_stats:
                spherical_is_adv = is_adversarial(spherical_candidates)
                stats_spherical_adversarial.append(spherical_is_adv)
                # TODO: algorithm: the original implementation ignores those samples
                # for which spherical is not adversarial and continues with the
                # next iteration -> we estimate different probabilities (conditional vs. unconditional)
                # TODO: thoughts: should we always track this because we compute it anyway
                stats_step_adversarial.append(is_adv)
            else:
                spherical_is_adv = None

            # in theory, we are closer per construction
            # but limited numerical precision might break this
            distances = l2norms(originals - candidates)
            closer = distances < source_norms
            is_best_adv = ep.logical_and(is_adv, closer)
            is_best_adv = atleast_kd(is_best_adv, ndim)

            cond = converged.logical_not().logical_and(is_best_adv)
            best_advs = ep.where(cond, candidates, best_advs)

            tb.probability("converged", converged, step)
            tb.scalar("updated_stats", check_spherical_and_update_stats, step)
            tb.histogram("norms", source_norms, step)
            tb.probability("is_adv", is_adv, step)
            if spherical_is_adv is not None:
                tb.probability("spherical_is_adv", spherical_is_adv, step)
            tb.histogram("candidates/distances", distances, step)
            tb.probability("candidates/closer", closer, step)
            tb.probability("candidates/is_best_adv", is_best_adv, step)
            tb.probability("new_best_adv_including_converged", is_best_adv,
                           step)
            tb.probability("new_best_adv", cond, step)

            if check_spherical_and_update_stats:
                full = stats_spherical_adversarial.isfull()
                tb.probability("spherical_stats/full", full, step)
                if full.any():
                    probs = stats_spherical_adversarial.mean()
                    cond1 = ep.logical_and(probs > 0.5, full)
                    spherical_steps = ep.where(
                        cond1, spherical_steps * step_adaptation,
                        spherical_steps)
                    source_steps = ep.where(cond1,
                                            source_steps * step_adaptation,
                                            source_steps)
                    cond2 = ep.logical_and(probs < 0.2, full)
                    spherical_steps = ep.where(
                        cond2, spherical_steps / step_adaptation,
                        spherical_steps)
                    source_steps = ep.where(cond2,
                                            source_steps / step_adaptation,
                                            source_steps)
                    stats_spherical_adversarial.clear(
                        ep.logical_or(cond1, cond2))
                    tb.conditional_mean(
                        "spherical_stats/isfull/success_rate/mean", probs,
                        full, step)
                    tb.probability_ratio("spherical_stats/isfull/too_linear",
                                         cond1, full, step)
                    tb.probability_ratio(
                        "spherical_stats/isfull/too_nonlinear", cond2, full,
                        step)

                full = stats_step_adversarial.isfull()
                tb.probability("step_stats/full", full, step)
                if full.any():
                    probs = stats_step_adversarial.mean()
                    # TODO: algorithm: changed the two values because we are currently tracking p(source_step_sucess)
                    # instead of p(source_step_success | spherical_step_sucess) that was tracked before
                    cond1 = ep.logical_and(probs > 0.25, full)
                    source_steps = ep.where(cond1,
                                            source_steps * step_adaptation,
                                            source_steps)
                    cond2 = ep.logical_and(probs < 0.1, full)
                    source_steps = ep.where(cond2,
                                            source_steps / step_adaptation,
                                            source_steps)
                    stats_step_adversarial.clear(ep.logical_or(cond1, cond2))
                    tb.conditional_mean("step_stats/isfull/success_rate/mean",
                                        probs, full, step)
                    tb.probability_ratio(
                        "step_stats/isfull/success_rate_too_high", cond1, full,
                        step)
                    tb.probability_ratio(
                        "step_stats/isfull/success_rate_too_low", cond2, full,
                        step)

            tb.histogram("spherical_step", spherical_steps, step)
            tb.histogram("source_step", source_steps, step)
        tb.close()
        return best_advs.tensor
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        starting_points: Optional[T] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        originals, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        if starting_points is None:
            init_attack: MinimizationAttack
            if self.init_attack is None:
                init_attack = LinearSearchBlendedUniformNoiseAttack(steps=50)
                logging.info(
                    f"Neither starting_points nor init_attack given. Falling"
                    f" back to {init_attack!r} for initialization.")
            else:
                init_attack = self.init_attack
            # TODO: use call and support all types of attacks (once early_stop is
            # possible in __call__)
            best_advs = init_attack.run(model,
                                        originals,
                                        criterion,
                                        early_stop=early_stop)
        else:
            best_advs = ep.astensor(starting_points)

        is_adv = is_adversarial(best_advs)
        if not is_adv.all():
            failed = is_adv.logical_not().float32().sum()
            if starting_points is None:
                raise ValueError(
                    f"init_attack failed for {failed} of {len(is_adv)} inputs")
            else:
                raise ValueError(
                    f"{failed} of {len(is_adv)} starting_points are not adversarial"
                )
        del starting_points

        tb = TensorBoard(logdir=self.tensorboard)

        N = len(originals)
        ndim = originals.ndim
        spherical_steps = ep.ones(originals, N) * self.spherical_step
        source_steps = ep.ones(originals, N) * self.source_step

        tb.scalar("batchsize", N, 0)

        # create two queues for each sample to track success rates
        # (used to update the hyper parameters)
        stats_spherical_adversarial = ArrayQueue(maxlen=100, N=N)
        stats_step_adversarial = ArrayQueue(maxlen=30, N=N)

        bounds = model.bounds

        self.class_1 = []
        self.class_2 = []

        self.surrogate_model = None
        device = model.device
        train_step = 500

        for step in tqdm(range(1, self.steps + 1)):
            converged = source_steps < self.source_step_convergance
            if converged.all():
                break  # pragma: no cover
            converged = atleast_kd(converged, ndim)

            # TODO: performance: ignore those that have converged
            # (we could select the non-converged ones, but we currently
            # cannot easily invert this in the end using EagerPy)

            unnormalized_source_directions = originals - best_advs
            source_norms = ep.norms.l2(flatten(unnormalized_source_directions),
                                       axis=-1)
            source_directions = unnormalized_source_directions / atleast_kd(
                source_norms, ndim)

            # only check spherical candidates every k steps
            check_spherical_and_update_stats = step % self.update_stats_every_k == 0

            candidates, spherical_candidates = draw_proposals(
                bounds, originals, best_advs, unnormalized_source_directions,
                source_directions, source_norms, spherical_steps, source_steps,
                self.surrogate_model)
            candidates.dtype == originals.dtype
            spherical_candidates.dtype == spherical_candidates.dtype

            is_adv = is_adversarial(candidates)
            is_adv_spherical_candidates = is_adversarial(spherical_candidates)

            if is_adv.item():
                self.class_1.append(candidates)

            if not is_adv_spherical_candidates.item():
                self.class_2.append(spherical_candidates)

            if (step % train_step == 0) and (step > 0):

                start_time = time()

                class_1 = self.class_1
                class_2 = self.class_2

                class_1 = np.array([image.numpy()[0] for image in class_1])
                class_2 = np.array([image.numpy()[0] for image in class_2])

                class_2 = class_2[:len(class_1)]
                data = np.concatenate([class_1, class_2])
                labels = np.append(np.ones(len(class_1)),
                                   np.zeros(len(class_2)))

                X = torch.tensor(data).to(device)
                y = torch.tensor(labels, dtype=torch.long).to(device)

                if self.surrogate_model is None:
                    model_sur = torchvision.models.resnet18(pretrained=True)
                    #model.features[0] = torch.nn.Conv2d(3, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
                    model_sur.fc = torch.nn.Linear(in_features=512,
                                                   out_features=2,
                                                   bias=True)
                    model_sur = model_sur.to(device)
                else:
                    model_sur = model_surrogate

                X_train, X_test, y_train, y_test = train_test_split(
                    X, y, test_size=0.2, random_state=42)

                optimizer = torch.optim.Adam(model_sur.parameters(), lr=3e-4)
                loss = torch.nn.CrossEntropyLoss()

                model_surrogate, accuracy_history_test, accuracy_history_train = train(
                    model_sur, optimizer, loss, X_train, y_train, X_test,
                    y_test)
                model_surrogate = model_surrogate.eval()

                self.surrogate_model = fb.PyTorchModel(model_surrogate,
                                                       bounds=(0, 1),
                                                       device=device)

                end_time = time()

                #print('Time for train: ', np.round(end_time - start_time, 2))
                #print('\n')

            spherical_is_adv: Optional[ep.Tensor]
            if check_spherical_and_update_stats:
                spherical_is_adv = is_adversarial(spherical_candidates)
                stats_spherical_adversarial.append(spherical_is_adv)
                # TODO: algorithm: the original implementation ignores those samples
                # for which spherical is not adversarial and continues with the
                # next iteration -> we estimate different probabilities (conditional vs. unconditional)
                # TODO: thoughts: should we always track this because we compute it anyway
                stats_step_adversarial.append(is_adv)
            else:
                spherical_is_adv = None

            # in theory, we are closer per construction
            # but limited numerical precision might break this
            distances = ep.norms.l2(flatten(originals - candidates), axis=-1)
            closer = distances < source_norms
            is_best_adv = ep.logical_and(is_adv, closer)
            is_best_adv = atleast_kd(is_best_adv, ndim)

            cond = converged.logical_not().logical_and(is_best_adv)
            best_advs = ep.where(cond, candidates, best_advs)

            tb.probability("converged", converged, step)
            tb.scalar("updated_stats", check_spherical_and_update_stats, step)
            tb.histogram("norms", source_norms, step)
            tb.probability("is_adv", is_adv, step)
            if spherical_is_adv is not None:
                tb.probability("spherical_is_adv", spherical_is_adv, step)
            tb.histogram("candidates/distances", distances, step)
            tb.probability("candidates/closer", closer, step)
            tb.probability("candidates/is_best_adv", is_best_adv, step)
            tb.probability("new_best_adv_including_converged", is_best_adv,
                           step)
            tb.probability("new_best_adv", cond, step)

            if check_spherical_and_update_stats:
                full = stats_spherical_adversarial.isfull()
                tb.probability("spherical_stats/full", full, step)
                if full.any():
                    probs = stats_spherical_adversarial.mean()
                    cond1 = ep.logical_and(probs > 0.5, full)
                    spherical_steps = ep.where(
                        cond1, spherical_steps * self.step_adaptation,
                        spherical_steps)
                    source_steps = ep.where(
                        cond1, source_steps * self.step_adaptation,
                        source_steps)
                    cond2 = ep.logical_and(probs < 0.2, full)
                    spherical_steps = ep.where(
                        cond2, spherical_steps / self.step_adaptation,
                        spherical_steps)
                    source_steps = ep.where(
                        cond2, source_steps / self.step_adaptation,
                        source_steps)
                    stats_spherical_adversarial.clear(
                        ep.logical_or(cond1, cond2))
                    tb.conditional_mean(
                        "spherical_stats/isfull/success_rate/mean", probs,
                        full, step)
                    tb.probability_ratio("spherical_stats/isfull/too_linear",
                                         cond1, full, step)
                    tb.probability_ratio(
                        "spherical_stats/isfull/too_nonlinear", cond2, full,
                        step)

                full = stats_step_adversarial.isfull()
                tb.probability("step_stats/full", full, step)
                if full.any():
                    probs = stats_step_adversarial.mean()
                    # TODO: algorithm: changed the two values because we are currently tracking p(source_step_sucess)
                    # instead of p(source_step_success | spherical_step_sucess) that was tracked before
                    cond1 = ep.logical_and(probs > 0.25, full)
                    source_steps = ep.where(
                        cond1, source_steps * self.step_adaptation,
                        source_steps)
                    cond2 = ep.logical_and(probs < 0.1, full)
                    source_steps = ep.where(
                        cond2, source_steps / self.step_adaptation,
                        source_steps)
                    stats_step_adversarial.clear(ep.logical_or(cond1, cond2))
                    tb.conditional_mean("step_stats/isfull/success_rate/mean",
                                        probs, full, step)
                    tb.probability_ratio(
                        "step_stats/isfull/success_rate_too_high", cond1, full,
                        step)
                    tb.probability_ratio(
                        "step_stats/isfull/success_rate_too_low", cond2, full,
                        step)

            tb.histogram("spherical_step", spherical_steps, step)
            tb.histogram("source_step", source_steps, step)
        tb.close()
        return restore_type(best_advs)