Ejemplo n.º 1
0
    def _binary_search(self, originals: ep.Tensor, perturbed: ep.Tensor, boost: Optional[bool] = False) -> ep.Tensor:
        # Choose upper thresholds in binary search based on constraint.
        highs = ep.ones(perturbed, len(perturbed))
        d = np.prod(perturbed.shape[1:])
        thresholds = self._BS_gamma / (d * math.sqrt(d))
        lows = ep.zeros_like(highs)

        # Boost Binary search
        if boost:
            boost_vec = 0.1 * originals + 0.9 * perturbed
            is_advs = self._is_adversarial(boost_vec)
            is_advs = atleast_kd(is_advs, originals.ndim)
            originals = ep.where(is_advs.logical_not(), boost_vec, originals)
            perturbed = ep.where(is_advs, boost_vec, perturbed)

        # use this variable to check when mids stays constant and the BS has converged
        old_mids = highs
        iteration = 0
        while ep.any(highs - lows > thresholds) and iteration < self._BS_max_iteration:
            iteration += 1
            mids = (lows + highs) / 2
            mids_perturbed = self._project(originals, perturbed, mids)
            is_adversarial_ = self._is_adversarial(mids_perturbed)

            highs = ep.where(is_adversarial_, mids, highs)
            lows = ep.where(is_adversarial_, lows, mids)

            # check of there is no more progress due to numerical imprecision
            reached_numerical_precision = (old_mids == mids).all()
            old_mids = mids
            if reached_numerical_precision:
                break
        
        results = self._project(originals, perturbed, highs)
        return results
Ejemplo n.º 2
0
    def __call__(self, model: Model, inputs: T,
                 criterion: Union[Misclassification, T]) -> T:
        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion

        N = len(x)

        if isinstance(criterion_, Misclassification):
            classes = criterion_.labels
        else:
            raise ValueError("unsupported criterion")

        if classes.shape != (N, ):
            raise ValueError(
                f"expected labels to have shape ({N},), got {classes.shape}")

        bounds = model.bounds

        def loss_fun(delta: ep.Tensor, logits: ep.Tensor) -> ep.Tensor:
            assert x.shape[0] == logits.shape[0]
            assert delta.shape == x.shape

            x_hat = x + delta
            logits_hat = model(x_hat)
            loss = ep.kl_div_with_logits(logits, logits_hat).sum()

            return loss

        value_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=False)

        clean_logits = model(x)

        # start with random vector as search vector
        d = ep.normal(x, shape=x.shape, mean=0, stddev=1)
        for it in range(self.iterations):
            # normalize proposal to be unit vector
            d = d * self.xi / atleast_kd(ep.norms.l2(flatten(d), axis=-1),
                                         x.ndim)

            # use gradient of KL divergence as new search vector
            _, grad = value_and_grad(d, clean_logits)
            d = grad

            # rescale search vector
            d = (bounds[1] - bounds[0]) * d

            if ep.any(ep.norms.l2(flatten(d), axis=-1) < 1e-64):
                raise RuntimeError(
                    "Gradient vanished; this can happen if xi is too small.")

        final_delta = (self.epsilon / ep.sqrt(
            (d**2).sum(keepdims=True, axis=(1, 2, 3))) * d)
        x_adv = ep.clip(x + final_delta, *bounds)

        return restore_type(x_adv)
Ejemplo n.º 3
0
    def _binary_search(
        self,
        is_adversarial: Callable[[ep.Tensor], ep.Tensor],
        originals: ep.Tensor,
        perturbed: ep.Tensor,
    ) -> ep.Tensor:
        # Choose upper thresholds in binary search based on constraint.
        d = np.prod(perturbed.shape[1:])
        if self.constraint == "linf":
            highs = linf(originals, perturbed)

            # TODO: Check if the threshold is correct
            #  empirically this seems to be too low
            thresholds = highs * self.gamma / (d * d)
        else:
            highs = ep.ones(perturbed, len(perturbed))
            thresholds = self.gamma / (d * math.sqrt(d))

        lows = ep.zeros_like(highs)

        # use this variable to check when mids stays constant and the BS has converged
        old_mids = highs

        while ep.any(highs - lows > thresholds):
            mids = (lows + highs) / 2
            mids_perturbed = self._project(originals, perturbed, mids)
            is_adversarial_ = is_adversarial(mids_perturbed)

            highs = ep.where(is_adversarial_, mids, highs)
            lows = ep.where(is_adversarial_, lows, mids)

            # check of there is no more progress due to numerical imprecision
            reached_numerical_precision = (old_mids == mids).all()
            old_mids = mids

            if reached_numerical_precision:
                # TODO: warn user
                break

        res = self._project(originals, perturbed, highs)

        return res
Ejemplo n.º 4
0
def test_any_none_keepdims(t: Tensor) -> Tensor:
    return ep.any(t > 3, axis=None, keepdims=True)
Ejemplo n.º 5
0
def test_any_axes(dummy: Tensor) -> Tensor:
    t = ep.arange(dummy, 30).float32().reshape((3, 5, 2))
    return ep.any(t > 3, axis=(0, 1))
Ejemplo n.º 6
0
def test_any_axis(t: Tensor) -> Tensor:
    return ep.any(t > 3, axis=0)
Ejemplo n.º 7
0
def test_any(t: Tensor) -> Tensor:
    return ep.any(t > 3)
Ejemplo n.º 8
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        starting_points: Optional[T] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        if starting_points is None:
            raise ValueError("BinarizationRefinementAttack requires starting_points")
        (o, x), restore_type = ep.astensors_(inputs, starting_points)
        del inputs, starting_points, kwargs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        if self.threshold is None:
            min_, max_ = model.bounds
            threshold = (min_ + max_) / 2.0
        else:
            threshold = self.threshold

        assert o.dtype == x.dtype

        nptype = o.reshape(-1)[0].numpy().dtype.type
        if nptype not in [np.float16, np.float32, np.float64]:
            raise ValueError(  # pragma: no cover
                f"expected dtype to be float16, float32 or float64, found '{nptype}'"
            )

        threshold = nptype(threshold)
        offset = nptype(1.0)

        if self.included_in == "lower":
            lower_ = threshold
            upper_ = np.nextafter(threshold, threshold + offset)
        elif self.included_in == "upper":
            lower_ = np.nextafter(threshold, threshold - offset)
            upper_ = threshold
        else:
            raise ValueError(
                f"expected included_in to be 'lower' or 'upper', found '{self.included_in}'"
            )

        assert lower_ < upper_

        p = ep.full_like(o, ep.nan)

        lower = ep.ones_like(o) * lower_
        upper = ep.ones_like(o) * upper_

        indices = ep.logical_and(o <= lower, x <= lower)
        p = ep.where(indices, o, p)

        indices = ep.logical_and(o <= lower, x >= upper)
        p = ep.where(indices, upper, p)

        indices = ep.logical_and(o >= upper, x <= lower)
        p = ep.where(indices, lower, p)

        indices = ep.logical_and(o >= upper, x >= upper)
        p = ep.where(indices, o, p)

        assert not ep.any(ep.isnan(p))

        is_adv1 = is_adversarial(x)
        is_adv2 = is_adversarial(p)
        if (is_adv1 != is_adv2).any():
            raise ValueError(
                "The specified threshold does not match what is done by the model."
            )
        return restore_type(p)
Ejemplo n.º 9
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        starting_points: Optional[T] = None,
        **kwargs: Any,
    ) -> T:
        """For models that preprocess their inputs by binarizing the
        inputs, this attack can improve adversarials found by other
        attacks. It does this by utilizing information about the
        binarization and mapping values to the corresponding value in
        the clean input or to the right side of the threshold.

        Parameters
        ----------
        threshold : float
            The treshold used by the models binarization. If none,
            defaults to (model.bounds()[1] - model.bounds()[0]) / 2.
        included_in : str
            Whether the threshold value itself belongs to the lower or
            upper interval.

        """
        raise_if_kwargs(kwargs)
        if starting_points is None:
            raise ValueError(
                "BinarizationRefinementAttack requires starting_points")
        (o, x), restore_type = ep.astensors_(inputs, starting_points)
        del inputs, starting_points, kwargs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        if self.threshold is None:
            min_, max_ = model.bounds
            threshold = (min_ + max_) / 2.0
        else:
            threshold = self.threshold

        assert o.dtype == x.dtype

        nptype = o.reshape(-1)[0].numpy().dtype.type
        if nptype not in [np.float16, np.float32, np.float64]:
            raise ValueError(  # pragma: no cover
                f"expected dtype to be float16, float32 or float64, found '{nptype}'"
            )

        threshold = nptype(threshold)
        offset = nptype(1.0)

        if self.included_in == "lower":
            lower_ = threshold
            upper_ = np.nextafter(threshold, threshold + offset)
        elif self.included_in == "upper":
            lower_ = np.nextafter(threshold, threshold - offset)
            upper_ = threshold
        else:
            raise ValueError(
                f"expected included_in to be 'lower' or 'upper', found '{self.included_in}'"
            )

        assert lower_ < upper_

        p = ep.full_like(o, ep.nan)

        lower = ep.ones_like(o) * lower_
        upper = ep.ones_like(o) * upper_

        indices = ep.logical_and(o <= lower, x <= lower)
        p = ep.where(indices, o, p)

        indices = ep.logical_and(o <= lower, x >= upper)
        p = ep.where(indices, upper, p)

        indices = ep.logical_and(o >= upper, x <= lower)
        p = ep.where(indices, lower, p)

        indices = ep.logical_and(o >= upper, x >= upper)
        p = ep.where(indices, o, p)

        assert not ep.any(ep.isnan(p))

        is_adv1 = is_adversarial(x)
        is_adv2 = is_adversarial(p)
        if (is_adv1 != is_adv2).any():
            raise ValueError(
                "The specified threshold does not match what is done by the model."
            )
        return restore_type(p)
Ejemplo n.º 10
0
    def __call__(
        self,
        inputs,
        labels,
        *,
        adversarials,
        criterion,
        threshold=None,
        included_in="upper",
    ):
        """For models that preprocess their inputs by binarizing the
        inputs, this attack can improve adversarials found by other
        attacks. It does this by utilizing information about the
        binarization and mapping values to the corresponding value in
        the clean input or to the right side of the threshold.

        Parameters
        ----------
        threshold : float
            The treshold used by the models binarization. If none,
            defaults to (model.bounds()[1] - model.bounds()[0]) / 2.
        included_in : str
            Whether the threshold value itself belongs to the lower or
            upper interval.

        """
        originals = ep.astensor(inputs)
        labels = ep.astensor(labels)

        def is_adversarial(p: ep.Tensor) -> ep.Tensor:
            """For each input in x, returns true if it is an adversarial for
            the given model and criterion"""
            logits = ep.astensor(self.model.forward(p.tensor))
            return criterion(originals, labels, p, logits)

        o = ep.astensor(inputs)
        x = ep.astensor(adversarials)

        min_, max_ = self.model.bounds()
        if threshold is None:
            threshold = (min_ + max_) / 2.0

        assert o.dtype == x.dtype
        dtype = o.dtype

        if dtype == o.backend.float16:
            nptype = np.float16
        elif dtype == o.backend.float32:
            nptype = np.float32
        elif dtype == o.backend.float64:
            nptype = np.float64
        else:
            raise ValueError(
                "expected dtype to be float16, float32 or float64, found '{dtype}'"
            )

        threshold = nptype(threshold)
        offset = nptype(1.0)

        if included_in == "lower":
            lower = threshold
            upper = np.nextafter(threshold, threshold + offset)
        elif included_in == "upper":
            lower = np.nextafter(threshold, threshold - offset)
            upper = threshold
        else:
            raise ValueError(
                "expected included_in to be 'lower' or 'upper', found '{included_in}'"
            )

        assert lower < upper

        p = ep.full_like(o, ep.nan)

        lower = ep.ones_like(o) * lower
        upper = ep.ones_like(o) * upper

        indices = ep.logical_and(o <= lower, x <= lower)
        p = ep.where(indices, o, p)

        indices = ep.logical_and(o <= lower, x >= upper)
        p = ep.where(indices, upper, p)

        indices = ep.logical_and(o >= upper, x <= lower)
        p = ep.where(indices, lower, p)

        indices = ep.logical_and(o >= upper, x >= upper)
        p = ep.where(indices, o, p)

        assert not ep.any(ep.isnan(p))

        is_adv1 = is_adversarial(x)
        is_adv2 = is_adversarial(p)
        assert (is_adv1 == is_adv2).all(
        ), "The specified threshold does not match what is done by the model."
        return p.tensor
Ejemplo n.º 11
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, Any] = None,
        *,
        starting_points: Optional[ep.Tensor] = None,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        del kwargs

        x, restore_type = ep.astensor_(inputs)
        del inputs

        verify_input_bounds(x, model)

        criterion_ = get_criterion(criterion)
        del criterion
        is_adversarial = get_is_adversarial(criterion_, model)

        if starting_points is None:
            init_attack: MinimizationAttack
            if self.init_attack is None:
                init_attack = SaltAndPepperNoiseAttack()
                logging.info(
                    f"Neither starting_points nor init_attack given. Falling"
                    f" back to {init_attack!r} for initialization."
                )
            else:
                init_attack = self.init_attack
            # TODO: use call and support all types of attacks (once early_stop is
            # possible in __call__)
            starting_points = init_attack.run(model, x, criterion_)

        x_adv = ep.astensor(starting_points)
        assert is_adversarial(x_adv).all()

        original_shape = x.shape
        N = len(x)

        x_flat = flatten(x)
        x_adv_flat = flatten(x_adv)

        # was there a pixel left in the samples to manipulate,
        # i.e. reset to the clean version?
        found_index_to_manipulate = ep.from_numpy(x, np.ones(N, dtype=bool))

        while ep.any(found_index_to_manipulate):
            diff_mask = (ep.abs(x_flat - x_adv_flat) > 1e-8).numpy()
            diff_idxs = [z.nonzero()[0] for z in diff_mask]
            untouched_indices = [z.tolist() for z in diff_idxs]
            untouched_indices = [
                np.random.permutation(it).tolist() for it in untouched_indices
            ]

            found_index_to_manipulate = ep.from_numpy(x, np.zeros(N, dtype=bool))

            # since the number of pixels still left to manipulate might differ
            # across different samples we track each of them separately and
            # and manipulate the images until there is no pixel left for
            # any of the samples. to not update already finished samples, we mask
            # the updates such that only samples that still have pixels left to manipulate
            # will be updated
            i = 0
            while i < max([len(it) for it in untouched_indices]):
                # mask all samples that still have pixels to manipulate left
                relevant_mask = [len(it) > i for it in untouched_indices]
                relevant_mask = np.array(relevant_mask, dtype=bool)
                relevant_mask_index = np.flatnonzero(relevant_mask)

                # for each image get the index of the next pixel we try out
                relevant_indices = [it[i] for it in untouched_indices if len(it) > i]

                old_values = x_adv_flat[relevant_mask_index, relevant_indices]
                new_values = x_flat[relevant_mask_index, relevant_indices]
                x_adv_flat = ep.index_update(
                    x_adv_flat, (relevant_mask_index, relevant_indices), new_values
                )

                # check if still adversarial
                is_adv = is_adversarial(x_adv_flat.reshape(original_shape))
                found_index_to_manipulate = ep.index_update(
                    found_index_to_manipulate,
                    relevant_mask_index,
                    ep.logical_or(found_index_to_manipulate, is_adv)[relevant_mask],
                )

                # if not, undo change
                new_or_old_values = ep.where(
                    is_adv[relevant_mask], new_values, old_values
                )
                x_adv_flat = ep.index_update(
                    x_adv_flat,
                    (relevant_mask_index, relevant_indices),
                    new_or_old_values,
                )

                i += 1

            if not ep.any(found_index_to_manipulate):
                break

        if self.l2_binary_search:
            while True:
                diff_mask = (ep.abs(x_flat - x_adv_flat) > 1e-12).numpy()
                diff_idxs = [z.nonzero()[0] for z in diff_mask]
                untouched_indices = [z.tolist() for z in diff_idxs]
                # draw random shuffling of all indices for all samples
                untouched_indices = [
                    np.random.permutation(it).tolist() for it in untouched_indices
                ]

                # whether that run through all values made any improvement
                improved = ep.from_numpy(x, np.zeros(N, dtype=bool)).astype(bool)

                logging.info("Starting new loop through all values")

                # use the same logic as above
                i = 0
                while i < max([len(it) for it in untouched_indices]):
                    # mask all samples that still have pixels to manipulate left
                    relevant_mask = [len(it) > i for it in untouched_indices]
                    relevant_mask = np.array(relevant_mask, dtype=bool)
                    relevant_mask_index = np.flatnonzero(relevant_mask)

                    # for each image get the index of the next pixel we try out
                    relevant_indices = [
                        it[i] for it in untouched_indices if len(it) > i
                    ]

                    old_values = x_adv_flat[relevant_mask_index, relevant_indices]
                    new_values = x_flat[relevant_mask_index, relevant_indices]

                    x_adv_flat = ep.index_update(
                        x_adv_flat, (relevant_mask_index, relevant_indices), new_values
                    )

                    # check if still adversarial
                    is_adv = is_adversarial(x_adv_flat.reshape(original_shape))

                    improved = ep.index_update(
                        improved,
                        relevant_mask_index,
                        ep.logical_or(improved, is_adv)[relevant_mask],
                    )

                    if not ep.all(is_adv):
                        # run binary search for examples that became non-adversarial
                        updated_new_values = self._binary_search(
                            x_adv_flat,
                            relevant_mask,
                            relevant_mask_index,
                            relevant_indices,
                            old_values,
                            new_values,
                            (-1, *original_shape[1:]),
                            is_adversarial,
                        )
                        x_adv_flat = ep.index_update(
                            x_adv_flat,
                            (relevant_mask_index, relevant_indices),
                            ep.where(
                                is_adv[relevant_mask], new_values, updated_new_values
                            ),
                        )

                        improved = ep.index_update(
                            improved,
                            relevant_mask_index,
                            ep.logical_or(
                                old_values != updated_new_values,
                                improved[relevant_mask],
                            ),
                        )

                    i += 1

                if not ep.any(improved):
                    # no improvement for any of the indices
                    break

        x_adv = x_adv_flat.reshape(original_shape)

        return restore_type(x_adv)