def project(self, x: ep.Tensor, x0: ep.Tensor,
             epsilon: ep.Tensor) -> ep.Tensor:
     flatten_delta = flatten(x - x0)
     abs_delta = abs(flatten_delta)
     epsilon = epsilon.astype(int)
     rows = range(flatten_delta.shape[0])
     idx_sorted = ep.argsort(abs_delta, axis=-1)[rows, -epsilon]
     thresholds = (ep.ones_like(flatten_delta).T *
                   abs_delta[rows, idx_sorted]).T
     clipped = ep.where(abs_delta >= thresholds, flatten_delta, 0)
     return x0 + clipped.reshape(x0.shape).astype(x0.dtype)
 def mid_points(
     self,
     x0: ep.Tensor,
     x1: ep.Tensor,
     epsilons: ep.Tensor,
     bounds: Tuple[float, float],
 ):
     # returns a point between x0 and x1 where
     # epsilon = 0 returns x0 and epsilon = 1
     # returns x1
     # epsilons here will be the percentage of features to keep
     n_features = flatten(ep.ones_like(x0)).bool().sum(axis=1).float32()
     new_x = self.project(x1, x0, n_features * epsilons)
     return new_x
Example #3
0
    def select_delta(self, originals: ep.Tensor, distances: ep.Tensor,
                     step: int) -> ep.Tensor:
        result: ep.Tensor
        if step == 0:
            result = 0.1 * ep.ones_like(distances)
        else:
            d = np.prod(originals.shape[1:])

            if self.constraint == "linf":
                theta = self.gamma / (d * d)
                result = d * theta * distances
            else:
                theta = self.gamma / (d * np.sqrt(d))
                result = np.sqrt(d) * theta * distances

        return result
Example #4
0
def test_logical_and_manual(t: Tensor) -> None:
    assert (ep.logical_and(t < 3, ep.ones_like(t).bool()) == (t < 3)).all()
Example #5
0
def test_ones_like(t: Tensor) -> Tensor:
    return ep.ones_like(t)
Example #6
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: TargetedMisclassification,
        *,
        epsilon: float,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        N = len(x)

        if isinstance(criterion, TargetedMisclassification):
            classes = criterion.target_classes
        else:
            raise ValueError("unsupported criterion")

        if classes.shape != (N, ):
            raise ValueError(
                f"expected target_classes to have shape ({N},), got {classes.shape}"
            )

        noise_shape: Union[Tuple[int, int, int, int], Tuple[int, ...]]
        channel_axis: Optional[int] = None
        if self.reduced_dims is not None:
            if x.ndim != 4:
                raise NotImplementedError(
                    "only implemented for inputs with two spatial dimensions"
                    " (and one channel and one batch dimension)")

            if self.channel_axis is None:
                maybe_axis = get_channel_axis(model, x.ndim)
                if maybe_axis is None:
                    raise ValueError(
                        "cannot infer the data_format from the model, please"
                        " specify channel_axis when initializing the attack")
                else:
                    channel_axis = maybe_axis
            else:
                channel_axis = self.channel_axis % x.ndim

            if channel_axis == 1:
                noise_shape = (x.shape[1], *self.reduced_dims)
            elif channel_axis == 3:
                noise_shape = (*self.reduced_dims, x.shape[3])
            else:
                raise ValueError(
                    "expected 'channel_axis' to be 1 or 3, got {channel_axis}")
        else:
            noise_shape = x.shape[1:]  # pragma: no cover

        def is_adversarial(logits: ep.TensorType) -> ep.TensorType:
            return ep.argmax(logits, 1) == classes

        num_plateaus = ep.zeros(x, len(x))
        mutation_probability = (ep.ones_like(num_plateaus) *
                                self.min_mutation_probability)
        mutation_range = ep.ones_like(num_plateaus) * self.min_mutation_range

        noise_pops = ep.uniform(x, (N, self.population, *noise_shape),
                                -epsilon, epsilon)

        def calculate_fitness(logits: ep.TensorType) -> ep.TensorType:
            first = logits[range(N), classes]
            second = ep.log(ep.exp(logits).sum(1) - first)

            return first - second

        n_its_wo_change = ep.zeros(x, (N, ))
        for step in range(self.steps):
            fitness_l, is_adv_l = [], []

            for i in range(self.population):
                it = self.apply_noise(x, noise_pops[:, i], epsilon,
                                      channel_axis)
                logits = model(it)
                f = calculate_fitness(logits)
                a = is_adversarial(logits)
                fitness_l.append(f)
                is_adv_l.append(a)

            fitness = ep.stack(fitness_l)
            is_adv = ep.stack(is_adv_l, 1)
            elite_idxs = ep.argmax(fitness, 0)

            elite_noise = noise_pops[range(N), elite_idxs]
            is_adv = is_adv[range(N), elite_idxs]

            # early stopping
            if is_adv.all():
                return restore_type(  # pragma: no cover
                    self.apply_noise(x, elite_noise, epsilon, channel_axis))

            probs = ep.softmax(fitness / self.sampling_temperature, 0)
            parents_idxs = np.stack(
                [
                    self.choice(
                        self.population,
                        2 * self.population - 2,
                        replace=True,
                        p=probs[:, i],
                    ) for i in range(N)
                ],
                1,
            )

            mutations = [
                ep.uniform(
                    x,
                    noise_shape,
                    -mutation_range[i].item() * epsilon,
                    mutation_range[i].item() * epsilon,
                ) for i in range(N)
            ]

            new_noise_pops = [elite_noise]
            for i in range(0, self.population - 1):
                parents_1 = noise_pops[range(N), parents_idxs[2 * i]]
                parents_2 = noise_pops[range(N), parents_idxs[2 * i + 1]]

                # calculate crossover
                p = probs[parents_idxs[2 * i], range(N)] / (
                    probs[parents_idxs[2 * i], range(N)] +
                    probs[parents_idxs[2 * i + 1],
                          range(N)])
                p = atleast_kd(p, x.ndim)
                p = ep.tile(p, (1, *noise_shape))

                crossover_mask = ep.uniform(p, p.shape, 0, 1) < p
                children = ep.where(crossover_mask, parents_1, parents_2)

                # calculate mutation
                mutation_mask = ep.uniform(children, children.shape)
                mutation_mask = mutation_mask <= atleast_kd(
                    mutation_probability, children.ndim)
                children = ep.where(mutation_mask, children + mutations[i],
                                    children)

                # project back to epsilon range
                children = ep.clip(children, -epsilon, epsilon)

                new_noise_pops.append(children)

            noise_pops = ep.stack(new_noise_pops, 1)

            # increase num_plateaus if fitness does not improve
            # for 100 consecutive steps
            n_its_wo_change = ep.where(elite_idxs == 0, n_its_wo_change + 1,
                                       ep.zeros_like(n_its_wo_change))
            num_plateaus = ep.where(n_its_wo_change >= 100, num_plateaus + 1,
                                    num_plateaus)
            n_its_wo_change = ep.where(n_its_wo_change >= 100,
                                       ep.zeros_like(n_its_wo_change),
                                       n_its_wo_change)

            mutation_probability = ep.maximum(
                self.min_mutation_probability,
                0.5 * ep.exp(
                    math.log(0.9) * ep.ones_like(num_plateaus) * num_plateaus),
            )
            mutation_range = ep.maximum(
                self.min_mutation_range,
                0.5 * ep.exp(
                    math.log(0.9) * ep.ones_like(num_plateaus) * num_plateaus),
            )

        return restore_type(
            self.apply_noise(x, elite_noise, epsilon, channel_axis))
Example #7
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        starting_points: Optional[T] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        if starting_points is None:
            raise ValueError("BinarizationRefinementAttack requires starting_points")
        (o, x), restore_type = ep.astensors_(inputs, starting_points)
        del inputs, starting_points, kwargs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        if self.threshold is None:
            min_, max_ = model.bounds
            threshold = (min_ + max_) / 2.0
        else:
            threshold = self.threshold

        assert o.dtype == x.dtype

        nptype = o.reshape(-1)[0].numpy().dtype.type
        if nptype not in [np.float16, np.float32, np.float64]:
            raise ValueError(  # pragma: no cover
                f"expected dtype to be float16, float32 or float64, found '{nptype}'"
            )

        threshold = nptype(threshold)
        offset = nptype(1.0)

        if self.included_in == "lower":
            lower_ = threshold
            upper_ = np.nextafter(threshold, threshold + offset)
        elif self.included_in == "upper":
            lower_ = np.nextafter(threshold, threshold - offset)
            upper_ = threshold
        else:
            raise ValueError(
                f"expected included_in to be 'lower' or 'upper', found '{self.included_in}'"
            )

        assert lower_ < upper_

        p = ep.full_like(o, ep.nan)

        lower = ep.ones_like(o) * lower_
        upper = ep.ones_like(o) * upper_

        indices = ep.logical_and(o <= lower, x <= lower)
        p = ep.where(indices, o, p)

        indices = ep.logical_and(o <= lower, x >= upper)
        p = ep.where(indices, upper, p)

        indices = ep.logical_and(o >= upper, x <= lower)
        p = ep.where(indices, lower, p)

        indices = ep.logical_and(o >= upper, x >= upper)
        p = ep.where(indices, o, p)

        assert not ep.any(ep.isnan(p))

        is_adv1 = is_adversarial(x)
        is_adv2 = is_adversarial(p)
        if (is_adv1 != is_adv2).any():
            raise ValueError(
                "The specified threshold does not match what is done by the model."
            )
        return restore_type(p)
Example #8
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        starting_points: Optional[T] = None,
        **kwargs: Any,
    ) -> T:
        """For models that preprocess their inputs by binarizing the
        inputs, this attack can improve adversarials found by other
        attacks. It does this by utilizing information about the
        binarization and mapping values to the corresponding value in
        the clean input or to the right side of the threshold.

        Parameters
        ----------
        threshold : float
            The treshold used by the models binarization. If none,
            defaults to (model.bounds()[1] - model.bounds()[0]) / 2.
        included_in : str
            Whether the threshold value itself belongs to the lower or
            upper interval.

        """
        raise_if_kwargs(kwargs)
        if starting_points is None:
            raise ValueError(
                "BinarizationRefinementAttack requires starting_points")
        (o, x), restore_type = ep.astensors_(inputs, starting_points)
        del inputs, starting_points, kwargs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        if self.threshold is None:
            min_, max_ = model.bounds
            threshold = (min_ + max_) / 2.0
        else:
            threshold = self.threshold

        assert o.dtype == x.dtype

        nptype = o.reshape(-1)[0].numpy().dtype.type
        if nptype not in [np.float16, np.float32, np.float64]:
            raise ValueError(  # pragma: no cover
                f"expected dtype to be float16, float32 or float64, found '{nptype}'"
            )

        threshold = nptype(threshold)
        offset = nptype(1.0)

        if self.included_in == "lower":
            lower_ = threshold
            upper_ = np.nextafter(threshold, threshold + offset)
        elif self.included_in == "upper":
            lower_ = np.nextafter(threshold, threshold - offset)
            upper_ = threshold
        else:
            raise ValueError(
                f"expected included_in to be 'lower' or 'upper', found '{self.included_in}'"
            )

        assert lower_ < upper_

        p = ep.full_like(o, ep.nan)

        lower = ep.ones_like(o) * lower_
        upper = ep.ones_like(o) * upper_

        indices = ep.logical_and(o <= lower, x <= lower)
        p = ep.where(indices, o, p)

        indices = ep.logical_and(o <= lower, x >= upper)
        p = ep.where(indices, upper, p)

        indices = ep.logical_and(o >= upper, x <= lower)
        p = ep.where(indices, lower, p)

        indices = ep.logical_and(o >= upper, x >= upper)
        p = ep.where(indices, o, p)

        assert not ep.any(ep.isnan(p))

        is_adv1 = is_adversarial(x)
        is_adv2 = is_adversarial(p)
        if (is_adv1 != is_adv2).any():
            raise ValueError(
                "The specified threshold does not match what is done by the model."
            )
        return restore_type(p)
Example #9
0
    def __call__(
        self,
        inputs,
        labels,
        *,
        adversarials,
        criterion,
        threshold=None,
        included_in="upper",
    ):
        """For models that preprocess their inputs by binarizing the
        inputs, this attack can improve adversarials found by other
        attacks. It does this by utilizing information about the
        binarization and mapping values to the corresponding value in
        the clean input or to the right side of the threshold.

        Parameters
        ----------
        threshold : float
            The treshold used by the models binarization. If none,
            defaults to (model.bounds()[1] - model.bounds()[0]) / 2.
        included_in : str
            Whether the threshold value itself belongs to the lower or
            upper interval.

        """
        originals = ep.astensor(inputs)
        labels = ep.astensor(labels)

        def is_adversarial(p: ep.Tensor) -> ep.Tensor:
            """For each input in x, returns true if it is an adversarial for
            the given model and criterion"""
            logits = ep.astensor(self.model.forward(p.tensor))
            return criterion(originals, labels, p, logits)

        o = ep.astensor(inputs)
        x = ep.astensor(adversarials)

        min_, max_ = self.model.bounds()
        if threshold is None:
            threshold = (min_ + max_) / 2.0

        assert o.dtype == x.dtype
        dtype = o.dtype

        if dtype == o.backend.float16:
            nptype = np.float16
        elif dtype == o.backend.float32:
            nptype = np.float32
        elif dtype == o.backend.float64:
            nptype = np.float64
        else:
            raise ValueError(
                "expected dtype to be float16, float32 or float64, found '{dtype}'"
            )

        threshold = nptype(threshold)
        offset = nptype(1.0)

        if included_in == "lower":
            lower = threshold
            upper = np.nextafter(threshold, threshold + offset)
        elif included_in == "upper":
            lower = np.nextafter(threshold, threshold - offset)
            upper = threshold
        else:
            raise ValueError(
                "expected included_in to be 'lower' or 'upper', found '{included_in}'"
            )

        assert lower < upper

        p = ep.full_like(o, ep.nan)

        lower = ep.ones_like(o) * lower
        upper = ep.ones_like(o) * upper

        indices = ep.logical_and(o <= lower, x <= lower)
        p = ep.where(indices, o, p)

        indices = ep.logical_and(o <= lower, x >= upper)
        p = ep.where(indices, upper, p)

        indices = ep.logical_and(o >= upper, x <= lower)
        p = ep.where(indices, lower, p)

        indices = ep.logical_and(o >= upper, x >= upper)
        p = ep.where(indices, o, p)

        assert not ep.any(ep.isnan(p))

        is_adv1 = is_adversarial(x)
        is_adv2 = is_adversarial(p)
        assert (is_adv1 == is_adv2).all(
        ), "The specified threshold does not match what is done by the model."
        return p.tensor
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
        *,
        starting_points: Optional[ep.Tensor] = None,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        criterion_ = get_criterion(criterion)

        if isinstance(criterion_, Misclassification):
            targeted = False
            classes = criterion_.labels
        elif isinstance(criterion_, TargetedMisclassification):
            targeted = True
            classes = criterion_.target_classes
        else:
            raise ValueError("unsupported criterion")

        def loss_fn(
            inputs: ep.Tensor, labels: ep.Tensor
        ) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]:

            logits = model(inputs)

            if targeted:
                c_minimize = best_other_classes(logits, labels)
                c_maximize = labels  # target_classes
            else:
                c_minimize = labels  # labels
                c_maximize = best_other_classes(logits, labels)

            loss = logits[rows, c_minimize] - logits[rows, c_maximize]

            return -loss.sum(), (logits, loss)

        x, restore_type = ep.astensor_(inputs)
        del inputs, criterion, kwargs
        N = len(x)

        # start from initialization points/attack
        if starting_points is not None:
            x1 = starting_points
        else:
            if self.init_attack is not None:
                x1 = self.init_attack.run(model, x, criterion_)
            else:
                x1 = None

        # if initial points or initialization attacks are provided,
        #   search for the boundary
        if x1 is not None:
            is_adv = get_is_adversarial(criterion_, model)
            assert is_adv(x1).all()
            lower_bound = ep.zeros(x, shape=(N, ))
            upper_bound = ep.ones(x, shape=(N, ))
            for _ in range(self.binary_search_steps):
                epsilons = (lower_bound + upper_bound) / 2
                mid_points = self.mid_points(x, x1, epsilons, model.bounds)
                is_advs = is_adv(mid_points)
                lower_bound = ep.where(is_advs, lower_bound, epsilons)
                upper_bound = ep.where(is_advs, epsilons, upper_bound)
            starting_points = self.mid_points(x, x1, upper_bound, model.bounds)
            delta = starting_points - x
        else:
            # start from x0
            delta = ep.zeros_like(x)

        if classes.shape != (N, ):
            name = "target_classes" if targeted else "labels"
            raise ValueError(
                f"expected {name} to have shape ({N},), got {classes.shape}")

        min_, max_ = model.bounds
        rows = range(N)
        grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True)

        if self.p != 0:
            epsilon = ep.inf * ep.ones(x, len(x))
        else:
            epsilon = ep.ones(x, len(x)) if x1 is None \
                else ep.norms.l0(flatten(delta), axis=-1)
        if self.p != 0:
            worst_norm = ep.norms.lp(flatten(ep.maximum(x - min_, max_ - x)),
                                     p=self.p,
                                     axis=-1)
        else:
            worst_norm = flatten(ep.ones_like(x)).bool().sum(axis=1).float32()

        best_lp = worst_norm
        best_delta = delta
        adv_found = ep.zeros(x, len(x)).bool()

        for i in range(self.steps):
            # perform cosine annealing of learning rates
            stepsize = (self.min_stepsize +
                        (self.max_stepsize - self.min_stepsize) *
                        (1 + math.cos(math.pi * i / self.steps)) / 2)
            gamma = (0.001 + (self.gamma - 0.001) *
                     (1 + math.cos(math.pi * (i / self.steps))) / 2)

            x_adv = x + delta

            loss, (logits,
                   loss_batch), gradients = grad_and_logits(x_adv, classes)
            is_adversarial = criterion_(x_adv, logits)

            lp = ep.norms.lp(flatten(delta), p=self.p, axis=-1)
            is_smaller = lp <= best_lp
            is_both = ep.logical_and(is_adversarial, is_smaller)
            adv_found = ep.logical_or(adv_found, is_adversarial)
            best_lp = ep.where(is_both, lp, best_lp)
            best_delta = ep.where(atleast_kd(is_both, x.ndim), delta,
                                  best_delta)

            # update epsilon
            if self.p != 0:
                distance_to_boundary = abs(loss_batch) / ep.norms.lp(
                    flatten(gradients), p=self.dual, axis=-1)
                epsilon = ep.where(
                    is_adversarial,
                    ep.minimum(
                        epsilon * (1 - gamma),
                        ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)),
                    ep.where(
                        adv_found, epsilon * (1 + gamma),
                        ep.norms.lp(flatten(delta), p=self.p, axis=-1) +
                        distance_to_boundary))
            else:
                epsilon = ep.where(
                    is_adversarial,
                    ep.minimum(
                        ep.minimum(epsilon - 1,
                                   (epsilon * (1 - gamma)).astype(int).astype(
                                       epsilon.dtype)),
                        ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)),
                    ep.maximum(epsilon + 1,
                               (epsilon * (1 + gamma)).astype(int).astype(
                                   epsilon.dtype)))
                epsilon = ep.maximum(0, epsilon).astype(epsilon.dtype)

            # clip epsilon
            epsilon = ep.minimum(epsilon, worst_norm)

            # computes normalized gradient update
            grad_ = self.normalize(gradients, x=x,
                                   bounds=model.bounds) * stepsize

            # do step
            delta = delta + grad_

            # project according to the given norm
            delta = self.project(x=x + delta, x0=x, epsilon=epsilon) - x

            # clip to valid bounds
            delta = ep.clip(x + delta, *model.bounds) - x

        x_adv = x + best_delta
        return restore_type(x_adv)