Exemple #1
0
    def _get_best_theta(
            self, 
            function_evolution: Callable[[ep.Tensor], ep.Tensor], 
            best_params: ep.Tensor) -> ep.Tensor:
        v_type = function_evolution(best_params)
        coefficients = ep.zeros(v_type, 2 * self.T).raw
        for i in range(0, self.T):
            coefficients[2* i] = 1 - (i / self.T)
            coefficients[2 * i + 1] = - coefficients[2* i]

        for i,  coeff in enumerate(coefficients):
            params = coeff * self.theta_max
            x_evol = function_evolution(params)
            x = ep.where(
                atleast_kd(best_params == 0, v_type.ndim), 
                x_evol, 
                ep.zeros_like(v_type))

            is_advs = self._is_adversarial(x)

            best_params = ep.where(
                (best_params == 0) * is_advs,
                params,
                best_params
            )
            if (best_params != 0).all():
                break
        
        return best_params  
Exemple #2
0
    def _binary_search(self, originals: ep.Tensor, perturbed: ep.Tensor, boost: Optional[bool] = False) -> ep.Tensor:
        # Choose upper thresholds in binary search based on constraint.
        highs = ep.ones(perturbed, len(perturbed))
        d = np.prod(perturbed.shape[1:])
        thresholds = self._BS_gamma / (d * math.sqrt(d))
        lows = ep.zeros_like(highs)

        # Boost Binary search
        if boost:
            boost_vec = 0.1 * originals + 0.9 * perturbed
            is_advs = self._is_adversarial(boost_vec)
            is_advs = atleast_kd(is_advs, originals.ndim)
            originals = ep.where(is_advs.logical_not(), boost_vec, originals)
            perturbed = ep.where(is_advs, boost_vec, perturbed)

        # use this variable to check when mids stays constant and the BS has converged
        old_mids = highs
        iteration = 0
        while ep.any(highs - lows > thresholds) and iteration < self._BS_max_iteration:
            iteration += 1
            mids = (lows + highs) / 2
            mids_perturbed = self._project(originals, perturbed, mids)
            is_adversarial_ = self._is_adversarial(mids_perturbed)

            highs = ep.where(is_adversarial_, mids, highs)
            lows = ep.where(is_adversarial_, lows, mids)

            # check of there is no more progress due to numerical imprecision
            reached_numerical_precision = (old_mids == mids).all()
            old_mids = mids
            if reached_numerical_precision:
                break
        
        results = self._project(originals, perturbed, highs)
        return results
    def __call__(self, model: Model, inputs: T, criterion: Union[Criterion, T]) -> T:

        x, restore_type = ep.astensor_(inputs)
        del inputs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)
        direction = target - x

        lower_bound = ep.zeros(x, len(x))
        upper_bound = ep.ones(x, len(x))
        epsilons = lower_bound
        for _ in range(self.binary_search_steps):
            eps = atleast_kd(epsilons, x.ndim)
            is_adv = is_adversarial(x + eps * direction)
            lower_bound = ep.where(is_adv, lower_bound, epsilons)
            upper_bound = ep.where(is_adv, epsilons, upper_bound)
            epsilons = (lower_bound + upper_bound) / 2

        epsilons = upper_bound
        eps = atleast_kd(epsilons, x.ndim)
        xp = x + eps * direction
        return restore_type(xp)
def l2_clipping_aware_rescaling(x,
                                delta,
                                eps: float,
                                a: float = 0.0,
                                b: float = 1.0):  # type: ignore
    """Calculates eta such that norm(clip(x + eta * delta, a, b) - x) == eps.

    Assumes x and delta have a batch dimension and eps, a, b, and p are
    scalars. If the equation cannot be solved because eps is too large, the
    left hand side is maximized.

    Args:
        x: A batch of inputs (PyTorch Tensor, TensorFlow Eager Tensor, NumPy
            Array, JAX Array, or EagerPy Tensor).
        delta: A batch of perturbation directions (same shape and type as x).
        eps: The target norm (non-negative float).
        a: The lower bound of the data domain (float).
        b: The upper bound of the data domain (float).

    Returns:
        eta: A batch of scales with the same number of dimensions as x but all
            axis == 1 except for the batch dimension.
    """
    (x, delta), restore_fn = ep.astensors_(x, delta)
    N = x.shape[0]
    assert delta.shape[0] == N
    rows = ep.arange(x, N)

    delta2 = delta.square().reshape((N, -1))
    space = ep.where(delta >= 0, b - x, x - a).reshape((N, -1))
    f2 = space.square() / ep.maximum(delta2, 1e-20)
    ks = ep.argsort(f2, axis=-1)
    f2_sorted = f2[rows[:, ep.newaxis], ks]
    m = ep.cumsum(delta2[rows[:, ep.newaxis],
                         ks.flip(axis=1)], axis=-1).flip(axis=1)
    dx = f2_sorted[:, 1:] - f2_sorted[:, :-1]
    dx = ep.concatenate((f2_sorted[:, :1], dx), axis=-1)
    dy = m * dx
    y = ep.cumsum(dy, axis=-1)
    c = y >= eps**2

    # work-around to get first nonzero element in each row
    f = ep.arange(x, c.shape[-1], 0, -1)
    j = ep.argmax(c.astype(f.dtype) * f, axis=-1)

    eta2 = f2_sorted[rows, j] - (y[rows, j] - eps**2) / m[rows, j]
    # it can happen that for certain rows even the largest j is not large enough
    # (i.e. c[:, -1] is False), then we will just use it (without any correction) as it's
    # the best we can do (this should also be the only cases where m[j] can be
    # 0 and they are thus not a problem)
    eta2 = ep.where(c[:, -1], eta2, f2_sorted[:, -1])
    eta = ep.sqrt(eta2)
    eta = eta.reshape((-1, ) + (1, ) * (x.ndim - 1))

    # xp = ep.clip(x + eta * delta, a, b)
    # l2 = (xp - x).reshape((N, -1)).square().sum(axis=-1).sqrt()
    return restore_fn(eta)
Exemple #5
0
    def _get_candidates(self, originals: ep.Tensor, best_advs: ep.Tensor) -> ep.Tensor:
        """
        Find the lowest epsilon to misclassified x following the direction: q of class 1 / q + eps*direction of class 0
        """
        epsilons = ep.zeros(originals, len(originals))
        direction_2 = ep.zeros_like(originals)
        while (epsilons == 0).any():
            # if epsilon ==0, we are still searching a good direction
            direction_2 = ep.where(
                atleast_kd(epsilons == 0, direction_2.ndim),
                self._basis.get_vector(self._directions_ortho),
                direction_2
            )
            
            for i, eps_i in enumerate(epsilons):
                if eps_i == 0:
                    self._directions_ortho[i] = ep.concatenate((self._directions_ortho[i], direction_2[i].expand_dims(0)), axis=0)
                    if len(self._directions_ortho[i]) > self.n_ortho + 1:
                        self._directions_ortho[i] = ep.concatenate((self._directions_ortho[i][:1], self._directions_ortho[i][self.n_ortho:]))
                        
            function_evolution = self._get_evolution_function(originals, best_advs, direction_2)
            new_epsilons = self._get_best_theta(function_evolution, epsilons)

            self.theta_max = ep.where(new_epsilons == 0, self.theta_max * self.rho, self.theta_max)
            self.theta_max = ep.where((new_epsilons != 0) * (epsilons == 0), self.theta_max / self.rho, self.theta_max)
            epsilons = new_epsilons

        function_evolution = self._get_evolution_function(originals, best_advs, direction_2)
        if self.with_alpha_line_search:
            epsilons = self._binary_search_on_alpha(function_evolution, epsilons)

        epsilons = epsilons.expand_dims(0)
        if self.with_interpolation:
            epsilons =  ep.concatenate((epsilons, epsilons[0] / 2), axis=0)

        candidates = ep.concatenate([function_evolution(eps).expand_dims(0) for eps in epsilons], axis=0)

        if self.with_interpolation:
            d = self.distance(best_advs, originals)
            delta = self.distance(self._binary_search(originals, candidates[1],  boost=True), originals)
            theta_star = epsilons[0]

            num = theta_star * (4 * delta - d * (self._cos(theta_star.raw) + 3))
            den = 4 * (2 * delta - d * (self._cos(theta_star.raw) + 1))

            theta_hat = num / den
            q_interp = function_evolution(theta_hat)
            if self.with_distance_line_search:
                q_interp = self._binary_search(originals, q_interp,  boost=True)
            candidates = ep.concatenate((candidates, q_interp.expand_dims(0)), axis=0)

        return candidates
Exemple #6
0
def _project_shrinkage_thresholding(
    z: ep.Tensor, x0: ep.Tensor, regularization: float, min_: float, max_: float
) -> ep.Tensor:
    """Performs the element-wise projected shrinkage-thresholding
    operation"""

    upper_mask = z - x0 > regularization
    lower_mask = z - x0 < -regularization

    projection = ep.where(upper_mask, ep.minimum(z - regularization, max_), x0)
    projection = ep.where(lower_mask, ep.maximum(z + regularization, min_), projection)

    return projection
Exemple #7
0
    def approximate_gradients(
        self,
        is_adversarial: Callable[[ep.Tensor], ep.Tensor],
        x_advs: ep.Tensor,
        steps: int,
        delta: ep.Tensor,
    ) -> ep.Tensor:
        # (steps, bs, ...)
        noise_shape = tuple([steps] + list(x_advs.shape))
        if self.constraint == "l2":
            rv = ep.normal(x_advs, noise_shape)
        elif self.constraint == "linf":
            rv = ep.uniform(x_advs, low=-1, high=1, shape=noise_shape)
        rv /= atleast_kd(ep.norms.l2(flatten(rv, keep=1), -1), rv.ndim) + 1e-12

        scaled_rv = atleast_kd(ep.expand_dims(delta, 0), rv.ndim) * rv

        perturbed = ep.expand_dims(x_advs, 0) + scaled_rv
        perturbed = ep.clip(perturbed, 0, 1)

        rv = (perturbed - x_advs) / atleast_kd(ep.expand_dims(delta + 1e-8, 0),
                                               rv.ndim)

        multipliers_list: List[ep.Tensor] = []
        for step in range(steps):
            decision = is_adversarial(perturbed[step])
            multipliers_list.append(
                ep.where(
                    decision,
                    ep.ones(
                        x_advs,
                        (len(x_advs, )),
                    ),
                    -ep.ones(
                        x_advs,
                        (len(decision, )),
                    ),
                ))
        # (steps, bs, ...)
        multipliers = ep.stack(multipliers_list, 0)

        vals = ep.where(
            ep.abs(ep.mean(multipliers, axis=0, keepdims=True)) == 1,
            multipliers,
            multipliers - ep.mean(multipliers, axis=0, keepdims=True),
        )
        grad = ep.mean(atleast_kd(vals, rv.ndim) * rv, axis=0)

        grad /= ep.norms.l2(atleast_kd(flatten(grad), grad.ndim)) + 1e-12

        return grad
Exemple #8
0
    def normalize(self, gradients: ep.Tensor, *, x: ep.Tensor,
                  bounds: Bounds) -> ep.Tensor:
        bad_pos = ep.logical_or(
            ep.logical_and(x == bounds.lower, gradients < 0),
            ep.logical_and(x == bounds.upper, gradients > 0),
        )
        gradients = ep.where(bad_pos, ep.zeros_like(gradients), gradients)

        abs_gradients = gradients.abs()
        quantiles = np.quantile(flatten(abs_gradients).numpy(),
                                q=self.quantile,
                                axis=-1)
        keep = abs_gradients >= atleast_kd(ep.from_numpy(gradients, quantiles),
                                           gradients.ndim)
        e = ep.where(keep, gradients.sign(), ep.zeros_like(gradients))
        return normalize_lp_norms(e, p=1)
Exemple #9
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        starting_points: Optional[ep.Tensor] = None,
        **kwargs: Any,
    ) -> T:
        originals, restore_type = ep.astensor_(inputs)

        self._nqueries = {i: 0 for i in range(len(originals))}
        self._set_cos_sin_function(originals)
        self.theta_max = ep.ones(originals, len(originals)) * self._theta_max
        criterion = get_criterion(criterion)
        self._criterion_is_adversarial = get_is_adversarial(criterion, model)

        # Get Starting Point
        if starting_points is not None:
            best_advs = starting_points
        elif starting_points is None:
            init_attack: MinimizationAttack = LinearSearchBlendedUniformNoiseAttack(steps=50)
            best_advs = init_attack.run(model, originals, criterion, early_stop=early_stop)
        else:
            raise ValueError("starting_points {} doesn't exist.".format(starting_points))

        assert self._is_adversarial(best_advs).all()

        # Initialize the direction orthogonalized with the first direction
        fd = best_advs - originals
        norm = ep.norms.l2(fd.flatten(1), axis=1)
        fd = fd / atleast_kd(norm, fd.ndim)
        self._directions_ortho = {i: v.expand_dims(0) for i, v in enumerate(fd)}

        # Load Basis
        if "basis_params" in kwargs:
            self._basis = Basis(originals, **kwargs["basis_params"])
        else:
            self._basis = Basis(originals)

        for _ in range(self._steps):
            # Get candidates. Shape: (n_candidates, batch_size, image_size)
            candidates = self._get_candidates(originals, best_advs)
            candidates = candidates.transpose((1, 0, 2, 3, 4))

            
            best_candidates = ep.zeros_like(best_advs).raw
            for i, o in enumerate(originals):
                o_repeated = ep.concatenate([o.expand_dims(0)] * len(candidates[i]), axis=0)
                index = ep.argmax(self.distance(o_repeated, candidates[i])).raw
                best_candidates[i] = candidates[i][index].raw

            is_success = self.distance(best_candidates, originals) < self.distance(best_advs, originals)
            best_advs = ep.where(atleast_kd(is_success, best_candidates.ndim), ep.astensor(best_candidates), best_advs)

            if all(v > self._max_queries for v in self._nqueries.values()):
                print("Max queries attained for all the images.")
                break
        return restore_type(best_advs)
    def __call__(self, model: Model, inputs: T, criterion: Union[Criterion, T]) -> T:

        x, restore_type = ep.astensor_(inputs)
        del inputs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)
        direction = target - x

        best = ep.ones(x, len(x))

        epsilon = 0.0
        stepsize = 1.0 / self.steps
        for _ in range(self.steps):
            # TODO: reduce the batch size to the ones that have not yet been sucessful

            is_adv = is_adversarial(x + epsilon * direction)
            is_best_adv = ep.logical_and(is_adv, best == 1)
            best = ep.where(is_best_adv, epsilon, best)

            if (best < 1).all():
                break

            epsilon += stepsize

        eps = atleast_kd(best, x.ndim)
        xp = x + eps * direction
        return restore_type(xp)
    def mid_points(
        self,
        x0: ep.Tensor,
        x1: ep.Tensor,
        epsilons: ep.Tensor,
        bounds: Tuple[float, float],
    ):
        # returns a point between x0 and x1 where
        # epsilon = 0 returns x0 and epsilon = 1
        delta = x1 - x0
        min_, max_ = bounds
        s = max_ - min_
        # get epsilons in right shape for broadcasting
        epsilons = epsilons.reshape(epsilons.shape + (1, ) * (x0.ndim - 1))

        clipped_delta = ep.where(delta < -epsilons * s, -epsilons * s, delta)
        clipped_delta = ep.where(clipped_delta > epsilons * s, epsilons * s,
                                 clipped_delta)
        return x0 + clipped_delta
Exemple #12
0
    def __call__(self, inputs, labels, *, steps=1000):
        x = ep.astensor(inputs)
        y = ep.astensor(labels)
        assert x.shape[0] == y.shape[0]
        assert y.ndim == 1

        assert x.ndim == 4
        if self.channel_axis == 1:
            h, w = x.shape[2:4]
        elif self.channel_axis == 3:
            h, w = x.shape[1:3]
        else:
            raise ValueError(
                "expected 'channel_axis' to be 1 or 3, got {channel_axis}")

        size = max(h, w)

        min_, max_ = self.model.bounds()

        x0 = x
        x0np = x0.numpy()

        epsilons = np.linspace(0, 1, num=steps + 1)[1:]

        logits = ep.astensor(self.model.forward(x0.tensor))
        classes = logits.argmax(axis=-1)
        is_adv = classes != labels
        found = is_adv

        result = x0

        for epsilon in epsilons:
            # TODO: reduce the batch size to the ones that haven't been sucessful

            sigmas = [epsilon * size] * 4
            sigmas[0] = 0
            sigmas[self.channel_axis] = 0

            # TODO: once we can implement gaussian_filter in eagerpy, avoid converting from numpy
            x = gaussian_filter(x0np, sigmas)
            x = np.clip(x, min_, max_)
            x = ep.from_numpy(x0, x)

            logits = ep.astensor(self.model.forward(x.tensor))
            classes = logits.argmax(axis=-1)
            is_adv = classes != labels

            new_adv = ep.logical_and(is_adv, found.logical_not())
            result = ep.where(atleast_kd(new_adv, x.ndim), x, result)
            found = ep.logical_or(new_adv, found)

            if found.all():
                break

        return result.tensor
Exemple #13
0
    def _binary_search(
        self,
        is_adversarial: Callable[[ep.Tensor], ep.Tensor],
        originals: ep.Tensor,
        perturbed: ep.Tensor,
    ) -> ep.Tensor:
        # Choose upper thresholds in binary search based on constraint.
        d = np.prod(perturbed.shape[1:])
        if self.constraint == "linf":
            highs = linf(originals, perturbed)

            # TODO: Check if the threshold is correct
            #  empirically this seems to be too low
            thresholds = highs * self.gamma / (d * d)
        else:
            highs = ep.ones(perturbed, len(perturbed))
            thresholds = self.gamma / (d * math.sqrt(d))

        lows = ep.zeros_like(highs)

        # use this variable to check when mids stays constant and the BS has converged
        old_mids = highs

        while ep.any(highs - lows > thresholds):
            mids = (lows + highs) / 2
            mids_perturbed = self._project(originals, perturbed, mids)
            is_adversarial_ = is_adversarial(mids_perturbed)

            highs = ep.where(is_adversarial_, mids, highs)
            lows = ep.where(is_adversarial_, lows, mids)

            # check of there is no more progress due to numerical imprecision
            reached_numerical_precision = (old_mids == mids).all()
            old_mids = mids

            if reached_numerical_precision:
                # TODO: warn user
                break

        res = self._project(originals, perturbed, highs)

        return res
 def project(self, x: ep.Tensor, x0: ep.Tensor,
             epsilon: ep.Tensor) -> ep.Tensor:
     flatten_delta = flatten(x - x0)
     abs_delta = abs(flatten_delta)
     epsilon = epsilon.astype(int)
     rows = range(flatten_delta.shape[0])
     idx_sorted = ep.argsort(abs_delta, axis=-1)[rows, -epsilon]
     thresholds = (ep.ones_like(flatten_delta).T *
                   abs_delta[rows, idx_sorted]).T
     clipped = ep.where(abs_delta >= thresholds, flatten_delta, 0)
     return x0 + clipped.reshape(x0.shape).astype(x0.dtype)
Exemple #15
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)

        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        found = is_adversarial(x)
        results = x

        def grid_search_generator() -> Generator[Any, Any, Any]:
            dphis = np.linspace(-self.max_rot, self.max_rot, self.num_rots)
            dxs = np.linspace(-self.max_trans, self.max_trans, self.num_trans)
            dys = np.linspace(-self.max_trans, self.max_trans, self.num_trans)
            for dphi in dphis:
                for dx in dxs:
                    for dy in dys:
                        yield dphi, dx, dy

        def random_search_generator() -> Generator[Any, Any, Any]:
            dphis = np.random.uniform(-self.max_rot, self.max_rot,
                                      self.random_steps)
            dxs = np.random.uniform(-self.max_trans, self.max_trans,
                                    self.random_steps)
            dys = np.random.uniform(-self.max_trans, self.max_trans,
                                    self.random_steps)
            for dphi, dx, dy in zip(dphis, dxs, dys):
                yield dphi, dx, dy

        gen = grid_search_generator(
        ) if self.grid_search else random_search_generator()
        for dphi, dx, dy in gen:
            # TODO: reduce the batch size to the ones that haven't been successful

            x_p = rotate_and_shift(x, translation=(dx, dy), rotation=dphi)
            is_adv = is_adversarial(x_p)
            new_adv = ep.logical_and(is_adv, found.logical_not())

            results = ep.where(atleast_kd(new_adv, x_p.ndim), x_p, results)
            found = ep.logical_or(new_adv, found)
            if found.all():
                break  # all images in batch misclassified
        return restore_type(results)
Exemple #16
0
    def _binary_search(
        self,
        x_adv_flat: ep.Tensor,
        mask: Union[ep.Tensor, List[bool]],
        mask_indices: ep.Tensor,
        indices: Union[ep.Tensor, List[int]],
        adv_values: ep.Tensor,
        non_adv_values: ep.Tensor,
        original_shape: Tuple,
        is_adversarial: Callable,
    ) -> ep.Tensor:
        for i in range(10):
            next_values = (adv_values + non_adv_values) / 2
            x_adv_flat = ep.index_update(
                x_adv_flat, (mask_indices, indices), next_values
            )
            is_adv = is_adversarial(x_adv_flat.reshape(original_shape))[mask]

            adv_values = ep.where(is_adv, next_values, adv_values)
            non_adv_values = ep.where(is_adv, non_adv_values, next_values)

        return adv_values
Exemple #17
0
def normalize_gradient_l2_norms(grad: ep.Tensor) -> ep.Tensor:
    norms = ep.norms.l2(flatten(grad), -1)

    # remove zero gradients
    grad = ep.where(atleast_kd(norms == 0, grad.ndim),
                    ep.normal(grad, shape=grad.shape), grad)
    # calculate norms again for previously vanishing elements
    norms = ep.norms.l2(flatten(grad), -1)

    norms = ep.maximum(norms, 1e-12)  # avoid division by zero
    factor = 1 / norms
    factor = atleast_kd(factor, grad.ndim)
    return grad * factor
Exemple #18
0
def _apply_decision_rule(
    decision_rule: Union[Literal["EN"], Literal["L1"]],
    beta: float,
    best_advs: ep.Tensor,
    best_advs_norms: ep.Tensor,
    x_k: ep.Tensor,
    x: ep.Tensor,
    found_advs: ep.Tensor,
) -> Tuple[ep.Tensor, ep.Tensor]:
    if decision_rule == "EN":
        norms = beta * flatten(x_k - x).abs().sum(
            axis=-1) + flatten(x_k - x).square().sum(axis=-1)
    else:
        # decision rule = L1
        norms = flatten(x_k - x).abs().sum(axis=-1)

    new_best = ep.logical_and(norms < best_advs_norms, found_advs)
    new_best_kd = atleast_kd(new_best, best_advs.ndim)
    best_advs = ep.where(new_best_kd, x_k, best_advs)
    best_advs_norms = ep.where(new_best, norms, best_advs_norms)

    return best_advs, best_advs_norms
Exemple #19
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        self.process_raw()
        assert self.inputs is not None
        assert self.outputs is not None
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)

        result = x
        found = criterion(x, model(x))

        batch_size = len(x)

        # for every sample try every other sample
        index_pools: List[List[int]] = []
        for i in range(batch_size):
            indices = list(range(batch_size))
            indices.remove(i)
            indices = list(indices)
            np.random.shuffle(indices)
            index_pools.append(indices)

        for i in range(batch_size - 1):
            if found.all():
                break

            indices = np.array([pool[i] for pool in index_pools])

            xp = self.inputs[indices]
            yp = self.outputs[indices]
            is_adv = criterion(xp, yp)

            new_found = ep.logical_and(is_adv, found.logical_not())
            result = ep.where(atleast_kd(new_found, result.ndim), xp, result)
            found = ep.logical_or(found, new_found)

        return restore_type(result)
Exemple #20
0
    def _project(self, originals: ep.Tensor, perturbed: ep.Tensor,
                 epsilons: ep.Tensor) -> ep.Tensor:
        """Clips the perturbations to epsilon and returns the new perturbed

        Args:
            originals: A batch of reference inputs.
            perturbed: A batch of perturbed inputs.
            epsilons: A batch of norm values to project to.
        Returns:
            A tensor like perturbed but with the perturbation clipped to epsilon.
        """
        epsilons = atleast_kd(epsilons, originals.ndim)
        if self.constraint == "linf":
            perturbation = perturbed - originals

            # ep.clip does not support tensors as min/max
            clipped_perturbed = ep.where(perturbation > epsilons,
                                         originals + epsilons, perturbed)
            clipped_perturbed = ep.where(perturbation < -epsilons,
                                         originals - epsilons,
                                         clipped_perturbed)
            return clipped_perturbed
        else:
            return (1.0 - epsilons) * originals + epsilons * perturbed
Exemple #21
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)
        direction = target - x

        lower_bound = ep.zeros(x, len(x))
        upper_bound = ep.ones(x, len(x))
        epsilons = lower_bound
        for _ in range(self.binary_search_steps):
            eps = atleast_kd(epsilons, x.ndim)
            is_adv = is_adversarial(x + eps * direction)
            lower_bound = ep.where(is_adv, lower_bound, epsilons)
            upper_bound = ep.where(is_adv, epsilons, upper_bound)
            epsilons = (lower_bound + upper_bound) / 2

        epsilons = upper_bound
        eps = atleast_kd(epsilons, x.ndim)
        xp = x + eps * direction
        return restore_type(xp)
def test_pointwise_targeted_attack(
    request: Any,
    fmodel_and_data_ext_for_attacks: ModeAndDataAndDescription,
    attack: fa.PointwiseAttack,
) -> None:
    (fmodel, x,
     y), real, low_dimensional_input = fmodel_and_data_ext_for_attacks

    if not low_dimensional_input or not real:
        pytest.skip()

    x = (x - fmodel.bounds.lower) / (fmodel.bounds.upper - fmodel.bounds.lower)
    fmodel = fmodel.transform_bounds((0, 1))

    init_attack = fa.SaltAndPepperNoiseAttack(steps=50)
    init_advs = init_attack.run(fmodel, x, y)

    logits = fmodel(init_advs)
    num_classes = logits.shape[-1]
    target_classes = logits.argmax(-1)
    target_classes = ep.where(target_classes == y,
                              (target_classes + 1) % num_classes,
                              target_classes)
    criterion = fbn.TargetedMisclassification(target_classes)

    advs = attack.run(fmodel, x, criterion, starting_points=init_advs)

    init_norms_l0 = ep.norms.lp(flatten(init_advs - x), p=0, axis=-1)
    norms_l0 = ep.norms.lp(flatten(advs - x), p=0, axis=-1)

    init_norms_l2 = ep.norms.lp(flatten(init_advs - x), p=2, axis=-1)
    norms_l2 = ep.norms.lp(flatten(advs - x), p=2, axis=-1)

    is_smaller_l0 = norms_l0 < init_norms_l0
    is_smaller_l2 = norms_l2 < init_norms_l2

    assert fbn.accuracy(fmodel, advs, y) < fbn.accuracy(fmodel, x, y)
    assert fbn.accuracy(fmodel, advs, y) <= fbn.accuracy(fmodel, init_advs, y)
    assert fbn.accuracy(fmodel, advs, target_classes) > fbn.accuracy(
        fmodel, x, target_classes)
    assert fbn.accuracy(fmodel, advs, target_classes) >= fbn.accuracy(
        fmodel, init_advs, target_classes)
    assert is_smaller_l2.any()
    assert is_smaller_l0.any()
    def mid_points(
        self,
        x0: ep.Tensor,
        x1: ep.Tensor,
        epsilons: ep.Tensor,
        bounds: Tuple[float, float],
    ) -> ep.Tensor:
        # returns a point between x0 and x1 where
        # epsilon = 0 returns x0 and epsilon = 1
        # returns x1

        # get epsilons in right shape for broadcasting
        epsilons = epsilons.reshape(epsilons.shape + (1, ) * (x0.ndim - 1))

        threshold = (bounds[1] - bounds[0]) * (1 - epsilons)
        mask = (x1 - x0).abs() > threshold
        new_x = ep.where(mask,
                         x0 + (x1 - x0).sign() * ((x1 - x0).abs() - threshold),
                         x0)
        return new_x
Exemple #24
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)
        direction = target - x

        best = ep.ones(x, len(x))

        epsilon = 0.0
        stepsize = 1.0 / self.steps
        for _ in range(self.steps):
            # TODO: reduce the batch size to the ones that have not yet been sucessful

            is_adv = is_adversarial(x + epsilon * direction)
            is_best_adv = ep.logical_and(is_adv, best == 1)
            best = ep.where(is_best_adv, epsilon, best)

            if (best < 1).all():
                break  # pragma: no cover

            epsilon += stepsize

        eps = atleast_kd(best, x.ndim)
        xp = x + eps * direction
        return restore_type(xp)
    def __call__(self,
                 inputs,
                 labels,
                 *,
                 epsilon,
                 criterion,
                 repeats=100,
                 check_trivial=True):
        originals = ep.astensor(inputs)
        labels = ep.astensor(labels)

        def is_adversarial(p: ep.Tensor) -> ep.Tensor:
            """For each input in x, returns true if it is an adversarial for
            the given model and criterion"""
            logits = self.model.forward(p)
            return criterion(originals, labels, p, logits)

        x0 = ep.astensor(inputs)
        min_, max_ = self.model.bounds()

        result = x0
        if check_trivial:
            found = is_adversarial(result)
        else:
            found = ep.zeros(x0, len(result)).bool()

        for _ in range(repeats):
            if found.all():
                break

            p = self.sample_noise(x0)
            norms = self.get_norms(p)
            p = p / atleast_kd(norms, p.ndim)
            x = x0 + epsilon * p
            x = x.clip(min_, max_)
            is_adv = is_adversarial(x)
            is_new_adv = ep.logical_and(is_adv, ep.logical_not(found))
            result = ep.where(atleast_kd(is_new_adv, x.ndim), x, result)
            found = ep.logical_or(found, is_adv)

        return result.tensor
Exemple #26
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, Any] = None,
        *,
        epsilon: float,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x0, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        verify_input_bounds(x0, model)

        is_adversarial = get_is_adversarial(criterion_, model)

        min_, max_ = model.bounds

        result = x0
        if self.check_trivial:
            found = is_adversarial(result)
        else:
            found = ep.zeros(x0, len(result)).bool()

        for _ in range(self.repeats):
            if found.all():
                break

            p = self.sample_noise(x0)
            epsilons = self.get_epsilons(x0, p, epsilon, min_=min_, max_=max_)
            x = x0 + epsilons * p
            x = x.clip(min_, max_)
            is_adv = is_adversarial(x)
            is_new_adv = ep.logical_and(is_adv, ep.logical_not(found))
            result = ep.where(atleast_kd(is_new_adv, x.ndim), x, result)
            found = ep.logical_or(found, is_adv)

        return restore_type(result)
Exemple #27
0
    def __call__(self, model: Model, inputs: T, criterion: Union[Criterion,
                                                                 T]) -> T:
        x, restore_type = ep.astensor_(inputs)
        del inputs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        best = self._attack(model, x, criterion)
        best_is_adv = is_adversarial(best)

        for _ in range(1, self._times):
            xp = self._attack(model, x, criterion)
            # assumes xp does not violate the perturbation size constraint

            is_adv = is_adversarial(xp)
            new_best = ep.logical_and(is_adv, best_is_adv.logical_not())

            best = ep.where(atleast_kd(new_best, best.ndim), xp, best)
            best_is_adv = ep.logical_or(is_adv, best_is_adv)

        return restore_type(best)
Exemple #28
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        self.process_raw()
        assert self.inputs is not None
        assert self.outputs is not None
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        criterion = get_criterion(criterion)

        result = x
        found = criterion(x, model(x))

        dataset_size = len(self.inputs)
        batch_size = len(x)

        while not found.all():
            indices = np.random.randint(0, dataset_size, size=(batch_size, ))

            xp = self.inputs[indices]
            yp = self.outputs[indices]
            is_adv = criterion(xp, yp)

            new_found = ep.logical_and(is_adv, found.logical_not())
            result = ep.where(atleast_kd(new_found, result.ndim), xp, result)
            found = ep.logical_or(found, new_found)

        return restore_type(result)
Exemple #29
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        #raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)

        min_, max_ = model.bounds

        logits = model(x)
        classes = logits.argsort(axis=-1).flip(axis=-1)
        if self.candidates is None:
            candidates = logits.shape[-1]  # pragma: no cover
        else:
            candidates = min(self.candidates, logits.shape[-1])
            if not candidates >= 2:
                raise ValueError(  # pragma: no cover
                    f"expected the model output to have atleast 2 classes, got {logits.shape[-1]}"
                )
            logging.info(f"Only testing the top-{candidates} classes")
            classes = classes[:, :candidates]

        N = len(x)
        rows = range(N)

        loss_fun = self._get_loss_fn(model, classes)
        loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True)

        x0 = x
        p_total = ep.zeros_like(x)
        for _ in range(self.steps):
            # let's first get the logits using k = 1 to see if we are done
            diffs = [loss_aux_and_grad(x, 1)]
            _, (_, logits), _ = diffs[0]

            is_adv = criterion(x, logits)
            if is_adv.all():
                break

            # then run all the other k's as well
            # we could avoid repeated forward passes and only repeat
            # the backward pass, but this cannot currently be done in eagerpy
            diffs += [loss_aux_and_grad(x, k) for k in range(2, candidates)]

            # we don't need the logits
            diffs_ = [(losses, grad) for _, (losses, _), grad in diffs]
            losses = ep.stack([lo for lo, _ in diffs_], axis=1)
            grads = ep.stack([g for _, g in diffs_], axis=1)
            assert losses.shape == (N, candidates - 1)
            assert grads.shape == (N, candidates - 1) + x0.shape[1:]

            # calculate the distances
            distances = self.get_distances(losses, grads)
            assert distances.shape == (N, candidates - 1)

            # determine the best directions
            best = distances.argmin(axis=1)
            distances = distances[rows, best]
            losses = losses[rows, best]
            grads = grads[rows, best]
            assert distances.shape == (N,)
            assert losses.shape == (N,)
            assert grads.shape == x0.shape

            # apply perturbation
            distances = distances + 1e-4  # for numerical stability
            p_step = self.get_perturbations(distances, grads)
            assert p_step.shape == x0.shape

            p_total += p_step
            # don't do anything for those that are already adversarial
            x = ep.where(
                atleast_kd(is_adv, x.ndim), x, x0 + (1.0 + self.overshoot) * p_total
            )
            x = ep.clip(x, min_, max_)

        return restore_type(x)
Exemple #30
0
    def __call__(
        self,
        inputs,
        labels,
        *,
        starting_points=None,
        init_attack=None,
        criterion: Callable = misclassification,
        steps=25000,
        spherical_step=1e-2,
        source_step=1e-2,
        source_step_convergance=1e-7,
        step_adaptation=1.5,
        tensorboard=False,
        update_stats_every_k=10,
    ):
        """Boundary Attack

        Differences to the original reference implementation:
        * We do not perform internal operations with float64
        * The samples within a batch can currently influence each other a bit
        * We don't perform the additional convergence confirmation
        * The success rate tracking changed a bit
        * Some other changes due to batching and merged loops

        Parameters
        ----------
        criterion : Callable
            A callable that returns true if the given logits of perturbed
            inputs should be considered adversarial w.r.t. to the given labels
            and unperturbed inputs.
        tensorboard : str
            The log directory for TensorBoard summaries. If False, TensorBoard
            summaries will be disabled (default). If None, the logdir will be
            runs/CURRENT_DATETIME_HOSTNAME.
        """
        tb = TensorBoard(logdir=tensorboard)

        originals = ep.astensor(inputs)
        labels = ep.astensor(labels)

        def is_adversarial(p: ep.Tensor) -> ep.Tensor:
            """For each input in x, returns true if it is an adversarial for
            the given model and criterion"""
            logits = self.model.forward(p)
            return criterion(originals, labels, p, logits)

        if starting_points is None:
            if init_attack is None:
                init_attack = LinearSearchBlendedUniformNoiseAttack
                logging.info(
                    f"Neither starting_points nor init_attack given. Falling"
                    f" back to {init_attack.__name__} for initialization.")
            starting_points = init_attack(self.model)(inputs, labels)

        best_advs = ep.astensor(starting_points)
        assert is_adversarial(best_advs).all()

        N = len(originals)
        ndim = originals.ndim
        spherical_steps = ep.ones(originals, N) * spherical_step
        source_steps = ep.ones(originals, N) * source_step

        tb.scalar("batchsize", N, 0)

        # create two queues for each sample to track success rates
        # (used to update the hyper parameters)
        stats_spherical_adversarial = ArrayQueue(maxlen=100, N=N)
        stats_step_adversarial = ArrayQueue(maxlen=30, N=N)

        bounds = self.model.bounds()

        for step in range(1, steps + 1):
            converged = source_steps < source_step_convergance
            if converged.all():
                break
            converged = atleast_kd(converged, ndim)

            # TODO: performance: ignore those that have converged
            # (we could select the non-converged ones, but we currently
            # cannot easily invert this in the end using EagerPy)

            unnormalized_source_directions = originals - best_advs
            source_norms = l2norms(unnormalized_source_directions)
            source_directions = unnormalized_source_directions / atleast_kd(
                source_norms, ndim)

            # only check spherical candidates every k steps
            check_spherical_and_update_stats = step % update_stats_every_k == 0

            candidates, spherical_candidates = draw_proposals(
                bounds,
                originals,
                best_advs,
                unnormalized_source_directions,
                source_directions,
                source_norms,
                spherical_steps,
                source_steps,
            )
            candidates.dtype == originals.dtype
            spherical_candidates.dtype == spherical_candidates.dtype

            is_adv = is_adversarial(candidates)

            if check_spherical_and_update_stats:
                spherical_is_adv = is_adversarial(spherical_candidates)
                stats_spherical_adversarial.append(spherical_is_adv)
                # TODO: algorithm: the original implementation ignores those samples
                # for which spherical is not adversarial and continues with the
                # next iteration -> we estimate different probabilities (conditional vs. unconditional)
                # TODO: thoughts: should we always track this because we compute it anyway
                stats_step_adversarial.append(is_adv)
            else:
                spherical_is_adv = None

            # in theory, we are closer per construction
            # but limited numerical precision might break this
            distances = l2norms(originals - candidates)
            closer = distances < source_norms
            is_best_adv = ep.logical_and(is_adv, closer)
            is_best_adv = atleast_kd(is_best_adv, ndim)

            cond = converged.logical_not().logical_and(is_best_adv)
            best_advs = ep.where(cond, candidates, best_advs)

            tb.probability("converged", converged, step)
            tb.scalar("updated_stats", check_spherical_and_update_stats, step)
            tb.histogram("norms", source_norms, step)
            tb.probability("is_adv", is_adv, step)
            if spherical_is_adv is not None:
                tb.probability("spherical_is_adv", spherical_is_adv, step)
            tb.histogram("candidates/distances", distances, step)
            tb.probability("candidates/closer", closer, step)
            tb.probability("candidates/is_best_adv", is_best_adv, step)
            tb.probability("new_best_adv_including_converged", is_best_adv,
                           step)
            tb.probability("new_best_adv", cond, step)

            if check_spherical_and_update_stats:
                full = stats_spherical_adversarial.isfull()
                tb.probability("spherical_stats/full", full, step)
                if full.any():
                    probs = stats_spherical_adversarial.mean()
                    cond1 = ep.logical_and(probs > 0.5, full)
                    spherical_steps = ep.where(
                        cond1, spherical_steps * step_adaptation,
                        spherical_steps)
                    source_steps = ep.where(cond1,
                                            source_steps * step_adaptation,
                                            source_steps)
                    cond2 = ep.logical_and(probs < 0.2, full)
                    spherical_steps = ep.where(
                        cond2, spherical_steps / step_adaptation,
                        spherical_steps)
                    source_steps = ep.where(cond2,
                                            source_steps / step_adaptation,
                                            source_steps)
                    stats_spherical_adversarial.clear(
                        ep.logical_or(cond1, cond2))
                    tb.conditional_mean(
                        "spherical_stats/isfull/success_rate/mean", probs,
                        full, step)
                    tb.probability_ratio("spherical_stats/isfull/too_linear",
                                         cond1, full, step)
                    tb.probability_ratio(
                        "spherical_stats/isfull/too_nonlinear", cond2, full,
                        step)

                full = stats_step_adversarial.isfull()
                tb.probability("step_stats/full", full, step)
                if full.any():
                    probs = stats_step_adversarial.mean()
                    # TODO: algorithm: changed the two values because we are currently tracking p(source_step_sucess)
                    # instead of p(source_step_success | spherical_step_sucess) that was tracked before
                    cond1 = ep.logical_and(probs > 0.25, full)
                    source_steps = ep.where(cond1,
                                            source_steps * step_adaptation,
                                            source_steps)
                    cond2 = ep.logical_and(probs < 0.1, full)
                    source_steps = ep.where(cond2,
                                            source_steps / step_adaptation,
                                            source_steps)
                    stats_step_adversarial.clear(ep.logical_or(cond1, cond2))
                    tb.conditional_mean("step_stats/isfull/success_rate/mean",
                                        probs, full, step)
                    tb.probability_ratio(
                        "step_stats/isfull/success_rate_too_high", cond1, full,
                        step)
                    tb.probability_ratio(
                        "step_stats/isfull/success_rate_too_low", cond2, full,
                        step)

            tb.histogram("spherical_step", spherical_steps, step)
            tb.histogram("source_step", source_steps, step)
        tb.close()
        return best_advs.tensor