Example #1
0
    def __call__(self, perturbed: T, outputs: T) -> T:
        outputs_, restore_type = ep.astensor_(outputs)
        del perturbed, outputs

        classes = outputs_.argmax(axis=-1)
        is_adv = classes == self.target_classes
        return restore_type(is_adv)
Example #2
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, Any] = None,
        *,
        epsilon: float,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, criterion, kwargs

        verify_input_bounds(x, model)

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)

        direction = target - x
        norms = ep.norms.l2(flatten(direction), axis=-1)
        scale = epsilon / atleast_kd(norms, direction.ndim)
        scale = ep.minimum(scale, 1)

        x = x + scale * direction
        x = x.clip(min_, max_)
        return restore_type(x)
Example #3
0
def test_astensor_restore_tensor(t: Tensor) -> None:
    r = t
    y, restore_type = ep.astensor_(r)
    assert (y == t).all()
    assert type(restore_type(y)) == type(r)
    y = y + 1
    assert type(restore_type(y)) == type(r)
Example #4
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        starting_points: Optional[ep.Tensor] = None,
        **kwargs: Any,
    ) -> T:
        originals, restore_type = ep.astensor_(inputs)

        self._nqueries = {i: 0 for i in range(len(originals))}
        self._set_cos_sin_function(originals)
        self.theta_max = ep.ones(originals, len(originals)) * self._theta_max
        criterion = get_criterion(criterion)
        self._criterion_is_adversarial = get_is_adversarial(criterion, model)

        # Get Starting Point
        if starting_points is not None:
            best_advs = starting_points
        elif starting_points is None:
            init_attack: MinimizationAttack = LinearSearchBlendedUniformNoiseAttack(steps=50)
            best_advs = init_attack.run(model, originals, criterion, early_stop=early_stop)
        else:
            raise ValueError("starting_points {} doesn't exist.".format(starting_points))

        assert self._is_adversarial(best_advs).all()

        # Initialize the direction orthogonalized with the first direction
        fd = best_advs - originals
        norm = ep.norms.l2(fd.flatten(1), axis=1)
        fd = fd / atleast_kd(norm, fd.ndim)
        self._directions_ortho = {i: v.expand_dims(0) for i, v in enumerate(fd)}

        # Load Basis
        if "basis_params" in kwargs:
            self._basis = Basis(originals, **kwargs["basis_params"])
        else:
            self._basis = Basis(originals)

        for _ in range(self._steps):
            # Get candidates. Shape: (n_candidates, batch_size, image_size)
            candidates = self._get_candidates(originals, best_advs)
            candidates = candidates.transpose((1, 0, 2, 3, 4))

            
            best_candidates = ep.zeros_like(best_advs).raw
            for i, o in enumerate(originals):
                o_repeated = ep.concatenate([o.expand_dims(0)] * len(candidates[i]), axis=0)
                index = ep.argmax(self.distance(o_repeated, candidates[i])).raw
                best_candidates[i] = candidates[i][index].raw

            is_success = self.distance(best_candidates, originals) < self.distance(best_advs, originals)
            best_advs = ep.where(atleast_kd(is_success, best_candidates.ndim), ep.astensor(best_candidates), best_advs)

            if all(v > self._max_queries for v in self._nqueries.values()):
                print("Max queries attained for all the images.")
                break
        return restore_type(best_advs)
Example #5
0
    def __call__(self, model: Model, inputs: T,
                 criterion: Union[Misclassification, T]) -> T:

        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion

        N = len(x)

        if isinstance(criterion_, Misclassification):
            classes = criterion_.labels
        else:
            raise ValueError("unsupported criterion")

        if classes.shape != (N, ):
            raise ValueError(
                f"expected labels to have shape ({N},), got {classes.shape}")

        min_, max_ = model.bounds

        x_l2_norm = flatten(x.square()).sum(1)

        def loss_fun(
                x: ep.Tensor) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]:
            logits = model(x)
            scores = ep.softmax(logits)
            pred_scores = scores[range(N), classes]
            loss = pred_scores.sum()
            return loss, (scores, pred_scores)

        for i in range(self.steps):
            # (1) get the scores and gradients
            _, (scores,
                pred_scores), gradients = ep.value_aux_and_grad(loss_fun, x)

            pred = scores.argmax(-1)
            num_classes = scores.shape[-1]

            # (2) calculate gradient norm
            gradients_l2_norm = flatten(gradients.square()).sum(1)

            # (3) calculate delta
            a = self.stepsize * x_l2_norm * gradients_l2_norm
            b = pred_scores - 1.0 / num_classes

            delta = ep.minimum(a, b)

            # (4) stop the attack if an adversarial example has been found
            # this is not described in the paper but otherwise once the prob. drops
            # below chance level the likelihood is not decreased but increased
            is_not_adversarial = (pred == classes).float32()
            delta *= is_not_adversarial

            # (5) calculate & apply current perturbation
            a = atleast_kd(delta / gradients_l2_norm.square(), gradients.ndim)
            x -= a * gradients

            x = ep.clip(x, min_, max_)

        return restore_type(x)
def get_projected_gradients(x_current, x_orig, label, surrogate_model):

    if surrogate_model is None:
        return None

    device = surrogate_model.device

    source_direction_ = x_orig - x_current
    source_direction = source_direction_.numpy()  #maybe make it in another way
    source_norm = np.linalg.norm(source_direction)
    source_direction = source_direction / source_norm

    criterion = fb.criteria.Misclassification(torch.tensor([0], device=device))
    classes = criterion.labels
    loss_fn = get_loss_fn(surrogate_model, classes)

    x0, restore_type = ep.astensor_(x_current + 1e-2 * source_direction_)

    _, gradients = value_and_grad(loss_fn, x0)
    gradients = gradients.numpy()
    #gradients = np.nan_to_num(gradients, nan=0.0, posinf=0.0, neginf=0.0)

    # Project the gradients.
    dot = np.vdot(gradients, source_direction)
    projected_gradient = gradients - dot * source_direction

    norm_ = np.linalg.norm(projected_gradient)
    if norm_ > 1e-5:
        projected_gradient /= norm_

    projected_gradient = (-1.) * projected_gradient

    return projected_gradient
Example #7
0
def extract_target_logits(model: Model, inputs: ep.Tensor, labels: ep.Tensor):
    """
    This implementation uses any correctly classified sample as the target logit
    """
    if not isinstance(labels, ep.Tensor):
        labels, _ = ep.astensor_(labels)
    num_classes = 10  # Hack for CIFAR10
    result = np.zeros([num_classes, num_classes])
    present = np.zeros(num_classes)
    Z = model(inputs)  # all the logits

    if isinstance(Z, ep.Tensor):
        Z = Z.raw
    else:
        Z = Z.detach()

    for i in range(labels.shape[0]):
        t = labels[i].raw.item()
        if present[t]:
            continue
        z = Z[i:i + 1]
        if z.argmax() == t:
            result[t, :] = z.cpu()
            present[t] = 1
        if sum(present) == num_classes:
            break

    return result
Example #8
0
    def __call__(self, model: Model, inputs: T, criterion: Union[Criterion, T]) -> T:

        x, restore_type = ep.astensor_(inputs)
        del inputs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)
        direction = target - x

        lower_bound = ep.zeros(x, len(x))
        upper_bound = ep.ones(x, len(x))
        epsilons = lower_bound
        for _ in range(self.binary_search_steps):
            eps = atleast_kd(epsilons, x.ndim)
            is_adv = is_adversarial(x + eps * direction)
            lower_bound = ep.where(is_adv, lower_bound, epsilons)
            upper_bound = ep.where(is_adv, epsilons, upper_bound)
            epsilons = (lower_bound + upper_bound) / 2

        epsilons = upper_bound
        eps = atleast_kd(epsilons, x.ndim)
        xp = x + eps * direction
        return restore_type(xp)
    def __call__(self, model: Model, inputs: T,
                 criterion: Union[Misclassification, T]) -> T:
        x0, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion

        if not isinstance(criterion_, Misclassification):
            raise ValueError("unsupported criterion")

        labels = criterion_.labels

        def loss_fn(inputs: ep.Tensor) -> ep.Tensor:
            logits = model(inputs)
            return ep.crossentropy(logits, labels).sum()

        x = x0

        if self.random_start:
            x = x + ep.uniform(x, x.shape, -self.epsilon, self.epsilon)
            x = ep.clip(x, *model.bounds)

        for _ in range(self.steps):
            _, gradients = ep.value_and_grad(loss_fn, x)
            gradients = gradients.sign()
            x = x + self.stepsize * gradients
            x = x0 + ep.clip(x - x0, -self.epsilon, self.epsilon)
            x = ep.clip(x, *model.bounds)

        return restore_type(x)
Example #10
0
    def __call__(self, model: Model, inputs: T, criterion: Union[Criterion, T]) -> T:

        x, restore_type = ep.astensor_(inputs)
        del inputs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)
        direction = target - x

        best = ep.ones(x, len(x))

        epsilon = 0.0
        stepsize = 1.0 / self.steps
        for _ in range(self.steps):
            # TODO: reduce the batch size to the ones that have not yet been sucessful

            is_adv = is_adversarial(x + epsilon * direction)
            is_best_adv = ep.logical_and(is_adv, best == 1)
            best = ep.where(is_best_adv, epsilon, best)

            if (best < 1).all():
                break

            epsilon += stepsize

        eps = atleast_kd(best, x.ndim)
        xp = x + eps * direction
        return restore_type(xp)
Example #11
0
    def __call__(self, perturbed: T, outputs: T) -> T:
        outputs_, restore_type = ep.astensor_(outputs)
        del perturbed, outputs

        classes = outputs_.argmax(axis=-1)
        assert classes.shape == self.labels.shape
        is_adv = classes != self.labels
        return restore_type(is_adv)
Example #12
0
    def __call__(  # type: ignore
        self,
        model: Model,
        inputs: T,
        criterion: Any,
        *,
        epsilons: Union[Sequence[Union[float, None]], float, None],
        **kwargs: Any,
    ) -> Union[Tuple[List[T], List[T], T], Tuple[T, T, T]]:
        x, restore_type = ep.astensor_(inputs)
        del inputs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        was_iterable = True
        if not isinstance(epsilons, Iterable):
            epsilons = [epsilons]
            was_iterable = False

        N = len(x)
        K = len(epsilons)

        # None means: just minimize, no early stopping, no limit on the perturbation size
        if any(eps is None for eps in epsilons):
            early_stop = None
        else:
            early_stop = min(epsilons)

        # run the actual attack
        xp = self.run(model, x, criterion, early_stop=early_stop, **kwargs)

        xpcs = []
        success = []
        for epsilon in epsilons:
            start = timer()
            if epsilon is None:
                xpc = xp
            else:
                xpc = self.distance.clip_perturbation(x, xp, epsilon)
            is_adv = is_adversarial(xpc)

            xpcs.append(xpc)
            success.append(is_adv)
            end = timer()
            print(end-start)

        success_ = ep.stack(success)
        assert success_.shape == (K, N)

        xp_ = restore_type(xp)
        xpcs_ = [restore_type(xpc) for xpc in xpcs]

        if was_iterable:
            return [xp_] * K, xpcs_, restore_type(success_)
        else:
            assert len(xpcs_) == 1
            return xp_, xpcs_[0], restore_type(success_.squeeze(axis=0))
Example #13
0
    def __call__(self, model: Model, inputs: T,
                 criterion: Union[Misclassification, T]) -> T:
        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion

        N = len(x)

        if isinstance(criterion_, Misclassification):
            classes = criterion_.labels
        else:
            raise ValueError("unsupported criterion")

        if classes.shape != (N, ):
            raise ValueError(
                f"expected labels to have shape ({N},), got {classes.shape}")

        bounds = model.bounds

        def loss_fun(delta: ep.Tensor, logits: ep.Tensor) -> ep.Tensor:
            assert x.shape[0] == logits.shape[0]
            assert delta.shape == x.shape

            x_hat = x + delta
            logits_hat = model(x_hat)
            loss = ep.kl_div_with_logits(logits, logits_hat).sum()

            return loss

        value_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=False)

        clean_logits = model(x)

        # start with random vector as search vector
        d = ep.normal(x, shape=x.shape, mean=0, stddev=1)
        for it in range(self.iterations):
            # normalize proposal to be unit vector
            d = d * self.xi / atleast_kd(ep.norms.l2(flatten(d), axis=-1),
                                         x.ndim)

            # use gradient of KL divergence as new search vector
            _, grad = value_and_grad(d, clean_logits)
            d = grad

            # rescale search vector
            d = (bounds[1] - bounds[0]) * d

            if ep.any(ep.norms.l2(flatten(d), axis=-1) < 1e-64):
                raise RuntimeError(
                    "Gradient vanished; this can happen if xi is too small.")

        final_delta = (self.epsilon / ep.sqrt(
            (d**2).sum(keepdims=True, axis=(1, 2, 3))) * d)
        x_adv = ep.clip(x + final_delta, *bounds)

        return restore_type(x_adv)
Example #14
0
    def __call__(self,
                 model: Model,
                 inputs: T,
                 criterion: Union[Criterion, Any] = None) -> T:
        x, restore_type = ep.astensor_(inputs)
        del inputs, criterion

        min_, max_ = model.bounds
        x = min_ + max_ - x
        return restore_type(x)
Example #15
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)

        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        found = is_adversarial(x)
        results = x

        def grid_search_generator() -> Generator[Any, Any, Any]:
            dphis = np.linspace(-self.max_rot, self.max_rot, self.num_rots)
            dxs = np.linspace(-self.max_trans, self.max_trans, self.num_trans)
            dys = np.linspace(-self.max_trans, self.max_trans, self.num_trans)
            for dphi in dphis:
                for dx in dxs:
                    for dy in dys:
                        yield dphi, dx, dy

        def random_search_generator() -> Generator[Any, Any, Any]:
            dphis = np.random.uniform(-self.max_rot, self.max_rot,
                                      self.random_steps)
            dxs = np.random.uniform(-self.max_trans, self.max_trans,
                                    self.random_steps)
            dys = np.random.uniform(-self.max_trans, self.max_trans,
                                    self.random_steps)
            for dphi, dx, dy in zip(dphis, dxs, dys):
                yield dphi, dx, dy

        gen = grid_search_generator(
        ) if self.grid_search else random_search_generator()
        for dphi, dx, dy in gen:
            # TODO: reduce the batch size to the ones that haven't been successful

            x_p = rotate_and_shift(x, translation=(dx, dy), rotation=dphi)
            is_adv = is_adversarial(x_p)
            new_adv = ep.logical_and(is_adv, found.logical_not())

            results = ep.where(atleast_kd(new_adv, x_p.ndim), x_p, results)
            found = ep.logical_or(new_adv, found)
            if found.all():
                break  # all images in batch misclassified
        return restore_type(results)
Example #16
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
        *,
        epsilon: float,
        mc: int,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x0, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        # perform a gradient ascent (targeted attack) or descent (untargeted attack)
        if isinstance(criterion_, Misclassification):
            gradient_step_sign = 1.0
            classes = criterion_.labels
        elif hasattr(criterion_, "target_classes"):
            gradient_step_sign = -1.0
            classes = criterion_.target_classes  # type: ignore
        else:
            raise ValueError("unsupported criterion")

        loss_fn = self.get_loss_fn(model, classes)

        if self.abs_stepsize is None:
            stepsize = self.rel_stepsize * epsilon
        else:
            stepsize = self.abs_stepsize

        if self.random_start:
            x = self.get_random_start(x0, epsilon)
            x = ep.clip(x, *model.bounds)
        else:
            x = x0

        for _ in range(self.steps):
            gradient_sum = 0.
            for _ in range(mc):
                _, gradients = self.value_and_grad(loss_fn, x)
                gradient_sum += gradients
            gradients = self.normalize(gradient_sum, x=x, bounds=model.bounds)
            x = x + gradient_step_sign * stepsize * gradients
            x = self.project(x, x0, epsilon)
            x = ep.clip(x, *model.bounds)

        return restore_type(x)
Example #17
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        self.process_raw()
        assert self.inputs is not None
        assert self.outputs is not None
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)

        result = x
        found = criterion(x, model(x))

        batch_size = len(x)

        # for every sample try every other sample
        index_pools: List[List[int]] = []
        for i in range(batch_size):
            indices = list(range(batch_size))
            indices.remove(i)
            indices = list(indices)
            np.random.shuffle(indices)
            index_pools.append(indices)

        for i in range(batch_size - 1):
            if found.all():
                break

            indices = np.array([pool[i] for pool in index_pools])

            xp = self.inputs[indices]
            yp = self.outputs[indices]
            is_adv = criterion(xp, yp)

            new_found = ep.logical_and(is_adv, found.logical_not())
            result = ep.where(atleast_kd(new_found, result.ndim), xp, result)
            found = ep.logical_or(found, new_found)

        return restore_type(result)
Example #18
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, Any] = None,
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, criterion, kwargs

        min_, max_ = model.bounds
        x = min_ + max_ - x
        return restore_type(x)
Example #19
0
def get_modified_loss(model, inputs, labels, untargeted_fn, targeted_fn,
                      targeted, modifier):
    """
    Return the loss function based on the modifiers.
    There are five modifiers:
    1. softmax: flag to control if attack on raw outputs / the one with softmax
    2. loss_diff: flag to controls the targeted loss to be subtracted by the untargeted loss
    *3. indiv: modify the return to return individual losses instead of the sum of losses
    *4. logits: return the prediction (affected by labels)
    *5. labels: takes in the array of labels and return the logits of the labels and the logits of the top class (for SQR)
    Here 3, 4, 5 does not change the functionality, and they are used to change the return for algorithm implementation
    """
    logits = model(inputs)
    logits, restore_type = ep.astensor_(logits)

    outputs = logits
    if 'softmax' in modifier:
        if modifier['softmax']:
            outputs = logits.softmax()

    if targeted:
        if 'loss_diff' in modifier and modifier['loss_diff']:
            ind_sorted = outputs.argsort(axis=1)
            ind = (ind_sorted[:, -1])
            losses = targeted_fn(outputs, labels) + untargeted_fn(outputs, ind)
        else:
            losses = targeted_fn(outputs, labels)
        loss = losses.sum()
    else:
        losses = untargeted_fn(outputs, labels)
        loss = losses.sum()

    result = [restore_type(loss)]
    if 'indiv' in modifier:
        result.append(restore_type(losses))
    if 'logits' in modifier:
        result.append(restore_type(outputs))
    if 'labels' in modifier:
        curr_idx = modifier['labels']
        u = np.arange(labels.shape[0])
        y_corr = logits[u, curr_idx]
        logits.raw[u, curr_idx] = -float('inf')
        y_others = logits.max(axis=-1)
        result.append([restore_type(y_corr), restore_type(y_others)])
    if len(result) == 1:
        result = result[0]
    return result
Example #20
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, T],
        *,
        epsilon: float,
        mc: int,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x0, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        if not isinstance(criterion_, Misclassification):
            raise ValueError("unsupported criterion")

        labels = criterion_.labels
        loss_fn = self.get_loss_fn(model, labels)

        if self.abs_stepsize is None:
            stepsize = self.rel_stepsize * epsilon
        else:
            stepsize = self.abs_stepsize

        if self.random_start:
            x = self.get_random_start(x0, epsilon)
            x = ep.clip(x, *model.bounds)
        else:
            x = x0

        for _ in range(self.steps):
            gradientsCum = 0
            for _ in range(mc):
                _, gradients = self.value_and_grad(loss_fn, x)
                # import pdb
                # pdb.set_trace()
                # assert not (gradients == gradientsCum).all()
                gradientsCum += gradients

            gradients = self.normalize(gradientsCum, x=x, bounds=model.bounds)
            x = x + stepsize * gradients
            x = self.project(x, x0, epsilon)
            x = ep.clip(x, *model.bounds)

        return restore_type(x)
Example #21
0
    def __call__(self,
                 model: Model,
                 inputs: T,
                 criterion: Union[Criterion, Any] = None) -> T:
        x, restore_type = ep.astensor_(inputs)
        del inputs, criterion

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)

        direction = target - x
        norms = ep.norms.l2(flatten(direction), axis=-1)
        scale = self.epsilon / atleast_kd(norms, direction.ndim)
        scale = ep.minimum(scale, 1)

        x = x + scale * direction
        x = x.clip(min_, max_)
        return restore_type(x)
Example #22
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, Any] = None,
        *,
        epsilon: float,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, criterion, kwargs

        min_, max_ = model.bounds
        p = self.sample_noise(x)
        epsilons = self.get_epsilons(x, p, epsilon, min_=min_, max_=max_)
        x = x + epsilons * p
        x = x.clip(min_, max_)

        return restore_type(x)
Example #23
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)
        direction = target - x

        best = ep.ones(x, len(x))

        epsilon = 0.0
        stepsize = 1.0 / self.steps
        for _ in range(self.steps):
            # TODO: reduce the batch size to the ones that have not yet been sucessful

            is_adv = is_adversarial(x + epsilon * direction)
            is_best_adv = ep.logical_and(is_adv, best == 1)
            best = ep.where(is_best_adv, epsilon, best)

            if (best < 1).all():
                break  # pragma: no cover

            epsilon += stepsize

        eps = atleast_kd(best, x.ndim)
        xp = x + eps * direction
        return restore_type(xp)
Example #24
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, Any] = None,
        *,
        epsilon: float,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, criterion, kwargs

        min_, max_ = model.bounds
        p = self.sample_noise(x)
        norms = self.get_norms(p)
        p = p / atleast_kd(norms, p.ndim)
        x = x + epsilon * p
        x = x.clip(min_, max_)

        return restore_type(x)
Example #25
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, Any] = None,
        *,
        epsilon: float,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x0, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        verify_input_bounds(x0, model)

        is_adversarial = get_is_adversarial(criterion_, model)

        min_, max_ = model.bounds

        result = x0
        if self.check_trivial:
            found = is_adversarial(result)
        else:
            found = ep.zeros(x0, len(result)).bool()

        for _ in range(self.repeats):
            if found.all():
                break

            p = self.sample_noise(x0)
            epsilons = self.get_epsilons(x0, p, epsilon, min_=min_, max_=max_)
            x = x0 + epsilons * p
            x = x.clip(min_, max_)
            is_adv = is_adversarial(x)
            is_new_adv = ep.logical_and(is_adv, ep.logical_not(found))
            result = ep.where(atleast_kd(is_new_adv, x.ndim), x, result)
            found = ep.logical_or(found, is_adv)

        return restore_type(result)
Example #26
0
    def __call__(self, model: Model, inputs: T, criterion: Union[Criterion,
                                                                 T]) -> T:
        x, restore_type = ep.astensor_(inputs)
        del inputs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        best = self._attack(model, x, criterion)
        best_is_adv = is_adversarial(best)

        for _ in range(1, self._times):
            xp = self._attack(model, x, criterion)
            # assumes xp does not violate the perturbation size constraint

            is_adv = is_adversarial(xp)
            new_best = ep.logical_and(is_adv, best_is_adv.logical_not())

            best = ep.where(atleast_kd(new_best, best.ndim), xp, best)
            best_is_adv = ep.logical_or(is_adv, best_is_adv)

        return restore_type(best)
Example #27
0
    def __call__(  # type: ignore
        self,
        model: Model,
        inputs: T,
        criterion: Any,
        **kwargs: Any,
    ) -> Tuple[T, T, T]:
        x, restore_type = ep.astensor_(inputs)
        del inputs
        criterion = get_criterion(criterion)

        is_adversarial = get_is_adversarial(criterion, model)

        if x.ndim != 4:
            raise NotImplementedError(
                "only implemented for inputs with two spatial dimensions (and one channel and one batch dimension)"
            )

        xp = self.run(model, x, criterion)
        success = is_adversarial(xp)

        xp_ = restore_type(xp)
        return xp_, xp_, restore_type(success)  # twice to match API
Example #28
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)
        direction = target - x

        lower_bound = ep.zeros(x, len(x))
        upper_bound = ep.ones(x, len(x))
        epsilons = lower_bound
        for _ in range(self.binary_search_steps):
            eps = atleast_kd(epsilons, x.ndim)
            is_adv = is_adversarial(x + eps * direction)
            lower_bound = ep.where(is_adv, lower_bound, epsilons)
            upper_bound = ep.where(is_adv, epsilons, upper_bound)
            epsilons = (lower_bound + upper_bound) / 2

        epsilons = upper_bound
        eps = atleast_kd(epsilons, x.ndim)
        xp = x + eps * direction
        return restore_type(xp)
Example #29
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        self.process_raw()
        assert self.inputs is not None
        assert self.outputs is not None
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        criterion = get_criterion(criterion)

        result = x
        found = criterion(x, model(x))

        dataset_size = len(self.inputs)
        batch_size = len(x)

        while not found.all():
            indices = np.random.randint(0, dataset_size, size=(batch_size, ))

            xp = self.inputs[indices]
            yp = self.outputs[indices]
            is_adv = criterion(xp, yp)

            new_found = ep.logical_and(is_adv, found.logical_not())
            result = ep.where(atleast_kd(new_found, result.ndim), xp, result)
            found = ep.logical_or(found, new_found)

        return restore_type(result)
Example #30
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        #raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)

        min_, max_ = model.bounds

        logits = model(x)
        classes = logits.argsort(axis=-1).flip(axis=-1)
        if self.candidates is None:
            candidates = logits.shape[-1]  # pragma: no cover
        else:
            candidates = min(self.candidates, logits.shape[-1])
            if not candidates >= 2:
                raise ValueError(  # pragma: no cover
                    f"expected the model output to have atleast 2 classes, got {logits.shape[-1]}"
                )
            logging.info(f"Only testing the top-{candidates} classes")
            classes = classes[:, :candidates]

        N = len(x)
        rows = range(N)

        loss_fun = self._get_loss_fn(model, classes)
        loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True)

        x0 = x
        p_total = ep.zeros_like(x)
        for _ in range(self.steps):
            # let's first get the logits using k = 1 to see if we are done
            diffs = [loss_aux_and_grad(x, 1)]
            _, (_, logits), _ = diffs[0]

            is_adv = criterion(x, logits)
            if is_adv.all():
                break

            # then run all the other k's as well
            # we could avoid repeated forward passes and only repeat
            # the backward pass, but this cannot currently be done in eagerpy
            diffs += [loss_aux_and_grad(x, k) for k in range(2, candidates)]

            # we don't need the logits
            diffs_ = [(losses, grad) for _, (losses, _), grad in diffs]
            losses = ep.stack([lo for lo, _ in diffs_], axis=1)
            grads = ep.stack([g for _, g in diffs_], axis=1)
            assert losses.shape == (N, candidates - 1)
            assert grads.shape == (N, candidates - 1) + x0.shape[1:]

            # calculate the distances
            distances = self.get_distances(losses, grads)
            assert distances.shape == (N, candidates - 1)

            # determine the best directions
            best = distances.argmin(axis=1)
            distances = distances[rows, best]
            losses = losses[rows, best]
            grads = grads[rows, best]
            assert distances.shape == (N,)
            assert losses.shape == (N,)
            assert grads.shape == x0.shape

            # apply perturbation
            distances = distances + 1e-4  # for numerical stability
            p_step = self.get_perturbations(distances, grads)
            assert p_step.shape == x0.shape

            p_total += p_step
            # don't do anything for those that are already adversarial
            x = ep.where(
                atleast_kd(is_adv, x.ndim), x, x0 + (1.0 + self.overshoot) * p_total
            )
            x = ep.clip(x, min_, max_)

        return restore_type(x)