def __call__(self, model: Model, inputs: T, criterion: Union[Misclassification, T]) -> T: x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion N = len(x) if isinstance(criterion_, Misclassification): classes = criterion_.labels else: raise ValueError("unsupported criterion") if classes.shape != (N, ): raise ValueError( f"expected labels to have shape ({N},), got {classes.shape}") bounds = model.bounds def loss_fun(delta: ep.Tensor, logits: ep.Tensor) -> ep.Tensor: assert x.shape[0] == logits.shape[0] assert delta.shape == x.shape x_hat = x + delta logits_hat = model(x_hat) loss = ep.kl_div_with_logits(logits, logits_hat).sum() return loss value_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=False) clean_logits = model(x) # start with random vector as search vector d = ep.normal(x, shape=x.shape, mean=0, stddev=1) for it in range(self.iterations): # normalize proposal to be unit vector d = d * self.xi / atleast_kd(ep.norms.l2(flatten(d), axis=-1), x.ndim) # use gradient of KL divergence as new search vector _, grad = value_and_grad(d, clean_logits) d = grad # rescale search vector d = (bounds[1] - bounds[0]) * d if ep.any(ep.norms.l2(flatten(d), axis=-1) < 1e-64): raise RuntimeError( "Gradient vanished; this can happen if xi is too small.") final_delta = (self.epsilon / ep.sqrt( (d**2).sum(keepdims=True, axis=(1, 2, 3))) * d) x_adv = ep.clip(x + final_delta, *bounds) return restore_type(x_adv)
def test_value_and_grad_fn(dummy: Tensor) -> None: if isinstance(dummy, ep.NumPyTensor): pytest.skip() def f(x: ep.Tensor) -> ep.Tensor: return x.square().sum() vgf = ep.value_and_grad_fn(dummy, f) t = ep.arange(dummy, 8).float32().reshape((2, 4)) v, g = vgf(t) assert v.item() == 140 assert (g == 2 * t).all()
def test_value_and_grad_fn_with_aux(dummy: Tensor) -> None: if isinstance(dummy, ep.NumPyTensor): pytest.skip() def f(x: Tensor) -> Tuple[Tensor, Tensor]: x = x.square() return x.sum(), x vgf = ep.value_and_grad_fn(dummy, f, has_aux=True) t = ep.arange(dummy, 8).float32().reshape((2, 4)) v, aux, g = vgf(t) assert v.item() == 140 assert (aux == t.square()).all() assert (g == 2 * t).all()
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: #raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) del inputs, kwargs verify_input_bounds(x, model) criterion = get_criterion(criterion) min_, max_ = model.bounds logits = model(x) classes = logits.argsort(axis=-1).flip(axis=-1) if self.candidates is None: candidates = logits.shape[-1] # pragma: no cover else: candidates = min(self.candidates, logits.shape[-1]) if not candidates >= 2: raise ValueError( # pragma: no cover f"expected the model output to have atleast 2 classes, got {logits.shape[-1]}" ) logging.info(f"Only testing the top-{candidates} classes") classes = classes[:, :candidates] N = len(x) rows = range(N) loss_fun = self._get_loss_fn(model, classes) loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True) x0 = x p_total = ep.zeros_like(x) for _ in range(self.steps): # let's first get the logits using k = 1 to see if we are done diffs = [loss_aux_and_grad(x, 1)] _, (_, logits), _ = diffs[0] is_adv = criterion(x, logits) if is_adv.all(): break # then run all the other k's as well # we could avoid repeated forward passes and only repeat # the backward pass, but this cannot currently be done in eagerpy diffs += [loss_aux_and_grad(x, k) for k in range(2, candidates)] # we don't need the logits diffs_ = [(losses, grad) for _, (losses, _), grad in diffs] losses = ep.stack([lo for lo, _ in diffs_], axis=1) grads = ep.stack([g for _, g in diffs_], axis=1) assert losses.shape == (N, candidates - 1) assert grads.shape == (N, candidates - 1) + x0.shape[1:] # calculate the distances distances = self.get_distances(losses, grads) assert distances.shape == (N, candidates - 1) # determine the best directions best = distances.argmin(axis=1) distances = distances[rows, best] losses = losses[rows, best] grads = grads[rows, best] assert distances.shape == (N,) assert losses.shape == (N,) assert grads.shape == x0.shape # apply perturbation distances = distances + 1e-4 # for numerical stability p_step = self.get_perturbations(distances, grads) assert p_step.shape == x0.shape p_total += p_step # don't do anything for those that are already adversarial x = ep.where( atleast_kd(is_adv, x.ndim), x, x0 + (1.0 + self.overshoot) * p_total ) x = ep.clip(x, min_, max_) return restore_type(x)
def __call__( self, inputs, labels, *, target_classes=None, binary_search_steps=9, max_iterations=10000, confidence=0, initial_learning_rate=1e-2, regularization=1e-2, initial_const=1e-3, abort_early=True, decision_rule="EN", ): x_0 = ep.astensor(inputs) N = len(x_0) assert decision_rule in ("EN", "L1") targeted = target_classes is not None if targeted: labels = None target_classes = ep.astensor(target_classes) assert target_classes.shape == (N, ) is_adv = partial(targeted_is_adv, target_classes=target_classes, confidence=confidence) else: labels = ep.astensor(labels) assert labels.shape == (N, ) is_adv = partial(untargeted_is_adv, labels=labels, confidence=confidence) min_, max_ = self.model.bounds() rows = np.arange(N) def loss_fun(y_k: ep.Tensor, consts: ep.Tensor) -> ep.Tensor: assert y_k.shape == x_0.shape assert consts.shape == (N, ) logits = ep.astensor(self.model.forward(y_k.tensor)) if targeted: c_minimize = best_other_classes(logits, target_classes) c_maximize = target_classes else: c_minimize = labels c_maximize = best_other_classes(logits, labels) is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize] assert is_adv_loss.shape == (N, ) is_adv_loss = is_adv_loss + confidence is_adv_loss = ep.maximum(0, is_adv_loss) is_adv_loss = is_adv_loss * consts squared_norms = flatten(y_k - x_0).square().sum(axis=-1) loss = is_adv_loss.sum() + squared_norms.sum() return loss, (y_k, logits) loss_aux_and_grad = ep.value_and_grad_fn(x_0, loss_fun, has_aux=True) consts = initial_const * np.ones((N, )) lower_bounds = np.zeros((N, )) upper_bounds = np.inf * np.ones((N, )) best_advs = ep.zeros_like(x_0) best_advs_norms = ep.ones(x_0, (N, )) * np.inf # the binary search searches for the smallest consts that produce adversarials for binary_search_step in range(binary_search_steps): if (binary_search_step == binary_search_steps - 1 and binary_search_steps >= 10): # in the last iteration, repeat the search once consts = np.minimum(upper_bounds, 1e10) # create a new optimizer find the delta that minimizes the loss # TODO: rewrite this once eagerpy supports .copy() x_k = x_0 # ep.zeros_like(x_0) + x_0 y_k = x_0 # ep.zeros_like(x_0) + x_0 found_advs = np.full( (N, ), fill_value=False) # found adv with the current consts loss_at_previous_check = np.inf consts_ = ep.from_numpy(x_0, consts.astype(np.float32)) for iteration in range(max_iterations): # square-root learning rate decay learning_rate = (initial_learning_rate * (1.0 - iteration / max_iterations)**0.5) loss, (x, logits), gradient = loss_aux_and_grad(x_k, consts_) x_k_old = x_k x_k = project_shrinkage_thresholding( y_k - learning_rate * gradient, x_0, regularization, min_, max_) y_k = x_k + iteration / (iteration + 3) - (x_k - x_k_old) if abort_early and iteration % (np.ceil( max_iterations / 10)) == 0: # after each tenth of the iterations, check progress if not (loss <= 0.9999 * loss_at_previous_check): break # stop Adam if there has been no progress loss_at_previous_check = loss found_advs_iter = is_adv(logits) best_advs, best_advs_norms = apply_decision_rule( decision_rule, regularization, best_advs, best_advs_norms, x_k, x_0, found_advs_iter, ) found_advs = np.logical_or(found_advs, found_advs_iter.numpy()) upper_bounds = np.where(found_advs, consts, upper_bounds) lower_bounds = np.where(found_advs, lower_bounds, consts) consts_exponential_search = consts * 10 consts_binary_search = (lower_bounds + upper_bounds) / 2 consts = np.where(np.isinf(upper_bounds), consts_exponential_search, consts_binary_search) return best_advs.tensor
def __call__( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], ) -> T: x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion N = len(x) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels change_classes_logits = self.confidence elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes change_classes_logits = -self.confidence else: raise ValueError("unsupported criterion") def is_adversarial(perturbed: ep.Tensor, logits: ep.Tensor) -> ep.Tensor: if change_classes_logits != 0: logits += ep.onehot_like(logits, classes, value=change_classes_logits) return criterion_(perturbed, logits) if classes.shape != (N,): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}" ) min_, max_ = model.bounds rows = range(N) def loss_fun(y_k: ep.Tensor, consts: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]: assert y_k.shape == x.shape assert consts.shape == (N,) logits = model(y_k) if targeted: c_minimize = best_other_classes(logits, classes) c_maximize = classes else: c_minimize = classes c_maximize = best_other_classes(logits, classes) is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize] assert is_adv_loss.shape == (N,) is_adv_loss = is_adv_loss + self.confidence is_adv_loss = ep.maximum(0, is_adv_loss) is_adv_loss = is_adv_loss * consts squared_norms = flatten(y_k - x).square().sum(axis=-1) loss = is_adv_loss.sum() + squared_norms.sum() return loss, logits loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True) consts = self.initial_const * ep.ones(x, (N,)) lower_bounds = ep.zeros(x, (N,)) upper_bounds = ep.inf * ep.ones(x, (N,)) best_advs = ep.zeros_like(x) best_advs_norms = ep.ones(x, (N,)) * ep.inf # the binary search searches for the smallest consts that produce adversarials for binary_search_step in range(self.binary_search_steps): if ( binary_search_step == self.binary_search_steps - 1 and self.binary_search_steps >= 10 ): # in the last iteration, repeat the search once consts = ep.minimum(upper_bounds, 1e10) # create a new optimizer find the delta that minimizes the loss x_k = x y_k = x found_advs = ep.full( x, (N,), value=False ).bool() # found adv with the current consts loss_at_previous_check = ep.ones(x, (1,)) * ep.inf for iteration in range(self.steps): # square-root learning rate decay stepsize = self.initial_stepsize * (1.0 - iteration / self.steps) ** 0.5 loss, logits, gradient = loss_aux_and_grad(y_k, consts) x_k_old = x_k x_k = project_shrinkage_thresholding( y_k - stepsize * gradient, x, self.regularization, min_, max_ ) y_k = x_k + iteration / (iteration + 3.0) * (x_k - x_k_old) if self.abort_early and iteration % (math.ceil(self.steps / 10)) == 0: # after each tenth of the iterations, check progress # TODO: loss is a scalar ep tensor. is this the bst way to # implement the condition? if not ep.all(loss <= 0.9999 * loss_at_previous_check): break # stop optimization if there has been no progress loss_at_previous_check = loss found_advs_iter = is_adversarial(x_k, logits) best_advs, best_advs_norms = apply_decision_rule( self.decision_rule, self.regularization, best_advs, best_advs_norms, x_k, x, found_advs_iter, ) found_advs = ep.logical_or(found_advs, found_advs_iter) upper_bounds = ep.where(found_advs, consts, upper_bounds) lower_bounds = ep.where(found_advs, lower_bounds, consts) consts_exponential_search = consts * 10 consts_binary_search = (lower_bounds + upper_bounds) / 2 consts = ep.where( ep.isinf(upper_bounds), consts_exponential_search, consts_binary_search ) return restore_type(best_advs)
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs N = len(x) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes else: raise ValueError("unsupported criterion") if classes.shape != (N, ): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}") stepsize = 1.0 min_, max_ = model.bounds def loss_fn(inputs: ep.Tensor, labels: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]: logits = model(inputs) sign = -1.0 if targeted else 1.0 loss = sign * ep.crossentropy(logits, labels).sum() return loss, logits grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True) delta = ep.zeros_like(x) epsilon = self.init_epsilon * ep.ones(x, len(x)) worst_norm = ep.norms.l2(flatten(ep.maximum(x - min_, max_ - x)), -1) best_l2 = worst_norm best_delta = delta adv_found = ep.zeros(x, len(x)).bool() for i in range(self.steps): # perform cosine annealing of LR starting from 1.0 to 0.01 stepsize = (0.01 + (stepsize - 0.01) * (1 + math.cos(math.pi * i / self.steps)) / 2) x_adv = x + delta _, logits, gradients = grad_and_logits(x_adv, classes) gradients = normalize_gradient_l2_norms(gradients) is_adversarial = criterion_(x_adv, logits) l2 = ep.norms.l2(flatten(delta), axis=-1) is_smaller = l2 <= best_l2 is_both = ep.logical_and(is_adversarial, is_smaller) adv_found = ep.logical_or(adv_found, is_adversarial) best_l2 = ep.where(is_both, l2, best_l2) best_delta = ep.where(atleast_kd(is_both, x.ndim), delta, best_delta) # do step delta = delta + stepsize * gradients epsilon = epsilon * ep.where(is_adversarial, 1.0 - self.gamma, 1.0 + self.gamma) epsilon = ep.minimum(epsilon, worst_norm) # project to epsilon ball delta *= atleast_kd(epsilon / ep.norms.l2(flatten(delta), -1), x.ndim) # clip to valid bounds delta = ep.clip(x + delta, *model.bounds) - x x_adv = x + best_delta return restore_type(x_adv)
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs N = len(x) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels change_classes_logits = self.confidence elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes change_classes_logits = -self.confidence else: raise ValueError("unsupported criterion") def is_adversarial(perturbed: ep.Tensor, logits: ep.Tensor) -> ep.Tensor: if change_classes_logits != 0: logits += ep.onehot_like(logits, classes, value=change_classes_logits) return criterion_(perturbed, logits) if classes.shape != (N, ): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}") bounds = model.bounds to_attack_space = partial(_to_attack_space, bounds=bounds) to_model_space = partial(_to_model_space, bounds=bounds) x_attack = to_attack_space(x) reconstsructed_x = to_model_space(x_attack) rows = range(N) def loss_fun( delta: ep.Tensor, consts: ep.Tensor ) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]: assert delta.shape == x_attack.shape assert consts.shape == (N, ) x = to_model_space(x_attack + delta) logits = model(x) if targeted: c_minimize = best_other_classes(logits, classes) c_maximize = classes # target_classes else: c_minimize = classes # labels c_maximize = best_other_classes(logits, classes) is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize] assert is_adv_loss.shape == (N, ) is_adv_loss = is_adv_loss + self.confidence is_adv_loss = ep.maximum(0, is_adv_loss) is_adv_loss = is_adv_loss * consts squared_norms = flatten(x - reconstsructed_x).square().sum(axis=-1) loss = is_adv_loss.sum() + squared_norms.sum() return loss, (x, logits) loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True) consts = self.initial_const * np.ones((N, )) lower_bounds = np.zeros((N, )) upper_bounds = np.inf * np.ones((N, )) best_advs = ep.zeros_like(x) best_advs_norms = ep.full(x, (N, ), ep.inf) # the binary search searches for the smallest consts that produce adversarials for binary_search_step in range(self.binary_search_steps): if (binary_search_step == self.binary_search_steps - 1 and self.binary_search_steps >= 10): # in the last binary search step, repeat the search once consts = np.minimum(upper_bounds, 1e10) # create a new optimizer find the delta that minimizes the loss delta = ep.zeros_like(x_attack) optimizer = AdamOptimizer(delta) # tracks whether adv with the current consts was found found_advs = np.full((N, ), fill_value=False) loss_at_previous_check = np.inf consts_ = ep.from_numpy(x, consts.astype(np.float32)) for step in range(self.steps): loss, (perturbed, logits), gradient = loss_aux_and_grad(delta, consts_) delta += optimizer(gradient, self.stepsize) if self.abort_early and step % (np.ceil(self.steps / 10)) == 0: # after each tenth of the overall steps, check progress if not (loss <= 0.9999 * loss_at_previous_check): break # stop Adam if there has been no progress loss_at_previous_check = loss found_advs_iter = is_adversarial(perturbed, logits) found_advs = np.logical_or(found_advs, found_advs_iter.numpy()) norms = flatten(perturbed - x).norms.l2(axis=-1) closer = norms < best_advs_norms new_best = ep.logical_and(closer, found_advs_iter) new_best_ = atleast_kd(new_best, best_advs.ndim) best_advs = ep.where(new_best_, perturbed, best_advs) best_advs_norms = ep.where(new_best, norms, best_advs_norms) upper_bounds = np.where(found_advs, consts, upper_bounds) lower_bounds = np.where(found_advs, lower_bounds, consts) consts_exponential_search = consts * 10 consts_binary_search = (lower_bounds + upper_bounds) / 2 consts = np.where(np.isinf(upper_bounds), consts_exponential_search, consts_binary_search) return restore_type(best_advs)
def __call__( self, inputs, labels, *, p, candidates=10, overshoot=0.02, steps=50, loss="logits", ): """ Parameters ---------- p : int or float Lp-norm that should be minimzed, must be 2 or np.inf. candidates : int Limit on the number of the most likely classes that should be considered. A small value is usually sufficient and much faster. overshoot : float steps : int Maximum number of steps to perform. """ if not (1 <= p <= np.inf): raise ValueError if p not in [2, np.inf]: raise NotImplementedError min_, max_ = self.model.bounds() inputs = ep.astensor(inputs) labels = ep.astensor(labels) N = len(inputs) logits = self.model.forward(inputs) candidates = min(candidates, logits.shape[-1]) classes = logits.argsort(axis=-1).flip(axis=-1) if candidates: assert candidates >= 2 logging.info(f"Only testing the top-{candidates} classes") classes = classes[:, :candidates] i0 = classes[:, 0] rows = ep.arange(inputs, N) if loss == "logits": def loss_fun(x: ep.Tensor, k: int) -> ep.Tensor: logits = self.model.forward(x) ik = classes[:, k] l0 = logits[rows, i0] lk = logits[rows, ik] loss = lk - l0 return loss.sum(), (loss, logits) elif loss == "crossentropy": def loss_fun(x: ep.Tensor, k: int) -> ep.Tensor: logits = self.model.forward(x) ik = classes[:, k] l0 = -ep.crossentropy(logits, i0) lk = -ep.crossentropy(logits, ik) loss = lk - l0 return loss.sum(), (loss, logits) else: raise ValueError( f"expected loss to be 'logits' or 'crossentropy', got '{loss}'" ) loss_aux_and_grad = ep.value_and_grad_fn(inputs, loss_fun, has_aux=True) x = x0 = inputs p_total = ep.zeros_like(x) for step in range(steps): # let's first get the logits using k = 1 to see if we are done diffs = [loss_aux_and_grad(x, 1)] _, (_, logits), _ = diffs[0] is_adv = logits.argmax(axis=-1) != labels if is_adv.all(): break # then run all the other k's as well # we could avoid repeated forward passes and only repeat # the backward pass, but this cannot currently be done in eagerpy diffs += [loss_aux_and_grad(x, k) for k in range(2, candidates)] # we don't need the logits diffs = [(losses, grad) for _, (losses, _), grad in diffs] losses = ep.stack([l for l, _ in diffs], axis=1) grads = ep.stack([g for _, g in diffs], axis=1) assert losses.shape == (N, candidates - 1) assert grads.shape == (N, candidates - 1) + x0.shape[1:] # calculate the distances distances = self.get_distances(losses, grads) assert distances.shape == (N, candidates - 1) # determine the best directions best = distances.argmin(axis=1) distances = distances[rows, best] losses = losses[rows, best] grads = grads[rows, best] assert distances.shape == (N, ) assert losses.shape == (N, ) assert grads.shape == x0.shape # apply perturbation distances = distances + 1e-4 # for numerical stability p_step = self.get_perturbations(distances, grads) assert p_step.shape == x0.shape p_total += p_step # don't do anything for those that are already adversarial x = ep.where(atleast_kd(is_adv, x.ndim), x, x0 + (1.0 + overshoot) * p_total) x = ep.clip(x, min_, max_) return x.tensor
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], *, starting_points: Optional[ep.Tensor] = None, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) criterion_ = get_criterion(criterion) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes else: raise ValueError("unsupported criterion") def loss_fn( inputs: ep.Tensor, labels: ep.Tensor ) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]: logits = model(inputs) if targeted: c_minimize = best_other_classes(logits, labels) c_maximize = labels # target_classes else: c_minimize = labels # labels c_maximize = best_other_classes(logits, labels) loss = logits[rows, c_minimize] - logits[rows, c_maximize] return -loss.sum(), (logits, loss) x, restore_type = ep.astensor_(inputs) del inputs, criterion, kwargs N = len(x) # start from initialization points/attack if starting_points is not None: x1 = starting_points else: if self.init_attack is not None: x1 = self.init_attack.run(model, x, criterion_) else: x1 = None # if initial points or initialization attacks are provided, # search for the boundary if x1 is not None: is_adv = get_is_adversarial(criterion_, model) assert is_adv(x1).all() lower_bound = ep.zeros(x, shape=(N, )) upper_bound = ep.ones(x, shape=(N, )) for _ in range(self.binary_search_steps): epsilons = (lower_bound + upper_bound) / 2 mid_points = self.mid_points(x, x1, epsilons, model.bounds) is_advs = is_adv(mid_points) lower_bound = ep.where(is_advs, lower_bound, epsilons) upper_bound = ep.where(is_advs, epsilons, upper_bound) starting_points = self.mid_points(x, x1, upper_bound, model.bounds) delta = starting_points - x else: # start from x0 delta = ep.zeros_like(x) if classes.shape != (N, ): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}") min_, max_ = model.bounds rows = range(N) grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True) if self.p != 0: epsilon = ep.inf * ep.ones(x, len(x)) else: epsilon = ep.ones(x, len(x)) if x1 is None \ else ep.norms.l0(flatten(delta), axis=-1) if self.p != 0: worst_norm = ep.norms.lp(flatten(ep.maximum(x - min_, max_ - x)), p=self.p, axis=-1) else: worst_norm = flatten(ep.ones_like(x)).bool().sum(axis=1).float32() best_lp = worst_norm best_delta = delta adv_found = ep.zeros(x, len(x)).bool() for i in range(self.steps): # perform cosine annealing of learning rates stepsize = (self.min_stepsize + (self.max_stepsize - self.min_stepsize) * (1 + math.cos(math.pi * i / self.steps)) / 2) gamma = (0.001 + (self.gamma - 0.001) * (1 + math.cos(math.pi * (i / self.steps))) / 2) x_adv = x + delta loss, (logits, loss_batch), gradients = grad_and_logits(x_adv, classes) is_adversarial = criterion_(x_adv, logits) lp = ep.norms.lp(flatten(delta), p=self.p, axis=-1) is_smaller = lp <= best_lp is_both = ep.logical_and(is_adversarial, is_smaller) adv_found = ep.logical_or(adv_found, is_adversarial) best_lp = ep.where(is_both, lp, best_lp) best_delta = ep.where(atleast_kd(is_both, x.ndim), delta, best_delta) # update epsilon if self.p != 0: distance_to_boundary = abs(loss_batch) / ep.norms.lp( flatten(gradients), p=self.dual, axis=-1) epsilon = ep.where( is_adversarial, ep.minimum( epsilon * (1 - gamma), ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)), ep.where( adv_found, epsilon * (1 + gamma), ep.norms.lp(flatten(delta), p=self.p, axis=-1) + distance_to_boundary)) else: epsilon = ep.where( is_adversarial, ep.minimum( ep.minimum(epsilon - 1, (epsilon * (1 - gamma)).astype(int).astype( epsilon.dtype)), ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)), ep.maximum(epsilon + 1, (epsilon * (1 + gamma)).astype(int).astype( epsilon.dtype))) epsilon = ep.maximum(0, epsilon).astype(epsilon.dtype) # clip epsilon epsilon = ep.minimum(epsilon, worst_norm) # computes normalized gradient update grad_ = self.normalize(gradients, x=x, bounds=model.bounds) * stepsize # do step delta = delta + grad_ # project according to the given norm delta = self.project(x=x + delta, x0=x, epsilon=epsilon) - x # clip to valid bounds delta = ep.clip(x + delta, *model.bounds) - x x_adv = x + best_delta return restore_type(x_adv)
def __call__( self, inputs, labels, *, target_classes=None, binary_search_steps=9, max_iterations=10000, confidence=0, learning_rate=1e-2, initial_const=1e-3, abort_early=True, ): x = ep.astensor(inputs) N = len(x) targeted = target_classes is not None if targeted: labels = None target_classes = ep.astensor(target_classes) assert target_classes.shape == (N, ) is_adv = partial(targeted_is_adv, target_classes=target_classes, confidence=confidence) else: labels = ep.astensor(labels) assert labels.shape == (N, ) is_adv = partial(untargeted_is_adv, labels=labels, confidence=confidence) bounds = self.model.bounds() to_attack_space = partial(_to_attack_space, bounds=bounds) to_model_space = partial(_to_model_space, bounds=bounds) x_attack = to_attack_space(x) reconstsructed_x = to_model_space(x_attack) rows = np.arange(N) def loss_fun(delta: ep.Tensor, consts: ep.Tensor) -> ep.Tensor: assert delta.shape == x_attack.shape assert consts.shape == (N, ) x = to_model_space(x_attack + delta) logits = ep.astensor(self.model.forward(x.tensor)) if targeted: c_minimize = best_other_classes(logits, target_classes) c_maximize = target_classes else: c_minimize = labels c_maximize = best_other_classes(logits, labels) is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize] assert is_adv_loss.shape == (N, ) is_adv_loss = is_adv_loss + confidence is_adv_loss = ep.maximum(0, is_adv_loss) is_adv_loss = is_adv_loss * consts squared_norms = flatten(x - reconstsructed_x).square().sum(axis=-1) loss = is_adv_loss.sum() + squared_norms.sum() return loss, (x, logits) loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True) consts = initial_const * np.ones((N, )) lower_bounds = np.zeros((N, )) upper_bounds = np.inf * np.ones((N, )) best_advs = ep.zeros_like(x) best_advs_norms = ep.ones(x, (N, )) * np.inf # the binary search searches for the smallest consts that produce adversarials for binary_search_step in range(binary_search_steps): if (binary_search_step == binary_search_steps - 1 and binary_search_steps >= 10): # in the last iteration, repeat the search once consts = np.minimum(upper_bounds, 1e10) # create a new optimizer find the delta that minimizes the loss delta = ep.zeros_like(x_attack) optimizer = AdamOptimizer(delta) found_advs = np.full( (N, ), fill_value=False) # found adv with the current consts loss_at_previous_check = np.inf consts_ = ep.from_numpy(x, consts.astype(np.float32)) for iteration in range(max_iterations): loss, (perturbed, logits), gradient = loss_aux_and_grad(delta, consts_) delta += optimizer(gradient, learning_rate) if abort_early and iteration % (np.ceil( max_iterations / 10)) == 0: # after each tenth of the iterations, check progress if not (loss <= 0.9999 * loss_at_previous_check): break # stop Adam if there has been no progress loss_at_previous_check = loss found_advs_iter = is_adv(logits) found_advs = np.logical_or(found_advs, found_advs_iter.numpy()) norms = flatten(perturbed - x).square().sum(axis=-1).sqrt() closer = norms < best_advs_norms new_best = closer.float32() * found_advs_iter.float32() best_advs = ( atleast_kd(new_best, best_advs.ndim) * perturbed + (1 - atleast_kd(new_best, best_advs.ndim)) * best_advs) best_advs_norms = new_best * norms + ( 1 - new_best) * best_advs_norms upper_bounds = np.where(found_advs, consts, upper_bounds) lower_bounds = np.where(found_advs, lower_bounds, consts) consts_exponential_search = consts * 10 consts_binary_search = (lower_bounds + upper_bounds) / 2 consts = np.where(np.isinf(upper_bounds), consts_exponential_search, consts_binary_search) return best_advs.tensor
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], *, early_stop: Optional[float] = None, filenames=None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) # is_adversarial = get_is_adversarial(criterion, model) del inputs, criterion, kwargs N = len(x) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes else: raise ValueError("unsupported criterion") if classes.shape != (N, ): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}") def loss_fn(inputs: ep.Tensor, labels: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]: logits = model(inputs) sign = -1.0 if targeted else 1.0 # print("lss_fn ",logits, labels) loss = sign * ep.crossentropy(logits, labels).sum() return loss, logits grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True) image = Image.open('./test2.png') # image.show() # loader = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) logist_clean = model(x) logist_clean = logist_clean.argmax(1) wm = [] one_batch_attack_success = 0 wm_numpy = torch.from_numpy( np.array(image, dtype=np.float32).transpose([2, 0, 1])) for k in range(N): wm.append(wm_numpy) wm_tensor = torch.stack(wm) # with open('result_inception_v3_gen1_40.csv', 'a+')as f: # f_csv = csv.writer(f) msg = '' for j in range(N): # foreach a batch if logist_clean[j] == classes[j]: blocks, alpha, angle = nsgaii.get_init() # x_j = "/home/frankfeng/researchData/code/adversarial_training_code/PLP/fast_adv/attacks/test/0.1504072755143_org.png" # x_j = Image.open(x_j) # x_j =transforms.ToTensor()(x_j).to(device) # x_j = PyTorchTensor(x_j) # print("x_j", x[j].raw.shape, x_j.shape) attack_success_population = nsgaii.nsgaii( model, x[j], classes[j], wm_tensor[j], blocks, alpha, angle, self.waterMark, filenames[j]) # print("attack_success_population", attack_success_population) # (alpha[single_population], # angle[single_population], # logist_population[single_population], # l2_population[single_population], # x_adv_population[single_population])) # if len(attack_success_population) > 0: one_batch_attack_success += 1 # plt.figure() if self.need_show_img: adv_dir = nsgaii.watermark_dir if not os.path.exists(adv_dir): os.makedirs(adv_dir) timestamp = str(int(time.time() * 1000)) for index in range(len(attack_success_population)): if index > 0: break alpha = attack_success_population[index][0] angle = attack_success_population[index][1] logist_population = attack_success_population[index][2] l2_population = attack_success_population[index][3] xxx = attack_success_population[index][4].raw.cpu( ).numpy().transpose([1, 2, 0]) * 255 img = Image.fromarray( xxx.astype('uint8')).convert('RGB') img = img.resize((500, 500), Image.ANTIALIAS) img.save(adv_dir + '/' + filenames[j]) # img.save(adv_dir+'/'+timestamp+"_org" + str(j) + "_" + str(index) + "_logist"+str(logist_population)+"_l2="+str(l2_population)+".png") msg += timestamp + "_filename_" + filenames[ j] + "_logist" + str( logist_population) + "_l2=" + str( l2_population) + "\n" # if index == 0: # img_org = x[j].raw.cpu().numpy().transpose([1, 2, 0]) * 255 # img_org = Image.fromarray(img_org.astype('uint8')).convert('RGB') # img_org = img_org.resize((500, 500), Image.ANTIALIAS) # img_org.save(adv_dir+'/'+timestamp+"_org" + str(j) + "_" + str(index) +"_class"+str(classes[j].raw.cpu().numpy()) + ".png") else: blocks, alpha, angle = nsgaii.get_init() attack_success_population = nsgaii.nsgaii( model, x[j], logist_clean[j], wm_tensor[j], blocks, alpha, angle, self.waterMark, filenames[j]) # print("attack_success_population", attack_success_population) # (alpha[single_population], # angle[single_population], # logist_population[single_population], # l2_population[single_population], # x_adv_population[single_population])) # if len(attack_success_population) > 0: one_batch_attack_success += 1 # plt.figure() if self.need_show_img: adv_dir = nsgaii.watermark_dir if not os.path.exists(adv_dir): os.makedirs(adv_dir) timestamp = str(int(time.time() * 1000)) for index in range(len(attack_success_population)): if index > 0: break alpha = attack_success_population[index][0] angle = attack_success_population[index][1] logist_population = attack_success_population[index][2] l2_population = attack_success_population[index][3] xxx = attack_success_population[index][4].raw.cpu( ).numpy().transpose([1, 2, 0]) * 255 img = Image.fromarray( xxx.astype('uint8')).convert('RGB') img = img.resize((500, 500), Image.ANTIALIAS) # img.save(adv_dir + '/' + timestamp + "_org" + str(j) + "_" + str(index) + "_logist" + str( # logist_population) + "_l2=" + str(l2_population) + ".png") img.save(adv_dir + '/' + filenames[j]) msg += timestamp + "_filename_" + filenames[ j] + "_logist" + str( logist_population) + "_l2=" + str( l2_population) + " pred error\n" # if index == 0: # img_org = x[j].raw.cpu().numpy().transpose([1, 2, 0]) * 255 # img_org = Image.fromarray(img_org.astype('uint8')).convert('RGB') # img_org = img_org.resize((500, 500), Image.ANTIALIAS) # img_org.save( # adv_dir + '/' + timestamp + "_org" + str(j) + "_" + str(index) + "_class" + str( # logist_clean[j].raw.cpu().numpy()) + ".png") return one_batch_attack_success, msg