def value_and_grad( self, loss_fn: Callable[[ep.Tensor], ep.Tensor], x: ep.Tensor, ) -> Tuple[ep.Tensor, ep.Tensor]: value = loss_fn(x) gradient = ep.zeros_like(x) for k in range(samples // 2): noise = ep.normal(x, shape=x.shape) pos_theta = x + sigma * noise neg_theta = x - sigma * noise if clip: pos_theta = pos_theta.clip(*bounds) neg_theta = neg_theta.clip(*bounds) pos_loss = loss_fn(pos_theta) neg_loss = loss_fn(neg_theta) gradient += (pos_loss - neg_loss) * noise gradient /= 2 * sigma * 2 * samples return value, gradient
def __call__(self, model: Model, inputs: T, criterion: Union[Misclassification, T]) -> T: x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion N = len(x) if isinstance(criterion_, Misclassification): classes = criterion_.labels else: raise ValueError("unsupported criterion") if classes.shape != (N, ): raise ValueError( f"expected labels to have shape ({N},), got {classes.shape}") bounds = model.bounds def loss_fun(delta: ep.Tensor, logits: ep.Tensor) -> ep.Tensor: assert x.shape[0] == logits.shape[0] assert delta.shape == x.shape x_hat = x + delta logits_hat = model(x_hat) loss = ep.kl_div_with_logits(logits, logits_hat).sum() return loss value_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=False) clean_logits = model(x) # start with random vector as search vector d = ep.normal(x, shape=x.shape, mean=0, stddev=1) for it in range(self.iterations): # normalize proposal to be unit vector d = d * self.xi / atleast_kd(ep.norms.l2(flatten(d), axis=-1), x.ndim) # use gradient of KL divergence as new search vector _, grad = value_and_grad(d, clean_logits) d = grad # rescale search vector d = (bounds[1] - bounds[0]) * d if ep.any(ep.norms.l2(flatten(d), axis=-1) < 1e-64): raise RuntimeError( "Gradient vanished; this can happen if xi is too small.") final_delta = (self.epsilon / ep.sqrt( (d**2).sum(keepdims=True, axis=(1, 2, 3))) * d) x_adv = ep.clip(x + final_delta, *bounds) return restore_type(x_adv)
def normalize_gradient_l2_norms(grad: ep.Tensor) -> ep.Tensor: norms = ep.norms.l2(flatten(grad), -1) # remove zero gradients grad = ep.where(atleast_kd(norms == 0, grad.ndim), ep.normal(grad, shape=grad.shape), grad) # calculate norms again for previously vanishing elements norms = ep.norms.l2(flatten(grad), -1) norms = ep.maximum(norms, 1e-12) # avoid division by zero factor = 1 / norms factor = atleast_kd(factor, grad.ndim) return grad * factor
def approximate_gradients( self, is_adversarial: Callable[[ep.Tensor], ep.Tensor], x_advs: ep.Tensor, steps: int, delta: ep.Tensor, ) -> ep.Tensor: # (steps, bs, ...) noise_shape = tuple([steps] + list(x_advs.shape)) if self.constraint == "l2": rv = ep.normal(x_advs, noise_shape) elif self.constraint == "linf": rv = ep.uniform(x_advs, low=-1, high=1, shape=noise_shape) rv /= atleast_kd(ep.norms.l2(flatten(rv, keep=1), -1), rv.ndim) + 1e-12 scaled_rv = atleast_kd(ep.expand_dims(delta, 0), rv.ndim) * rv perturbed = ep.expand_dims(x_advs, 0) + scaled_rv perturbed = ep.clip(perturbed, 0, 1) rv = (perturbed - x_advs) / atleast_kd(ep.expand_dims(delta + 1e-8, 0), rv.ndim) multipliers_list: List[ep.Tensor] = [] for step in range(steps): decision = is_adversarial(perturbed[step]) multipliers_list.append( ep.where( decision, ep.ones( x_advs, (len(x_advs, )), ), -ep.ones( x_advs, (len(decision, )), ), )) # (steps, bs, ...) multipliers = ep.stack(multipliers_list, 0) vals = ep.where( ep.abs(ep.mean(multipliers, axis=0, keepdims=True)) == 1, multipliers, multipliers - ep.mean(multipliers, axis=0, keepdims=True), ) grad = ep.mean(atleast_kd(vals, rv.ndim) * rv, axis=0) grad /= ep.norms.l2(atleast_kd(flatten(grad), grad.ndim)) + 1e-12 return grad
def draw_proposals( bounds, originals: ep.Tensor, perturbed: ep.Tensor, unnormalized_source_directions: ep.Tensor, source_directions: ep.Tensor, source_norms: ep.Tensor, spherical_steps: ep.Tensor, source_steps: ep.Tensor, ): # remember the actual shape shape = originals.shape assert perturbed.shape == shape assert unnormalized_source_directions.shape == shape assert source_directions.shape == shape # flatten everything to (batch, size) originals = flatten(originals) perturbed = flatten(perturbed) unnormalized_source_directions = flatten(unnormalized_source_directions) source_directions = flatten(source_directions) N, D = originals.shape assert source_norms.shape == (N, ) assert spherical_steps.shape == (N, ) assert source_steps.shape == (N, ) # draw from an iid Gaussian (we can share this across the whole batch) eta = ep.normal(perturbed, (D, 1)) # make orthogonal (source_directions are normalized) eta = eta.T - ep.matmul(source_directions, eta) * source_directions assert eta.shape == (N, D) # rescale norms = l2norms(eta) assert norms.shape == (N, ) eta = eta * atleast_kd(spherical_steps * source_norms / norms, eta.ndim) # project on the sphere using Pythagoras distances = atleast_kd((spherical_steps.square() + 1).sqrt(), eta.ndim) directions = eta - unnormalized_source_directions spherical_candidates = originals + directions / distances # clip min_, max_ = bounds spherical_candidates = spherical_candidates.clip(min_, max_) # step towards the original inputs new_source_directions = originals - spherical_candidates assert new_source_directions.ndim == 2 new_source_directions_norms = l2norms(new_source_directions) # length if spherical_candidates would be exactly on the sphere lengths = source_steps * source_norms # length including correction for numerical deviation from sphere lengths = lengths + new_source_directions_norms - source_norms # make sure the step size is positive lengths = ep.maximum(lengths, 0) # normalize the length lengths = lengths / new_source_directions_norms lengths = atleast_kd(lengths, new_source_directions.ndim) candidates = spherical_candidates + lengths * new_source_directions # clip candidates = candidates.clip(min_, max_) # restore shape candidates = candidates.reshape(shape) spherical_candidates = spherical_candidates.reshape(shape) return candidates, spherical_candidates
def uniform_l2_n_spheres(dummy: ep.Tensor, batch_size: int, n: int) -> ep.Tensor: x = ep.normal(dummy, (batch_size, n + 1)) r = x.norms.l2(axis=-1, keepdims=True) s = x / r return s
def test_normal_tuple(t: Tensor) -> Shape: return ep.normal(t, (2, 3)).shape
def test_normal_scalar(t: Tensor) -> Shape: return ep.normal(t, 5).shape
def _get_vector_dct(self) -> ep.Tensor: probs = ep.uniform(self._originals, self._originals.shape, 0, 3).astype(int) - 1 r_np = self.dcts * probs r_np = self._inverse_dct(r_np) return r_np + ep.normal(self._originals, r_np.shape, stddev=self._beta)
def draw_proposals(bounds: Bounds, originals: ep.Tensor, perturbed: ep.Tensor, unnormalized_source_directions: ep.Tensor, source_directions: ep.Tensor, source_norms: ep.Tensor, spherical_steps: ep.Tensor, source_steps: ep.Tensor, surrogate_model: Model) -> Tuple[ep.Tensor, ep.Tensor]: # remember the actual shape shape = originals.shape assert perturbed.shape == shape assert unnormalized_source_directions.shape == shape assert source_directions.shape == shape # flatten everything to (batch, size) originals = flatten(originals) perturbed = flatten(perturbed) unnormalized_source_directions = flatten(unnormalized_source_directions) source_directions = flatten(source_directions) N, D = originals.shape assert source_norms.shape == (N, ) assert spherical_steps.shape == (N, ) assert source_steps.shape == (N, ) # draw from an iid Gaussian (we can share this across the whole batch) eta = ep.normal(perturbed, (D, 1)) # make orthogonal (source_directions are normalized) eta = eta.T - ep.matmul(source_directions, eta) * source_directions assert eta.shape == (N, D) pg_factor = 0.5 if not surrogate_model is None: device = surrogate_model.device projected_gradient = get_projected_gradients(perturbed.reshape(shape), originals.reshape(shape), 0, surrogate_model) projected_gradient = projected_gradient.reshape((N, D)) projected_gradient = torch.tensor(projected_gradient, device=device) projected_gradient, restore_type = ep.astensor_(projected_gradient) eta = (1. - pg_factor) * eta + pg_factor * projected_gradient # rescale norms = ep.norms.l2(eta, axis=-1) assert norms.shape == (N, ) eta = eta * atleast_kd(spherical_steps * source_norms / norms, eta.ndim) # project on the sphere using Pythagoras distances = atleast_kd((spherical_steps.square() + 1).sqrt(), eta.ndim) directions = eta - unnormalized_source_directions spherical_candidates = originals + directions / distances # clip min_, max_ = bounds spherical_candidates = spherical_candidates.clip(min_, max_) # step towards the original inputs new_source_directions = originals - spherical_candidates assert new_source_directions.ndim == 2 new_source_directions_norms = ep.norms.l2(flatten(new_source_directions), axis=-1) # length if spherical_candidates would be exactly on the sphere lengths = source_steps * source_norms # length including correction for numerical deviation from sphere lengths = lengths + new_source_directions_norms - source_norms # make sure the step size is positive lengths = ep.maximum(lengths, 0) # normalize the length lengths = lengths / new_source_directions_norms lengths = atleast_kd(lengths, new_source_directions.ndim) candidates = spherical_candidates + lengths * new_source_directions # clip candidates = candidates.clip(min_, max_) # restore shape candidates = candidates.reshape(shape) spherical_candidates = spherical_candidates.reshape(shape) return candidates, spherical_candidates
def uniform_n_sphere(dummy: ep.Tensor, n: int) -> ep.Tensor: x = ep.normal(dummy, n + 1) r = x.norms.l2() s = x / r return s