def project_onto_l1_ball(x: ep.Tensor, eps: ep.Tensor) -> ep.Tensor: """Computes Euclidean projection onto the L1 ball for a batch. [#Duchi08]_ Adapted from the pytorch version by Tony Duan: https://gist.github.com/tonyduan/1329998205d88c566588e57e3e2c0c55 Args: x: Batch of arbitrary-size tensors to project, possibly on GPU eps: radius of l-1 ball to project onto References: ..[#Duchi08] Efficient Projections onto the l1-Ball for Learning in High Dimensions John Duchi, Shai Shalev-Shwartz, Yoram Singer, and Tushar Chandra. International Conference on Machine Learning (ICML 2008) """ original_shape = x.shape x = flatten(x) mask = (ep.norms.l1(x, axis=1) <= eps).astype(x.dtype).expand_dims(1) mu = ep.flip(ep.sort(ep.abs(x)), axis=-1).astype(x.dtype) cumsum = ep.cumsum(mu, axis=-1) arange = ep.arange(x, 1, x.shape[1] + 1).astype(x.dtype) rho = (ep.max( ((mu * arange > (cumsum - eps.expand_dims(1)))).astype(x.dtype) * arange, axis=-1, ) - 1) # samples already under norm will have to select rho = ep.maximum(rho, 0) theta = (cumsum[ep.arange(x, x.shape[0]), rho.astype(ep.arange(x, 1).dtype)] - eps) / (rho + 1.0) proj = (ep.abs(x) - theta.expand_dims(1)).clip(min_=0, max_=ep.inf) x = mask * x + (1 - mask) * proj * ep.sign(x) return x.reshape(original_shape)
def probability_ratio( self, tag: str, x: ep.Tensor, y: ep.Tensor, step: int ) -> None: x_ = x.float32().mean(axis=0).item() y_ = y.float32().mean(axis=0).item() if y_ == 0: return self.writer.add_scalar(tag, x_ / y_, step)
def conditional_mean( self, tag: str, x: ep.Tensor, cond: ep.Tensor, step: int ) -> None: cond_ = cond.numpy() if ~cond_.any(): return x_ = x.numpy() x_ = x_[cond_] self.writer.add_scalar(tag, x_.mean(axis=0).item(), step)
def test_2d(x2d: Tensor, p: float, axis: int, keepdims: bool) -> None: assert isinstance(axis, int) # see test4d for the more general test assert_allclose( lp(x2d, p, axis=axis, keepdims=keepdims).numpy(), norm(x2d.numpy(), ord=p, axis=axis, keepdims=keepdims), rtol=1e-6, ) if p not in norms: return assert_allclose( norms[p](x2d, axis=axis, keepdims=keepdims).numpy(), norm(x2d.numpy(), ord=p, axis=axis, keepdims=keepdims), rtol=1e-6, )
def histogram( self, tag: str, x: ep.Tensor, step: int, *, first: bool = True ) -> None: x = x.numpy() self.writer.add_histogram(tag, x, step) if first: self.writer.add_scalar(tag + "/0", x[0].item(), step)
def append(self, x: ep.Tensor): if self.tensor is None: self.tensor = x x = x.numpy() assert x.shape == (self.N, ) self.data[self.next] = x self.next = (self.next + 1) % self.maxlen
def _to_model_space(x: ep.Tensor, *, bounds: Bounds) -> ep.Tensor: min_, max_ = bounds x = x.tanh() # from (-inf, +inf) to (-1, +1) a = (min_ + max_) / 2 b = (max_ - min_) / 2 x = x * b + a # map from (-1, +1) to (min_, max_) return x
def clear(self, dims: ep.Tensor): if self.tensor is None: self.tensor = dims dims = dims.numpy() assert dims.shape == (self.N, ) assert dims.dtype == np.bool self.data[:, dims] = np.nan
def normalize(self, gradients: ep.Tensor, *, x: ep.Tensor, bounds: Bounds) -> ep.Tensor: bad_pos = ep.logical_or( ep.logical_and(x == bounds.lower, gradients < 0), ep.logical_and(x == bounds.upper, gradients > 0), ) gradients = ep.where(bad_pos, ep.zeros_like(gradients), gradients) abs_gradients = gradients.abs() quantiles = np.quantile(flatten(abs_gradients).numpy(), q=self.quantile, axis=-1) keep = abs_gradients >= atleast_kd(ep.from_numpy(gradients, quantiles), gradients.ndim) e = ep.where(keep, gradients.sign(), ep.zeros_like(gradients)) return normalize_lp_norms(e, p=1)
def _gram_schmidt(self, v: ep.Tensor, ortho_with: ep.Tensor): v_repeated = ep.concatenate([v.expand_dims(0)] * len(ortho_with), axis=0) #inner product gs_coeff = (ortho_with * v_repeated).flatten(1).sum(1) proj = atleast_kd(gs_coeff, ortho_with.ndim) * ortho_with v = v - proj.sum(0) return v / ep.norms.l2(v)
def _to_attack_space(x: ep.Tensor, *, bounds: Bounds) -> ep.Tensor: min_, max_ = bounds a = (min_ + max_) / 2 b = (max_ - min_) / 2 x = (x - a) / b # map from [min_, max_] to [-1, +1] x = x * 0.999999 # from [-1, +1] to approx. (-1, +1) x = x.arctanh() # from (-1, +1) to (-inf, +inf) return x
def __call__( self, inputs: ep.Tensor, labels: ep.Tensor, perturbed: ep.Tensor, logits: ep.Tensor, ) -> ep.Tensor: classes = logits.argmax(axis=-1) return classes == self.target_classes
def mid_points(self, x0: ep.Tensor, x1: ep.Tensor, epsilons: ep.Tensor, bounds) -> ep.Tensor: # returns a point between x0 and x1 where # epsilon = 0 returns x0 and epsilon = 1 # returns x1 # get epsilons in right shape for broadcasting epsilons = epsilons.reshape(epsilons.shape + (1, ) * (x0.ndim - 1)) return epsilons * x1 + (1 - epsilons) * x0
def project_onto_l1_ball(x: ep.Tensor, eps: ep.Tensor): """ Compute Euclidean projection onto the L1 ball for a batch. min ||x - u||_2 s.t. ||u||_1 <= eps Inspired by the corresponding numpy version by Adrien Gaidon. Adapted from the pytorch version by Tony Duan: https://gist.github.com/tonyduan/1329998205d88c566588e57e3e2c0c55 Parameters ---------- x: (batch_size, *) torch array batch of arbitrary-size tensors to project, possibly on GPU eps: float radius of l-1 ball to project onto Returns ------- u: (batch_size, *) torch array batch of projected tensors, reshaped to match the original Notes ----- The complexity of this algorithm is in O(dlogd) as it involves sorting x. References ---------- [1] Efficient Projections onto the l1-Ball for Learning in High Dimensions John Duchi, Shai Shalev-Shwartz, Yoram Singer, and Tushar Chandra. International Conference on Machine Learning (ICML 2008) """ original_shape = x.shape x = flatten(x) mask = (ep.norms.l1(x, axis=1) < eps).astype(x.dtype).expand_dims(1) mu = ep.flip(ep.sort(ep.abs(x)), axis=-1) cumsum = ep.cumsum(mu, axis=-1) arange = ep.arange(x, 1, x.shape[1] + 1) rho = ep.max( (mu * arange > (cumsum - eps.expand_dims(1))) * arange, axis=-1) - 1 theta = (cumsum[ep.arange(x, x.shape[0]), rho] - eps) / (rho + 1.0) proj = (ep.abs(x) - theta.expand_dims(1)).clip(min_=0, max_=ep.inf) x = mask * x + (1 - mask) * proj * ep.sign(x) return x.reshape(original_shape)
def test_logical_and_nonboolean(t: Tensor, f: Callable[[Tensor, Tensor], Tensor]) -> None: t = t.float32() f(t > 1, t > 1) with pytest.raises(ValueError): f(t, t > 1) with pytest.raises(ValueError): f(t > 1, t) with pytest.raises(ValueError): f(t, t)
def project(self, x: ep.Tensor, x0: ep.Tensor, epsilon: ep.Tensor) -> ep.Tensor: flatten_delta = flatten(x - x0) abs_delta = abs(flatten_delta) epsilon = epsilon.astype(int) rows = range(flatten_delta.shape[0]) idx_sorted = ep.argsort(abs_delta, axis=-1)[rows, -epsilon] thresholds = (ep.ones_like(flatten_delta).T * abs_delta[rows, idx_sorted]).T clipped = ep.where(abs_delta >= thresholds, flatten_delta, 0) return x0 + clipped.reshape(x0.shape).astype(x0.dtype)
def mid_points( self, x0: ep.Tensor, x1: ep.Tensor, epsilons: ep.Tensor, bounds: Tuple[float, float], ): # returns a point between x0 and x1 where # epsilon = 0 returns x0 and epsilon = 1 delta = x1 - x0 min_, max_ = bounds s = max_ - min_ # get epsilons in right shape for broadcasting epsilons = epsilons.reshape(epsilons.shape + (1, ) * (x0.ndim - 1)) clipped_delta = ep.where(delta < -epsilons * s, -epsilons * s, delta) clipped_delta = ep.where(clipped_delta > epsilons * s, epsilons * s, clipped_delta) return x0 + clipped_delta
def mid_points( self, x0: ep.Tensor, x1: ep.Tensor, epsilons: ep.Tensor, bounds: Tuple[float, float], ) -> ep.Tensor: # returns a point between x0 and x1 where # epsilon = 0 returns x0 and epsilon = 1 # returns x1 # get epsilons in right shape for broadcasting epsilons = epsilons.reshape(epsilons.shape + (1, ) * (x0.ndim - 1)) threshold = (bounds[1] - bounds[0]) * (1 - epsilons) mask = (x1 - x0).abs() > threshold new_x = ep.where(mask, x0 + (x1 - x0).sign() * ((x1 - x0).abs() - threshold), x0) return new_x
def _binary_search( self, x_adv_flat: ep.Tensor, mask: Union[ep.Tensor, List[bool]], mask_indices: ep.Tensor, indices: Union[ep.Tensor, List[int]], adv_values: ep.Tensor, non_adv_values: ep.Tensor, original_shape: Tuple, is_adversarial: Callable, ) -> ep.Tensor: for i in range(10): next_values = (adv_values + non_adv_values) / 2 x_adv_flat = ep.index_update( x_adv_flat, (mask_indices, indices), next_values ) is_adv = is_adversarial(x_adv_flat.reshape(original_shape))[mask] adv_values = ep.where(is_adv, next_values, adv_values) non_adv_values = ep.where(is_adv, non_adv_values, next_values) return adv_values
def apply_decision_rule( decision_rule: str, beta: float, best_advs: ep.Tensor, best_advs_norms: ep.Tensor, x_k: ep.Tensor, x_0: ep.Tensor, found_advs: ep.Tensor, ): if decision_rule == "EN": norms = beta * flatten(x_k - x_0).abs().sum( axis=-1) + flatten(x_k - x_0).square().sum(axis=-1) elif decision_rule == "L1": norms = flatten(x_k - x_0).abs().sum(axis=-1) else: raise ValueError("invalid decision rule") new_best = (norms < best_advs_norms).float32() * found_advs.float32() new_best = atleast_kd(new_best, best_advs.ndim) best_advs = new_best * x_k + (1 - new_best) * best_advs best_advs_norms = ep.minimum(norms, best_advs_norms) return best_advs, best_advs_norms
def test_4d(x4d: Tensor, p: float, axis: Optional[ep.types.AxisAxes], keepdims: bool) -> None: actual = lp(x4d, p, axis=axis, keepdims=keepdims).numpy() # numpy does not support arbitrary axes (limited to vector and matrix norms) if axis is None: axes = tuple(range(x4d.ndim)) elif not isinstance(axis, tuple): axes = (axis, ) else: axes = axis del axis axes = tuple(i % x4d.ndim for i in axes) x = x4d.numpy() other = tuple(i for i in range(x.ndim) if i not in axes) x = np.transpose(x, other + axes) x = x.reshape(x.shape[:len(other)] + (-1, )) desired = norm(x, ord=p, axis=-1) if keepdims: shape = tuple(1 if i in axes else x4d.shape[i] for i in range(x4d.ndim)) desired = desired.reshape(shape) assert_allclose(actual, desired, rtol=1e-6)
def get_perturbations(self, distances: ep.Tensor, grads: ep.Tensor) -> ep.Tensor: return atleast_kd(distances, grads.ndim) * grads.sign()
def targeted_is_adv(logits: ep.Tensor, target_classes: ep.Tensor, confidence) -> ep.Tensor: logits = logits - ep.onehot_like(logits, target_classes, value=confidence) classes = logits.argmax(axis=-1) return classes == target_classes
def untargeted_is_adv(logits: ep.Tensor, labels: ep.Tensor, confidence) -> ep.Tensor: logits = logits + ep.onehot_like(logits, labels, value=confidence) classes = logits.argmax(axis=-1) return classes != labels
def draw_proposals( bounds, originals: ep.Tensor, perturbed: ep.Tensor, unnormalized_source_directions: ep.Tensor, source_directions: ep.Tensor, source_norms: ep.Tensor, spherical_steps: ep.Tensor, source_steps: ep.Tensor, ): # remember the actual shape shape = originals.shape assert perturbed.shape == shape assert unnormalized_source_directions.shape == shape assert source_directions.shape == shape # flatten everything to (batch, size) originals = flatten(originals) perturbed = flatten(perturbed) unnormalized_source_directions = flatten(unnormalized_source_directions) source_directions = flatten(source_directions) N, D = originals.shape assert source_norms.shape == (N, ) assert spherical_steps.shape == (N, ) assert source_steps.shape == (N, ) # draw from an iid Gaussian (we can share this across the whole batch) eta = ep.normal(perturbed, (D, 1)) # make orthogonal (source_directions are normalized) eta = eta.T - ep.matmul(source_directions, eta) * source_directions assert eta.shape == (N, D) # rescale norms = l2norms(eta) assert norms.shape == (N, ) eta = eta * atleast_kd(spherical_steps * source_norms / norms, eta.ndim) # project on the sphere using Pythagoras distances = atleast_kd((spherical_steps.square() + 1).sqrt(), eta.ndim) directions = eta - unnormalized_source_directions spherical_candidates = originals + directions / distances # clip min_, max_ = bounds spherical_candidates = spherical_candidates.clip(min_, max_) # step towards the original inputs new_source_directions = originals - spherical_candidates assert new_source_directions.ndim == 2 new_source_directions_norms = l2norms(new_source_directions) # length if spherical_candidates would be exactly on the sphere lengths = source_steps * source_norms # length including correction for numerical deviation from sphere lengths = lengths + new_source_directions_norms - source_norms # make sure the step size is positive lengths = ep.maximum(lengths, 0) # normalize the length lengths = lengths / new_source_directions_norms lengths = atleast_kd(lengths, new_source_directions.ndim) candidates = spherical_candidates + lengths * new_source_directions # clip candidates = candidates.clip(min_, max_) # restore shape candidates = candidates.reshape(shape) spherical_candidates = spherical_candidates.reshape(shape) return candidates, spherical_candidates
def normalize(self, gradients: ep.Tensor, *, x: ep.Tensor, bounds: Bounds) -> ep.Tensor: return gradients.sign()
def test_arctanh(t: Tensor) -> Tensor: return ep.arctanh((t - t.mean()) / t.max())
def test_log10(t: Tensor) -> Tensor: return ep.log10(t.maximum(1e-8))
def test_item(t: Tensor) -> float: t = t.sum() return t.item()
def test_numpy_inplace(t: Tensor) -> None: copy = t + 0 a = t.numpy().copy() a[:] += 1 assert (t == copy).all()