def l2_clipping_aware_rescaling(x, delta, eps: float, a: float = 0.0, b: float = 1.0): # type: ignore """Calculates eta such that norm(clip(x + eta * delta, a, b) - x) == eps. Assumes x and delta have a batch dimension and eps, a, b, and p are scalars. If the equation cannot be solved because eps is too large, the left hand side is maximized. Args: x: A batch of inputs (PyTorch Tensor, TensorFlow Eager Tensor, NumPy Array, JAX Array, or EagerPy Tensor). delta: A batch of perturbation directions (same shape and type as x). eps: The target norm (non-negative float). a: The lower bound of the data domain (float). b: The upper bound of the data domain (float). Returns: eta: A batch of scales with the same number of dimensions as x but all axis == 1 except for the batch dimension. """ (x, delta), restore_fn = ep.astensors_(x, delta) N = x.shape[0] assert delta.shape[0] == N rows = ep.arange(x, N) delta2 = delta.square().reshape((N, -1)) space = ep.where(delta >= 0, b - x, x - a).reshape((N, -1)) f2 = space.square() / ep.maximum(delta2, 1e-20) ks = ep.argsort(f2, axis=-1) f2_sorted = f2[rows[:, ep.newaxis], ks] m = ep.cumsum(delta2[rows[:, ep.newaxis], ks.flip(axis=1)], axis=-1).flip(axis=1) dx = f2_sorted[:, 1:] - f2_sorted[:, :-1] dx = ep.concatenate((f2_sorted[:, :1], dx), axis=-1) dy = m * dx y = ep.cumsum(dy, axis=-1) c = y >= eps**2 # work-around to get first nonzero element in each row f = ep.arange(x, c.shape[-1], 0, -1) j = ep.argmax(c.astype(f.dtype) * f, axis=-1) eta2 = f2_sorted[rows, j] - (y[rows, j] - eps**2) / m[rows, j] # it can happen that for certain rows even the largest j is not large enough # (i.e. c[:, -1] is False), then we will just use it (without any correction) as it's # the best we can do (this should also be the only cases where m[j] can be # 0 and they are thus not a problem) eta2 = ep.where(c[:, -1], eta2, f2_sorted[:, -1]) eta = ep.sqrt(eta2) eta = eta.reshape((-1, ) + (1, ) * (x.ndim - 1)) # xp = ep.clip(x + eta * delta, a, b) # l2 = (xp - x).reshape((N, -1)).square().sum(axis=-1).sqrt() return restore_fn(eta)
def project_onto_l1_ball(x: ep.Tensor, eps: ep.Tensor) -> ep.Tensor: """Computes Euclidean projection onto the L1 ball for a batch. [#Duchi08]_ Adapted from the pytorch version by Tony Duan: https://gist.github.com/tonyduan/1329998205d88c566588e57e3e2c0c55 Args: x: Batch of arbitrary-size tensors to project, possibly on GPU eps: radius of l-1 ball to project onto References: ..[#Duchi08] Efficient Projections onto the l1-Ball for Learning in High Dimensions John Duchi, Shai Shalev-Shwartz, Yoram Singer, and Tushar Chandra. International Conference on Machine Learning (ICML 2008) """ original_shape = x.shape x = flatten(x) mask = (ep.norms.l1(x, axis=1) <= eps).astype(x.dtype).expand_dims(1) mu = ep.flip(ep.sort(ep.abs(x)), axis=-1).astype(x.dtype) cumsum = ep.cumsum(mu, axis=-1) arange = ep.arange(x, 1, x.shape[1] + 1).astype(x.dtype) rho = (ep.max( ((mu * arange > (cumsum - eps.expand_dims(1)))).astype(x.dtype) * arange, axis=-1, ) - 1) # samples already under norm will have to select rho = ep.maximum(rho, 0) theta = (cumsum[ep.arange(x, x.shape[0]), rho.astype(ep.arange(x, 1).dtype)] - eps) / (rho + 1.0) proj = (ep.abs(x) - theta.expand_dims(1)).clip(min_=0, max_=ep.inf) x = mask * x + (1 - mask) * proj * ep.sign(x) return x.reshape(original_shape)
def project_onto_l1_ball(x: ep.Tensor, eps: ep.Tensor): """ Compute Euclidean projection onto the L1 ball for a batch. min ||x - u||_2 s.t. ||u||_1 <= eps Inspired by the corresponding numpy version by Adrien Gaidon. Adapted from the pytorch version by Tony Duan: https://gist.github.com/tonyduan/1329998205d88c566588e57e3e2c0c55 Parameters ---------- x: (batch_size, *) torch array batch of arbitrary-size tensors to project, possibly on GPU eps: float radius of l-1 ball to project onto Returns ------- u: (batch_size, *) torch array batch of projected tensors, reshaped to match the original Notes ----- The complexity of this algorithm is in O(dlogd) as it involves sorting x. References ---------- [1] Efficient Projections onto the l1-Ball for Learning in High Dimensions John Duchi, Shai Shalev-Shwartz, Yoram Singer, and Tushar Chandra. International Conference on Machine Learning (ICML 2008) """ original_shape = x.shape x = flatten(x) mask = (ep.norms.l1(x, axis=1) < eps).astype(x.dtype).expand_dims(1) mu = ep.flip(ep.sort(ep.abs(x)), axis=-1) cumsum = ep.cumsum(mu, axis=-1) arange = ep.arange(x, 1, x.shape[1] + 1) rho = ep.max( (mu * arange > (cumsum - eps.expand_dims(1))) * arange, axis=-1) - 1 theta = (cumsum[ep.arange(x, x.shape[0]), rho] - eps) / (rho + 1.0) proj = (ep.abs(x) - theta.expand_dims(1)).clip(min_=0, max_=ep.inf) x = mask * x + (1 - mask) * proj * ep.sign(x) return x.reshape(original_shape)
def test_cumsum_axis(t: Tensor) -> Tensor: return ep.cumsum(t, axis=0)
def test_cumsum(t: Tensor) -> Tensor: return ep.cumsum(t)