def test_DenseEquivariant(symmetries, use_bias, lattice): g, hi, perms = _setup_symm(symmetries, N=3, lattice=lattice) pt = perms.product_table n_symm = np.asarray(perms).shape[0] ma = nk.nn.DenseEquivariant( symmetry_info=pt.ravel(), in_features=1, out_features=1, use_bias=use_bias, bias_init=nk.nn.initializers.uniform(), ) pars = ma.init(nk.jax.PRNGKey(), np.random.normal(0, 1, [1, n_symm])) # inv_pt computes chosen_op = gh^-1 instead of g^-1h chosen_op = np.random.randint(n_symm) inverse = PermutationGroup([perms.elems[i] for i in perms.inverse], degree=g.n_nodes) inv_pt = inverse.product_table sym_op = np.where(inv_pt == chosen_op, 1.0, 0.0) v = random.normal(random.PRNGKey(0), [3, n_symm]) v_trans = dot(v, sym_op) out = ma.apply(pars, v) out_trans = ma.apply(pars, v_trans) # output should be involution assert jnp.allclose(dot(out, sym_op.transpose(0, 1)), out_trans)
def dot(a, b): # pylint: disable=missing-docstring _check_arraylike("dot", a, b) a, b = _promote_dtypes(a, b) a_ndim, b_ndim = ndim(a), ndim(b) if a_ndim == 0 or b_ndim == 0: return lax.mul(a, b) if _max(a_ndim, b_ndim) <= 2: return lax.dot(a, b) a_reshaped = reshape(a, (-1, shape(a)[-1])) if _ndim(b) in {1, 2}: out = lax.dot(a_reshaped, b) else: b_reshaped = reshape(moveaxis(b, -2, 0), (shape(b)[-2], -1)) out = lax.dot(a_reshaped, b_reshaped) return lax.reshape(out, a.shape[:-1] + b.shape[:-2] + b.shape[-2:][1:])
def _lu_blocked(a, block_size=128): """Blocked LU decomposition, as an unrolled loop.""" m, n = a.shape r = min(m, n) pivot = np.zeros((r, ), dtype=np.int32) for k in range(0, r, block_size): b = min(r - k, block_size) block_pivot, perm, lu_block = _lu_unblocked(a[k:, k:k + b]) a = ops.index_update(a, ops.index[k:, :], a[perm + k, :]) a = ops.index_update(a, ops.index[k:, k:k + b], lu_block) pivot = ops.index_update(pivot, ops.index[k:k + b], block_pivot + k) if k + b < n: a = ops.index_update( a, ops.index[k:k + b, k + b:], triangular_solve(a[k:k + b, k:k + b], a[k:k + b, k + b:], left_side=True, lower=True, unit_diagonal=True)) a = ops.index_add( a, ops.index[k + b:, k + b:], -lax.dot(a[k + b:, k:k + b], a[k:k + b, k + b:], precision=lax.Precision.HIGHEST)) return pivot, a
def _lu_blocked(a, block_size=32): """Blocked LU decomposition, as an unrolled loop.""" m, n = a.shape r = min(m, n) pivot = np.zeros((r, ), dtype=np.int32) error = np.array(False, np.bool_) for k in range(0, r, block_size): b = min(r - k, block_size) block_pivot, perm, lu_block, block_error = _lu_unblocked(a[k:, k:k + b]) error = error | block_error a = ops.index_update(a, ops.index[k:, k:k + b], lu_block) a = ops.index_update(a, ops.index[k:, :k], a[perm + k, :k]) pivot = ops.index_update(pivot, ops.index[k:k + b], block_pivot + k) if k + b < n: a = ops.index_update(a, ops.index[k:, k + b:], a[perm + k, k + b:]) a = ops.index_update( a, ops.index[k:k + b, k + b:], triangular_solve(a[k:k + b, k:k + b], a[k:k + b, k + b:], left_side=True, lower=True, unit_diagonal=True)) a = ops.index_add( a, ops.index[k + b:, k + b:], -lax.dot(a[k + b:, k:k + b], a[k:k + b, k + b:], precision=lax.Precision.HIGHEST)) a = np.where(error, lax.full_like(a, np.nan), a) return pivot, a
def testJVP(self): f = xmap(lambda x, y: jnp.cos(lax.dot(x, jnp.sin(y), precision=lax.Precision.HIGHEST)), in_axes=[['i', ...], {}], out_axes=['i', ...]) x = jnp.arange(12, dtype=jnp.float32).reshape((3, 4)) / 100 y = jnp.arange(20, dtype=jnp.float32).reshape((4, 5)) / 100 jtu.check_grads(f, (x, y), order=2, modes=['fwd'])
def _dot_papply_rule(name, vals, dims): x, y = vals xdim, ydim = dims if xdim is None: return lax.dot(x, y), ydim elif ydim is None: return lax.dot(x, y), xdim elif ydim == 0: if xdim != x.ndim: x = psplit(x, name, x.ndim) x = x[..., None] y = y[..., None, :] return psum(x * y, name), None else: y = pcollect(y, name) return lax.dot(x, y), xdim
def test_approx_max_k(self, qy_shape, db_shape, dtype, k, recall): rng = jtu.rand_default(self.rng()) qy = rng(qy_shape, dtype) db = rng(db_shape, dtype) scores = lax.dot(qy, db) _, gt_args = lax.top_k(scores, k) _, ann_args = lax.approx_max_k(scores, k, recall_target=recall) self.assertEqual(k, len(ann_args[0])) ann_recall = compute_recall(np.asarray(ann_args), np.asarray(gt_args)) self.assertGreater(ann_recall, recall)
def __call__(self, inputs: Array) -> Array: """ Applies a masked linear transformation to the inputs. Args: inputs: input data with dimensions (batch, length, features). Returns: The transformed data. """ if inputs.ndim == 2: is_single_input = True inputs = jnp.expand_dims(inputs, axis=0) else: is_single_input = False batch, size, in_features = inputs.shape inputs = inputs.reshape((batch, size * in_features)) if self.use_bias: bias = self.param( "bias", self.bias_init, (size, self.features), self.param_dtype ) else: bias = None mask = jnp.ones((size, size), dtype=self.param_dtype) mask = jnp.triu(mask, self.exclusive) mask = jnp.kron( mask, jnp.ones((in_features, self.features), dtype=self.param_dtype) ) kernel = self.param( "kernel", wrap_kernel_init(self.kernel_init, mask), (size * in_features, size * self.features), self.param_dtype, ) inputs, mask, kernel, bias = promote_dtype( inputs, mask, kernel, bias, dtype=None ) y = lax.dot(inputs, mask * kernel, precision=self.precision) y = y.reshape((batch, size, self.features)) if is_single_input: y = y.squeeze(axis=0) if self.use_bias: y = y + bias return y
def cholesky_jvp_rule(primals, tangents): x, = primals sigma_dot, = tangents L = cholesky_p.bind(x) # Forward-mode rule from https://arxiv.org/pdf/1602.07527.pdf sigma_dot = (sigma_dot + _T(sigma_dot)) / 2 phi = lambda X: np.tril(X) / (1 + np.eye(x.shape[-1])) tmp = triangular_solve(L, sigma_dot, left_side=False, transpose_a=True, lower=True) L_dot = lax.dot(L, phi(triangular_solve( L, tmp, left_side=True, transpose_a=False, lower=True))) return L, L_dot
def test_approx_min_k(self, qy_shape, db_shape, dtype, k, recall): rng = jtu.rand_default(self.rng()) qy = rng(qy_shape, dtype) db = rng(db_shape, dtype) scores = lax.dot(qy, db) _, gt_args = lax.top_k(-scores, k) _, ann_args = ann.approx_min_k(scores, k, recall_target=recall) self.assertEqual(k, len(ann_args[0])) gt_args_sets = [set(np.asarray(x)) for x in gt_args] hits = sum( len(list(x for x in ann_args_per_q if x.item() in gt_args_sets[q])) for q, ann_args_per_q in enumerate(ann_args)) self.assertGreater(hits / (qy_shape[0] * k), recall)
def f(x, y): return lax.dot(x, y)
def fun(x, y): return lax.dot(x, y)
def _matvec_multiply(a, b): return lax.dot(a, b, precision=lax.Precision.HIGHEST)
def high_precision_dot(a, b): return lax.dot(a, b, precision=lax.Precision.HIGHEST)
def update_site(self, inputs: Array, index: int) -> Array: """ Adds an input site into the cache, and applies the masked linear transformation to the cache. Args: inputs: an input site to be added into the cache with dimensions (batch, features). index: the index of the output site. The index of the input site should be `index - self.exclusive`. Returns: The output site with dimensions (batch, features). """ dtype = jnp.promote_types(inputs.dtype, self.dtype) inputs = jnp.asarray(inputs, dtype) is_single_input = False if inputs.ndim == 1: is_single_input = True inputs = jnp.expand_dims(inputs, axis=0) batch, in_features = inputs.shape size = self.size # Number of input sites depended by the output site at the index size_i = index + 1 # Initialize the cache with zeros, and the RNG key is None # `cache.dtype` must be the same as `inputs.dtype` (no promotion) _cache = self.variable("cache", "inputs", zeros, None, (batch, size, in_features), inputs.dtype) initializing = self.is_mutable_collection("params") if not initializing: # Add the input site into the cache # To write the cache, use `_cache.value` as the left value of the assignment _cache.value = lax.cond( index - self.exclusive >= 0, lambda _: _cache.value.at[:, index - self.exclusive, :].set( inputs), lambda _: _cache.value, None, ) cache = _cache.value cache = jnp.asarray(cache, dtype) cache_i = cache[:, :size_i, :] cache_i = cache_i.reshape((batch, size_i * in_features)) # The construction of `mask` will be optimized to a constant by JIT mask = jnp.ones((size, size), dtype=self.dtype) mask = jnp.triu(mask, self.exclusive) mask = jnp.kron( mask, jnp.ones((in_features, self.features), dtype=self.dtype)) kernel = self.param( "kernel", wrap_kernel_init(self.kernel_init, mask), (size * in_features, size * self.features), self.dtype, ) mask = jnp.asarray(mask, dtype) kernel = jnp.asarray(kernel, dtype) mask_i = mask.reshape((size, in_features, size, self.features)) mask_i = mask_i[:size_i, :, index, :] mask_i = mask_i.reshape((size_i * in_features, self.features)) kernel_i = kernel.reshape((size, in_features, size, self.features)) kernel_i = kernel_i[:size_i, :, index, :] kernel_i = kernel_i.reshape((size_i * in_features, self.features)) y_i = lax.dot(cache_i, mask_i * kernel_i, precision=self.precision) if self.use_bias: bias = self.param("bias", self.bias_init, (size, self.features), self.dtype) bias = jnp.asarray(bias, dtype) bias_i = bias[index, :] y_i = y_i + bias_i assert y_i.shape[1] == self.features if is_single_input: y_i = y_i.squeeze(axis=0) return y_i
def f(x, y): a = lax.dot(x, y) # TODO(skye): use these more interesting outputs once returning constants # works # return a, a + 1, 3 return a, a + x, x + y
def f(x, y): a = lax.dot(x, y) b = a + jnp.ones(a.shape) c = b + jnp.ones(a.shape[0]) return c
def f(x, y): a = lax.dot(x, y) b = a + jnp.ones(a.shape) c = b + jnp.ones(a.shape[0])[jnp.newaxis] return c