Exemple #1
0
def test_DenseEquivariant(symmetries, use_bias, lattice):
    g, hi, perms = _setup_symm(symmetries, N=3, lattice=lattice)

    pt = perms.product_table
    n_symm = np.asarray(perms).shape[0]

    ma = nk.nn.DenseEquivariant(
        symmetry_info=pt.ravel(),
        in_features=1,
        out_features=1,
        use_bias=use_bias,
        bias_init=nk.nn.initializers.uniform(),
    )

    pars = ma.init(nk.jax.PRNGKey(), np.random.normal(0, 1, [1, n_symm]))

    # inv_pt computes chosen_op = gh^-1 instead of g^-1h
    chosen_op = np.random.randint(n_symm)
    inverse = PermutationGroup([perms.elems[i] for i in perms.inverse],
                               degree=g.n_nodes)
    inv_pt = inverse.product_table
    sym_op = np.where(inv_pt == chosen_op, 1.0, 0.0)

    v = random.normal(random.PRNGKey(0), [3, n_symm])
    v_trans = dot(v, sym_op)

    out = ma.apply(pars, v)
    out_trans = ma.apply(pars, v_trans)

    # output should be involution
    assert jnp.allclose(dot(out, sym_op.transpose(0, 1)), out_trans)
Exemple #2
0
def dot(a, b):  # pylint: disable=missing-docstring
    _check_arraylike("dot", a, b)
    a, b = _promote_dtypes(a, b)
    a_ndim, b_ndim = ndim(a), ndim(b)
    if a_ndim == 0 or b_ndim == 0:
        return lax.mul(a, b)
    if _max(a_ndim, b_ndim) <= 2:
        return lax.dot(a, b)
    a_reshaped = reshape(a, (-1, shape(a)[-1]))
    if _ndim(b) in {1, 2}:
        out = lax.dot(a_reshaped, b)
    else:
        b_reshaped = reshape(moveaxis(b, -2, 0), (shape(b)[-2], -1))
        out = lax.dot(a_reshaped, b_reshaped)
    return lax.reshape(out, a.shape[:-1] + b.shape[:-2] + b.shape[-2:][1:])
Exemple #3
0
def _lu_blocked(a, block_size=128):
    """Blocked LU decomposition, as an unrolled loop."""
    m, n = a.shape
    r = min(m, n)
    pivot = np.zeros((r, ), dtype=np.int32)
    for k in range(0, r, block_size):
        b = min(r - k, block_size)
        block_pivot, perm, lu_block = _lu_unblocked(a[k:, k:k + b])

        a = ops.index_update(a, ops.index[k:, :], a[perm + k, :])
        a = ops.index_update(a, ops.index[k:, k:k + b], lu_block)
        pivot = ops.index_update(pivot, ops.index[k:k + b], block_pivot + k)

        if k + b < n:
            a = ops.index_update(
                a, ops.index[k:k + b, k + b:],
                triangular_solve(a[k:k + b, k:k + b],
                                 a[k:k + b, k + b:],
                                 left_side=True,
                                 lower=True,
                                 unit_diagonal=True))
            a = ops.index_add(
                a, ops.index[k + b:, k + b:],
                -lax.dot(a[k + b:, k:k + b],
                         a[k:k + b, k + b:],
                         precision=lax.Precision.HIGHEST))
    return pivot, a
Exemple #4
0
def _lu_blocked(a, block_size=32):
    """Blocked LU decomposition, as an unrolled loop."""
    m, n = a.shape
    r = min(m, n)
    pivot = np.zeros((r, ), dtype=np.int32)
    error = np.array(False, np.bool_)
    for k in range(0, r, block_size):
        b = min(r - k, block_size)
        block_pivot, perm, lu_block, block_error = _lu_unblocked(a[k:,
                                                                   k:k + b])
        error = error | block_error
        a = ops.index_update(a, ops.index[k:, k:k + b], lu_block)

        a = ops.index_update(a, ops.index[k:, :k], a[perm + k, :k])
        pivot = ops.index_update(pivot, ops.index[k:k + b], block_pivot + k)

        if k + b < n:
            a = ops.index_update(a, ops.index[k:, k + b:], a[perm + k, k + b:])
            a = ops.index_update(
                a, ops.index[k:k + b, k + b:],
                triangular_solve(a[k:k + b, k:k + b],
                                 a[k:k + b, k + b:],
                                 left_side=True,
                                 lower=True,
                                 unit_diagonal=True))
            a = ops.index_add(
                a, ops.index[k + b:, k + b:],
                -lax.dot(a[k + b:, k:k + b],
                         a[k:k + b, k + b:],
                         precision=lax.Precision.HIGHEST))
    a = np.where(error, lax.full_like(a, np.nan), a)
    return pivot, a
Exemple #5
0
 def testJVP(self):
   f = xmap(lambda x, y: jnp.cos(lax.dot(x, jnp.sin(y),
                                         precision=lax.Precision.HIGHEST)),
            in_axes=[['i', ...], {}], out_axes=['i', ...])
   x = jnp.arange(12, dtype=jnp.float32).reshape((3, 4)) / 100
   y = jnp.arange(20, dtype=jnp.float32).reshape((4, 5)) / 100
   jtu.check_grads(f, (x, y), order=2, modes=['fwd'])
Exemple #6
0
def _dot_papply_rule(name, vals, dims):
    x, y = vals
    xdim, ydim = dims
    if xdim is None:
        return lax.dot(x, y), ydim
    elif ydim is None:
        return lax.dot(x, y), xdim
    elif ydim == 0:
        if xdim != x.ndim:
            x = psplit(x, name, x.ndim)
        x = x[..., None]
        y = y[..., None, :]
        return psum(x * y, name), None
    else:
        y = pcollect(y, name)
        return lax.dot(x, y), xdim
Exemple #7
0
 def test_approx_max_k(self, qy_shape, db_shape, dtype, k, recall):
     rng = jtu.rand_default(self.rng())
     qy = rng(qy_shape, dtype)
     db = rng(db_shape, dtype)
     scores = lax.dot(qy, db)
     _, gt_args = lax.top_k(scores, k)
     _, ann_args = lax.approx_max_k(scores, k, recall_target=recall)
     self.assertEqual(k, len(ann_args[0]))
     ann_recall = compute_recall(np.asarray(ann_args), np.asarray(gt_args))
     self.assertGreater(ann_recall, recall)
Exemple #8
0
    def __call__(self, inputs: Array) -> Array:
        """
        Applies a masked linear transformation to the inputs.

        Args:
          inputs: input data with dimensions (batch, length, features).

        Returns:
          The transformed data.
        """
        if inputs.ndim == 2:
            is_single_input = True
            inputs = jnp.expand_dims(inputs, axis=0)
        else:
            is_single_input = False

        batch, size, in_features = inputs.shape
        inputs = inputs.reshape((batch, size * in_features))

        if self.use_bias:
            bias = self.param(
                "bias", self.bias_init, (size, self.features), self.param_dtype
            )
        else:
            bias = None

        mask = jnp.ones((size, size), dtype=self.param_dtype)
        mask = jnp.triu(mask, self.exclusive)
        mask = jnp.kron(
            mask, jnp.ones((in_features, self.features), dtype=self.param_dtype)
        )

        kernel = self.param(
            "kernel",
            wrap_kernel_init(self.kernel_init, mask),
            (size * in_features, size * self.features),
            self.param_dtype,
        )

        inputs, mask, kernel, bias = promote_dtype(
            inputs, mask, kernel, bias, dtype=None
        )

        y = lax.dot(inputs, mask * kernel, precision=self.precision)

        y = y.reshape((batch, size, self.features))

        if is_single_input:
            y = y.squeeze(axis=0)

        if self.use_bias:
            y = y + bias

        return y
Exemple #9
0
def cholesky_jvp_rule(primals, tangents):
  x, = primals
  sigma_dot, = tangents
  L = cholesky_p.bind(x)

  # Forward-mode rule from https://arxiv.org/pdf/1602.07527.pdf
  sigma_dot = (sigma_dot + _T(sigma_dot)) / 2
  phi = lambda X: np.tril(X) / (1 + np.eye(x.shape[-1]))
  tmp = triangular_solve(L, sigma_dot,
                         left_side=False, transpose_a=True, lower=True)
  L_dot = lax.dot(L, phi(triangular_solve(
      L, tmp, left_side=True, transpose_a=False, lower=True)))
  return L, L_dot
Exemple #10
0
 def test_approx_min_k(self, qy_shape, db_shape, dtype, k, recall):
   rng = jtu.rand_default(self.rng())
   qy = rng(qy_shape, dtype)
   db = rng(db_shape, dtype)
   scores = lax.dot(qy, db)
   _, gt_args = lax.top_k(-scores, k)
   _, ann_args = ann.approx_min_k(scores, k, recall_target=recall)
   self.assertEqual(k, len(ann_args[0]))
   gt_args_sets = [set(np.asarray(x)) for x in gt_args]
   hits = sum(
       len(list(x
                for x in ann_args_per_q
                if x.item() in gt_args_sets[q]))
       for q, ann_args_per_q in enumerate(ann_args))
   self.assertGreater(hits / (qy_shape[0] * k), recall)
Exemple #11
0
 def f(x, y):
     return lax.dot(x, y)
Exemple #12
0
 def fun(x, y):
   return lax.dot(x, y)
Exemple #13
0
def _matvec_multiply(a, b):
    return lax.dot(a, b, precision=lax.Precision.HIGHEST)
def high_precision_dot(a, b):
  return lax.dot(a, b, precision=lax.Precision.HIGHEST)
Exemple #15
0
    def update_site(self, inputs: Array, index: int) -> Array:
        """
        Adds an input site into the cache, and applies the masked linear transformation to the cache.

        Args:
          inputs: an input site to be added into the cache with dimensions (batch, features).
          index: the index of the output site. The index of the input site should be `index - self.exclusive`.

        Returns:
          The output site with dimensions (batch, features).
        """
        dtype = jnp.promote_types(inputs.dtype, self.dtype)

        inputs = jnp.asarray(inputs, dtype)

        is_single_input = False
        if inputs.ndim == 1:
            is_single_input = True
            inputs = jnp.expand_dims(inputs, axis=0)

        batch, in_features = inputs.shape
        size = self.size

        # Number of input sites depended by the output site at the index
        size_i = index + 1

        # Initialize the cache with zeros, and the RNG key is None
        # `cache.dtype` must be the same as `inputs.dtype` (no promotion)
        _cache = self.variable("cache", "inputs", zeros, None,
                               (batch, size, in_features), inputs.dtype)

        initializing = self.is_mutable_collection("params")
        if not initializing:
            # Add the input site into the cache
            # To write the cache, use `_cache.value` as the left value of the assignment
            _cache.value = lax.cond(
                index - self.exclusive >= 0,
                lambda _: _cache.value.at[:, index - self.exclusive, :].set(
                    inputs),
                lambda _: _cache.value,
                None,
            )

        cache = _cache.value
        cache = jnp.asarray(cache, dtype)

        cache_i = cache[:, :size_i, :]
        cache_i = cache_i.reshape((batch, size_i * in_features))

        # The construction of `mask` will be optimized to a constant by JIT
        mask = jnp.ones((size, size), dtype=self.dtype)
        mask = jnp.triu(mask, self.exclusive)
        mask = jnp.kron(
            mask, jnp.ones((in_features, self.features), dtype=self.dtype))

        kernel = self.param(
            "kernel",
            wrap_kernel_init(self.kernel_init, mask),
            (size * in_features, size * self.features),
            self.dtype,
        )
        mask = jnp.asarray(mask, dtype)
        kernel = jnp.asarray(kernel, dtype)

        mask_i = mask.reshape((size, in_features, size, self.features))
        mask_i = mask_i[:size_i, :, index, :]
        mask_i = mask_i.reshape((size_i * in_features, self.features))

        kernel_i = kernel.reshape((size, in_features, size, self.features))
        kernel_i = kernel_i[:size_i, :, index, :]
        kernel_i = kernel_i.reshape((size_i * in_features, self.features))

        y_i = lax.dot(cache_i, mask_i * kernel_i, precision=self.precision)

        if self.use_bias:
            bias = self.param("bias", self.bias_init, (size, self.features),
                              self.dtype)
            bias = jnp.asarray(bias, dtype)

            bias_i = bias[index, :]

            y_i = y_i + bias_i

        assert y_i.shape[1] == self.features

        if is_single_input:
            y_i = y_i.squeeze(axis=0)

        return y_i
Exemple #16
0
 def f(x, y):
     a = lax.dot(x, y)
     # TODO(skye): use these more interesting outputs once returning constants
     # works
     # return a, a + 1, 3
     return a, a + x, x + y
Exemple #17
0
 def f(x, y):
     a = lax.dot(x, y)
     b = a + jnp.ones(a.shape)
     c = b + jnp.ones(a.shape[0])
     return c
Exemple #18
0
 def f(x, y):
   a = lax.dot(x, y)
   b = a + jnp.ones(a.shape)
   c = b + jnp.ones(a.shape[0])[jnp.newaxis]
   return c