def batch_shape(self): # loc.shape should be [..., d] # self.scale_tril shoule be [..., d, d] # broadcasted... should be [..., d] return lax.broadcast_shapes(self.loc.shape, self.scale_tril[:-1])[:-1]
def log_prob(self, value): shape = lax.broadcast_shapes(self.batch_shape, np.shape(value)[:-1]) return np.broadcast_to(self.log_factor, shape)
def testBroadcastShapesReturnsPythonInts(self): shape1, shape2 = (1, 2, 3), (2, 3) out_shape = lax.broadcast_shapes(shape1, shape2) self.assertTrue(all(type(s) is int for s in out_shape))
def test_multinomial_shape(p, shape): rng = random.PRNGKey(0) n = 10000 expected_shape = lax.broadcast_shapes(p.shape[:-1], shape) + p.shape[-1:] assert np.shape(multinomial(rng, p, n, shape)) == expected_shape
def log_prob(self, value): shape = lax.broadcast_shapes(self.batch_shape, jnp.shape(value)[:max(jnp.ndim(value) - self.event_dim, 0)]) log_prob = self.base_dist.log_prob(value) return jnp.broadcast_to(log_prob, shape)
def __init__(self, loc=0., scale=1., validate_args=None): self.loc, self.scale = promote_shapes(loc, scale) batch_shape = lax.broadcast_shapes(np.shape(loc), np.shape(scale)) super(Cauchy, self).__init__(batch_shape=batch_shape, validate_args=validate_args)
def test_standard_gamma_shape(alpha, shape): rng = random.PRNGKey(0) expected_shape = lax.broadcast_shapes(np.shape(alpha), shape) assert np.shape(standard_gamma(rng, alpha, shape=shape)) == expected_shape
def infer_shapes(predictor, cutpoints): batch_shape = lax.broadcast_shapes(predictor, cutpoints[:-1]) event_shape = () return batch_shape, event_shape
def infer_shapes(logits, total_count): batch_shape = lax.broadcast_shapes(logits[:-1], total_count) event_shape = logits[-1:] return batch_shape, event_shape
def infer_shapes(concentration, total_count=()): batch_shape = lax.broadcast_shapes(concentration[:-1], total_count) event_shape = concentration[-1:] return batch_shape, event_shape
def log_prob(self, value): if self._validate_args: self._validate_sample(value) batch_shape = lax.broadcast_shapes(self.batch_shape, np.shape(value)) return -np.zeros(batch_shape)
def kl_divergence(p, q): kl = kl_divergence(p.base_dist, q.base_dist) shape = lax.broadcast_shapes(p.batch_shape, q.batch_shape) return jnp.broadcast_to(kl, shape)
def batch_shape(self): return lax.broadcast_shapes(self.minval.shape, self.maxval.shape)
def batch_shape(self): return lax.broadcast_shapes(self.loc.shape, self.scale.shape)
def inverse_shape(self, shape): return lax.broadcast_shapes(shape, getattr(self.exponent, "shape", ()))
def _xlog1py_jvp_lhs(g, ans, x, y): shape = lax.broadcast_shapes(np.shape(g), np.shape(y)) g = np.broadcast_to(g, shape) y = np.broadcast_to(y, shape) g, y = _promote_args_like(osp_special.xlog1py, g, y) return lax._safe_mul(g, np.log1p(y))
def __init__(self, concentration, rate=1., validate_args=None): self.concentration, self.rate = promote_shapes(concentration, rate) batch_shape = lax.broadcast_shapes(np.shape(concentration), np.shape(rate)) super(Gamma, self).__init__(batch_shape=batch_shape, validate_args=validate_args)
def _xlog1py_jvp_rhs(g, ans, x, y): shape = lax.broadcast_shapes(np.shape(g), np.shape(x)) g = np.broadcast_to(g, shape) x = np.broadcast_to(x, shape) x, y = _promote_args_like(osp_special.xlog1py, x, y) return g * lax._safe_mul(x, np.reciprocal(1 + y))
def __init__(self, low=0., high=1., validate_args=None): self.low, self.high = promote_shapes(low, high) batch_shape = lax.broadcast_shapes(np.shape(low), np.shape(high)) super(Uniform, self).__init__(batch_shape=batch_shape, validate_args=validate_args)
def __init__(self, probs, total_count=1, validate_args=None): self.probs, self.total_count = promote_shapes(probs, total_count) batch_shape = lax.broadcast_shapes(np.shape(probs), np.shape(total_count)) super(BinomialProbs, self).__init__(batch_shape=batch_shape, validate_args=validate_args)
def test_categorical_shape(p, shape): rng = random.PRNGKey(0) expected_shape = lax.broadcast_shapes(p.shape[:-1], shape) assert np.shape(categorical(rng, p, shape)) == expected_shape
def __init__(self, logits, total_count=1, validate_args=None): self.logits, self.total_count = promote_shapes(logits, total_count) batch_shape = lax.broadcast_shapes(jnp.shape(logits), jnp.shape(total_count)) super(BinomialLogits, self).__init__(batch_shape=batch_shape, validate_args=validate_args)
def _cofactor_solve(a, b): """Equivalent to det(a)*solve(a, b) for nonsingular mat. Intermediate function used for jvp and vjp of det. This function borrows heavily from jax.numpy.linalg.solve and jax.numpy.linalg.slogdet to compute the gradient of the determinant in a way that is well defined even for low rank matrices. This function handles two different cases: * rank(a) == n or n-1 * rank(a) < n-1 For rank n-1 matrices, the gradient of the determinant is a rank 1 matrix. Rather than computing det(a)*solve(a, b), which would return NaN, we work directly with the LU decomposition. If a = p @ l @ u, then det(a)*solve(a, b) = prod(diag(u)) * u^-1 @ l^-1 @ p^-1 b = prod(diag(u)) * triangular_solve(u, solve(p @ l, b)) If a is rank n-1, then the lower right corner of u will be zero and the triangular_solve will fail. Let x = solve(p @ l, b) and y = det(a)*solve(a, b). Then y_{n} x_{n} / u_{nn} * prod_{i=1...n}(u_{ii}) = x_{n} * prod_{i=1...n-1}(u_{ii}) So by replacing the lower-right corner of u with prod_{i=1...n-1}(u_{ii})^-1 we can avoid the triangular_solve failing. To correctly compute the rest of y_{i} for i != n, we simply multiply x_{i} by det(a) for all i != n, which will be zero if rank(a) = n-1. For the second case, a check is done on the matrix to see if `solve` returns NaN or Inf, and gives a matrix of zeros as a result, as the gradient of the determinant of a matrix with rank less than n-1 is 0. This will still return the correct value for rank n-1 matrices, as the check is applied *after* the lower right corner of u has been updated. Args: a: A square matrix or batch of matrices, possibly singular. b: A matrix, or batch of matrices of the same dimension as a. Returns: det(a) and cofactor(a)^T*b, aka adjugate(a)*b """ a = _promote_arg_dtypes(jnp.asarray(a)) b = _promote_arg_dtypes(jnp.asarray(b)) a_shape = jnp.shape(a) b_shape = jnp.shape(b) a_ndims = len(a_shape) if not (a_ndims >= 2 and a_shape[-1] == a_shape[-2] and b_shape[-2:] == a_shape[-2:]): msg = ("The arguments to _cofactor_solve must have shapes " "a=[..., m, m] and b=[..., m, m]; got a={} and b={}") raise ValueError(msg.format(a_shape, b_shape)) if a_shape[-1] == 1: return a[0, 0], b # lu contains u in the upper triangular matrix and l in the strict lower # triangular matrix. # The diagonal of l is set to ones without loss of generality. lu, pivots, permutation = lax_linalg.lu(a) dtype = lax.dtype(a) batch_dims = lax.broadcast_shapes(lu.shape[:-2], b.shape[:-2]) x = jnp.broadcast_to(b, batch_dims + b.shape[-2:]) lu = jnp.broadcast_to(lu, batch_dims + lu.shape[-2:]) # Compute (partial) determinant, ignoring last diagonal of LU diag = jnp.diagonal(lu, axis1=-2, axis2=-1) parity = jnp.count_nonzero(pivots != jnp.arange(a_shape[-1]), axis=-1) sign = jnp.array(-2 * (parity % 2) + 1, dtype=dtype) # partial_det[:, -1] contains the full determinant and # partial_det[:, -2] contains det(u) / u_{nn}. partial_det = jnp.cumprod(diag, axis=-1) * sign[..., None] lu = ops.index_update(lu, ops.index[..., -1, -1], 1.0 / partial_det[..., -2]) permutation = jnp.broadcast_to(permutation, batch_dims + (a_shape[-1], )) iotas = jnp.ix_(*(lax.iota(jnp.int32, b) for b in batch_dims + (1, ))) # filter out any matrices that are not full rank d = jnp.ones(x.shape[:-1], x.dtype) d = lax_linalg.triangular_solve(lu, d, left_side=True, lower=False) d = jnp.any(jnp.logical_or(jnp.isnan(d), jnp.isinf(d)), axis=-1) d = jnp.tile(d[..., None, None], d.ndim * (1, ) + x.shape[-2:]) x = jnp.where(d, jnp.zeros_like(x), x) # first filter x = x[iotas[:-1] + (permutation, slice(None))] x = lax_linalg.triangular_solve(lu, x, left_side=True, lower=True, unit_diagonal=True) x = jnp.concatenate( (x[..., :-1, :] * partial_det[..., -1, None, None], x[..., -1:, :]), axis=-2) x = lax_linalg.triangular_solve(lu, x, left_side=True, lower=False) x = jnp.where(d, jnp.zeros_like(x), x) # second filter return partial_det[..., -1], x
def __init__(self, gate, rate=1., validate_args=None): batch_shape = lax.broadcast_shapes(jnp.shape(gate), jnp.shape(rate)) self.gate, self.rate = promote_shapes(gate, rate) super(ZeroInflatedPoisson, self).__init__(batch_shape, validate_args=validate_args)
def log_prob(self, value): batch_shape = jnp.shape(value)[:jnp.ndim(value) - len(self.event_shape)] batch_shape = lax.broadcast_shapes(batch_shape, self.batch_shape) return jnp.zeros(batch_shape)
def inverse_shape(self, shape): if len(shape) < 1: raise ValueError("Too few dimensions on input") return lax.broadcast_shapes(shape, self.loc.shape, self.scale_tril.shape[:-1])
def polygamma(n, x): assert jnp.issubdtype(lax.dtype(n), jnp.integer) n, x = _promote_args_inexact("polygamma", n, x) shape = lax.broadcast_shapes(n.shape, x.shape) return _polygamma(jnp.broadcast_to(n, shape), jnp.broadcast_to(x, shape))
def forward_shape(self, shape): return lax.broadcast_shapes(shape, getattr(self.exponent, "shape", ()))
def inverse_shape(self, shape): return lax.broadcast_shapes(shape, getattr(self.loc, "shape", ()), getattr(self.scale, "shape", ()))
def __init__(self, low=0., loc=0., scale=1., validate_args=None): self.low, self.loc, self.scale = promote_shapes(low, loc, scale) batch_shape = lax.broadcast_shapes(np.shape(low), np.shape(loc), np.shape(scale)) self._normal = Normal(self.loc, self.scale) super(TruncatedNormal, self).__init__(batch_shape=batch_shape, validate_args=validate_args)