def issubdtype(dtype1: DType, dtype2: DType): """Check whether one data type is a subtype of another. Args: dtype1 (dtype): First data type. dtype2 (dtype): Second data type. Returns: bool: `dtype1` is a subtype of `dtype2`. """ return np.issubdtype(convert(dtype1, NPDType), convert(dtype2, NPDType))
def __init__(self, m_i, m_z, k_zi, z, K_z, y): self.m_i = m_i self.m_z = m_z self.k_zi = k_zi self.z = z self.K_z = convert(K_z, AbstractMatrix) self.y = uprank(y)
def scan(f: Callable, xs, *init_state): """Perform a TensorFlow-style scanning operation. Args: f (function): Scanning function. xs (tensor): Tensor to scan over. *init_state (tensor): Initial state. """ state = init_state state_shape = [B.shape(s) for s in state] states = [] # Cannot simply iterate, because that breaks TensorFlow. for i in range(int(B.shape(xs)[0])): state = convert(f(B.squeeze(state), xs[i]), tuple) new_state_shape = [B.shape(s) for s in state] # Check that the state shape remained constant. if new_state_shape != state_shape: raise RuntimeError( "Shape of state changed from {} to {}." "".format(state_shape, new_state_shape) ) # Record the state, stacked over the various elements. states.append(B.stack(*state, axis=0)) # Stack states over iterations. states = B.stack(*states, axis=0) # Put the elements dimension first and return. return B.transpose(states, perm=(1, 0) + tuple(range(2, B.rank(states))))
def __init__(self, lam, prec): self.lam = lam # The Cholesky of `self.prec` will be cached. self.prec = convert(prec, AbstractMatrix) self._mean = None self._var = None self._m2 = None
def _minimise_l_bfgs_b(f, vs, f_calls=10000, iters=1000, trace=False, names=None, jit=False): names = _convert_and_validate_names(names) # Run function once to ensure that all variables are initialised and # available. val_init = f(vs) # SciPy doesn't perform zero iterations, so handle that edge case # manually. if iters == 0 or f_calls == 0: return B.to_numpy(val_init) # Extract initial value. x0 = B.to_numpy(vs.get_latent_vector(*names)) # The optimiser expects to get `float64`s. def _convert(*xs): return [B.cast(np.float64, B.to_numpy(x)) for x in xs] # Wrap the function and get the list of function evaluations. f_vals, f_wrapped = wrap_f(vs, names, f, jit, _convert) # Perform optimisation routine. def perform_minimisation(callback_=lambda _: None): return fmin_l_bfgs_b( func=f_wrapped, x0=x0, maxiter=iters, maxfun=f_calls, callback=callback_, disp=0, ) if trace: # Print progress during minimisation. with out.Progress(name='Minimisation of "{}"'.format(f.__name__), total=iters) as progress: def callback(_): progress({"Objective value": np.min(f_vals)}) x_opt, val_opt, info = perform_minimisation(callback) with out.Section("Termination message"): out.out(convert(info["task"], str)) else: # Don't print progress; simply perform minimisation. x_opt, val_opt, info = perform_minimisation() vs.set_latent_vector(x_opt, *names) # Assign optimum. return val_opt # Return optimal value.
def to_numpy(a): """Convert an object to NumPy. Args: a (object): Object to convert. Returns: `np.ndarray`: `a` as NumPy. """ return convert(a, NPOrNum)
def on_device(device): """Create a context to change the active device. Args: device (device): New active device. Returns: :class:`.Device`: Context to change the active device. """ return ActiveDevice(convert(device, str))
def as_tf(x: B.Numeric): """Convert object to TensorFlow. Args: x (object): Object to convert. Returns: object: `x` as a TensorFlow object. """ dtype = convert(B.dtype(x), B.TFDType) return tf.constant(x, dtype=dtype)
def as_torch(x: B.Numeric, grad: bool = False): """Convert object to PyTorch. Args: x (object): Object to convert. grad (bool, optional): Requires gradient. Defaults to `False`. Returns: object: `x` as a PyTorch object. """ dtype = convert(B.dtype(x), B.TorchDType) return torch.tensor(x, dtype=dtype, requires_grad=grad)
def _compute(self, measure): # Extract processes and inputs. p_x, x = self.fdd.p, self.fdd.x p_z, z = self.u.p, self.u.x # Construct the necessary kernel matrices. K_zx = measure.kernels[p_z, p_x](z, x) K_z = convert(measure.kernels[p_z](z), AbstractMatrix) self._K_z_store[id(measure)] = K_z # Evaluating `e.kernel(x)` will yield incorrect results if `x` is a # `MultiInput`, because `x` then still designates the particular components # of `f`. Fix that by instead designating the elements of `e`. if isinstance(x, MultiInput): x_n = MultiInput(*(e(fdd.x) for e, fdd in zip(self.e.kernel.ps, x.get()))) else: x_n = x # Construct the noise kernel matrix. K_n = self.e.kernel(x_n) # The approximation can only handle diagonal noise matrices. if not isinstance(K_n, Diagonal): raise RuntimeError("Kernel matrix of noise must be diagonal.") # And construct the components for the inducing point approximation. L_z = B.cholesky(K_z) A = B.add(B.eye(K_z), B.iqf(K_n, B.transpose(B.solve(L_z, K_zx)))) self._A_store[id(measure)] = A y_bar = uprank(self.y) - self.e.mean(x_n) - measure.means[p_x](x) prod_y_bar = B.solve(L_z, B.iqf(K_n, B.transpose(K_zx), y_bar)) # Compute the optimal mean. mu = B.add( measure.means[p_z](z), B.iqf(A, B.solve(L_z, K_z), prod_y_bar), ) self._mu_store[id(measure)] = mu # Compute the ELBO. # NOTE: The calculation of `trace_part` asserts that `K_n` is diagonal. # The rest, however, is completely generic. trace_part = B.ratio( Diagonal(measure.kernels[p_x].elwise(x)[:, 0]) - Diagonal(B.iqf_diag(K_z, K_zx)), K_n, ) det_part = B.logdet(2 * B.pi * K_n) + B.logdet(A) iqf_part = B.iqf(K_n, y_bar)[0, 0] - B.iqf(A, prod_y_bar)[0, 0] self._elbo_store[id(measure)] = -0.5 * (trace_part + det_part + iqf_part)
def promote_dtypes(first_dtype: DType, *dtypes: DType): """Find the smallest data type to which safely a number of the given data types can be cast. This function is sensitive to the order of the arguments. The result, however, is always valid. Args: *dtypes (dtype): Data types to promote. Must be at least one. Returns: dtype: Common data type. Will be of the type of the first given data type. """ if len(dtypes) == 0: # There is just one data type given. return first_dtype # Perform promotion. common_dtype = np.promote_types( convert(first_dtype, NPDType), convert(dtypes[0], NPDType) ) for dtype in dtypes[1:]: common_dtype = np.promote_types(common_dtype, convert(dtype, NPDType)) return _convert_back(common_dtype.type, first_dtype)
def dtype_int(dtype: DType): """Get the data type of an object and get the integer equivalent. Args: x (object): Data type or object to get data type of. Returns: dtype: Data type, but ensured to be integer. """ # TODO: Is there a better way of doing this? name = list(convert(dtype, NPDType).__name__) while name and name[0] not in set([str(i) for i in range(10)]): name.pop(0) return _convert_back(getattr(np, "int" + "".join(name)), dtype)
def pd_inv(a: Union[B.Numeric, AbstractMatrix]): """Invert a positive-definite matrix. Args: a (matrix): Positive-definite matrix to invert. Returns: matrix: Inverse of `a`, which is also positive definite. """ a = convert(a, AbstractMatrix) # The call to `cholesky_solve` will convert the identity matrix to dense, because # `cholesky(a)` will not have any exploitable structure. We suppress the expected # warning by converting `B.eye(a)` to dense here already. return B.cholesky_solve(B.cholesky(a), B.dense(B.eye(a)))
def perturb(x): """Slightly perturb a tensor. Args: x (tensor): Tensor to perturb. Returns: tensor: `x`, but perturbed. """ dtype = convert(B.dtype(x), B.NPDType) if dtype == np.float64: return 1e-20 + x * (1 + 1e-14) elif dtype == np.float32: return 1e-20 + x * (1 + 1e-7) else: raise ValueError(f"Cannot perturb a tensor of data type {B.dtype(x)}.")
def __init__(self, var, mean=None): # Ensure that the variance is an instance of `AbstractMatrix`. self._var = convert(var, AbstractMatrix) # Resolve mean and check whether it is zero. if mean is None: # Set to an actual zero that indicates that it is all zeros. self._mean = 0 self._zero_mean = True else: # Not useful to retain structure here. self._mean = B.dense(mean) self._zero_mean = False # Set `p` and `x` to `None`. self.p = None self.x = None
def _compute(self): # Extract processes. p_x, x = type_parameter(self.x), self.x.get() p_z, z = type_parameter(self.z), self.z.get() # Construct the necessary kernel matrices. K_zx = self.graph.kernels[p_z, p_x](z, x) self._K_z = convert(self.graph.kernels[p_z](z), AbstractMatrix) # Evaluating `e.kernel(x)` will yield incorrect results if `x` is a # `MultiInput`, because `x` then still designates the particular # components of `f`. Fix that by instead designating the elements of # `e`. if isinstance(x, MultiInput): x_n = MultiInput(*(p(xi.get()) for p, xi in zip(self.e.kernel.ps, x.get()))) else: x_n = x # Construct the noise kernel matrix. K_n = self.e.kernel(x_n) # The approximation can only handle diagonal noise matrices. if not isinstance(K_n, Diagonal): raise RuntimeError('Kernel matrix of noise must be diagonal.') # And construct the components for the inducing point approximation. L_z = B.cholesky(self._K_z) self._A = B.add(B.eye(self._K_z), B.iqf(K_n, B.transpose(B.solve(L_z, K_zx)))) y_bar = uprank(self.y) - self.e.mean(x_n) - self.graph.means[p_x](x) prod_y_bar = B.solve(L_z, B.iqf(K_n, B.transpose(K_zx), y_bar)) # Compute the optimal mean. self._mu = B.add(self.graph.means[p_z](z), B.iqf(self._A, B.solve(L_z, self._K_z), prod_y_bar)) # Compute the ELBO. # NOTE: The calculation of `trace_part` asserts that `K_n` is diagonal. # The rest, however, is completely generic. trace_part = B.ratio(Diagonal(self.graph.kernels[p_x].elwise(x)[:, 0]) - Diagonal(B.iqf_diag(self._K_z, K_zx)), K_n) det_part = B.logdet(2 * B.pi * K_n) + B.logdet(self._A) iqf_part = B.iqf(K_n, y_bar)[0, 0] - B.iqf(self._A, prod_y_bar)[0, 0] self._elbo = -0.5 * (trace_part + det_part + iqf_part)
def matmul(a: LowRank, b: Constant, tr_a=False, tr_b=False): return B.matmul(a, convert(b, LowRank), tr_a=tr_a, tr_b=tr_b)
def _convert_back(dtype: NPDType, _: target): return convert(dtype, target)
def K_x(self): """Kernel matrix of the data.""" if self._K_x is None: # Cache computation. p_x, x = type_parameter(self.x), self.x.get() self._K_x = convert(self.graph.kernels[p_x](x), AbstractMatrix) return self._K_x
def __init__(self, k_ij, k_zi, k_zj, z, K_z): self.k_ij = k_ij self.k_zi = k_zi self.k_zj = k_zj self.z = z self.K_z = convert(K_z, AbstractMatrix)
def test_constant_to_lowrank_square(const1): res = convert(const1, LowRank) approx(const1, res) assert isinstance(res, LowRank) assert res.left is res.right
def dtype(a: JAXNumeric): # JAX gives NumPy data types back. Convert to JAX ones. return convert(a.dtype, JAXDType)
def test_default_conversion_methods(): # Conversion to `tuple`. assert plum.convert(1, tuple) == (1, ) assert plum.convert((1, ), tuple) == (1, ) assert plum.convert(((1, ), ), tuple) == ((1, ), ) assert plum.convert([1], tuple) == (1, ) assert plum.convert([(1, )], tuple) == ((1, ), ) # Conversion to `list`. assert plum.convert(1, list) == [1] assert plum.convert((1, ), list) == [1] assert plum.convert(((1, ), ), list) == [(1, )] assert plum.convert([1], list) == [1] assert plum.convert([(1, )], list) == [(1, )] # Convert to `str`. assert plum.convert("test".encode(), str) == "test"
def matmul(a: Constant, b: LowRank, tr_a=False, tr_b=False): return B.matmul(convert(a, LowRank), b, tr_a=tr_a, tr_b=tr_b)
def test_constant_to_lowrank_rectangular(const_r): res = convert(const_r, LowRank) approx(const_r, res) assert isinstance(res, LowRank)
def __init__(self, k_zi, k_zj, z, A, K_z): self.k_zi = k_zi self.k_zj = k_zj self.z = z self.A = A self.L = B.cholesky(convert(K_z, AbstractMatrix))
def add(a: LowRank, b: Constant): assert_compatible(a, b) b = Constant(b.const, *broadcast(a, b).as_tuple()) return add(a, convert(b, LowRank))
def add(a: Constant, b: Diagonal): assert_compatible(a, b) a = Constant(a.const, *broadcast(a, b).as_tuple()) return add(convert(a, LowRank), b)
def _resolve_var(self): if self._var is None: self._var = self._construct_var() # Ensure that the variance is a structured matrix for efficient operations. self._var = convert(self._var, AbstractMatrix)
def _take_convert(indices_or_mask: TorchNumeric): if issubdtype(convert(indices_or_mask.dtype, NPDType), np.integer): # Indices must be on the CPU and `int64`s! return indices_or_mask.cpu().type(torch.int64) else: return indices_or_mask