def update(self, x, weights=None): '''Batch-update the properties for the current distribution. This assumes that all samples are being provided at once. Parameters ---------- x : numpy.ndarray a DxN array containing the dataset used to update the distribution's parameters weights : numpy.ndarray a 1xN array containing the relative weighting of each component for this distribution; if not provided then assumed to all be '1' ''' self._samples = x.shape[1] if weights is None: weights = np.ones((1, self._samples)) # Compute new means. Nk = np.sum(weights) self.mu = np.sum(weights * x, axis=1)[:, np.newaxis] / Nk # Compute new covariances. Sigma = linalg.weighted_scatter_matrix(weights, x, self.mu) / Nk self.L = cholesky.from_matrix(Sigma) # Compute the new model weight. self.weight = Nk / self._samples # Update any variables that don't change during a log-likelihood # estimate. detL = 2.0 * np.sum(np.log(np.diag(self.L))) self._const = -(np.log(2 * np.pi) * x.shape[0] + detL) / 2.0
def __init__(self, alpha, d, num_models, k=1, mu=None, Sigma=None): ''' Parameters ---------- alpha : float concentration parameter d : float dimensionality of the data num_models : int number of total models k : float initial scaling factor mu : numpy.ndarray initial prior on the distribution mean; if unknown then it is set to zero Sigma : numpy.ndarray initial prior on the covariance; if unknown then it is set to identity ''' super().__init__() # Internal variables self._num_models = num_models self._alpha0 = alpha self._beta0 = k self._nu0 = d + 1 if mu is None: self._mu0 = np.zeros((d, 1)) else: if mu.shape[0] != d: raise ValueError( 'Mean prior should have a dimensionality of %d.' % d) self._mu0 = mu.copy() # Note: this stores the *covariance*, not the precision even though # that's how the Wishart distribution is defined. The math in the # update equations is actually a little bit cleaner if the Cholesky # decomposition of the covariance matrix is used (basically it ends up # being nearly the same as the Gaussian version). if Sigma is None: self._Sigma0 = np.eye(d) self._L0 = np.eye(d) else: if Sigma.shape[0] != d and Sigma.shape[1] != d: raise ValueError('Covariance prior should be a %dx%d matrix.' % (d, d)) self._Sigma0 = Sigma.copy() self._L0 = cholesky.from_matrix(self._Sigma0) # Public attributes self.alpha = self._alpha0 self.alpha_sum = 0 self.beta = self._beta0 self.nu = self._nu0 self.mu = self._mu0.copy() self.L = self._L0.copy()
def test_decomp(): '''Cholesky decomposition is called correctly.''' np.random.seed(1234) # Generate a positive-definite matrix. x = np.random.uniform(size=(5, 100)) S = x @ x.T Lwrapper = cholesky.from_matrix(S) Lscipy = linalg.cholesky(S, lower=True) assert np.linalg.norm(Lwrapper - Lscipy) == pytest.approx(0)
def test_reconst(): '''Matrix is recovered from a Cholesky decomposition.''' np.random.seed(1234) # Generate a positive-definite matrix. x = np.random.uniform(size=(5, 100)) S = x @ x.T L = cholesky.from_matrix(S) T = cholesky.to_matrix(L) print(S) print(L) print(T) assert np.linalg.norm(S - T) == pytest.approx(0)
def update(self, x, weights=None): '''Batch-update the properties for the current distribution. This assumes that all samples are being provided at once. Parameters ---------- x : numpy.ndarray a DxN array containing the dataset used to update the distribution's parameters weights : numpy.ndarray a 1xN array containing the relative weighting of each component for this distribution; if not provided then assumed to all be '1' ''' self._samples = x.shape[1] # print('Samples: %d' % self._samples) if weights is None: weights = np.ones((1, self._samples)) / self._num_models # NOTE All equations are from the cited resource. # "Pattern Recognition and Machine Learning". Bishop. 2006 # Gaussian Update Nk = np.sum(weights) + np.finfo(float).eps # (10.51) x_mean = np.sum(weights * x, axis=1)[:, np.newaxis] / Nk # (10.52) Sk = linalg.weighted_scatter_matrix(weights, x, x_mean) / Nk # (10.53) # Dirichlet Update self.alpha = self._alpha0 + Nk # (10.58) # Wishart Update self.nu = self._nu0 + Nk # (10.63) self.beta = self._beta0 + Nk # (10.60) self.mu = (self._beta0 * self._mu0 + Nk * x_mean) / self.beta # (10.61) delta = x_mean - self._mu0 Winv = (self._Sigma0 + Nk * Sk + (self._beta0 * Nk / self.beta) * (delta @ delta.T)) self.L = cholesky.from_matrix(Winv) # (10.62) # Update any variables that don't change during a log-likelihood # estimate. self._detW = -2.0 * np.sum(np.log(np.diag(self.L)))
def __init__(self, mu=None, Sigma=None): ''' Parameters ---------- mu : numpy.ndarray distribution mean Sigma : numpy.ndarray distribution covariance ''' super().__init__() if mu is None: mu = np.zeros((1, 1)) if Sigma is None: Sigma = np.eye(1) self.L = cholesky.from_matrix(Sigma) self.mu = mu.copy() detL = 2.0 * np.sum(np.log(np.diag(self.L))) self._const = -(np.log(2 * np.pi) * self.mu.shape[0] + detL) / 2.0
def test_invalid_decomp(): '''Invalid Cholesky decomposition raises an exception.''' S = np.zeros((3, 3)) with pytest.raises(LinearAlgebraError): cholesky.from_matrix(S)