def test_suffstat_sk_tied(): # use equation Nk * Sk / N = S_tied rng = np.random.RandomState(0) n_samples, n_features, n_components = 500, 2, 2 resp = rng.rand(n_samples, n_components) resp = resp / resp.sum(axis=1)[:, np.newaxis] X = rng.rand(n_samples, n_features) nk = resp.sum(axis=0) xk = np.dot(resp.T, X) / nk[:, np.newaxis] covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0) covars_pred_full = np.sum(nk[:, np.newaxis, np.newaxis] * covars_pred_full, 0) / n_samples covars_pred_tied = _estimate_gaussian_covariances_tied(resp, X, nk, xk, 0) ecov = EmpiricalCovariance() ecov.covariance_ = covars_pred_full assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='frobenius'), 0) assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='spectral'), 0) # check the precision computation precs_chol_pred = _compute_precision_cholesky(covars_pred_tied, 'tied') precs_pred = np.dot(precs_chol_pred, precs_chol_pred.T) precs_est = linalg.inv(covars_pred_tied) assert_array_almost_equal(precs_est, precs_pred)
def test_suffstat_sk_tied(): # use equation Nk * Sk / N = S_tied rng = np.random.RandomState(0) n_samples, n_features, n_components = 500, 2, 2 resp = rng.rand(n_samples, n_components) resp = resp / resp.sum(axis=1)[:, np.newaxis] X = rng.rand(n_samples, n_features) nk = resp.sum(axis=0) xk = np.dot(resp.T, X) / nk[:, np.newaxis] covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0) covars_pred_full = np.sum(nk[:, np.newaxis, np.newaxis] * covars_pred_full, 0) / n_samples covars_pred_tied = _estimate_gaussian_covariances_tied(resp, X, nk, xk, 0) ecov = EmpiricalCovariance() ecov.covariance_ = covars_pred_full assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='frobenius'), 0) assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='spectral'), 0) # check the precision computation precs_chol_pred = _compute_precision_cholesky(covars_pred_tied, 'tied') precs_pred = np.dot(precs_chol_pred, precs_chol_pred.T) precs_est = linalg.inv(covars_pred_tied) assert_array_almost_equal(precs_est, precs_pred)
def _estimate_gaussian_correlations_tied(resp, X, nk, means, reg_covar, resp_fair=None, X_fair=None, nk_fair=None, means_fair=None): """Estimate the tied correlation matrix. Then obtain covariance matrix by scaling it using component-wise variances. Parameters ---------- resp : array-like, shape (n_samples, n_components) X : array-like, shape (n_samples, n_features) nk : array-like, shape (n_components,) means : array-like, shape (n_components, n_features) reg_covar : float Returns ------- covariance : array, shape (n_components, n_features, n_features) The correlation-tied covariance matrix of the components. """ # Note: maybe we should require to either provide all or none of the fair parameters if resp_fair is None: resp_fair = resp if X_fair is None: X_fair = X if nk_fair is None: nk_fair = nk if means_fair is None: means_fair = means tied_covariance = _estimate_gaussian_covariances_tied( resp_fair, X_fair, nk_fair, means_fair, reg_covar) tied_inv_scaler = 1.0 / np.sqrt(np.diag(tied_covariance)) tied_correlation = np.outer(tied_inv_scaler, tied_inv_scaler) * tied_covariance comp_variances = _estimate_gaussian_covariances_diag( resp, X, nk, means, reg_covar) comp_scaler = [np.outer(c, c) for c in np.sqrt(comp_variances)] comp_covariance = [tied_correlation * s for s in comp_scaler] return np.array(comp_covariance)