Ejemplo n.º 1
0
def _onehot_to_initial_params(
        X: np.ndarray, onehot: np.ndarray,
        cov_type: CovarianceType) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Computes cluster weights, cluster means and cluster precisions from
    a given clustering.

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
        List of n_features-dimensional data points. Each row
        corresponds to a single data point.
    onehot : ndarray, shape (n_samples, n_clusters)
        Each row has a 1 indicating cluster membership, other entries are 0.
    cov_type : {'full', 'tied', 'diag', 'spherical'}
        Covariance type for Gaussian mixture model
    """
    n = X.shape[0]
    weights, means, covariances = _estimate_gaussian_parameters(
        X, onehot, 1e-06, cov_type)
    weights /= n

    precisions_cholesky_ = _compute_precision_cholesky(covariances, cov_type)

    if cov_type == "tied":
        c = precisions_cholesky_
        precisions = np.dot(c, c.T)
    elif cov_type == "diag":
        precisions = precisions_cholesky_
    else:
        precisions = [np.dot(c, c.T) for c in precisions_cholesky_]

    return weights, means, precisions
def test_gaussian_mixture_precisions_init_diag():
    """Check that we properly initialize `precision_cholesky_` when we manually
    provide the precision matrix.

    In this regard, we check the consistency between estimating the precision
    matrix and providing the same precision matrix as initialization. It should
    lead to the same results with the same number of iterations.

    If the initialization is wrong then the number of iterations will increase.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/16944
    """
    # generate a toy dataset
    n_samples = 300
    rng = np.random.RandomState(0)
    shifted_gaussian = rng.randn(n_samples, 2) + np.array([20, 20])
    C = np.array([[0.0, -0.7], [3.5, 0.7]])
    stretched_gaussian = np.dot(rng.randn(n_samples, 2), C)
    X = np.vstack([shifted_gaussian, stretched_gaussian])

    # common parameters to check the consistency of precision initialization
    n_components, covariance_type, reg_covar, random_state = 2, "diag", 1e-6, 0

    # execute the manual initialization to compute the precision matrix:
    # - run KMeans to have an initial guess
    # - estimate the covariance
    # - compute the precision matrix from the estimated covariance
    resp = np.zeros((X.shape[0], n_components))
    label = (
        KMeans(n_clusters=n_components, n_init=1, random_state=random_state)
        .fit(X)
        .labels_
    )
    resp[np.arange(X.shape[0]), label] = 1
    _, _, covariance = _estimate_gaussian_parameters(
        X, resp, reg_covar=reg_covar, covariance_type=covariance_type
    )
    precisions_init = 1 / covariance

    gm_with_init = GaussianMixture(
        n_components=n_components,
        covariance_type=covariance_type,
        reg_covar=reg_covar,
        precisions_init=precisions_init,
        random_state=random_state,
    ).fit(X)

    gm_without_init = GaussianMixture(
        n_components=n_components,
        covariance_type=covariance_type,
        reg_covar=reg_covar,
        random_state=random_state,
    ).fit(X)

    assert gm_without_init.n_iter_ == gm_with_init.n_iter_
    assert_allclose(
        gm_with_init.precisions_cholesky_, gm_without_init.precisions_cholesky_
    )
Ejemplo n.º 3
0
    def _m_step(self, X, log_resp):
        """M step.
            Parameters
            ----------
            X : array-like, shape (n_samples, n_features)
            log_resp : array-like, shape (n_samples, n_components)
                Logarithm of the posterior probabilities (or responsibilities) of
                the point of each sample in X.
            """
        resp = (self.Xweights * np.exp(log_resp).T).T
        self.weights_, self.means_, self.covariances_ = (
            _estimate_gaussian_parameters(X, resp, self.reg_covar,
                                          self.covariance_type))

        self.weights_ /= np.sum(self.Xweights)

        self.precisions_cholesky_ = _compute_precision_cholesky(
            self.covariances_, self.covariance_type)
Ejemplo n.º 4
0
def get_means_resp(X,log_resp, cov):
    _, means_, covariances_ = _gm._estimate_gaussian_parameters(X, np.exp(log_resp), 1e-6, cov)
    precisions_cholesky_    = _gm._compute_precision_cholesky( covariances_, cov)
    log_resp                = _gm._estimate_log_gaussian_prob( X, means_, precisions_cholesky_,cov)
    return means_, log_resp