Esempio n. 1
0
def gmm_em(dataList, nmix, final_niter, ds_factor):
    dataList = load_data(dataList)
    nfiles = len(dataList)
    gm, gv = comp_gm_gv(dataList)
    #niter = [1,2,4,4,4,4,6,6,10,10,15]
    #niter[int(np.log2(nmix))] = final_niter
    niter = np.ones(10, dtype=np.int32)

    model = GaussianMixture(1, 'diag', verbose=0, max_iter=100)
    data = np.concatenate(dataList, axis=1).T

    mix = 1
    while mix <= nmix:
        if mix >= nmix // 2:
            ds_factor = 1
        print('\nRe-estimating the GMM hyperparameters for %d components ...' %
              mix)
        for i in range(niter[int(np.log2(mix))]):
            print('EM iter#: %d \t' % i, end='')
            model.fit(data)
            w = model.weights_
            sigma = model.covariances_
            sigma = apply_var_floors(w, sigma, 1)
            model.precisions_ = 1 / sigma
            model.covariances = sigma
            llk, _ = model._estimate_log_prob_resp(data)
            print('[llk = %.2f]' % np.mean(llk))

        if mix < nmix:
            model = mixup(model)
        mix *= 2

    pass
Esempio n. 2
0
    def test_log_responsibilities(self):
        """
        Test the log responsibilities with the help of Sklearn.
        """
        N = 16384
        S = 2048
        D = 128

        means = torch.randn(S, D)
        covs = torch.rand(S)
        x = torch.randn(N, D)
        prior = torch.rand(S)
        prior /= prior.sum()
        mixture = GaussianMixture(S, covariance_type='spherical')
        mixture.means_ = means.numpy()
        mixture.precisions_cholesky_ = np.sqrt(1 / covs.numpy())
        mixture.weights_ = prior.numpy()

        # pylint: disable=protected-access
        _, expected = mixture._estimate_log_prob_resp(x.numpy())
        expected = torch.from_numpy(expected)

        probs = log_normal(x, means, covs, 'spherical')
        predicted = log_responsibilities(probs, prior)

        self.assertTrue(
            torch.allclose(expected, predicted, atol=1e-03, rtol=1e-05))
Esempio n. 3
0
 def test__estimate_log_prob_resp_spherical_shared_compression(self):
     rs = np.random.RandomState(11)
     cov_type = 'spherical'
     gmm = GaussianMixture(n_components=3,
                           num_feat_full=5,
                           num_feat_comp=3,
                           num_feat_shared=3,
                           num_samp=4,
                           transform=None,
                           mask=None,
                           D_indices=None,
                           covariance_type=cov_type,
                           random_state=rs)
     gmm.fit_sparsifier(X=self.td.X)
     means = rs.rand(gmm.n_components, gmm.num_feat_full)
     covariances = rs.rand(gmm.n_components)
     weights = rs.rand(gmm.n_components)
     weights /= weights.sum()
     log_prob_test, log_resp_test, log_prob_norm_test = gmm._estimate_log_prob_resp(
         weights, means, covariances, cov_type)
     # find skl's values, pretty ugly to do.
     precisions = _compute_precision_cholesky(covariances, cov_type)
     gmm_skl = GMSKL(n_components=3, covariance_type=cov_type)
     # we need the mask to be shared so that we can use mask[0] on all means
     gmm_skl.means_ = means[:, gmm.mask[0]]
     gmm_skl.precisions_cholesky_ = precisions
     gmm_skl.weights_ = weights
     gmm_skl.covariance_type_ = cov_type
     log_prob_norm_true, log_resp_true = gmm_skl._estimate_log_prob_resp(
         gmm.RHDX)
     # if anything is bad later this overwrite with mean seems suspect:
     log_prob_norm_true = log_prob_norm_true.mean()
     # now get the log_prob from another function
     log_prob_true = _estimate_log_gaussian_prob(gmm.RHDX, gmm_skl.means_,
                                                 precisions, cov_type)
     # run the tests
     self.assertArrayEqual(log_prob_test, log_prob_true)
     self.assertArrayEqual(log_prob_norm_true, log_prob_norm_test)
     self.assertArrayEqual(log_resp_true, log_resp_test)
Esempio n. 4
0
 def test__estimate_log_prob_resp_diagonal_no_compression(self):
     cov_type = 'diag'
     gmm = GaussianMixture(n_components=3,
                           num_feat_full=5,
                           num_feat_comp=5,
                           num_feat_shared=5,
                           num_samp=4,
                           transform=None,
                           mask=None,
                           D_indices=None,
                           covariance_type=cov_type)
     gmm.fit_sparsifier(X=self.td.X)
     means = np.random.rand(gmm.n_components, gmm.num_feat_comp)
     covariances = np.random.rand(gmm.n_components, gmm.num_feat_comp)
     weights = np.random.rand(gmm.n_components)
     weights /= weights.sum()
     log_prob_test, log_resp_test, log_prob_norm_test = gmm._estimate_log_prob_resp(
         weights, means, covariances, cov_type)
     # find skl's values, pretty ugly to do.
     precisions = _compute_precision_cholesky(covariances, cov_type)
     gmm_skl = GMSKL(n_components=3, covariance_type=cov_type)
     gmm_skl.means_ = means
     gmm_skl.precisions_cholesky_ = precisions
     gmm_skl.weights_ = weights
     gmm_skl.covariance_type_ = cov_type
     log_prob_norm_true, log_resp_true = gmm_skl._estimate_log_prob_resp(
         self.td.X)
     # if anything is bad later this overwrite with mean seems suspect:
     log_prob_norm_true = log_prob_norm_true.mean()
     # now get the log_prob from another function
     log_prob_true = _estimate_log_gaussian_prob(self.td.X, means,
                                                 precisions, cov_type)
     # run the tests
     self.assertArrayEqual(log_prob_test, log_prob_true)
     self.assertArrayEqual(log_prob_norm_true, log_prob_norm_test)
     self.assertArrayEqual(log_resp_true, log_resp_test)