def test_suffstat_sk_tied():
    # use equation Nk * Sk / N = S_tied
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]

    covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0)
    covars_pred_full = np.sum(nk[:, np.newaxis, np.newaxis] * covars_pred_full,
                              0) / n_samples

    covars_pred_tied = _estimate_gaussian_covariances_tied(resp, X, nk, xk, 0)

    ecov = EmpiricalCovariance()
    ecov.covariance_ = covars_pred_full
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='spectral'), 0)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred_tied, 'tied')
    precs_pred = np.dot(precs_chol_pred, precs_chol_pred.T)
    precs_est = linalg.inv(covars_pred_tied)
    assert_array_almost_equal(precs_est, precs_pred)
Example #2
0
    def _initialize1(self):

        # n_samples, _ = X.shape
        # weights, means, covariances = _estimate_gaussian_parameters(
        #     X, resp, self.reg_covar, self.covariance_type)
        # weights /= n_samples

        # self.weights_ = (weights if self.weights_init is None
        #                  else self.weights_init)
        # self.means_ = means if self.means_init is None else self.means_init

        if self.precisions_init is None:
            # self.covariances_ = covariances
            self.precisions_cholesky_ = _compute_precision_cholesky(
                self.covariances_, self.covariance_type)
        elif self.covariance_type == 'full':
            self.precisions_cholesky_ = np.array([
                linalg.cholesky(prec_init, lower=True)
                for prec_init in self.precisions_init
            ])
        elif self.covariance_type == 'tied':
            self.precisions_cholesky_ = linalg.cholesky(self.precisions_init,
                                                        lower=True)
        else:
            self.precisions_cholesky_ = self.precisions_init
Example #3
0
def _onehot_to_initial_params(
        X: np.ndarray, onehot: np.ndarray,
        cov_type: CovarianceType) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Computes cluster weights, cluster means and cluster precisions from
    a given clustering.

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
        List of n_features-dimensional data points. Each row
        corresponds to a single data point.
    onehot : ndarray, shape (n_samples, n_clusters)
        Each row has a 1 indicating cluster membership, other entries are 0.
    cov_type : {'full', 'tied', 'diag', 'spherical'}
        Covariance type for Gaussian mixture model
    """
    n = X.shape[0]
    weights, means, covariances = _estimate_gaussian_parameters(
        X, onehot, 1e-06, cov_type)
    weights /= n

    precisions_cholesky_ = _compute_precision_cholesky(covariances, cov_type)

    if cov_type == "tied":
        c = precisions_cholesky_
        precisions = np.dot(c, c.T)
    elif cov_type == "diag":
        precisions = precisions_cholesky_
    else:
        precisions = [np.dot(c, c.T) for c in precisions_cholesky_]

    return weights, means, precisions
Example #4
0
    def add_new_component(self,
                          x_proj,
                          q_abnormal_thres=0.95,
                          acculumated_X_train_proj=None):
        ##########################################################################################
        # sub-scenario:  create a new component for the new x
        # self.novelty_thres < _y_score < self.abnormal_thres
        # x is predicted as a novelty datapoint (but still is a normal datapoint), so we create a new
        # component and update GMM.

        self.n_components += 1
        # compute the mean and covariance of the new components
        # For the mean, we use the x value as the mean of the new component
        # (because the new component only has one point (i.e., x)), and append it to the previous means.
        new_mean = x_proj
        new_covar = self.generate_new_covariance(x_proj, self.means_,
                                                 self.covariances_)
        self.means_ = np.concatenate([self.means_, new_mean], axis=0)
        _, dim = new_mean.shape
        if self.covariance_type == 'diag':
            self.covariances_ = np.concatenate(
                [self.covariances_,
                 new_covar.reshape(1, dim)], axis=0)
        else:
            self.covariances_ = np.concatenate(
                [self.covariances_,
                 new_covar.reshape(1, dim, dim)], axis=0)

        # print(f'new_model.params: {self.get_params()}')
        n = acculumated_X_train_proj.shape[0]
        self.weights_ = np.asarray([n / (n + 1) * v for v in self.weights_])
        self.weights_ = np.concatenate(
            [self.weights_, np.ones((1, )) * (1 / (n + 1))], axis=0)

        self.sum_resp = np.concatenate([self.sum_resp, np.ones((1, ))], axis=0)
        # f_(k+1): the new component of GMM
        n_feats = self.n_components
        # get log probabliity of acculumated data to update the threshold.
        diff = (acculumated_X_train_proj -
                new_mean).T  # X and mu should be column vectors
        if self.covariance_type == 'diag':
            log_dist = -0.5 * diff.T**2 @ (1 / new_covar)
            log_det = np.product(new_covar)
        else:
            log_dist = np.diag(
                -0.5 *
                np.matmul(np.matmul(diff.T, np.linalg.inv(new_covar)), diff))
            log_det = np.log(np.linalg.det(new_covar))
        log_det = 1e-6 if np.isnan(log_det) or np.isinf(log_det) else log_det
        f_k_1 = -.5 * (n_feats * np.log(2 * np.pi) + log_det) + log_dist

        # update self.self.precisions_cholesky_,
        self.precisions_cholesky_ = _compute_precision_cholesky(
            self.covariances_, self.covariance_type)

        self.y_score = n / (n + 1) * self.decision_function(
            acculumated_X_train_proj) + 1 / (n + 1) * f_k_1
        self.abnormal_thres = np.quantile(
            self.y_score, q=q_abnormal_thres)  # abnormal threshold
Example #5
0
    def train(self, data):
        if self.initialized:
           

            ff_tmp = self.params['forgetting_factor'].get_value()

            self.short_term_model = IGMM(self.params['init_components'])
            self.short_term_model.get_best_gmm(data,lims=[1, self.params['max_step_components']])
                                               # lims=[self.params['init_components'], self.params['max_step_components']])
            weights_st = self.short_term_model.weights_
            weights_st = ff_tmp * weights_st
            self.short_term_model.weights_ = weights_st


            #print(ff_tmp)
            weights_lt = self.weights_
            weights_lt = (self.weights_.sum() - ff_tmp) * weights_lt  # Regularization to keep sum(w)=1.0

            self.weights_ = weights_lt

            gmm_new = copy.deepcopy(self.short_term_model)

            gmm_new = self.merge_similar_gaussians_in_gmm_minim(gmm_new)
            self.mergeGMM(gmm_new)

            self.weights_=self.weights_/sum(self.weights_) #Regularization


        else:
            #self.get_best_gmm(data, lims=[self.params['init_components'], self.params['max_step_components']])
            self.get_best_gmm(data, lims=[self.params['init_components'],self.params['init_components']])
            self.short_term_model = GMM(self.n_components)
            self.initialized = True

        self.precisions_cholesky_ = _compute_precision_cholesky(self.covariances_, "full")
        if self.params['infer_fixed']:
            y_dims = self.params['y_dims']
            x_dims = self.params['x_dims']
            SIGMA_YY_inv = np.zeros((self.n_components,len(y_dims),len(y_dims)))
            SIGMA_XY = np.zeros((self.n_components,len(x_dims),len(y_dims)))

            for k, (Mu, Sigma) in enumerate(zip(self.means_, self.covariances_)):
                Sigma_yy = Sigma[:, y_dims]
                Sigma_yy = Sigma_yy[y_dims, :]

                Sigma_xy = Sigma[x_dims, :]
                Sigma_xy = Sigma_xy[:, y_dims]
                Sigma_yy_inv = linalg.inv(Sigma_yy)

                SIGMA_YY_inv[k,:,:] = Sigma_yy_inv
                SIGMA_XY[k,:, :] = Sigma_xy

            self.SIGMA_YY_inv = SIGMA_YY_inv
            self.SIGMA_XY = SIGMA_XY
Example #6
0
    def _set_pX(self):
        # probability density function of X
        self.pX = sklearn.mixture.GaussianMixture(
            n_components=self.n_mix, covariance_type="full"
        )
        self.pX.weights_ = self.w
        self.pX.means_ = self.meanX
        self.pX.covariances_ = self.covXX

        # following function is required to estimate porsterior
        self.pX.precisions_cholesky_ = _compute_precision_cholesky(self.covXX, "full")
        return
def test_suffstat_sk_full():
    # compare the precision matrix compute from the
    # EmpiricalCovariance.covariance fitted on X*sqrt(resp)
    # with _sufficient_sk_full, n_components=1
    rng = np.random.RandomState(0)
    n_samples, n_features = 500, 2

    # special case 1, assuming data is "centered"
    X = rng.rand(n_samples, n_features)
    resp = rng.rand(n_samples, 1)
    X_resp = np.sqrt(resp) * X
    nk = np.array([n_samples])
    xk = np.zeros((1, n_features))
    covars_pred = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0)
    ecov = EmpiricalCovariance(assume_centered=True)
    ecov.fit(X_resp)
    assert_almost_equal(ecov.error_norm(covars_pred[0], norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred[0], norm='spectral'), 0)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred, 'full')
    precs_pred = np.array([np.dot(prec, prec.T) for prec in precs_chol_pred])
    precs_est = np.array([linalg.inv(cov) for cov in covars_pred])
    assert_array_almost_equal(precs_est, precs_pred)

    # special case 2, assuming resp are all ones
    resp = np.ones((n_samples, 1))
    nk = np.array([n_samples])
    xk = X.mean(axis=0).reshape((1, -1))
    covars_pred = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0)
    ecov = EmpiricalCovariance(assume_centered=False)
    ecov.fit(X)
    assert_almost_equal(ecov.error_norm(covars_pred[0], norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred[0], norm='spectral'), 0)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred, 'full')
    precs_pred = np.array([np.dot(prec, prec.T) for prec in precs_chol_pred])
    precs_est = np.array([linalg.inv(cov) for cov in covars_pred])
    assert_array_almost_equal(precs_est, precs_pred)
Example #8
0
    def jsd_diss(self, w1, mu1, cov1, w2, mu2, cov2):
        """
        Calculates Jensen-Shannon divergence of two gmm's
        :param gmm_p: mixture.GaussianMixture
        :param gmm_q: mixture.GaussianMixture
        :param sample_count: number of monte carlo samples to use
        :return: Jensen-Shannon divergence
        """
        gmm_p = GaussianMixture(n_components=n_components,
                                covariance_type="full")
        gmm_p.weights_ = w1
        gmm_p.covariances_ = cov1
        gmm_p.means_ = mu1
        gmm_p.n_components = 1
        gmm_p.precisions_cholesky_ = _compute_precision_cholesky(cov1, "full")

        gmm_q = GaussianMixture(n_components=n_components,
                                covariance_type="full")
        gmm_q.weights_ = w2
        gmm_q.covariances_ = cov2
        gmm_q.means_ = mu2
        gmm_q.n_components = 1
        gmm_q.precisions_cholesky_ = _compute_precision_cholesky(cov2, "full")

        X = gmm_p.sample(sample_count)[0]
        log_p_X = gmm_p.score_samples(X)
        log_q_X = gmm_q.score_samples(X)
        log_mix_X = np.logaddexp(log_p_X, log_q_X)

        Y = gmm_q.sample(sample_count)[0]
        log_p_Y = gmm_p.score_samples(Y)
        log_q_Y = gmm_q.score_samples(Y)
        log_mix_Y = np.logaddexp(log_p_Y, log_q_Y)

        # black magic?
        return (log_p_X.mean() -
                (log_mix_X.mean() - np.log(2)) + log_q_Y.mean() -
                (log_mix_Y.mean() - np.log(2))) / 2
Example #9
0
    def _m_step(self, X, log_resp):
        """M step.
            Parameters
            ----------
            X : array-like, shape (n_samples, n_features)
            log_resp : array-like, shape (n_samples, n_components)
                Logarithm of the posterior probabilities (or responsibilities) of
                the point of each sample in X.
            """
        resp = (self.Xweights * np.exp(log_resp).T).T
        self.weights_, self.means_, self.covariances_ = (
            _estimate_gaussian_parameters(X, resp, self.reg_covar,
                                          self.covariance_type))

        self.weights_ /= np.sum(self.Xweights)

        self.precisions_cholesky_ = _compute_precision_cholesky(
            self.covariances_, self.covariance_type)
def test_compute_log_det_cholesky():
    n_features = 2
    rand_data = RandomData(np.random.RandomState(0))

    for covar_type in COVARIANCE_TYPE:
        covariance = rand_data.covariances[covar_type]

        if covar_type == 'full':
            predected_det = np.array([linalg.det(cov) for cov in covariance])
        elif covar_type == 'tied':
            predected_det = linalg.det(covariance)
        elif covar_type == 'diag':
            predected_det = np.array([np.prod(cov) for cov in covariance])
        elif covar_type == 'spherical':
            predected_det = covariance ** n_features

        # We compute the cholesky decomposition of the covariance matrix
        expected_det = _compute_log_det_cholesky(_compute_precision_cholesky(
            covariance, covar_type), covar_type, n_features=n_features)
        assert_array_almost_equal(expected_det, - .5 * np.log(predected_det))
def test_gaussian_suffstat_sk_spherical():
    # computing spherical covariance equals to the variance of one-dimension
    # data after flattening, n_components=1
    rng = np.random.RandomState(0)
    n_samples, n_features = 500, 2

    X = rng.rand(n_samples, n_features)
    X = X - X.mean()
    resp = np.ones((n_samples, 1))
    nk = np.array([n_samples])
    xk = X.mean()
    covars_pred_spherical = _estimate_gaussian_covariances_spherical(resp, X, nk, xk, 0)
    covars_pred_spherical2 = np.dot(X.flatten().T, X.flatten()) / (
        n_features * n_samples
    )
    assert_almost_equal(covars_pred_spherical, covars_pred_spherical2)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred_spherical, "spherical")
    assert_almost_equal(covars_pred_spherical, 1.0 / precs_chol_pred**2)
Example #12
0
    def _set_gmm(self):
        """
        Make a GMM object for sampling
        """
        from sklearn.mixture._gaussian_mixture import (
            _compute_precision_cholesky)

        # these numbers are not used because we set the means, etc by hand
        ngauss = self.weights.size

        gmm = self._make_gmm(ngauss)
        gmm.means_ = self.means.copy()
        gmm.covariances_ = self.covars.copy()
        gmm.weights_ = self.weights.copy()

        gmm.precisions_cholesky_ = _compute_precision_cholesky(
            self.covars,
            "full",
        )

        self._gmm = gmm
Example #13
0
    def get_best_gmm(self, data, lims=[1, 10]):
        lowest_bic = np.infty
        bic = []
        aic = []
        # minim = False
        # minim_flag = 2

        n_components_range = range(lims[0], lims[1] + 1, 1)
        for n_components in n_components_range:
            # Fit a mixture of Gaussians with EM, beware for cases where te model is not found in any case
            gmm = GMM(n_components=n_components,
                          covariance_type='full')
            gmm.fit(data)
            bic.append(gmm.bic(data))
            aic.append(gmm.aic(data))

            if bic[-1] < lowest_bic:
                lowest_bic = bic[-1]
                best_gmm = n_components
            try:
                if (bic[-1] > bic[-2] > bic[-3] and
                                bic[-3] < bic[-4] < bic[-5]):
                    best_gmm = n_components - 2
                    break

            except IndexError:
                pass
        # if best_gmm <= 6: # The derivative does not make sense here  #THS MUST BE CHECKED
        best_gmm = np.array(bic).argmin() + lims[0]

        gmm = GMM(n_components=best_gmm,
                      covariance_type='full')
        gmm.fit(data)

        self.weights_ = gmm.weights_
        self.covariances_ = gmm.covariances_ # self.covariances_ = gmm._get_covars()
        self.means_ = gmm.means_
        self.n_components = gmm.n_components

        self.precisions_cholesky_ = _compute_precision_cholesky(gmm.covariances_, "full")
def get_3d_grid_gmm(subdivisions=[5,5,5], variance=0.04):
    """
    Compute the weight, mean and covariance of a gmm placed on a 3D grid
    :param subdivisions: 2 element list of number of subdivisions of the 3D space in each axes to form the grid
    :param variance: scalar for spherical gmm.p
    :return gmm: gmm: instance of sklearn GaussianMixture (GMM) object Gauassian mixture model
    """
    # n_gaussians = reduce(lambda x, y: x*y,subdivisions)
    n_gaussians = np.prod(np.array(subdivisions))
    step = [1.0/(subdivisions[0]),  1.0/(subdivisions[1]),  1.0/(subdivisions[2])]

    means = np.mgrid[ step[0]-1: 1.0-step[0]: complex(0, subdivisions[0]),
                      step[1]-1: 1.0-step[1]: complex(0, subdivisions[1]),
                      step[2]-1: 1.0-step[2]: complex(0, subdivisions[2])]
    means = np.reshape(means, [3, -1]).T
    covariances = variance*np.ones_like(means)
    weights = (1.0/n_gaussians)*np.ones(n_gaussians)
    gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag')
    gmm.weights_ = weights
    gmm.covariances_ = covariances
    gmm.means_ = means
    gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'diag')
    return gmm
def test_suffstat_sk_diag():
    # test against 'full' case
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]
    covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0)
    covars_pred_diag = _estimate_gaussian_covariances_diag(resp, X, nk, xk, 0)

    ecov = EmpiricalCovariance()
    for (cov_full, cov_diag) in zip(covars_pred_full, covars_pred_diag):
        ecov.covariance_ = np.diag(np.diag(cov_full))
        cov_diag = np.diag(cov_diag)
        assert_almost_equal(ecov.error_norm(cov_diag, norm='frobenius'), 0)
        assert_almost_equal(ecov.error_norm(cov_diag, norm='spectral'), 0)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred_diag, 'diag')
    assert_almost_equal(covars_pred_diag, 1. / precs_chol_pred**2)
Example #16
0
def get_means_resp(X,log_resp, cov):
    _, means_, covariances_ = _gm._estimate_gaussian_parameters(X, np.exp(log_resp), 1e-6, cov)
    precisions_cholesky_    = _gm._compute_precision_cholesky( covariances_, cov)
    log_resp                = _gm._estimate_log_gaussian_prob( X, means_, precisions_cholesky_,cov)
    return means_, log_resp
Example #17
0
cov_init_grav = np.array([gmmref.covariances_[:, 0]]).reshape((3, 1, 1))
clfgrav = utils.pgi_utils.GaussianMixture(
    n_components=3,
    means_init=means_init_grav,
    precisions_init=cov_init_grav,
    n_init=1,
    max_iter=2,
    tol=np.inf,
)
# random fit, we set values after.
clfgrav.fit(np.random.randn(10, 1))
clfgrav.means_ = means_init_grav
clfgrav.covariances_ = cov_init_grav
from sklearn.mixture._gaussian_mixture import _compute_precision_cholesky

clfgrav.precisions_cholesky_ = _compute_precision_cholesky(
    clfgrav.covariances_, clfgrav.covariance_type)
clfgrav.weights_ = gmmref.weights_
testXplot_grav = np.linspace(-1.2, 0.1, 1000)[:, np.newaxis]
score_grav = clfgrav.score_samples(testXplot_grav)
ax2.plot(
    testXplot_grav,
    np.exp(score_grav),
    linewidth=3.0,
    label="1D Probability Density Distribution",
    c="k",
)
ax2.set_ylim([0.0, 2])
ax2.legend(fontsize=ticksize)

# create the 1D GMM profile for mag. susc.
means_init_mag = gmmref.means_[:, 1].reshape(3, 1)
Example #18
0
    def fit(self, data):
        if (len(data) <= 20):
            return 0
        best_gmm = self.trainBestModel(data)
        if (self.initialized == False):
            self.weights_ = best_gmm.weights_
            self.covariances_ = best_gmm.covariances_  # self.covariances_ = gmm._get_covars()
            self.means_ = best_gmm.means_
            self.n_components = best_gmm.n_components
            self.precisions_cholesky_ = _compute_precision_cholesky(
                best_gmm.covariances_, "full")
            self.initialized = True
            logging.debug(
                f'TRAIN AWAL component : {best_gmm.n_components} \t W: {best_gmm.weights_} '
            )
        else:

            w_all = np.concatenate((self.weights_, best_gmm.weights_),
                                   axis=None)
            mu_all = np.concatenate((self.means_, best_gmm.means_), axis=0)
            cov_all = np.concatenate(
                (self.covariances_, best_gmm.covariances_), axis=0)
            n_components_range = range(
                self.n_components + best_gmm.n_components,
                self.n_components - 1, -1)
            #logging.debug(f'Search  from:{n_components_range}')
            bicreduced = []
            lowest_bic = np.infty
            jumlahSample = 5 * len(data)

            currentSample = self.sample(2 * jumlahSample)[0]
            dataxx = np.concatenate((currentSample, data), axis=0)
            for n_components in n_components_range:
                #print(n_components)
                w, m, c = self.mixture_reduction(w_all,
                                                 mu_all,
                                                 cov_all,
                                                 n_components,
                                                 isomorphic=True,
                                                 verbose=False,
                                                 optimization=False)
                gmm_p = GaussianMixture(n_components=n_components,
                                        covariance_type="full")
                gmm_p.weights_ = w
                gmm_p.covariances_ = c
                gmm_p.means_ = m
                gmm_p.precisions_cholesky_ = _compute_precision_cholesky(
                    c, "full")
                bic_ = gmm_p.bic(dataxx)
                bicreduced.append(bic_)

                #print('REDUCD BIC components {0} = {1}'.format(n_components, bic_))
                if bic_ < lowest_bic:
                    lowest_bic = bic_
                    best_gmm = gmm_p

            logging.debug(
                f'N_component Awal: {self.n_components} \t Drift Comp: {best_gmm.n_components}  '
            )
            self.weights_ = best_gmm.weights_ / np.sum(best_gmm.weights_)
            self.means_ = best_gmm.means_
            self.covariances_ = best_gmm.covariances_
            self.n_components = best_gmm.n_components
            #print("W awal:", self.weights_)
            #Compute the Cholesky decomposition of the precisions.
            self.prune()
            #print("W Prune:", self.weights_)
            self.precisions_cholesky_ = _compute_precision_cholesky(
                self.covariances_, self.covariance_type)