Esempio n. 1
0
def gmm_em(dataList, nmix, final_niter, ds_factor):
    dataList = load_data(dataList)
    nfiles = len(dataList)
    gm, gv = comp_gm_gv(dataList)
    #niter = [1,2,4,4,4,4,6,6,10,10,15]
    #niter[int(np.log2(nmix))] = final_niter
    niter = np.ones(10, dtype=np.int32)

    model = GaussianMixture(1, 'diag', verbose=0, max_iter=100)
    data = np.concatenate(dataList, axis=1).T

    mix = 1
    while mix <= nmix:
        if mix >= nmix // 2:
            ds_factor = 1
        print('\nRe-estimating the GMM hyperparameters for %d components ...' %
              mix)
        for i in range(niter[int(np.log2(mix))]):
            print('EM iter#: %d \t' % i, end='')
            model.fit(data)
            w = model.weights_
            sigma = model.covariances_
            sigma = apply_var_floors(w, sigma, 1)
            model.precisions_ = 1 / sigma
            model.covariances = sigma
            llk, _ = model._estimate_log_prob_resp(data)
            print('[llk = %.2f]' % np.mean(llk))

        if mix < nmix:
            model = mixup(model)
        mix *= 2

    pass
Esempio n. 2
0
def gmm_scale(gmm, shift=None, scale=None, reverse=False, params=None):
    """
    Apply scaling factors to GMM instances.

    Parameters
    ----------
    gmm : GaussianMixture
        GMM instance to be scaled.
    shift : int, float, optional
        Shift for the entire model. Default is 0 (no shift).
    scale : int, float, optional
        Scale for all components. Default is 1 (no scale).
    reverse : bool, optional
        Whether the GMM should be reversed.
    params
        GaussianMixture params for initialization of new instance.

    Returns
    -------
    GaussianMixture
        Modified GMM instance.

    """

    # Fetch parameters if not supplied
    if params is None:
        # noinspection PyUnresolvedReferences
        params = gmm.get_params()

    # Instantiate new GMM
    gmm_new = GaussianMixture(**params)

    # Create scaled fitted GMM model
    gmm_new.weights_ = gmm.weights_

    # Apply shift if set
    gmm_new.means_ = gmm.means_ + shift if shift is not None else gmm.means_

    # Apply scale
    if scale is not None:
        gmm_new.means_ /= scale

    gmm_new.covariances_ = gmm.covariances_ / scale ** 2 if scale is not None else gmm.covariances_
    gmm_new.precisions_ = np.linalg.inv(gmm_new.covariances_) if scale is not None else gmm.precisions_
    gmm_new.precisions_cholesky_ = np.linalg.cholesky(gmm_new.precisions_) if scale is not None \
        else gmm.precisions_cholesky_

    # Reverse if set
    if reverse:
        gmm_new.means_ *= -1

    # Add converged attribute if available
    if gmm.converged_:
        gmm_new.converged_ = gmm.converged_

    # Return scaled GMM
    return gmm_new
Esempio n. 3
0
def create_sklearn_gmm(weights, mean_tensor, cov_tensor, random_state=0):
    n_components = len(weights)
    gmm = GaussianMixture(n_components=n_components,
                          covariance_type='full',
                          random_state=random_state)
    gmm.weights_ = weights.numpy()
    gmm.means_ = mean_tensor.numpy()
    gmm.covariances_ = cov_tensor.numpy()
    gmm.precisions_ = np.array(
        [np.linalg.inv(cov) for cov in gmm.covariances_])
    gmm.precisions_cholesky_ = np.array(
        [np.linalg.cholesky(prec) for prec in gmm.precisions_])
    return gmm
Esempio n. 4
0
def gmm_loglik(y,pi,mu,sigma,K):
    model = GaussianMixture(K, covariance_type = 'diag')
    model.fit(y)
    N = np.shape(mu)[0]
    N_test = np.shape(y)[0]
    ll_test = np.zeros(N)
    for i in (range(N)):
        model.means_ = mu[i,:]
        model.covariances_ = sigma[i,:]**2
        model.precisions_ = 1/(sigma[i,:]**2)
        model.weights_ = pi[i,:]
        model.precisions_cholesky_ = _compute_precision_cholesky(model.covariances_, model.covariance_type)
        ll_test[i] = model.score(y) 
    return ll_test*N_test
Esempio n. 5
0
def test_once_by_random_features():
    Xtrain = numpy.random.random_sample((5000)).reshape(-1, 10)
    Xtest = numpy.random.random_sample((500)).reshape(-1, 10)

    gmm_orig = GaussianMixture(n_components=8, random_state=1)
    gmm_copy = GaussianMixture()

    gmm_orig.fit(Xtrain)

    gmm_copy.weights_ = gmm_orig.weights_
    gmm_copy.means_ = gmm_orig.means_
    gmm_copy.covariances_ = gmm_orig.covariances_
    gmm_copy.precisions_ = gmm_orig.precisions_
    gmm_copy.precisions_cholesky_ = gmm_orig.precisions_cholesky_
    gmm_copy.converged_ = gmm_orig.converged_
    gmm_copy.n_iter_ = gmm_orig.n_iter_
    gmm_copy.lower_bound_ = gmm_orig.lower_bound_

    y_orig = gmm_orig.score_samples(Xtest)
    y_copy = gmm_copy.score_samples(Xtest)

    return all(y_orig == y_copy)
Esempio n. 6
0
def fit_markov_chain(y,plot=False):
	y_0 = y[:-1]
	y_1 = y[1:]
	grad_0 = np.gradient(y_0)
	grad_1 = np.gradient(y_1)
	state_1 = grad_1[np.where(grad_0 < 0)] # instances where previous gradient was negative
	state_2 = grad_1[np.where(grad_0 > 0)] # instances where previous gradient was positive
	mean_1,std_1 = stats.norm.fit(state_1)
	mean_2,std_2 = stats.norm.fit(state_2)
	# Reshaping parameters to be suitable for sklearn.GaussianMixture
	means = np.array([mean_1,mean_2])
	means = means.reshape(2,1)
	y_GM = np.concatenate((state_2.reshape(-1,1),state_1.reshape(-1,1)))
	precisions =  [1/(std_1**2),1/(std_2**2)]	
	GM = GaussianMixture(n_components=2,covariance_type='spherical')
	GM.weights_ = [0.5,0.5]
	GM.means_ = means
	GM.covariances_ = [std_1,std_2]
	GM.precisions_ = precisions
	GM.precisions_cholesky_ = precisions	
	GM.converged_ = True
	if(plot):
		samples = GM.sample(5000)[0]
		fig,ax_list = plt.subplots(3,1)
		fig.set_size_inches(20,20)
		ax_list[0].hist(state_1,bins=70)
		ax_list[1].hist(state_2,bins=70)
		lnspc_1 = np.linspace(state_1.min(),state_1.max(),y.shape[0])
		gauss_1 = stats.norm.pdf(lnspc_1, mean_1, std_1)
		lnspc_2 = np.linspace(state_2.min(),state_2.max(),y.shape[0])
		gauss_2 = stats.norm.pdf(lnspc_2, mean_2, std_2)
		ax_list[0].plot(lnspc_1,gauss_1)
		ax_list[1].plot(lnspc_2,gauss_2)
		ax_list[0].scatter(mean_1,30)
		ax_list[1].scatter(mean_2,30)
		ax_list[2].hist(samples,bins=100)
		plt.show()
	return GM
Esempio n. 7
0
def _create_gmm(k, means, weights, precisions=None, covariances=None):
    if covariances is None:
        precisions = np.array(precisions)
        covariances = np.linalg.pinv(precisions)
    elif precisions is None:
        covariances = np.array(covariances)
        precisions = np.linalg.pinv(covariances)

    gmm = GaussianMixture(n_components=k,
                          weights_init=weights,
                          means_init=means,
                          reg_covar=1e-2,
                          precisions_init=precisions,
                          max_iter=1,
                          warm_start=True)

    try:
        gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances,
                                                               'full')
    except Exception:
        c2 = covariances.copy()
        covariances = _singular_prevent_multiple(covariances)
        precisions = np.linalg.pinv(covariances)
        try:
            gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances,
                                                                   'full')
        except Exception:
            c2.dump('cov.npy')
            raise Exception('Problema na matriz! Dump no arquivo cov.npy')

    gmm.weights_ = weights
    gmm.means_ = means
    gmm.covariances_ = covariances
    gmm.precisions_ = precisions

    return gmm
Esempio n. 8
0
def load_ubm(path):
    """
    Load UBM stored with save_ubm, returning
    GMM object and normalization vectors

    Parameters:
        path (str): Where to load UBM from
    Returns:
        ubm (sklearn.mixture.GaussianMixture): Trained GMM model
        means, stds (ndarray): Means and stds of variables to
            be stored along UBM for normalization purposes
    """
    data = np.load(path)
    n_components = data["ubm_means"].shape[0]
    cov_type = "diag" if data["ubm_covariances"].ndim == 2 else "full"
    ubm = GaussianMixture(n_components=n_components, covariance_type=cov_type)
    ubm.means_ = data["ubm_means"]
    ubm.weights_ = data["ubm_weights"]
    ubm.covariances_ = data["ubm_covariances"]
    ubm.precisions_ = data["ubm_precisions"]
    ubm.precisions_cholesky_ = data["ubm_precisions_cholesky"]
    means = data["means"]
    stds = data["stds"]
    return ubm, means, stds
Esempio n. 9
0
    def _fit(self, X: np.ndarray) -> np.ndarray:
        pred = self._cluster_and_decide(X)
        self.children: Tuple["DivisiveCluster"] = cast(
            Tuple["DivisiveCluster"], tuple())

        uni_labels = np.unique(pred)
        labels = pred.reshape((-1, 1)).copy()
        if len(uni_labels) > 1:
            for ul in uni_labels:
                inds = pred == ul
                new_X = X[inds]
                dc = DivisiveCluster(
                    cluster_method=self.cluster_method,
                    max_components=self.max_components,
                    min_split=self.min_split,
                    max_level=self.max_level,
                    cluster_kws=self.cluster_kws,
                    delta_criter=self.delta_criter,
                )
                dc.parent = self
                if (len(new_X) > self.max_components
                        and len(new_X) >= self.min_split
                        and self.depth + 1 < self.max_level):
                    child_labels = dc._fit(new_X)
                    while labels.shape[1] <= child_labels.shape[1]:
                        labels = np.column_stack(
                            (labels, np.zeros((len(X), 1), dtype=int)))
                    labels[inds, 1:child_labels.shape[1] + 1] = child_labels
                else:
                    # make a "GaussianMixture" model for clusters
                    # that were not fitted
                    if self.cluster_method == "gmm":
                        cluster_idx = len(dc.parent.children) - 1
                        parent_model = dc.parent.model_
                        model = GaussianMixture()
                        model.weights_ = np.array([1])
                        model.means_ = parent_model.means_[
                            cluster_idx].reshape(1, -1)
                        model.covariance_type = parent_model.covariance_type
                        if model.covariance_type == "tied":
                            model.covariances_ = parent_model.covariances_
                            model.precisions_ = parent_model.precisions_
                            model.precisions_cholesky_ = (
                                parent_model.precisions_cholesky_)
                        else:
                            cov_types = ["spherical", "diag", "full"]
                            n_features = model.means_.shape[-1]
                            cov_shapes = [
                                (1, ),
                                (1, n_features),
                                (1, n_features, n_features),
                            ]
                            cov_shape_idx = cov_types.index(
                                model.covariance_type)
                            model.covariances_ = parent_model.covariances_[
                                cluster_idx].reshape(cov_shapes[cov_shape_idx])
                            model.precisions_ = parent_model.precisions_[
                                cluster_idx].reshape(cov_shapes[cov_shape_idx])
                            model.precisions_cholesky_ = (
                                parent_model.precisions_cholesky_[cluster_idx].
                                reshape(cov_shapes[cov_shape_idx]))

                        dc.model_ = model

        return labels
Esempio n. 10
0
def gmm_combine(gmms, weights=None, params=None, good_idx=None,
                gmms_means=None, gmms_variances=None, gmms_weights=None, gmms_zps=None):
    """
    Method to combine Gaussian Mixture Models. This function create a new GaussianMixture instance and adds all
    mixture components from the input models.

    Parameters
    ----------
    gmms
        List of GaussianMixture instances to combine.
    weights : iterable, optional
        Weights for each GaussianMixture instance in the input list.
    params : dict, optional
        GaussianMixture parameter dictionary for faster instantiation.
    good_idx : iterable, optional
        Boolean array or list for masking. True indicates a good entry in the input list, False a bad entry.
    gmms_means : np.ndarray, optional
        The means of all components for all input GMMs in an array. Can be used to speed up the combination process-
    gmms_variances : np.ndarray, optional
        Same as gmms_means, but for all variances of all components.
    gmms_weights : np.ndarray, optional
        Same as gmms_means, but for all weights of all components.
    gmms_zps : np.ndarray, optional
        Zero point for all models. If None is given, no shift is applied.

    Returns
    -------
    GaussianMixture
        Combined GaussianMixture instance.

    """

    # Dummy checks
    if not isinstance(gmms, Iterable):
        raise ValueError("Models must be provided as an iterable")

    # Set good_idx
    if good_idx is None:
        good_idx = [True for _ in range(len(gmms))]

    # Extract good GMMs
    gmms = gmms[good_idx]

    # Set weights to unity if not specified
    if weights is None:
        weights = [1 for _ in range(len(gmms))]
    else:
        weights = weights[good_idx]

    # Return None if weights are all bad
    if np.sum(np.isfinite(weights)) == 0:
        return None

    # Set zeropoints if not specified
    if gmms_zps is None:
        gmms_zps = [0. for _ in range(len(gmms))]
    else:
        gmms_zps = gmms_zps[good_idx]

    # Get parameters if not set from first entry
    if params is None:
        params = gmms[0].get_params()

    # Dummy check
    if np.sum([isinstance(g, GaussianMixture) for g in gmms]) != len(gmms):
        raise ValueError("Must only supply GaussianMixture instances")

    # Instantiate combined GMM
    gmm_combined = GaussianMixture(**params)

    # Build combined components from supplied models if not given as attributes
    if gmms_means is None or gmms_variances is None or gmms_weights is None:

        gmm_combined_means = gmms[0].means_ + gmms_zps[0]
        gmm_combined_variances = gmms[0].covariances_
        gmm_combined_weights = gmms[0].weights_ * weights[0]
        for gmm, w, zp in zip(gmms[1:], weights[1:], gmms_zps[1:]):
            gmm_combined_means = np.vstack([gmm_combined_means, gmm.means_ + zp])
            gmm_combined_variances = np.vstack([gmm_combined_variances, gmm.covariances_])
            gmm_combined_weights = np.hstack([gmm_combined_weights, gmm.weights_ * w])

    # If the attributes are provided, extract the parameters directly (much faster)
    else:
        gmm_combined_means = np.vstack(gmms_means[good_idx] + gmms_zps)
        gmm_combined_variances = np.vstack(gmms_variances[good_idx])
        gmm_combined_weights = np.hstack(gmms_weights[good_idx] * weights)

    # Add attributes to new mixture
    gmm_combined.n_components = len(gmm_combined_means)
    gmm_combined.means_ = gmm_combined_means
    gmm_combined.covariances_ = gmm_combined_variances
    gmm_combined.weights_ = gmm_combined_weights / np.sum(gmm_combined_weights)
    gmm_combined.precisions_ = np.linalg.inv(gmm_combined.covariances_)
    gmm_combined.precisions_cholesky_ = np.linalg.cholesky(gmm_combined.precisions_)

    # Add attribute to store number of input models used to create this model
    gmm_combined.n_models = len(gmms)

    # Return new GMM
    return gmm_combined