def gmm_em(dataList, nmix, final_niter, ds_factor): dataList = load_data(dataList) nfiles = len(dataList) gm, gv = comp_gm_gv(dataList) #niter = [1,2,4,4,4,4,6,6,10,10,15] #niter[int(np.log2(nmix))] = final_niter niter = np.ones(10, dtype=np.int32) model = GaussianMixture(1, 'diag', verbose=0, max_iter=100) data = np.concatenate(dataList, axis=1).T mix = 1 while mix <= nmix: if mix >= nmix // 2: ds_factor = 1 print('\nRe-estimating the GMM hyperparameters for %d components ...' % mix) for i in range(niter[int(np.log2(mix))]): print('EM iter#: %d \t' % i, end='') model.fit(data) w = model.weights_ sigma = model.covariances_ sigma = apply_var_floors(w, sigma, 1) model.precisions_ = 1 / sigma model.covariances = sigma llk, _ = model._estimate_log_prob_resp(data) print('[llk = %.2f]' % np.mean(llk)) if mix < nmix: model = mixup(model) mix *= 2 pass
def gmm_scale(gmm, shift=None, scale=None, reverse=False, params=None): """ Apply scaling factors to GMM instances. Parameters ---------- gmm : GaussianMixture GMM instance to be scaled. shift : int, float, optional Shift for the entire model. Default is 0 (no shift). scale : int, float, optional Scale for all components. Default is 1 (no scale). reverse : bool, optional Whether the GMM should be reversed. params GaussianMixture params for initialization of new instance. Returns ------- GaussianMixture Modified GMM instance. """ # Fetch parameters if not supplied if params is None: # noinspection PyUnresolvedReferences params = gmm.get_params() # Instantiate new GMM gmm_new = GaussianMixture(**params) # Create scaled fitted GMM model gmm_new.weights_ = gmm.weights_ # Apply shift if set gmm_new.means_ = gmm.means_ + shift if shift is not None else gmm.means_ # Apply scale if scale is not None: gmm_new.means_ /= scale gmm_new.covariances_ = gmm.covariances_ / scale ** 2 if scale is not None else gmm.covariances_ gmm_new.precisions_ = np.linalg.inv(gmm_new.covariances_) if scale is not None else gmm.precisions_ gmm_new.precisions_cholesky_ = np.linalg.cholesky(gmm_new.precisions_) if scale is not None \ else gmm.precisions_cholesky_ # Reverse if set if reverse: gmm_new.means_ *= -1 # Add converged attribute if available if gmm.converged_: gmm_new.converged_ = gmm.converged_ # Return scaled GMM return gmm_new
def create_sklearn_gmm(weights, mean_tensor, cov_tensor, random_state=0): n_components = len(weights) gmm = GaussianMixture(n_components=n_components, covariance_type='full', random_state=random_state) gmm.weights_ = weights.numpy() gmm.means_ = mean_tensor.numpy() gmm.covariances_ = cov_tensor.numpy() gmm.precisions_ = np.array( [np.linalg.inv(cov) for cov in gmm.covariances_]) gmm.precisions_cholesky_ = np.array( [np.linalg.cholesky(prec) for prec in gmm.precisions_]) return gmm
def gmm_loglik(y,pi,mu,sigma,K): model = GaussianMixture(K, covariance_type = 'diag') model.fit(y) N = np.shape(mu)[0] N_test = np.shape(y)[0] ll_test = np.zeros(N) for i in (range(N)): model.means_ = mu[i,:] model.covariances_ = sigma[i,:]**2 model.precisions_ = 1/(sigma[i,:]**2) model.weights_ = pi[i,:] model.precisions_cholesky_ = _compute_precision_cholesky(model.covariances_, model.covariance_type) ll_test[i] = model.score(y) return ll_test*N_test
def test_once_by_random_features(): Xtrain = numpy.random.random_sample((5000)).reshape(-1, 10) Xtest = numpy.random.random_sample((500)).reshape(-1, 10) gmm_orig = GaussianMixture(n_components=8, random_state=1) gmm_copy = GaussianMixture() gmm_orig.fit(Xtrain) gmm_copy.weights_ = gmm_orig.weights_ gmm_copy.means_ = gmm_orig.means_ gmm_copy.covariances_ = gmm_orig.covariances_ gmm_copy.precisions_ = gmm_orig.precisions_ gmm_copy.precisions_cholesky_ = gmm_orig.precisions_cholesky_ gmm_copy.converged_ = gmm_orig.converged_ gmm_copy.n_iter_ = gmm_orig.n_iter_ gmm_copy.lower_bound_ = gmm_orig.lower_bound_ y_orig = gmm_orig.score_samples(Xtest) y_copy = gmm_copy.score_samples(Xtest) return all(y_orig == y_copy)
def fit_markov_chain(y,plot=False): y_0 = y[:-1] y_1 = y[1:] grad_0 = np.gradient(y_0) grad_1 = np.gradient(y_1) state_1 = grad_1[np.where(grad_0 < 0)] # instances where previous gradient was negative state_2 = grad_1[np.where(grad_0 > 0)] # instances where previous gradient was positive mean_1,std_1 = stats.norm.fit(state_1) mean_2,std_2 = stats.norm.fit(state_2) # Reshaping parameters to be suitable for sklearn.GaussianMixture means = np.array([mean_1,mean_2]) means = means.reshape(2,1) y_GM = np.concatenate((state_2.reshape(-1,1),state_1.reshape(-1,1))) precisions = [1/(std_1**2),1/(std_2**2)] GM = GaussianMixture(n_components=2,covariance_type='spherical') GM.weights_ = [0.5,0.5] GM.means_ = means GM.covariances_ = [std_1,std_2] GM.precisions_ = precisions GM.precisions_cholesky_ = precisions GM.converged_ = True if(plot): samples = GM.sample(5000)[0] fig,ax_list = plt.subplots(3,1) fig.set_size_inches(20,20) ax_list[0].hist(state_1,bins=70) ax_list[1].hist(state_2,bins=70) lnspc_1 = np.linspace(state_1.min(),state_1.max(),y.shape[0]) gauss_1 = stats.norm.pdf(lnspc_1, mean_1, std_1) lnspc_2 = np.linspace(state_2.min(),state_2.max(),y.shape[0]) gauss_2 = stats.norm.pdf(lnspc_2, mean_2, std_2) ax_list[0].plot(lnspc_1,gauss_1) ax_list[1].plot(lnspc_2,gauss_2) ax_list[0].scatter(mean_1,30) ax_list[1].scatter(mean_2,30) ax_list[2].hist(samples,bins=100) plt.show() return GM
def _create_gmm(k, means, weights, precisions=None, covariances=None): if covariances is None: precisions = np.array(precisions) covariances = np.linalg.pinv(precisions) elif precisions is None: covariances = np.array(covariances) precisions = np.linalg.pinv(covariances) gmm = GaussianMixture(n_components=k, weights_init=weights, means_init=means, reg_covar=1e-2, precisions_init=precisions, max_iter=1, warm_start=True) try: gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'full') except Exception: c2 = covariances.copy() covariances = _singular_prevent_multiple(covariances) precisions = np.linalg.pinv(covariances) try: gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'full') except Exception: c2.dump('cov.npy') raise Exception('Problema na matriz! Dump no arquivo cov.npy') gmm.weights_ = weights gmm.means_ = means gmm.covariances_ = covariances gmm.precisions_ = precisions return gmm
def load_ubm(path): """ Load UBM stored with save_ubm, returning GMM object and normalization vectors Parameters: path (str): Where to load UBM from Returns: ubm (sklearn.mixture.GaussianMixture): Trained GMM model means, stds (ndarray): Means and stds of variables to be stored along UBM for normalization purposes """ data = np.load(path) n_components = data["ubm_means"].shape[0] cov_type = "diag" if data["ubm_covariances"].ndim == 2 else "full" ubm = GaussianMixture(n_components=n_components, covariance_type=cov_type) ubm.means_ = data["ubm_means"] ubm.weights_ = data["ubm_weights"] ubm.covariances_ = data["ubm_covariances"] ubm.precisions_ = data["ubm_precisions"] ubm.precisions_cholesky_ = data["ubm_precisions_cholesky"] means = data["means"] stds = data["stds"] return ubm, means, stds
def _fit(self, X: np.ndarray) -> np.ndarray: pred = self._cluster_and_decide(X) self.children: Tuple["DivisiveCluster"] = cast( Tuple["DivisiveCluster"], tuple()) uni_labels = np.unique(pred) labels = pred.reshape((-1, 1)).copy() if len(uni_labels) > 1: for ul in uni_labels: inds = pred == ul new_X = X[inds] dc = DivisiveCluster( cluster_method=self.cluster_method, max_components=self.max_components, min_split=self.min_split, max_level=self.max_level, cluster_kws=self.cluster_kws, delta_criter=self.delta_criter, ) dc.parent = self if (len(new_X) > self.max_components and len(new_X) >= self.min_split and self.depth + 1 < self.max_level): child_labels = dc._fit(new_X) while labels.shape[1] <= child_labels.shape[1]: labels = np.column_stack( (labels, np.zeros((len(X), 1), dtype=int))) labels[inds, 1:child_labels.shape[1] + 1] = child_labels else: # make a "GaussianMixture" model for clusters # that were not fitted if self.cluster_method == "gmm": cluster_idx = len(dc.parent.children) - 1 parent_model = dc.parent.model_ model = GaussianMixture() model.weights_ = np.array([1]) model.means_ = parent_model.means_[ cluster_idx].reshape(1, -1) model.covariance_type = parent_model.covariance_type if model.covariance_type == "tied": model.covariances_ = parent_model.covariances_ model.precisions_ = parent_model.precisions_ model.precisions_cholesky_ = ( parent_model.precisions_cholesky_) else: cov_types = ["spherical", "diag", "full"] n_features = model.means_.shape[-1] cov_shapes = [ (1, ), (1, n_features), (1, n_features, n_features), ] cov_shape_idx = cov_types.index( model.covariance_type) model.covariances_ = parent_model.covariances_[ cluster_idx].reshape(cov_shapes[cov_shape_idx]) model.precisions_ = parent_model.precisions_[ cluster_idx].reshape(cov_shapes[cov_shape_idx]) model.precisions_cholesky_ = ( parent_model.precisions_cholesky_[cluster_idx]. reshape(cov_shapes[cov_shape_idx])) dc.model_ = model return labels
def gmm_combine(gmms, weights=None, params=None, good_idx=None, gmms_means=None, gmms_variances=None, gmms_weights=None, gmms_zps=None): """ Method to combine Gaussian Mixture Models. This function create a new GaussianMixture instance and adds all mixture components from the input models. Parameters ---------- gmms List of GaussianMixture instances to combine. weights : iterable, optional Weights for each GaussianMixture instance in the input list. params : dict, optional GaussianMixture parameter dictionary for faster instantiation. good_idx : iterable, optional Boolean array or list for masking. True indicates a good entry in the input list, False a bad entry. gmms_means : np.ndarray, optional The means of all components for all input GMMs in an array. Can be used to speed up the combination process- gmms_variances : np.ndarray, optional Same as gmms_means, but for all variances of all components. gmms_weights : np.ndarray, optional Same as gmms_means, but for all weights of all components. gmms_zps : np.ndarray, optional Zero point for all models. If None is given, no shift is applied. Returns ------- GaussianMixture Combined GaussianMixture instance. """ # Dummy checks if not isinstance(gmms, Iterable): raise ValueError("Models must be provided as an iterable") # Set good_idx if good_idx is None: good_idx = [True for _ in range(len(gmms))] # Extract good GMMs gmms = gmms[good_idx] # Set weights to unity if not specified if weights is None: weights = [1 for _ in range(len(gmms))] else: weights = weights[good_idx] # Return None if weights are all bad if np.sum(np.isfinite(weights)) == 0: return None # Set zeropoints if not specified if gmms_zps is None: gmms_zps = [0. for _ in range(len(gmms))] else: gmms_zps = gmms_zps[good_idx] # Get parameters if not set from first entry if params is None: params = gmms[0].get_params() # Dummy check if np.sum([isinstance(g, GaussianMixture) for g in gmms]) != len(gmms): raise ValueError("Must only supply GaussianMixture instances") # Instantiate combined GMM gmm_combined = GaussianMixture(**params) # Build combined components from supplied models if not given as attributes if gmms_means is None or gmms_variances is None or gmms_weights is None: gmm_combined_means = gmms[0].means_ + gmms_zps[0] gmm_combined_variances = gmms[0].covariances_ gmm_combined_weights = gmms[0].weights_ * weights[0] for gmm, w, zp in zip(gmms[1:], weights[1:], gmms_zps[1:]): gmm_combined_means = np.vstack([gmm_combined_means, gmm.means_ + zp]) gmm_combined_variances = np.vstack([gmm_combined_variances, gmm.covariances_]) gmm_combined_weights = np.hstack([gmm_combined_weights, gmm.weights_ * w]) # If the attributes are provided, extract the parameters directly (much faster) else: gmm_combined_means = np.vstack(gmms_means[good_idx] + gmms_zps) gmm_combined_variances = np.vstack(gmms_variances[good_idx]) gmm_combined_weights = np.hstack(gmms_weights[good_idx] * weights) # Add attributes to new mixture gmm_combined.n_components = len(gmm_combined_means) gmm_combined.means_ = gmm_combined_means gmm_combined.covariances_ = gmm_combined_variances gmm_combined.weights_ = gmm_combined_weights / np.sum(gmm_combined_weights) gmm_combined.precisions_ = np.linalg.inv(gmm_combined.covariances_) gmm_combined.precisions_cholesky_ = np.linalg.cholesky(gmm_combined.precisions_) # Add attribute to store number of input models used to create this model gmm_combined.n_models = len(gmms) # Return new GMM return gmm_combined