def get_P_of_Data_Given_Param(means, covs, weights, X, method='scipy'): # P(Data | Param) samples = zip(range(0, len(X)), X) p = {} if method == 'scipy': g = [ multivariate_normal(mean=means[k], cov=covs[k], allow_singular=False) for k in range(0, len(weights)) ] gaussians = {} for index, x in samples: gaussians[index] = np.array([g_k.pdf(x) for g_k in g]) for index, x in samples: probabilities = np.multiply(gaussians[index], weights) probabilities = probabilities / np.sum(probabilities) p[index] = probabilities else: gmm = GaussianMixture(n_components=len(weights), covariance_type='diag').fit(X) gmm.precisions_cholesky_ = 1 # crude way to make GMM think that model is fit gmm.means_ = means gmm.covariances_ = covs gmm.weights_ = weights for index, x in samples: x = x.reshape(1, -1) likelihood_ratio = gmm.predict_proba(x.reshape(1, -1)) # likelihood_ratio = likelihood_ratio / np.sum(likelihood_ratio) p[index] = likelihood_ratio return p
def log_prob(self, x, t, feature, samples): observations = x[:, :t] p_s_past = {} #p(s_t-1|X_{0:t-1}) for st in self.states: p_s_past[st], _, _ = fwd_bkw(observations, self.states, self.start_probability, self.transition_probability, self.emission_probability, st) p_currstate_past = {} for s in self.states: p_currstate_past[s] = 0. for curr_state in self.states: for st in self.states: p_currstate_past[curr_state] += self.transition_probability[ curr_state][st] * p_s_past[st] gmm = GaussianMixture(n_components=len(self.states), covariance_type='full') gmm.fit(np.random.randn(10, observations.shape[0])) gmm.weights_ = list(p_currstate_past.values()) gmm.means_ = np.array(self.mean) gmm.covariances_ = np.array(self.cov) for i in range(2): gmm.precisions_[i] = np.linalg.inv(gmm.covariances_[i]) gmm.precisions_cholesky_[i] = np.linalg.cholesky( gmm.covariances_[i]) return gmm.score_samples(samples)
def get_3d_grid_gmm(subdivisions=[5, 5, 5], variance=0.04): """ Compute the weight, mean and covariance of a gmm placed on a 3D grid :param subdivisions: 2 element list of number of subdivisions of the 3D space in each axes to form the grid :param variance: scalar for spherical gmm.p :return gmm: gmm: instance of sklearn GaussianMixture (GMM) object Gauassian mixture model """ # n_gaussians = reduce(lambda x, y: x*y,subdivisions) n_gaussians = np.prod(np.array(subdivisions)) step = [ 1.0 / (subdivisions[0]), 1.0 / (subdivisions[1]), 1.0 / (subdivisions[2]) ] means = np.mgrid[step[0] - 1:1.0 - step[0]:complex(0, subdivisions[0]), step[1] - 1:1.0 - step[1]:complex(0, subdivisions[1]), step[2] - 1:1.0 - step[2]:complex(0, subdivisions[2])] means = np.reshape(means, [3, -1]).T covariances = variance * np.ones_like(means) weights = (1.0 / n_gaussians) * np.ones(n_gaussians) gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag') gmm.weights_ = weights gmm.covariances_ = covariances gmm.means_ = means from sklearn.mixture.gaussian_mixture import _compute_precision_cholesky gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'diag') return gmm
def sample_gaussian_mixture(pis, sigmas, mus, num_samples, n_features): """ return: array of size (batch_size,num_samples*n_features) containing samples taken from the gaussian mixture parameratized by pis, sigmas, mus e.g input 1 [[ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ], input 2 [ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ], input 3 [ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ], . [...............................................], input n [ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ]] """ # Gaussian PDF parameters batch_size = pis.shape[0] num_mixtures = pis.shape[1] samples = np.zeros((batch_size, num_samples * n_features)) gmm = GaussianMixture(n_components=num_mixtures, covariance_type='spherical') gmm.fit(np.random.rand(10, 1)) # Now it thinks it is trained for i in range(batch_size): gmm.weights_ = pis[i] gmm.means_ = mus[i].reshape(num_mixtures, n_features) gmm.covariances_ = np.expand_dims(sigmas[i], axis=1)**2 sample = gmm.sample(num_samples) samples[i] = np.ravel(sample[0]) return Variable(torch.from_numpy(samples))
def test_fit(self): expected_means = np.array([-55., 0., 7.]) expected_stds = np.array([3., .5, 1.]) from sklearn.mixture import GaussianMixture gmm = GaussianMixture(n_components=3, means_init=expected_means) gmm.means_ = expected_means[..., None] gmm.covariances_ = np.array(expected_stds[..., None, None]) gmm.weights_ = np.array([1 / 3, 1 / 3, 1 / 3]) obs = gmm.sample(100000 + np.random.randint(-3, 3))[0].squeeze() init = deeptime.markov.hmm.init.gaussian.from_data(obs, n_hidden_states=3, reversible=True) hmm_est = deeptime.markov.hmm.MaximumLikelihoodHMM(init) hmm = hmm_est.fit(obs).fetch_model() np.testing.assert_array_almost_equal( hmm.transition_model.transition_matrix, np.eye(3), decimal=3) m = hmm.output_model for mean, sigma in zip(m.means, m.sigmas): # find the mean closest to this one (order might have changed) mean_ix = np.argmin(np.abs(expected_means - mean)) np.testing.assert_almost_equal(mean, expected_means[mean_ix], decimal=1) np.testing.assert_almost_equal(sigma * sigma, expected_stds[mean_ix], decimal=1)
def toGaussianMixture(self): g = GaussianMixture(self.n) g.fit(np.random.rand(2 * self.n).reshape((-1, 1))) g.weights_ = np.array(self.weights) g.means_ = np.array(self.means)[:, np.newaxis] g.covariances_ = np.array(self.covariances)[:, np.newaxis, np.newaxis] return g
def gmm_scale(gmm, shift=None, scale=None, reverse=False, params=None): """ Apply scaling factors to GMM instances. Parameters ---------- gmm : GaussianMixture GMM instance to be scaled. shift : int, float, optional Shift for the entire model. Default is 0 (no shift). scale : int, float, optional Scale for all components. Default is 1 (no scale). reverse : bool, optional Whether the GMM should be reversed. params GaussianMixture params for initialization of new instance. Returns ------- GaussianMixture Modified GMM instance. """ # Fetch parameters if not supplied if params is None: # noinspection PyUnresolvedReferences params = gmm.get_params() # Instantiate new GMM gmm_new = GaussianMixture(**params) # Create scaled fitted GMM model gmm_new.weights_ = gmm.weights_ # Apply shift if set gmm_new.means_ = gmm.means_ + shift if shift is not None else gmm.means_ # Apply scale if scale is not None: gmm_new.means_ /= scale gmm_new.covariances_ = gmm.covariances_ / scale ** 2 if scale is not None else gmm.covariances_ gmm_new.precisions_ = np.linalg.inv(gmm_new.covariances_) if scale is not None else gmm.precisions_ gmm_new.precisions_cholesky_ = np.linalg.cholesky(gmm_new.precisions_) if scale is not None \ else gmm.precisions_cholesky_ # Reverse if set if reverse: gmm_new.means_ *= -1 # Add converged attribute if available if gmm.converged_: gmm_new.converged_ = gmm.converged_ # Return scaled GMM return gmm_new
def return_copy(self): '''If any trouble be sure that assignation of means and weights is done copying through assignation ''' copy_tmp = GMM(n_components=self.n_components) copy_tmp.covariances_ = self.covariances_ #_get_covars() copy_tmp.means_ = self.means_ copy_tmp.weights_ = self.weights_ return copy_tmp
def multimod_emd_from_gmm(means, sigmas, weights): means_stacked = np.concatenate(means, axis=0)[:, :, 0, 0] sigmas_stacked = np.concatenate(sigmas, axis=0)[:, :, 0, 0] weights_stacked = np.concatenate(weights, axis=0)[:, 0, 0, 0] gmm = GaussianMixture(n_components=4, covariance_type='diag') gmm_vars = 2 * sigmas_stacked * sigmas_stacked precisions_cholesky = _compute_precision_cholesky(gmm_vars, 'diag') gmm.weights_ = weights_stacked gmm.means_ = means_stacked gmm.precisions_cholesky_ = precisions_cholesky gmm.covariances_ = gmm_vars y_sampled, _ = gmm.sample(1000) return wemd_from_pred_samples(y_sampled)
def get_multimodality_score(means, sigmas, weights): means_stacked = np.concatenate(means, axis=0)[:, :, 0, 0] sigmas_stacked = np.concatenate(sigmas, axis=0)[:, :, 0, 0] weights_stacked = np.concatenate(weights, axis=0)[:, 0, 0, 0] gmm = GaussianMixture(n_components=4, covariance_type='diag') vars = 2 * sigmas_stacked * sigmas_stacked precisions_cholesky = _compute_precision_cholesky(vars, 'diag') gmm.weights_ = weights_stacked gmm.means_ = means_stacked gmm.precisions_cholesky_ = precisions_cholesky gmm.covariances_ = vars gmm_uni = GaussianMixture(n_components=1, covariance_type='diag') argmax = np.argmax(gmm.weights_) gmm_uni.means_ = gmm.means_[argmax, :].reshape([1, 2]) gmm_uni.covariances_ = gmm.covariances_[argmax, :].reshape([1, 2]) gmm_uni.precisions_cholesky_ = gmm.precisions_cholesky_[argmax, :].reshape( [1, 2]) gmm_uni.weights_ = np.array([1]).reshape([1]) Z_uni = compute_histogram_gmm(gmm_uni) Z = compute_histogram_gmm(gmm) ratio = computeWEMD(Z, Z_uni) return ratio
def create_sklearn_gmm(weights, mean_tensor, cov_tensor, random_state=0): n_components = len(weights) gmm = GaussianMixture(n_components=n_components, covariance_type='full', random_state=random_state) gmm.weights_ = weights.numpy() gmm.means_ = mean_tensor.numpy() gmm.covariances_ = cov_tensor.numpy() gmm.precisions_ = np.array( [np.linalg.inv(cov) for cov in gmm.covariances_]) gmm.precisions_cholesky_ = np.array( [np.linalg.cholesky(prec) for prec in gmm.precisions_]) return gmm
def jsd_diss(self, w1, mu1, cov1, w2, mu2, cov2): """ Calculates Jensen-Shannon divergence of two gmm's :param gmm_p: mixture.GaussianMixture :param gmm_q: mixture.GaussianMixture :param sample_count: number of monte carlo samples to use :return: Jensen-Shannon divergence """ gmm_p = GaussianMixture(n_components=n_components, covariance_type="full") gmm_p.weights_ = w1 gmm_p.covariances_ = cov1 gmm_p.means_ = mu1 gmm_p.n_components = 1 gmm_p.precisions_cholesky_ = _compute_precision_cholesky(cov1, "full") gmm_q = GaussianMixture(n_components=n_components, covariance_type="full") gmm_q.weights_ = w2 gmm_q.covariances_ = cov2 gmm_q.means_ = mu2 gmm_q.n_components = 1 gmm_q.precisions_cholesky_ = _compute_precision_cholesky(cov2, "full") X = gmm_p.sample(sample_count)[0] log_p_X = gmm_p.score_samples(X) log_q_X = gmm_q.score_samples(X) log_mix_X = np.logaddexp(log_p_X, log_q_X) Y = gmm_q.sample(sample_count)[0] log_p_Y = gmm_p.score_samples(Y) log_q_Y = gmm_q.score_samples(Y) log_mix_Y = np.logaddexp(log_p_Y, log_q_Y) # black magic? return (log_p_X.mean() - (log_mix_X.mean() - np.log(2)) + log_q_Y.mean() - (log_mix_Y.mean() - np.log(2))) / 2
def generate_params_gmm(weight, mean, cov, use_cdf=False): gmm = GaussianMixture(n_components=weight.size) gmm.weights_ = weight gmm.means_ = mean gmm.covariances_ = cov # Pass the fit check gmm.precisions_cholesky_ = None params = gmm.sample()[0][0] if use_cdf: params = ndtr(params) else: params = np.clip(params, 0, 1) params = params.tolist() return params
def generate_equal_weight_GMM(H_mu, H_var, covariance_type='diag'): n_components = len(H_mu) weights_init = len(H_mu) * [1. / len(H_mu)] GMM = GaussianMixture(n_components=n_components, covariance_type=covariance_type, n_init=0, weights_init=None, means_init=None, precisions_init=None, random_state=None, warm_start=True, verbose=0, verbose_interval=10) GMM.weights_ = weights_init GMM.means_ = H_mu GMM.covariances_ = H_var GMM.precisions_cholesky_ = _compute_precision_cholesky( H_var, covariance_type) return GMM
def gmm_loglik(y,pi,mu,sigma,K): model = GaussianMixture(K, covariance_type = 'diag') model.fit(y) N = np.shape(mu)[0] N_test = np.shape(y)[0] ll_test = np.zeros(N) for i in (range(N)): model.means_ = mu[i,:] model.covariances_ = sigma[i,:]**2 model.precisions_ = 1/(sigma[i,:]**2) model.weights_ = pi[i,:] model.precisions_cholesky_ = _compute_precision_cholesky(model.covariances_, model.covariance_type) ll_test[i] = model.score(y) return ll_test*N_test
def _sample_rows_same(self, X): """ uses efficient sklearn implementation to sample from gaussian mixture -> only works if all rows of X are the same""" weights, locs, scales = self._get_mixture_components(np.expand_dims(X[0], axis=0)) # make sure that sum of weights < 1 weights = weights.astype(np.float64) weights = weights / np.sum(weights) gmm = GaussianMixture(n_components=self.n_centers, covariance_type='diag', max_iter=5, tol=1e-1) gmm.fit(np.random.normal(size=(100,self.ndim_y))) # just pretending a fit # overriding the GMM parameters with own params gmm.converged_ = True gmm.weights_ = weights[0] gmm.means_ = locs[0] gmm.covariances_ = scales[0] y_sample, _ = gmm.sample(X.shape[0]) assert y_sample.shape == (X.shape[0], self.ndim_y) return X, y_sample
def read_pred(): means = readFloat('%s-mixture_distribution_means.float3' % predition_path) # shape (4, 2) sigmas = readFloat('%s-mixture_distribution_sigmas.float3' % predition_path) # shape (4, 2) weights = readFloat('%s-mixture_distribution_weights.float3' % predition_path) # shape (4) sigmas = 2 * sigmas * sigmas gmm = GaussianMixture(n_components=4, covariance_type='diag') precisions_cholesky = _compute_precision_cholesky(sigmas, 'diag') gmm.weights_ = weights gmm.means_ = means gmm.precisions_cholesky_ = precisions_cholesky gmm.covariances_ = sigmas return gmm
def EM_Process(data, n, covt): ''' data: array shape data. n: the number of components covt: covariance_type {‘full’, ‘tied’, ‘diag’, ‘spherical’} chose one of them. ''' GM = GaussianMixture(n_components=n, covariance_type=covt, max_iter=600, random_state=3) GM.means_ = np.zeros(3) GM.covariances_ = np.identity(3) GM.fit(data) clusters = GM.predict(data) return clusters
def train(self, train_data): # 1. Create a GMM object and specify the number of components (classes) in the object # 2. Fit the model to our training data. NOTE: You may need to reshape with np.reshape(-1,1) # 3. Return None data = np.array(train_data).reshape(-1, 1) gmm = GaussianMixture(n_components=2) fit = gmm.fit(data) sort_indices = gmm.means_.argsort(axis=0) order = sort_indices[:, 0] gmm.means_ = gmm.means_[order, :] gmm.covariances_ = gmm.covariances_[order, :] w = np.split(gmm.weights_, 2) w = np.asarray(w) w = np.ravel(w[order, :]) gmm.weights_ = w self.__model = gmm return
def test_once_by_random_features(): Xtrain = numpy.random.random_sample((5000)).reshape(-1, 10) Xtest = numpy.random.random_sample((500)).reshape(-1, 10) gmm_orig = GaussianMixture(n_components=8, random_state=1) gmm_copy = GaussianMixture() gmm_orig.fit(Xtrain) gmm_copy.weights_ = gmm_orig.weights_ gmm_copy.means_ = gmm_orig.means_ gmm_copy.covariances_ = gmm_orig.covariances_ gmm_copy.precisions_ = gmm_orig.precisions_ gmm_copy.precisions_cholesky_ = gmm_orig.precisions_cholesky_ gmm_copy.converged_ = gmm_orig.converged_ gmm_copy.n_iter_ = gmm_orig.n_iter_ gmm_copy.lower_bound_ = gmm_orig.lower_bound_ y_orig = gmm_orig.score_samples(Xtest) y_copy = gmm_copy.score_samples(Xtest) return all(y_orig == y_copy)
def __init__(self, n_components, x, gmm_kwargs=dict()): # fit mixture model if isinstance(x, torch.Tensor): x = x.detach().numpy() bsize = x.shape[0] g = GaussianMixture(n_components=n_components, **gmm_kwargs) \ .fit(x.reshape(bsize, -1)) # g.covariances_ = [np.eye(x.shape[-1])] * bsize if len(g.covariances_.shape) == 2: g.covariances_ = np.array([np.diag(var) for var in g.covariances_]) # extract mean and covariance to comppute pdf later self.log_weights = torch.Tensor(np.log(g.weights_)) self.mnormals = [ MultivariateNormal( torch.Tensor(mu), torch.Tensor(var)) for mu, var in zip( g.means_, g.covariances_)] self.means = torch.Tensor(g.means_) self.covariances = torch.Tensor(g.covariances_)
def fit_markov_chain(y,plot=False): y_0 = y[:-1] y_1 = y[1:] grad_0 = np.gradient(y_0) grad_1 = np.gradient(y_1) state_1 = grad_1[np.where(grad_0 < 0)] # instances where previous gradient was negative state_2 = grad_1[np.where(grad_0 > 0)] # instances where previous gradient was positive mean_1,std_1 = stats.norm.fit(state_1) mean_2,std_2 = stats.norm.fit(state_2) # Reshaping parameters to be suitable for sklearn.GaussianMixture means = np.array([mean_1,mean_2]) means = means.reshape(2,1) y_GM = np.concatenate((state_2.reshape(-1,1),state_1.reshape(-1,1))) precisions = [1/(std_1**2),1/(std_2**2)] GM = GaussianMixture(n_components=2,covariance_type='spherical') GM.weights_ = [0.5,0.5] GM.means_ = means GM.covariances_ = [std_1,std_2] GM.precisions_ = precisions GM.precisions_cholesky_ = precisions GM.converged_ = True if(plot): samples = GM.sample(5000)[0] fig,ax_list = plt.subplots(3,1) fig.set_size_inches(20,20) ax_list[0].hist(state_1,bins=70) ax_list[1].hist(state_2,bins=70) lnspc_1 = np.linspace(state_1.min(),state_1.max(),y.shape[0]) gauss_1 = stats.norm.pdf(lnspc_1, mean_1, std_1) lnspc_2 = np.linspace(state_2.min(),state_2.max(),y.shape[0]) gauss_2 = stats.norm.pdf(lnspc_2, mean_2, std_2) ax_list[0].plot(lnspc_1,gauss_1) ax_list[1].plot(lnspc_2,gauss_2) ax_list[0].scatter(mean_1,30) ax_list[1].scatter(mean_2,30) ax_list[2].hist(samples,bins=100) plt.show() return GM
def load_ubm(path): """ Load UBM stored with save_ubm, returning GMM object and normalization vectors Parameters: path (str): Where to load UBM from Returns: ubm (sklearn.mixture.GaussianMixture): Trained GMM model means, stds (ndarray): Means and stds of variables to be stored along UBM for normalization purposes """ data = np.load(path) n_components = data["ubm_means"].shape[0] cov_type = "diag" if data["ubm_covariances"].ndim == 2 else "full" ubm = GaussianMixture(n_components=n_components, covariance_type=cov_type) ubm.means_ = data["ubm_means"] ubm.weights_ = data["ubm_weights"] ubm.covariances_ = data["ubm_covariances"] ubm.precisions_ = data["ubm_precisions"] ubm.precisions_cholesky_ = data["ubm_precisions_cholesky"] means = data["means"] stds = data["stds"] return ubm, means, stds
def _create_gmm(k, means, weights, precisions=None, covariances=None): if covariances is None: precisions = np.array(precisions) covariances = np.linalg.pinv(precisions) elif precisions is None: covariances = np.array(covariances) precisions = np.linalg.pinv(covariances) gmm = GaussianMixture(n_components=k, weights_init=weights, means_init=means, reg_covar=1e-2, precisions_init=precisions, max_iter=1, warm_start=True) try: gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'full') except Exception: c2 = covariances.copy() covariances = _singular_prevent_multiple(covariances) precisions = np.linalg.pinv(covariances) try: gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'full') except Exception: c2.dump('cov.npy') raise Exception('Problema na matriz! Dump no arquivo cov.npy') gmm.weights_ = weights gmm.means_ = means gmm.covariances_ = covariances gmm.precisions_ = precisions return gmm
def _estimate_GMM(self, x, n_component_lim=[1, 3]): """ Find the GMM that best fit the data in x using Bayesian information criterion. """ min_comp = n_component_lim[0] max_comp = n_component_lim[1] lowest_BIC = np.inf counter = 0 for i_comp in range(min_comp, max_comp + 1): GMM = GaussianMixture(i_comp) GMM.fit(x) BIC = GMM.bic(x) if BIC < lowest_BIC: lowest_BIC = BIC best_GMM = GaussianMixture(i_comp) best_GMM.weights_ = GMM.weights_ best_GMM.means_ = GMM.means_ best_GMM.covariances_ = GMM.covariances_ counter += 1 return best_GMM
# create the 1D GMM profile for mag. susc. means_init_mag = gmmref.means_[:, 1].reshape(3, 1) cov_init_mag = np.array([gmmref.covariances_[:, 1]]).reshape((3, 1, 1)) clfmag = GaussianMixture( n_components=3, means_init=means_init_mag, precisions_init=cov_init_mag, n_init=1, max_iter=2, tol=np.inf, ) # random fit, we set values after. clfmag.fit(np.random.randn(10, 1)) clfmag.means_ = means_init_mag clfmag.covariances_ = cov_init_mag clfmag.precisions_cholesky_ = _compute_precision_cholesky( clfmag.covariances_, clfmag.covariance_type) clfmag.weights_ = gmmref.weights_ testXplot_mag = np.linspace(-0.01, 1.2, 1000)[:, np.newaxis] score_mag = clfmag.score_samples(testXplot_mag) ax3.plot(np.exp(score_mag), testXplot_mag, linewidth=3.0, c="k") ax3.set_xlim([0.0, 50]) ax3.set_xlabel("1D Probability Density values", fontsize=labelsize, rotation=-45, labelpad=0, x=0.5) ax2.set_xlabel("Density (g/cc)", fontsize=labelsize) ax3.set_ylabel("Magnetic Susceptibility (SI)", fontsize=labelsize)
def objective_2(): # objective 2 filenames = glob.glob('Lab 7/Objective 2/data/*.csv') subjects = get_subjects(filenames) trials = [str(x) for x in range(1, 6)] for subject in subjects: subjects_validate = subject subjects_train = [x for x in subjects if x != subject] # print(subjects_validate, subjects_train) # 2 gmm_validate_t, gmm_validate_ir = load_files(subjects_validate, trials, filenames, 25) gmm_train_t, gmm_train_ir = load_files(subjects_train, trials, filenames, 25) # 3 title = 'Training with ' + subjects_validate + ' Left out' filename = 'training_data_' + subjects_validate + '.png' plot(gmm_train_t, gmm_train_ir, title=title, filename=filename, slice_window=None) # 4 title = 'Histogram with ' + subjects_validate + ' Left out' filename = 'hist_training_' + subjects_validate + '.png' hist(gmm_train_t, gmm_train_ir, title=title, filename=filename) # 5 data_train = np.array(gmm_train_ir).reshape(-1, 1) data_validate = np.array(gmm_validate_ir).reshape(-1, 1) # create gmm and fit gmm = GaussianMixture(n_components=2) fit = gmm.fit(data_train) # sort the order of means # https://stackoverflow.com/questions/37008588/sklearn-gmm-classification-prediction-component-assignment-order sort_indices = gmm.means_.argsort(axis=0) order = sort_indices[:, 0] gmm.means_ = gmm.means_[order, :] gmm.covariances_ = gmm.covariances_[order, :] w = np.split(gmm.weights_, 2) w = np.asarray(w) w = np.ravel(w[order, :]) gmm.weights_ = w title = 'IR Signal Histogram \n Individual with ' + subjects_validate + ' Left out' filename = 'hist_individual_' + subjects_validate + '.png' hist_gmm(data_train, gmm, plot_sum=False, title=title, filename=filename) title = 'IR Signal Histogram \n Sum with ' + subjects_validate + ' Left out' filename = 'hist_sum_' + subjects_validate + '.png' hist_gmm(data_train, gmm, plot_sum=True, title=title, filename=filename) #6 predictions_train = gmm.predict(data_train) predictions_validate = gmm.predict(data_validate) # print(predictions_train) # print(predictions_validate) plt.ion() plt.cla() plt.plot(gmm_train_t[window], data_train[window], color='green', label='Voltage') plt.plot(gmm_train_t[window], predictions_train[window], color='red', label='Prediction') title = 'Prediction of Training Set \n with ' + subjects_validate + ' Left out' plt.title(title) plt.legend(loc='lower left', fontsize='small') plt.xlabel('Time') plt.ylabel('Reading') plt.pause(0.5) plt.show() filename = 'gmm_train_labeled_' + subjects_validate + '.png' plt.savefig(filename) plt.cla() plt.plot(gmm_validate_t[window], data_validate[window], color='green', label='Voltage') plt.plot(gmm_validate_t[window], predictions_validate[window], color='red', label='Prediction') title = 'Prediction of Validation Set \n with ' + subjects_validate + ' Left out' plt.title(title) plt.legend(loc='lower left', fontsize='small') plt.xlabel('Time') plt.ylabel('Reading') plt.pause(0.5) plt.show() filename = 'gmm_validate_labeled_' + subjects_validate + '.png' plt.savefig(filename)
def fit(self, data): #train drifted data #print("fitting.......") best_gmm = self.trainBestModel(data) if (self.initialized == False): self.weights_ = best_gmm.weights_ self.covariances_ = best_gmm.covariances_ # self.covariances_ = gmm._get_covars() self.means_ = best_gmm.means_ self.n_components = best_gmm.n_components self.precisions_cholesky_ = _compute_precision_cholesky( best_gmm.covariances_, "full") #backup for later self.backup_weights_ = best_gmm.weights_ self.backup_covariances_ = best_gmm.covariances_ # self.covariances_ = gmm._get_covars() self.backup_means_ = best_gmm.means_ self.backup_n_components = best_gmm.n_components self.backup_precisions_cholesky_ = self.precisions_cholesky_ self.initialized = True else: #print("adapted.........",self.backup_weights_) w_all = np.concatenate((self.backup_weights_, best_gmm.weights_), axis=None) #w_all = w_all/np.sum(w_all) mu_all = np.concatenate((self.backup_means_, best_gmm.means_), axis=0) cov_all = np.concatenate( (self.backup_covariances_, best_gmm.covariances_), axis=0) #n_comp =5 n_components_range = range(self.max_components + 1, self.min_components, -1) bicreduced = [] lowest_bic = np.infty jumlahSample = 1 * len(data) if (jumlahSample <= 100): jumlahSample = 100 currentSample = self.sample(jumlahSample)[0] Xall = np.concatenate((currentSample, data), axis=0) for n_components in n_components_range: w, m, c = self.mixture_reduction(w_all, mu_all, cov_all, n_components, isomorphic=True, verbose=False, optimization=False) gmm_p = GaussianMixture(n_components=n_components, covariance_type="full") gmm_p.weights_ = w gmm_p.covariances_ = c gmm_p.means_ = m gmm_p.precisions_cholesky_ = _compute_precision_cholesky( c, "full") bic_ = gmm_p.bic(data) bicreduced.append(bic_) if self.verbose: print('REDUCD BIC components {0} = {1}'.format( n_components, bic_)) if bic_ < lowest_bic: lowest_bic = bic_ best_gmm = gmm_p #print(best_gmm.n_components) self.weights_ = best_gmm.weights_ / np.sum(best_gmm.weights_) self.means_ = best_gmm.means_ self.covariances_ = best_gmm.covariances_ self.n_components = best_gmm.n_components #Compute the Cholesky decomposition of the precisions. self.precisions_cholesky_ = _compute_precision_cholesky( best_gmm.covariances_, self.covariance_type)
def main(args): file_name = os.path.join("BetaCostFunction", args.dist + "_" + args.file) n_iter = args.n_iter n_samples = args.samples if args.dist == "beta1": # Define the true distribution dist = st.beta(a=2, b=2, loc=-1, scale=2) elif args.dist == "beta2": # Define the true distribution dist = st.beta(a=5, b=5, loc=-1, scale=2) elif args.dist == "beta3": # Define the true distribution dist = st.beta(a=2, b=5, loc=-1, scale=2) elif "gmm" in args.dist: if args.dist == "gmm1": mu1 = -0.5 sigma1 = 0.15 mu2 = 0.4 sigma2 = 0.3 p = 0.6 elif args.dist == "gmm2": mu1 = -0.1 sigma1 = 0.3 mu2 = 0.4 sigma2 = 0.1 p = 0.7 dist = GaussianMixture(n_components=2, covariance_type="spherical") dist.weights_ = np.array([p, 1 - p]) dist.means_ = np.array([[mu1], [mu2]]) dist.covariances_ = np.array([sigma1**2, sigma2**2]) dist.precisions_cholesky_ = np.linalg.cholesky( np.linalg.inv([[sigma1**2, 0], [0, sigma2**2]])) chosen_functions = [0, 1, 2, 4, 5, 7, 9, 15, 16, 18] parameter_bounds = None #parameter_bounds =((1,None),(1,None),(None,None),(None,None)) cost_coefs = pd.read_csv(args.coef_file) N = [10, 25, 50] try: df = pd.read_csv(file_name, index_col=0) except: df = pd.DataFrame( columns=['w_star', 'method', 'function', 'N', "distribution"]) df.to_csv(file_name) for z in range(len(chosen_functions)): coefs = cost_coefs.iloc[chosen_functions[z]].values new_f = lambda x, y: f_mean(x, y, coefs[0], coefs[1], coefs[2]) saa = optimiser.SAA(new_f) bagging = optimiser.BaggingSolver(400, objective_function=new_f) if "gmm" in args.dist: mle = optimiser.GMMSolver(False, 2, objective_function=new_f, n_to_sample=n_samples) bayes = optimiser.GMMSolver(True, 2, objective_function=new_f, n_to_sample=n_samples) else: mle = optimiser.MLESolver(st.beta, parameter_bounds=parameter_bounds, objective_function=new_f, n_to_sample=n_samples, floc=-1, fscale=2) bayes = optimiser.BetaBayesianSolver(objective_function=new_f, n_to_sample=n_samples) kde = optimiser.KDESolver(n_samples, objective_function=new_f) methods = [bagging, mle, saa, kde, bayes] for k in range(n_iter): for j in range(len(N)): if "gmm" in args.dist: samples = dist.sample(N[j])[0].reshape(-1) else: samples = dist.rvs(N[j]) for ele in methods: w_star = ele.solve(samples, initial_conditions=0.0)[0] results = { 'w_star': w_star, 'method': ele.__str__(), 'function': chosen_functions[z] + 1, 'N': N[j], "distribution": args.dist } df = df.append(results, ignore_index=True) # save the results so far df.to_csv(file_name)
def _fit(self, X: np.ndarray) -> np.ndarray: pred = self._cluster_and_decide(X) self.children: Tuple["DivisiveCluster"] = cast( Tuple["DivisiveCluster"], tuple()) uni_labels = np.unique(pred) labels = pred.reshape((-1, 1)).copy() if len(uni_labels) > 1: for ul in uni_labels: inds = pred == ul new_X = X[inds] dc = DivisiveCluster( cluster_method=self.cluster_method, max_components=self.max_components, min_split=self.min_split, max_level=self.max_level, cluster_kws=self.cluster_kws, delta_criter=self.delta_criter, ) dc.parent = self if (len(new_X) > self.max_components and len(new_X) >= self.min_split and self.depth + 1 < self.max_level): child_labels = dc._fit(new_X) while labels.shape[1] <= child_labels.shape[1]: labels = np.column_stack( (labels, np.zeros((len(X), 1), dtype=int))) labels[inds, 1:child_labels.shape[1] + 1] = child_labels else: # make a "GaussianMixture" model for clusters # that were not fitted if self.cluster_method == "gmm": cluster_idx = len(dc.parent.children) - 1 parent_model = dc.parent.model_ model = GaussianMixture() model.weights_ = np.array([1]) model.means_ = parent_model.means_[ cluster_idx].reshape(1, -1) model.covariance_type = parent_model.covariance_type if model.covariance_type == "tied": model.covariances_ = parent_model.covariances_ model.precisions_ = parent_model.precisions_ model.precisions_cholesky_ = ( parent_model.precisions_cholesky_) else: cov_types = ["spherical", "diag", "full"] n_features = model.means_.shape[-1] cov_shapes = [ (1, ), (1, n_features), (1, n_features, n_features), ] cov_shape_idx = cov_types.index( model.covariance_type) model.covariances_ = parent_model.covariances_[ cluster_idx].reshape(cov_shapes[cov_shape_idx]) model.precisions_ = parent_model.precisions_[ cluster_idx].reshape(cov_shapes[cov_shape_idx]) model.precisions_cholesky_ = ( parent_model.precisions_cholesky_[cluster_idx]. reshape(cov_shapes[cov_shape_idx])) dc.model_ = model return labels