def choose(node_info: MixtureGaussianParams, pvals: List[Union[str, float]]) -> Optional[float]: """ Func to get value from current node node_info: nodes info from distributions pvals: parent values Return value from MixtureGaussian node """ mean = node_info["mean"] covariance = node_info["covars"] w = node_info["coef"] n_comp = len(node_info['coef']) if n_comp != 0: if pvals: indexes = [i for i in range(1, len(pvals) + 1)] if not np.isnan(np.array(pvals)).all(): gmm = GMM(n_components=n_comp, priors=w, means=mean, covariances=covariance) cond_gmm = gmm.condition(indexes, [pvals]) sample = cond_gmm.sample(1)[0][0] else: sample = np.nan else: gmm = GMM(n_components=n_comp, priors=w, means=mean, covariances=covariance) sample = gmm.sample(1)[0][0] else: sample = np.nan return sample
def test_kmeanspp_initialization(): random_state = check_random_state(1) n_samples = 300 n_features = 2 X = np.ndarray((n_samples, n_features)) X[:n_samples // 3, :] = random_state.multivariate_normal( [0.0, 1.0], [[0.5, -1.0], [-1.0, 5.0]], size=(n_samples // 3, )) X[n_samples // 3:-n_samples // 3, :] = random_state.multivariate_normal( [-2.0, -2.0], [[3.0, 1.0], [1.0, 1.0]], size=(n_samples // 3, )) X[-n_samples // 3:, :] = random_state.multivariate_normal( [3.0, 1.0], [[3.0, -1.0], [-1.0, 1.0]], size=(n_samples // 3, )) # artificial scaling, makes standard implementation fail # either the initial covariances have to be adjusted or we have # to normalize the dataset X[:, 1] *= 10000.0 gmm = GMM(n_components=3, random_state=random_state) gmm.from_samples(X, init_params="random") ellipses = gmm.to_ellipses() widths = np.array([ellipsis_params[1] for _, ellipsis_params in ellipses])[:, np.newaxis] average_widths_random = np.mean(pdist(widths)) gmm = GMM(n_components=3, random_state=random_state) gmm.from_samples(X, init_params="kmeans++") ellipses = gmm.to_ellipses() widths = np.array([ellipsis_params[1] for _, ellipsis_params in ellipses])[:, np.newaxis] average_widths_kmeanspp = np.mean(pdist(widths)) # random initialization produces uneven covariance scaling assert_less(average_widths_kmeanspp, average_widths_random)
def choose(self, pvalues, method, outcome): ''' Randomly choose state of node from probability distribution conditioned on *pvalues*. This method has two parts: (1) determining the proper probability distribution, and (2) using that probability distribution to determine an outcome. Arguments: 1. *pvalues* -- An array containing the assigned states of the node's parents. This must be in the same order as the parents appear in ``self.Vdataentry['parents']``. The function creates a Gaussian distribution in the manner described in :doc:`lgbayesiannetwork`, and samples from that distribution, returning its outcome. ''' random.seed() sample = 0 if method == 'simple': # calculate Bayesian parameters (mean and variance) mean = self.Vdataentry["mean_base"] if (self.Vdataentry["parents"] != None): for x in range(len(self.Vdataentry["parents"])): if (pvalues[x] != "default"): mean += pvalues[x] * self.Vdataentry["mean_scal"][x] else: print( "Attempted to sample node with unassigned parents." ) variance = self.Vdataentry["variance"] sample = random.gauss(mean, math.sqrt(variance)) else: mean = self.Vdataentry["mean_base"] variance = self.Vdataentry["variance"] w = self.Vdataentry["mean_scal"] n_comp = len(self.Vdataentry["mean_scal"]) if n_comp != 0: if (self.Vdataentry["parents"] != None): indexes = [ i for i in range(1, (len(self.Vdataentry["parents"]) + 1), 1) ] if not np.isnan(np.array(pvalues)).all(): gmm = GMM(n_components=n_comp, priors=w, means=mean, covariances=variance) sample = gmm.predict(indexes, [pvalues])[0][0] else: sample = np.nan else: gmm = GMM(n_components=n_comp, priors=w, means=mean, covariances=variance) sample = gmm.sample(1)[0][0] else: sample = np.nan return sample
def test_regression_with_custom_mean_covar_as_lists(): gmm = GMM(n_components=2, priors=[0.5, 0.5], means=[[0, 1], [1, 2]], covariances=[[[1, 0], [0, 1]], [[1, 0], [0, 1]]]) y = gmm.predict([0], [[0]]) assert_array_almost_equal(y, [[1.37754067]])
def test_ellipses(): """Test equiprobable ellipses.""" random_state = check_random_state(0) means = np.array([[0.0, 1.0], [2.0, -1.0]]) covariances = np.array([[[0.5, 0.0], [0.0, 5.0]], [[5.0, 0.0], [0.0, 0.5]]]) gmm = GMM(n_components=2, priors=np.array([0.5, 0.5]), means=means, covariances=covariances, random_state=random_state) ellipses = gmm.to_ellipses() mean, (angle, width, height) = ellipses[0] assert_array_almost_equal(means[0], mean) assert_equal(angle, 0.5 * np.pi) assert_equal(width, np.sqrt(5.0)) assert_equal(height, np.sqrt(0.5)) mean, (angle, width, height) = ellipses[1] assert_array_almost_equal(means[1], mean) assert_equal(angle, -np.pi) assert_equal(width, np.sqrt(5.0)) assert_equal(height, np.sqrt(0.5))
def test_numerically_robust_responsibilities(): random_state = check_random_state(0) n_samples = 300 n_features = 2 X = np.ndarray((n_samples, n_features)) mean0 = np.array([0.0, 1.0]) X[:n_samples // 3, :] = random_state.multivariate_normal( mean0, [[0.5, -1.0], [-1.0, 5.0]], size=(n_samples // 3, )) mean1 = np.array([-2.0, -2.0]) X[n_samples // 3:-n_samples // 3, :] = random_state.multivariate_normal( mean1, [[3.0, 1.0], [1.0, 1.0]], size=(n_samples // 3, )) mean2 = np.array([3.0, 1.0]) X[-n_samples // 3:, :] = random_state.multivariate_normal( mean2, [[3.0, -1.0], [-1.0, 1.0]], size=(n_samples // 3, )) # artificial scaling, makes naive implementation fail X[:, 1] *= 10000.0 gmm = GMM(n_components=3, random_state=random_state) gmm.from_samples(X, init_params="random") mean_dists = pdist(gmm.means) assert_true(all(mean_dists > 1)) assert_true(all(1e7 < gmm.covariances[:, 1, 1])) assert_true(all(gmm.covariances[:, 1, 1] < 1e9))
def test_mvn_to_mvn(): means = 123.0 * np.ones((1, 1)) covs = 4.0 * np.ones((1, 1, 1)) gmm = GMM(n_components=1, priors=np.ones(1), means=means, covariances=covs) mvn = gmm.to_mvn() assert_array_almost_equal(mvn.mean, means[0]) assert_array_almost_equal(mvn.covariance, covs[0])
def choose_gmm(self, pvalues, outcome): ''' Randomly choose state of node from probability distribution conditioned on *pvalues*. This method has two parts: (1) determining the proper probability distribution, and (2) using that probability distribution to determine an outcome. Arguments: 1. *pvalues* -- An array containing the assigned states of the node's parents. This must be in the same order as the parents appear in ``self.Vdataentry['parents']``. The function creates a Gaussian distribution in the manner described in :doc:`lgbayesiannetwork`, and samples from that distribution, returning its outcome. ''' random.seed() # calculate Bayesian parameters (mean and variance) s = 0 mean = self.Vdataentry["mean_base"] variance = self.Vdataentry["variance"] w = self.Vdataentry["mean_scal"] n_comp = len(self.Vdataentry["mean_scal"]) indexes = [ i for i in range(1, (len(self.Vdataentry["parents"]) + 1), 1) ] if (self.Vdataentry["parents"] != None): # for x in range(len(self.Vdataentry["parents"])): # if (pvalues[x] != "default"): # X.append(pvalues[x]) # else: # print ("Attempted to sample node with unassigned parents.") if not np.isnan(np.array(pvalues)).any(): gmm = GMM(n_components=n_comp, priors=w, means=mean, covariances=variance) s = gmm.predict(indexes, [pvalues])[0][0] else: s = np.nan else: gmm = GMM(n_components=n_comp, priors=w, means=mean, covariances=variance) s = gmm.sample(1)[0][0] # draw random outcome from Gaussian # note that this built in function takes the standard deviation, not the # variance, thus requiring a square root return s
def test_extract_mvns(): gmm = GMM(n_components=2, priors=0.5 * np.ones(2), means=np.array([[1, 2], [3, 4]]), covariances=[np.eye(2)] * 2) mvn0 = gmm.extract_mvn(0) assert_array_almost_equal(mvn0.mean, np.array([1, 2])) mvn1 = gmm.extract_mvn(1) assert_array_almost_equal(mvn1.mean, np.array([3, 4]))
def test_float_precision_error(): try: from sklearn.datasets import load_boston except ImportError: raise SkipTest("sklearn is not available") boston = load_boston() X, y = boston.data, boston.target gmm = GMM(n_components=10, random_state=2016) gmm.from_samples(X)
def test_2_components_to_mvn(): priors = np.array([0.25, 0.75]) means = np.array([[1.0, 2.0], [3.0, 4.0]]) covs = np.array([ [[1.0, 0.0], [0.0, 1.0]], [[1.0, 0.0], [0.0, 1.0]], ]) gmm = GMM(n_components=1, priors=priors, means=means, covariances=covs) mvn = gmm.to_mvn() assert_array_almost_equal(mvn.mean, np.array([2.5, 3.5]))
def choose(node_info: Dict[str, Dict[str, CondMixtureGaussParams]], pvals: List[Union[str, float]]) -> Optional[float]: """ Function to get value from ConditionalMixtureGaussian node params: node_info: nodes info from distributions pvals: parent values """ dispvals = [] lgpvals = [] for pval in pvals: if ((isinstance(pval, str)) | ((isinstance(pval, int)))): dispvals.append(pval) else: lgpvals.append(pval) lgdistribution = node_info["hybcprob"][str(dispvals)] mean = lgdistribution["mean"] covariance = lgdistribution["covars"] w = lgdistribution["coef"] if len(w) != 0: if len(lgpvals) != 0: indexes = [i for i in range(1, (len(lgpvals) + 1), 1)] if not np.isnan(np.array(lgpvals)).all(): n_comp = len(w) gmm = GMM(n_components=n_comp, priors=w, means=mean, covariances=covariance) cond_gmm = gmm.condition(indexes, [lgpvals]) sample = cond_gmm.sample(1)[0][0] else: sample = np.nan else: n_comp = len(w) gmm = GMM(n_components=n_comp, priors=w, means=mean, covariances=covariance) sample = gmm.sample(1)[0][0] else: sample = np.nan return sample
def test_gmm_to_mvn_vs_mvn(): random_state = check_random_state(0) gmm = GMM(n_components=2, random_state=random_state) gmm.from_samples(X) mvn_from_gmm = gmm.to_mvn() mvn = MVN(random_state=random_state) mvn.from_samples(X) assert_array_almost_equal(mvn_from_gmm.mean, mvn.mean) assert_array_almost_equal(mvn_from_gmm.covariance, mvn.covariance, decimal=3)
def test_estimate_moments(): """Test moments estimated from samples and sampling from GMM.""" global X global random_state gmm = GMM(n_components=2, random_state=random_state) gmm.from_samples(X) assert_less(np.linalg.norm(gmm.means[0] - means[0]), 0.005) assert_less(np.linalg.norm(gmm.covariances[0] - covariances[0]), 0.01) assert_less(np.linalg.norm(gmm.means[1] - means[1]), 0.01) assert_less(np.linalg.norm(gmm.covariances[1] - covariances[1]), 0.03) X = gmm.sample(n_samples=100000) gmm = GMM(n_components=2, random_state=random_state) gmm.from_samples(X) assert_less(np.linalg.norm(gmm.means[0] - means[0]), 0.01) assert_less(np.linalg.norm(gmm.covariances[0] - covariances[0]), 0.03) assert_less(np.linalg.norm(gmm.means[1] - means[1]), 0.01) assert_less(np.linalg.norm(gmm.covariances[1] - covariances[1]), 0.04)
def test_probability_density(): """Test PDF of GMM.""" global X global random_state gmm = GMM(n_components=2, random_state=random_state) gmm.from_samples(X) x = np.linspace(-100, 100, 201) X_grid = np.vstack(list(map(np.ravel, np.meshgrid(x, x)))).T p = gmm.to_probability_density(X_grid) approx_int = np.sum(p) * ((x[-1] - x[0]) / 201)**2 assert_less(np.abs(1.0 - approx_int), 0.01)
def fit_parameters(self, data: DataFrame) -> MixtureGaussianParams: """ Train params for Mixture Gaussian Node """ parents = self.disc_parents + self.cont_parents if not parents: n_comp = int((component(data, [self.name], 'aic') + component(data, [self.name], 'bic')) / 2) # component(data, [node], 'LRTS')# # n_comp = 3 gmm = GMM(n_components=n_comp).from_samples(np.transpose( [data[self.name].values]), n_iter=500, init_params='kmeans++') means = gmm.means.tolist() cov = gmm.covariances.tolist() # weigts = np.transpose(gmm.to_responsibilities(np.transpose([data[node].values]))) w = gmm.priors.tolist() # [] # for row in weigts: # w.append(np.mean(row)) return {"mean": means, "coef": w, "covars": cov} if parents: if not self.disc_parents and self.cont_parents: nodes = [self.name] + self.cont_parents new_data = data[nodes] new_data.reset_index(inplace=True, drop=True) n_comp = int((component(new_data, nodes, 'aic') + component(new_data, nodes, 'bic')) / 2) # component(new_data, nodes, 'LRTS')# # n_comp = 3 gmm = GMM(n_components=n_comp).from_samples( new_data[nodes].values, n_iter=500, init_params='kmeans++') means = gmm.means.tolist() cov = gmm.covariances.tolist() # weigts = np.transpose(gmm.to_responsibilities(new_data[nodes].values)) w = gmm.priors.tolist() # [] # for row in weigts: # w.append(np.mean(row)) return {"mean": means, "coef": w, "covars": cov}
def test_kmeanspp_initialization(): random_state = check_random_state(0) n_samples = 300 n_features = 2 X = np.ndarray((n_samples, n_features)) mean0 = np.array([0.0, 1.0]) X[:n_samples // 3, :] = random_state.multivariate_normal( mean0, [[0.5, -1.0], [-1.0, 5.0]], size=(n_samples // 3, )) mean1 = np.array([-2.0, -2.0]) X[n_samples // 3:-n_samples // 3, :] = random_state.multivariate_normal( mean1, [[3.0, 1.0], [1.0, 1.0]], size=(n_samples // 3, )) mean2 = np.array([3.0, 1.0]) X[-n_samples // 3:, :] = random_state.multivariate_normal( mean2, [[3.0, -1.0], [-1.0, 1.0]], size=(n_samples // 3, )) # artificial scaling, makes standard implementation fail # either the initial covariances have to be adjusted or we have # to normalize the dataset X[:, 1] *= 10000.0 gmm = GMM(n_components=3, random_state=random_state) gmm.from_samples(X, init_params="random") # random initialization fails assert_less(gmm.covariances[0, 0, 0], np.finfo(float).eps) assert_less(gmm.covariances[1, 0, 0], np.finfo(float).eps) assert_less(gmm.covariances[2, 0, 0], np.finfo(float).eps) assert_less(gmm.covariances[0, 1, 1], np.finfo(float).eps) assert_less(gmm.covariances[1, 1, 1], np.finfo(float).eps) assert_less(gmm.covariances[2, 1, 1], np.finfo(float).eps) gmm = GMM(n_components=3, random_state=random_state) gmm.from_samples(X, init_params="kmeans++") mean_dists = pdist(gmm.means) assert_true(all(mean_dists > 1)) assert_true(all(1e7 < gmm.covariances[:, 1, 1])) assert_true(all(gmm.covariances[:, 1, 1] < 1e9))
def test_uninitialized(): """Test behavior of uninitialized GMM.""" random_state = check_random_state(0) gmm = GMM(n_components=2, random_state=random_state) assert_raises(ValueError, gmm.sample, 10) assert_raises(ValueError, gmm.to_probability_density, np.ones((1, 1))) assert_raises(ValueError, gmm.condition, np.zeros(0), np.zeros(0)) assert_raises(ValueError, gmm.predict, np.zeros(0), np.zeros(0)) assert_raises(ValueError, gmm.to_ellipses) gmm = GMM(n_components=2, priors=np.ones(2), random_state=random_state) assert_raises(ValueError, gmm.sample, 10) assert_raises(ValueError, gmm.to_probability_density, np.ones((1, 1))) assert_raises(ValueError, gmm.condition, np.zeros(0), np.zeros(0)) assert_raises(ValueError, gmm.predict, np.zeros(0), np.zeros(0)) assert_raises(ValueError, gmm.to_ellipses) gmm = GMM(n_components=2, priors=np.ones(2), means=np.zeros((2, 2)), random_state=random_state) assert_raises(ValueError, gmm.sample, 10) assert_raises(ValueError, gmm.to_probability_density, np.ones((1, 1))) assert_raises(ValueError, gmm.condition, np.zeros(0), np.zeros(0)) assert_raises(ValueError, gmm.predict, np.zeros(0), np.zeros(0)) assert_raises(ValueError, gmm.to_ellipses)
def test_plot(): """Test plot of GMM.""" gmm = GMM(n_components=2, priors=np.array([0.5, 0.5]), means=means, covariances=covariances, random_state=0) ax = AxisStub() plot_error_ellipses(ax, gmm) assert_equal(ax.count, 16) ax = AxisStub() plot_error_ellipses(ax, gmm, colors=["r", "g"]) assert_equal(ax.count, 16)
def test_verbose_from_samples(): """Test verbose output.""" global X random_state = check_random_state(0) old_stdout = sys.stdout sys.stdout = StringIO() try: gmm = GMM(n_components=2, verbose=True, random_state=random_state) gmm.from_samples(X) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout assert ("converged" in out)
def test_sample_confidence_region(): """Test sampling from confidence region.""" random_state = check_random_state(0) means = np.array([[0.0, 1.0], [2.0, -1.0]]) covariances = np.array([[[0.5, 0.0], [0.0, 5.0]], [[5.0, 0.0], [0.0, 0.5]]]) gmm = GMM(n_components=2, priors=np.array([0.5, 0.5]), means=means, covariances=covariances, random_state=random_state) samples = gmm.sample_confidence_region(100, 0.7) for sample in samples: assert_true(gmm.is_in_confidence_region(sample, 0.7))
def test_conditional_distribution(): """Test moments from conditional GMM.""" random_state = check_random_state(0) gmm = GMM(n_components=2, priors=np.array([0.5, 0.5]), means=means, covariances=covariances, random_state=random_state) conditional = gmm.condition(np.array([1]), np.array([1.0])) assert_array_almost_equal(conditional.means[0], np.array([0.0])) assert_array_almost_equal(conditional.covariances[0], np.array([[0.3]])) conditional = gmm.condition(np.array([0]), np.array([2.0])) assert_array_almost_equal(conditional.means[1], np.array([-1.0])) assert_array_almost_equal(conditional.covariances[1], np.array([[0.3]]))
def test_estimation_from_previous_initialization(): global X global random_state global means global covariances gmm = GMM(n_components=2, priors=0.5 * np.ones(2), means=np.copy(means), covariances=np.copy(covariances), random_state=check_random_state(2)) gmm.from_samples(X, n_iter=2) assert_less(np.linalg.norm(gmm.means[0] - means[0]), 0.01) assert_less(np.linalg.norm(gmm.covariances[0] - covariances[0]), 0.03) assert_less(np.linalg.norm(gmm.means[1] - means[1]), 0.01) assert_less(np.linalg.norm(gmm.covariances[1] - covariances[1]), 0.04)
def test_regression_with_2d_input(): """Test regression with GMM and two-dimensional input.""" random_state = check_random_state(0) n_samples = 200 x = np.linspace(0, 2, n_samples)[:, np.newaxis] y1 = 3 * x[:n_samples // 2] + 1 y2 = -3 * x[n_samples // 2:] + 7 noise = random_state.randn(n_samples, 1) * 0.01 y = np.vstack((y1, y2)) + noise samples = np.hstack((x, x[::-1], y)) gmm = GMM(n_components=2, random_state=random_state) gmm.from_samples(samples) pred = gmm.predict(np.array([0, 1]), np.hstack((x, x[::-1]))) mse = np.sum((y - pred)**2) / n_samples
def test_plot(): """Test plot of GMM.""" gmm = GMM(n_components=2, priors=np.array([0.5, 0.5]), means=means, covariances=covariances, random_state=0) ax = AxisStub() try: plot_error_ellipses(ax, gmm) except ImportError: raise SkipTest("matplotlib is required for this test") assert_equal(ax.count, 16) ax = AxisStub() plot_error_ellipses(ax, gmm, colors=["r", "g"]) assert_equal(ax.count, 16)
def test_from_samples_with_oas(): n_samples = 9 n_features = 2 X = np.ndarray((n_samples, n_features)) X[:n_samples // 3, :] = random_state.multivariate_normal( [0.0, 1.0], [[0.5, -1.0], [-1.0, 5.0]], size=(n_samples // 3, )) X[n_samples // 3:-n_samples // 3, :] = random_state.multivariate_normal( [-2.0, -2.0], [[3.0, 1.0], [1.0, 1.0]], size=(n_samples // 3, )) X[-n_samples // 3:, :] = random_state.multivariate_normal( [3.0, 3.0], [[3.0, -1.0], [-1.0, 1.0]], size=(n_samples // 3, )) gmm = GMM(n_components=3, random_state=random_state) gmm.from_samples(X, init_params="kmeans++", oracle_approximating_shrinkage=True) cond = gmm.condition(np.array([0]), np.array([1.0])) for i in range(cond.n_components): eigvals = np.linalg.eigvals(cond.covariances[i]) assert_true(all(eigvals >= 0))
def test_regression_without_noise(): """Test regression without noise.""" random_state = check_random_state(0) n_samples = 200 x = np.linspace(0, 2, n_samples)[:, np.newaxis] y1 = 3 * x[:n_samples // 2] + 1 y2 = -3 * x[n_samples // 2:] + 7 y = np.vstack((y1, y2)) samples = np.hstack((x, y)) gmm = GMM(n_components=2, random_state=random_state) gmm.from_samples(samples) assert_array_almost_equal(gmm.priors, 0.5 * np.ones(2), decimal=2) assert_array_almost_equal(gmm.means[0], np.array([1.5, 2.5]), decimal=2) assert_array_almost_equal(gmm.means[1], np.array([0.5, 2.5]), decimal=1) pred = gmm.predict(np.array([0]), x) mse = np.sum((y - pred)**2) / n_samples assert_less(mse, 0.01)
X_test = np.linspace(0, 2 * np.pi, 100) mean, covariance = mvn.predict(np.array([0]), X_test[:, np.newaxis]) plt.figure(figsize=(10, 5)) plt.subplot(1, 2, 1) plt.title("Linear: $p(Y | X) = \mathcal{N}(\mu_{Y|X}, \Sigma_{Y|X})$") plt.scatter(X[:, 0], X[:, 1]) y = mean.ravel() s = covariance.ravel() plt.fill_between(X_test, y - s, y + s, alpha=0.2) plt.plot(X_test, y, lw=2) n_samples = 100 X = np.ndarray((n_samples, 2)) X[:, 0] = np.linspace(0, 2 * np.pi, n_samples) X[:, 1] = np.sin(X[:, 0]) + random_state.randn(n_samples) * 0.1 gmm = GMM(n_components=3, random_state=0) gmm.from_samples(X) Y = gmm.predict(np.array([0]), X_test[:, np.newaxis]) plt.subplot(1, 2, 2) plt.title("Mixture of Experts: $p(Y | X) = \Sigma_k \pi_{k, Y|X} " "\mathcal{N}_{k, Y|X}$") plt.scatter(X[:, 0], X[:, 1]) plot_error_ellipses(plt.gca(), gmm, colors=["r", "g", "b"]) plt.plot(X_test, Y.ravel(), c="k", lw=2) plt.show()
We will cluster the Iris dataset but we will use sklearn to initialize our GMM. sklearn allows restricted covariances such as diagonal covariances. This is just for demonstration purposes and does not represent an example of a particularly good fit. Take a look at `plot_iris.py` for a fit with full covariances. """ print(__doc__) import numpy as np from sklearn.datasets import load_iris from sklearn.decomposition import PCA from sklearn.mixture import GaussianMixture import matplotlib.pyplot as plt from gmr import GMM, plot_error_ellipses X, y = load_iris(return_X_y=True) X_pca = PCA(n_components=2, whiten=True, random_state=1).fit_transform(X) gmm_sklearn = GaussianMixture(n_components=3, covariance_type="diag", random_state=3) gmm_sklearn.fit(X_pca) gmm = GMM( n_components=3, priors=gmm_sklearn.weights_, means=gmm_sklearn.means_, covariances=np.array([np.diag(c) for c in gmm_sklearn.covariances_])) plt.figure() ax = plt.subplot(111) ax.scatter(X_pca[:, 0], X_pca[:, 1], c=y) plot_error_ellipses(ax, gmm, alpha=0.1, colors=["r", "g", "b"]) plt.show()
# Drop some features: df = df.drop([name + ' s1' for name in features_to_drop], axis=1) df = df.drop([name + ' s2' for name in features_to_drop], axis=1) data = df.to_numpy() print('Number of transitions in the data: %d' % data.shape[0]) if sys.argv[1] == 'train': ####################### # Train the GMM model # ####################### n_components = 500 gmm = GMM(n_components=n_components) print('Training the model with %d gaussian units' % n_components) gmm.from_samples(data, init='', plot_title=gmm_id, n_iter=100, savefig=True) print('Model ready') plt.show() # Save the model: with open(gmm_file, 'wb') as f: pickle.dump(gmm, f) elif sys.argv[1] == 'score':