def test_bayesian_mixture_predict_predict_proba(): # this is the same test as test_gaussian_mixture_predict_predict_proba() rng = np.random.RandomState(0) rand_data = RandomData(rng) for prior_type in PRIOR_TYPE: for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] Y = rand_data.Y bgmm = BayesianGaussianMixture( n_components=rand_data.n_components, random_state=rng, weight_concentration_prior_type=prior_type, covariance_type=covar_type) # Check a warning message arrive if we don't do fit assert_raise_message( NotFittedError, "This BayesianGaussianMixture instance" " is not fitted yet. Call 'fit' with " "appropriate arguments before using " "this estimator.", bgmm.predict, X) bgmm.fit(X) Y_pred = bgmm.predict(X) Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1) assert_array_equal(Y_pred, Y_pred_proba) assert adjusted_rand_score(Y, Y_pred) >= .95
def test_invariant_translation(): # We check here that adding a constant in the data change correctly the # parameters of the mixture rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=100) n_components = 2 * rand_data.n_components for prior_type in PRIOR_TYPE: for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] bgmm1 = BayesianGaussianMixture( weight_concentration_prior_type=prior_type, n_components=n_components, max_iter=100, random_state=0, tol=1e-3, reg_covar=0).fit(X) bgmm2 = BayesianGaussianMixture( weight_concentration_prior_type=prior_type, n_components=n_components, max_iter=100, random_state=0, tol=1e-3, reg_covar=0).fit(X + 100) assert_almost_equal(bgmm1.means_, bgmm2.means_ - 100) assert_almost_equal(bgmm1.weights_, bgmm2.weights_) assert_almost_equal(bgmm1.covariances_, bgmm2.covariances_)
def test_check_covariance_precision(): # We check that the dot product of the covariance and the precision # matrices is identity. rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=7) n_components, n_features = 2 * rand_data.n_components, 2 # Computation of the full_covariance bgmm = BayesianGaussianMixture(n_components=n_components, max_iter=100, random_state=rng, tol=1e-3, reg_covar=0) for covar_type in COVARIANCE_TYPE: bgmm.covariance_type = covar_type bgmm.fit(rand_data.X[covar_type]) if covar_type == 'full': for covar, precision in zip(bgmm.covariances_, bgmm.precisions_): assert_almost_equal(np.dot(covar, precision), np.eye(n_features)) elif covar_type == 'tied': assert_almost_equal(np.dot(bgmm.covariances_, bgmm.precisions_), np.eye(n_features)) elif covar_type == 'diag': assert_almost_equal(bgmm.covariances_ * bgmm.precisions_, np.ones((n_components, n_features))) else: assert_almost_equal(bgmm.covariances_ * bgmm.precisions_, np.ones(n_components))
def test_monotonic_likelihood(): # We check that each step of the each step of variational inference without # regularization improve monotonically the training set of the bound rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=20) n_components = rand_data.n_components for prior_type in PRIOR_TYPE: for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] bgmm = BayesianGaussianMixture( weight_concentration_prior_type=prior_type, n_components=2 * n_components, covariance_type=covar_type, warm_start=True, max_iter=1, random_state=rng, tol=1e-3) current_lower_bound = -np.infty # Do one training iteration at a time so we can make sure that the # training log likelihood increases after each iteration. for _ in range(600): prev_lower_bound = current_lower_bound current_lower_bound = bgmm.fit(X).lower_bound_ assert current_lower_bound >= prev_lower_bound if bgmm.converged_: break assert (bgmm.converged_)
def test_compare_covar_type(): # We can compare the 'full' precision with the other cov_type if we apply # 1 iter of the M-step (done during _initialize_parameters). rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=7) X = rand_data.X['full'] n_components = rand_data.n_components for prior_type in PRIOR_TYPE: # Computation of the full_covariance bgmm = BayesianGaussianMixture( weight_concentration_prior_type=prior_type, n_components=2 * n_components, covariance_type='full', max_iter=1, random_state=0, tol=1e-7) bgmm._check_initial_parameters(X) bgmm._initialize_parameters(X, np.random.RandomState(0)) full_covariances = ( bgmm.covariances_ * bgmm.degrees_of_freedom_[:, np.newaxis, np.newaxis]) # Check tied_covariance = mean(full_covariances, 0) bgmm = BayesianGaussianMixture( weight_concentration_prior_type=prior_type, n_components=2 * n_components, covariance_type='tied', max_iter=1, random_state=0, tol=1e-7) bgmm._check_initial_parameters(X) bgmm._initialize_parameters(X, np.random.RandomState(0)) tied_covariance = bgmm.covariances_ * bgmm.degrees_of_freedom_ assert_almost_equal(tied_covariance, np.mean(full_covariances, 0)) # Check diag_covariance = diag(full_covariances) bgmm = BayesianGaussianMixture( weight_concentration_prior_type=prior_type, n_components=2 * n_components, covariance_type='diag', max_iter=1, random_state=0, tol=1e-7) bgmm._check_initial_parameters(X) bgmm._initialize_parameters(X, np.random.RandomState(0)) diag_covariances = (bgmm.covariances_ * bgmm.degrees_of_freedom_[:, np.newaxis]) assert_almost_equal(diag_covariances, np.array([np.diag(cov) for cov in full_covariances])) # Check spherical_covariance = np.mean(diag_covariances, 0) bgmm = BayesianGaussianMixture( weight_concentration_prior_type=prior_type, n_components=2 * n_components, covariance_type='spherical', max_iter=1, random_state=0, tol=1e-7) bgmm._check_initial_parameters(X) bgmm._initialize_parameters(X, np.random.RandomState(0)) spherical_covariances = bgmm.covariances_ * bgmm.degrees_of_freedom_ assert_almost_equal( spherical_covariances, np.mean(diag_covariances, 1))
def test_bayesian_mixture_fit_predict(seed, max_iter, tol): rng = np.random.RandomState(seed) rand_data = RandomData(rng, n_samples=50, scale=7) n_components = 2 * rand_data.n_components for covar_type in COVARIANCE_TYPE: bgmm1 = BayesianGaussianMixture(n_components=n_components, max_iter=max_iter, random_state=rng, tol=tol, reg_covar=0) bgmm1.covariance_type = covar_type bgmm2 = copy.deepcopy(bgmm1) X = rand_data.X[covar_type] Y_pred1 = bgmm1.fit(X).predict(X) Y_pred2 = bgmm2.fit_predict(X) assert_array_equal(Y_pred1, Y_pred2)