def test_score(): covar_type = 'full' rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=7) n_components = rand_data.n_components X = rand_data.X[covar_type] # Check the error message if we don't call fit gmm1 = GaussianMixture(n_components=n_components, n_init=1, max_iter=1, reg_covar=0, random_state=rng, covariance_type=covar_type) assert_raise_message( NotFittedError, "This GaussianMixture instance is not fitted " "yet. Call 'fit' with appropriate arguments " "before using this method.", gmm1.score, X) # Check score value with warnings.catch_warnings(): warnings.simplefilter("ignore", ConvergenceWarning) gmm1.fit(X) gmm_score = gmm1.score(X) gmm_score_proba = gmm1.score_samples(X).mean() assert_almost_equal(gmm_score, gmm_score_proba) # Check if the score increase gmm2 = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0, random_state=rng, covariance_type=covar_type).fit(X) assert_greater(gmm2.score(X), gmm1.score(X))
def test_score(): covar_type = 'full' rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=7) n_components = rand_data.n_components X = rand_data.X[covar_type] # Check the error message if we don't call fit gmm1 = GaussianMixture(n_components=n_components, n_init=1, max_iter=1, reg_covar=0, random_state=rng, covariance_type=covar_type) assert_raise_message(NotFittedError, "This GaussianMixture instance is not fitted " "yet. Call 'fit' with appropriate arguments " "before using this method.", gmm1.score, X) # Check score value with warnings.catch_warnings(): warnings.simplefilter("ignore", ConvergenceWarning) gmm1.fit(X) gmm_score = gmm1.score(X) gmm_score_proba = gmm1.score_samples(X).mean() assert_almost_equal(gmm_score, gmm_score_proba) # Check if the score increase gmm2 = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0, random_state=rng, covariance_type=covar_type).fit(X) assert_greater(gmm2.score(X), gmm1.score(X))
def test_gaussian_mixture_fit_best_params(): rng = np.random.RandomState(0) rand_data = RandomData(rng) n_components = rand_data.n_components n_init = 10 for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] g = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0, random_state=rng, covariance_type=covar_type) ll = [] for _ in range(n_init): g.fit(X) ll.append(g.score(X)) ll = np.array(ll) g_best = GaussianMixture(n_components=n_components, n_init=n_init, reg_covar=0, random_state=rng, covariance_type=covar_type) g_best.fit(X) assert_almost_equal(ll.min(), g_best.score(X))
def test_gaussian_mixture_fit_best_params(): rng = np.random.RandomState(0) rand_data = RandomData(rng) n_components = rand_data.n_components n_init = 10 for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] g = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0, random_state=rng, covariance_type=covar_type) ll = [] for _ in range(n_init): g.fit(X) ll.append(g.score(X)) ll = np.array(ll) g_best = GaussianMixture(n_components=n_components, n_init=n_init, reg_covar=0, random_state=rng, covariance_type=covar_type) g_best.fit(X) assert_almost_equal(ll.min(), g_best.score(X))
class ScikitLL(LikelihoodEvaluator): """ Fastest Single Core Version so far! """ def __init__(self, Xpoints, numMixtures): super().__init__(Xpoints, numMixtures) self.evaluator = GaussianMixture(numMixtures, 'diag') self.Xpoints = Xpoints self.evaluator.fit(Xpoints) def __str__(self): return "SciKit's learn implementation Implementation" def loglikelihood(self, means, diagCovs, weights): self.evaluator.weights_ = weights self.evaluator.covariances_ = diagCovs self.evaluator.means_ = means self.evaluator.precisions_cholesky_ = _compute_precision_cholesky( diagCovs, "diag") return self.numPoints * np.sum(self.evaluator.score(self.Xpoints))
def trainGmmEM(x, t, K, cov_type='full', n_max_iter=500, n_restarts_random=20, n_restarts_kmeans=20, regularize=1e-2, uniform_class_prior=True): ''' Trains a GMM for each class of the given data. If EM is run for several times, the models with the largest log-likelihood are kept. x: The input features t: The target values K: List containing the number of components per class cov_type: Which covariance type should be used ('full' or 'diag') n_max_iter: Maximum number of iterations used for EM training n_restarts_random: Number of random restarts with random initial values n_restarts_kmeans: Number of random restarts where initial values are computed with the k-means algorithm. regularize: Regularizer for the diagonal of the covariance matrices uniform_class_prior: If True, the class prior is set to 1/C for each class, otherwise it is computed as the fraction of samples per class. Returns a dictionary containing all parameters. ''' assert n_restarts_random + n_restarts_kmeans > 0 C = int(np.max(t) + 1) params = {} for c in range(C): print 'EM training for class %d/%d with %d components (N=%d, D=%d)' % ( c, C, K[c], np.sum(t == c), x.shape[1]) t_start = time() if n_restarts_random > 0: gmm1 = GaussianMixture(n_components=K[c], covariance_type=cov_type, reg_covar=regularize, max_iter=n_max_iter, n_init=n_restarts_random, init_params='random', verbose=2) gmm1.fit(x[t == c, :]) if n_restarts_kmeans > 0: gmm2 = GaussianMixture(n_components=K[c], covariance_type=cov_type, reg_covar=regularize, max_iter=n_max_iter, n_init=n_restarts_kmeans, init_params='kmeans', verbose=2) gmm2.fit(x[t == c, :]) t_elapsed = time() - t_start print 'EM training for class %d/%d finished in %f seconds' % ( c, C, t_elapsed) # Select the better model of gmm1 and gmm2 # Don't use gmm.lower_bound_, it returns the last logl and not the best score1 = gmm1.score(x[t == c, :]) if n_restarts_random > 0 else -np.Inf score2 = gmm2.score(x[t == c, :]) if n_restarts_kmeans > 0 else -np.Inf gmm = gmm1 if score1 > score2 else gmm2 params['alpha_%d' % (c)] = gmm.weights_ params['mu_%d' % (c)] = gmm.means_ params['Sigma_%d' % (c)] = gmm.covariances_ params['Lambda_%d' % (c)] = gmm.precisions_ if uniform_class_prior == True: params['prior'] = np.full((C, ), 1. / C, 'float32') else: _, counts = np.unique(t, return_counts=True) counts = np.asarray(counts, 'float32') params['prior'] = counts / np.sum(counts) return params