Ejemplo n.º 1
0
def test_score():
    covar_type = 'full'
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components
    X = rand_data.X[covar_type]

    # Check the error message if we don't call fit
    gmm1 = GaussianMixture(n_components=n_components,
                           n_init=1,
                           max_iter=1,
                           reg_covar=0,
                           random_state=rng,
                           covariance_type=covar_type)
    assert_raise_message(
        NotFittedError, "This GaussianMixture instance is not fitted "
        "yet. Call 'fit' with appropriate arguments "
        "before using this method.", gmm1.score, X)

    # Check score value
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", ConvergenceWarning)
        gmm1.fit(X)
    gmm_score = gmm1.score(X)
    gmm_score_proba = gmm1.score_samples(X).mean()
    assert_almost_equal(gmm_score, gmm_score_proba)

    # Check if the score increase
    gmm2 = GaussianMixture(n_components=n_components,
                           n_init=1,
                           reg_covar=0,
                           random_state=rng,
                           covariance_type=covar_type).fit(X)
    assert_greater(gmm2.score(X), gmm1.score(X))
def test_score():
    covar_type = 'full'
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components
    X = rand_data.X[covar_type]

    # Check the error message if we don't call fit
    gmm1 = GaussianMixture(n_components=n_components, n_init=1,
                           max_iter=1, reg_covar=0, random_state=rng,
                           covariance_type=covar_type)
    assert_raise_message(NotFittedError,
                         "This GaussianMixture instance is not fitted "
                         "yet. Call 'fit' with appropriate arguments "
                         "before using this method.", gmm1.score, X)

    # Check score value
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", ConvergenceWarning)
        gmm1.fit(X)
    gmm_score = gmm1.score(X)
    gmm_score_proba = gmm1.score_samples(X).mean()
    assert_almost_equal(gmm_score, gmm_score_proba)

    # Check if the score increase
    gmm2 = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0,
                           random_state=rng,
                           covariance_type=covar_type).fit(X)
    assert_greater(gmm2.score(X), gmm1.score(X))
def test_gaussian_mixture_fit_best_params():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_components = rand_data.n_components
    n_init = 10
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        g = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0,
                            random_state=rng, covariance_type=covar_type)
        ll = []
        for _ in range(n_init):
            g.fit(X)
            ll.append(g.score(X))
        ll = np.array(ll)
        g_best = GaussianMixture(n_components=n_components,
                                 n_init=n_init, reg_covar=0, random_state=rng,
                                 covariance_type=covar_type)
        g_best.fit(X)
        assert_almost_equal(ll.min(), g_best.score(X))
Ejemplo n.º 4
0
def test_gaussian_mixture_fit_best_params():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_components = rand_data.n_components
    n_init = 10
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        g = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0,
                            random_state=rng, covariance_type=covar_type)
        ll = []
        for _ in range(n_init):
            g.fit(X)
            ll.append(g.score(X))
        ll = np.array(ll)
        g_best = GaussianMixture(n_components=n_components,
                                 n_init=n_init, reg_covar=0, random_state=rng,
                                 covariance_type=covar_type)
        g_best.fit(X)
        assert_almost_equal(ll.min(), g_best.score(X))
Ejemplo n.º 5
0
class ScikitLL(LikelihoodEvaluator):
    """
    Fastest Single Core Version so far!
    """
    def __init__(self, Xpoints, numMixtures):
        super().__init__(Xpoints, numMixtures)
        self.evaluator = GaussianMixture(numMixtures, 'diag')
        self.Xpoints = Xpoints
        self.evaluator.fit(Xpoints)

    def __str__(self):
        return "SciKit's learn implementation Implementation"

    def loglikelihood(self, means, diagCovs, weights):
        self.evaluator.weights_ = weights
        self.evaluator.covariances_ = diagCovs
        self.evaluator.means_ = means
        self.evaluator.precisions_cholesky_ = _compute_precision_cholesky(
            diagCovs, "diag")

        return self.numPoints * np.sum(self.evaluator.score(self.Xpoints))
Ejemplo n.º 6
0
def trainGmmEM(x,
               t,
               K,
               cov_type='full',
               n_max_iter=500,
               n_restarts_random=20,
               n_restarts_kmeans=20,
               regularize=1e-2,
               uniform_class_prior=True):
    '''
    Trains a GMM for each class of the given data. If EM is run for several
    times, the models with the largest log-likelihood are kept.
    
    x: The input features
    t: The target values
    K: List containing the number of components per class
    cov_type: Which covariance type should be used ('full' or 'diag')
    n_max_iter: Maximum number of iterations used for EM training
    n_restarts_random: Number of random restarts with random initial values
    n_restarts_kmeans: Number of random restarts where initial values are
      computed with the k-means algorithm.
    regularize: Regularizer for the diagonal of the covariance matrices
    uniform_class_prior: If True, the class prior is set to 1/C for each class,
      otherwise it is computed as the fraction of samples per class.
      
    Returns a dictionary containing all parameters. 
    '''
    assert n_restarts_random + n_restarts_kmeans > 0

    C = int(np.max(t) + 1)
    params = {}
    for c in range(C):
        print 'EM training for class %d/%d with %d components (N=%d, D=%d)' % (
            c, C, K[c], np.sum(t == c), x.shape[1])
        t_start = time()
        if n_restarts_random > 0:
            gmm1 = GaussianMixture(n_components=K[c],
                                   covariance_type=cov_type,
                                   reg_covar=regularize,
                                   max_iter=n_max_iter,
                                   n_init=n_restarts_random,
                                   init_params='random',
                                   verbose=2)
            gmm1.fit(x[t == c, :])
        if n_restarts_kmeans > 0:
            gmm2 = GaussianMixture(n_components=K[c],
                                   covariance_type=cov_type,
                                   reg_covar=regularize,
                                   max_iter=n_max_iter,
                                   n_init=n_restarts_kmeans,
                                   init_params='kmeans',
                                   verbose=2)
            gmm2.fit(x[t == c, :])
        t_elapsed = time() - t_start
        print 'EM training for class %d/%d finished in %f seconds' % (
            c, C, t_elapsed)

        # Select the better model of gmm1 and gmm2
        # Don't use gmm.lower_bound_, it returns the last logl and not the best
        score1 = gmm1.score(x[t == c, :]) if n_restarts_random > 0 else -np.Inf
        score2 = gmm2.score(x[t == c, :]) if n_restarts_kmeans > 0 else -np.Inf
        gmm = gmm1 if score1 > score2 else gmm2

        params['alpha_%d' % (c)] = gmm.weights_
        params['mu_%d' % (c)] = gmm.means_
        params['Sigma_%d' % (c)] = gmm.covariances_
        params['Lambda_%d' % (c)] = gmm.precisions_

    if uniform_class_prior == True:
        params['prior'] = np.full((C, ), 1. / C, 'float32')
    else:
        _, counts = np.unique(t, return_counts=True)
        counts = np.asarray(counts, 'float32')
        params['prior'] = counts / np.sum(counts)

    return params