Ejemplo n.º 1
0
def test_gaussian_mixture_verbose():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_components = rand_data.n_components
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        g = GaussianMixture(n_components=n_components,
                            n_init=1,
                            reg_covar=0,
                            random_state=rng,
                            covariance_type=covar_type,
                            verbose=1)
        h = GaussianMixture(n_components=n_components,
                            n_init=1,
                            reg_covar=0,
                            random_state=rng,
                            covariance_type=covar_type,
                            verbose=2)
        old_stdout = sys.stdout
        sys.stdout = StringIO()
        try:
            g.fit(X)
            h.fit(X)
        finally:
            sys.stdout = old_stdout
Ejemplo n.º 2
0
def test_gaussian_mixture_estimate_log_prob_resp():
    # test whether responsibilities are normalized
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=5)
    n_samples = rand_data.n_samples
    n_features = rand_data.n_features
    n_components = rand_data.n_components

    X = rng.rand(n_samples, n_features)
    for covar_type in COVARIANCE_TYPE:
        weights = rand_data.weights
        means = rand_data.means
        precisions = rand_data.precisions[covar_type]
        g = GaussianMixture(n_components=n_components,
                            random_state=rng,
                            weights_init=weights,
                            means_init=means,
                            precisions_init=precisions,
                            covariance_type=covar_type)
        g.fit(X)
        resp = g.predict_proba(X)
        assert_array_almost_equal(resp.sum(axis=1), np.ones(n_samples))
        assert_array_equal(g.weights_init, weights)
        assert_array_equal(g.means_init, means)
        assert_array_equal(g.precisions_init, precisions)
def test_score():
    covar_type = 'full'
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components
    X = rand_data.X[covar_type]

    # Check the error message if we don't call fit
    gmm1 = GaussianMixture(n_components=n_components, n_init=1,
                           max_iter=1, reg_covar=0, random_state=rng,
                           covariance_type=covar_type)
    assert_raise_message(NotFittedError,
                         "This GaussianMixture instance is not fitted "
                         "yet. Call 'fit' with appropriate arguments "
                         "before using this method.", gmm1.score, X)

    # Check score value
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", ConvergenceWarning)
        gmm1.fit(X)
    gmm_score = gmm1.score(X)
    gmm_score_proba = gmm1.score_samples(X).mean()
    assert_almost_equal(gmm_score, gmm_score_proba)

    # Check if the score increase
    gmm2 = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0,
                           random_state=rng,
                           covariance_type=covar_type).fit(X)
    assert_greater(gmm2.score(X), gmm1.score(X))
def test_score():
    covar_type = 'full'
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components
    X = rand_data.X[covar_type]

    # Check the error message if we don't call fit
    gmm1 = GaussianMixture(n_components=n_components,
                           n_init=1,
                           max_iter=1,
                           reg_covar=0,
                           random_state=rng,
                           covariance_type=covar_type)
    assert_raise_message(
        NotFittedError, "This GaussianMixture instance is not fitted "
        "yet. Call 'fit' with appropriate arguments "
        "before using this method.", gmm1.score, X)

    # Check score value
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", ConvergenceWarning)
        gmm1.fit(X)
    gmm_score = gmm1.score(X)
    gmm_score_proba = gmm1.score_samples(X).mean()
    assert_almost_equal(gmm_score, gmm_score_proba)

    # Check if the score increase
    gmm2 = GaussianMixture(n_components=n_components,
                           n_init=1,
                           reg_covar=0,
                           random_state=rng,
                           covariance_type=covar_type).fit(X)
    assert_greater(gmm2.score(X), gmm1.score(X))
Ejemplo n.º 5
0
def create_mask(img, background_probability=0.75, use_triangle=False, use_otsu=False):
    test_mask = None
    if use_triangle:
        test_mask = sitk.GetArrayFromImage(sitk.TriangleThreshold(img, 0, 1))
    elif use_otsu:
        test_mask = sitk.GetArrayFromImage(sitk.OtsuThreshold(img, 0, 1))
    else:
        if type(img) is sitk.SimpleITK.Image:
            img = sitk.GetArrayFromImage(img)
        gmix = GaussianMixture(n_components=3, covariance_type='full', init_params='kmeans', verbose=0)
        gmix.fit(img.ravel().reshape(-1, 1))
        covariances = gmix.covariances_
        mean_background = gmix.means_.min()
        covariance_background = covariances[np.where( gmix.means_ == mean_background ) ][0][0]
        z_score = st.norm.ppf(background_probability)
        threshold = z_score * np.sqrt(covariance_background) + mean_background
        test_mask = (img > threshold)
    eroded_im = morphology.opening(test_mask, selem=morphology.ball(2))
    connected_comp = skimage.measure.label(eroded_im)
    out = skimage.measure.regionprops(connected_comp)
    area_max = 0.0
    idx_max = 0
    for i in range(len(out)):
        if out[i].area > area_max:
            area_max = out[i].area
            idx_max = i+1
    connected_comp[ connected_comp != idx_max ] = 0
    mask = connected_comp
    mask_sitk = sitk.GetImageFromArray(mask)
    mask_sitk.CopyInformation(img)
    return mask_sitk
Ejemplo n.º 6
0
    def macro_clusters(self, n_clusters, max_iters=1000, **kwargs):
        data = np.concatenate([
            m.sample(np.int32(np.ceil(m.n / 100))) for m in self.micro_clusters
        ])

        gmm = GaussianMixture(n_components=n_clusters)
        gmm.fit(data)
        self.macro_centroids = gmm.means_
        self.gmm_model = gmm
Ejemplo n.º 7
0
def test_check_precisions():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)

    n_components, n_features = rand_data.n_components, rand_data.n_features

    # Define the bad precisions for each covariance_type
    precisions_bad_shape = {
        'full': np.ones((n_components + 1, n_features, n_features)),
        'tied': np.ones((n_features + 1, n_features + 1)),
        'diag': np.ones((n_components + 1, n_features)),
        'spherical': np.ones((n_components + 1))
    }

    # Define not positive-definite precisions
    precisions_not_pos = np.ones((n_components, n_features, n_features))
    precisions_not_pos[0] = np.eye(n_features)
    precisions_not_pos[0, 0, 0] = -1.

    precisions_not_positive = {
        'full': precisions_not_pos,
        'tied': precisions_not_pos[0],
        'diag': -1. * np.ones((n_components, n_features)),
        'spherical': -1. * np.ones(n_components)
    }

    not_positive_errors = {
        'full': 'symmetric, positive-definite',
        'tied': 'symmetric, positive-definite',
        'diag': 'positive',
        'spherical': 'positive'
    }

    for covar_type in COVARIANCE_TYPE:
        X = RandomData(rng).X[covar_type]
        g = GaussianMixture(n_components=n_components,
                            covariance_type=covar_type,
                            random_state=rng)

        # Check precisions with bad shapes
        g.precisions_init = precisions_bad_shape[covar_type]
        assert_raise_message(
            ValueError, "The parameter '%s precision' should have "
            "the shape of" % covar_type, g.fit, X)

        # Check not positive precisions
        g.precisions_init = precisions_not_positive[covar_type]
        assert_raise_message(
            ValueError, "'%s precision' should be %s" %
            (covar_type, not_positive_errors[covar_type]), g.fit, X)

        # Check the correct init of precisions_init
        g.precisions_init = rand_data.precisions[covar_type]
        g.fit(X)
        assert_array_equal(rand_data.precisions[covar_type], g.precisions_init)
def test_check_precisions():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)

    n_components, n_features = rand_data.n_components, rand_data.n_features

    # Define the bad precisions for each covariance_type
    precisions_bad_shape = {
        'full': np.ones((n_components + 1, n_features, n_features)),
        'tied': np.ones((n_features + 1, n_features + 1)),
        'diag': np.ones((n_components + 1, n_features)),
        'spherical': np.ones((n_components + 1))}

    # Define not positive-definite precisions
    precisions_not_pos = np.ones((n_components, n_features, n_features))
    precisions_not_pos[0] = np.eye(n_features)
    precisions_not_pos[0, 0, 0] = -1.

    precisions_not_positive = {
        'full': precisions_not_pos,
        'tied': precisions_not_pos[0],
        'diag': np.full((n_components, n_features), -1.),
        'spherical': np.full(n_components, -1.)}

    not_positive_errors = {
        'full': 'symmetric, positive-definite',
        'tied': 'symmetric, positive-definite',
        'diag': 'positive',
        'spherical': 'positive'}

    for covar_type in COVARIANCE_TYPE:
        X = RandomData(rng).X[covar_type]
        g = GaussianMixture(n_components=n_components,
                            covariance_type=covar_type,
                            random_state=rng)

        # Check precisions with bad shapes
        g.precisions_init = precisions_bad_shape[covar_type]
        assert_raise_message(ValueError,
                             "The parameter '%s precision' should have "
                             "the shape of" % covar_type,
                             g.fit, X)

        # Check not positive precisions
        g.precisions_init = precisions_not_positive[covar_type]
        assert_raise_message(ValueError,
                             "'%s precision' should be %s"
                             % (covar_type, not_positive_errors[covar_type]),
                             g.fit, X)

        # Check the correct init of precisions_init
        g.precisions_init = rand_data.precisions[covar_type]
        g.fit(X)
        assert_array_equal(rand_data.precisions[covar_type], g.precisions_init)
Ejemplo n.º 9
0
def main():
    #Import data from file
    trainData, trainLabel = importTrainData()
    testData, testLabel = importTestData()

    #PCA procedure

    #For train data
    rawlowDivTrainData = []
    pca = PCA(n_components=537)
    #pca = PCA(n_components=537, whiten="True")

    rawlowDivTrainData = pca.fit_transform(trainData)

    #For test data which sum up to 10000
    lowDivTestData = pca.fit_transform(testData)

    #Classify manually, divide the data into 10 parts by labels
    numTrainData = [[], [], [], [], [], [], [], [], [], []]
    for i in range(60000):
        numTrainData[trainLabel[i]].append(rawlowDivTrainData[i])

    #Train Gmm for every mode/Each mode represent a number.
    myGmms = []
    #GMM procedure
    for num in range(10):
        print("GMM", num)
        gmmClassifier = GaussianMixture(n_components=25,
                                        init_params='kmeans',
                                        max_iter=1000)
        gmmClassifier.fit(numTrainData[num])
        myGmms.append(gmmClassifier)

    #Validate
    # We need to calculate all the total probability under each GMM, and choose the biggest one.
    correctCount = 0
    allProb = []
    for num in range(10):
        allProb.append(np.array(myGmms[num].score_samples(lowDivTestData)))

    allProb = np.array(allProb)
    #If the result seem strange, print the predict label

    for i in range(10000):
        predictLabel = 0
        max = np.max(allProb[:, i])
        for num in range(10):
            if max == allProb[num, i]:
                predictLabel = num
        if predictLabel == testLabel[i]:
            correctCount += 1

    #Output
    print("The correct rate is:", correctCount / 10000)
Ejemplo n.º 10
0
def test_gaussian_mixture_fit():
    # recover the ground truth
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_features = rand_data.n_features
    n_components = rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        g = GaussianMixture(n_components=n_components,
                            n_init=20,
                            reg_covar=0,
                            random_state=rng,
                            covariance_type=covar_type)
        g.fit(X)

        # needs more data to pass the test with rtol=1e-7
        assert_allclose(np.sort(g.weights_),
                        np.sort(rand_data.weights),
                        rtol=0.1,
                        atol=1e-2)

        arg_idx1 = g.means_[:, 0].argsort()
        arg_idx2 = rand_data.means[:, 0].argsort()
        assert_allclose(g.means_[arg_idx1],
                        rand_data.means[arg_idx2],
                        rtol=0.1,
                        atol=1e-2)

        if covar_type == 'full':
            prec_pred = g.precisions_
            prec_test = rand_data.precisions['full']
        elif covar_type == 'tied':
            prec_pred = np.array([g.precisions_] * n_components)
            prec_test = np.array([rand_data.precisions['tied']] * n_components)
        elif covar_type == 'spherical':
            prec_pred = np.array(
                [np.eye(n_features) * c for c in g.precisions_])
            prec_test = np.array([
                np.eye(n_features) * c
                for c in rand_data.precisions['spherical']
            ])
        elif covar_type == 'diag':
            prec_pred = np.array([np.diag(d) for d in g.precisions_])
            prec_test = np.array(
                [np.diag(d) for d in rand_data.precisions['diag']])

        arg_idx1 = np.trace(prec_pred, axis1=1, axis2=2).argsort()
        arg_idx2 = np.trace(prec_test, axis1=1, axis2=2).argsort()
        for k, h in zip(arg_idx1, arg_idx2):
            ecov = EmpiricalCovariance()
            ecov.covariance_ = prec_test[h]
            # the accuracy depends on the number of data and randomness, rng
            assert_allclose(ecov.error_norm(prec_pred[k]), 0, atol=0.1)
def test_check_covariances():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)

    n_components, n_features = rand_data.n_components, rand_data.n_features

    # Define the bad covariances for each covariance_type
    covariances_bad_shape = {
        'full': rng.rand(n_components + 1, n_features, n_features),
        'tied': rng.rand(n_features + 1, n_features + 1),
        'diag': rng.rand(n_components + 1, n_features),
        'spherical': rng.rand(n_components + 1)}

    # Define not positive-definite covariances
    covariances_not_pos = rng.rand(n_components, n_features, n_features)
    covariances_not_pos[0] = np.eye(n_features)
    covariances_not_pos[0, 0, 0] = -1.

    covariances_not_positive = {
        'full': covariances_not_pos,
        'tied': covariances_not_pos[0],
        'diag': -1. * np.ones((n_components, n_features)),
        'spherical': -1. * np.ones(n_components)}

    not_positive_errors = {
        'full': 'symmetric, positive-definite',
        'tied': 'symmetric, positive-definite',
        'diag': 'positive',
        'spherical': 'positive'}

    for cov_type in ['full', 'tied', 'diag', 'spherical']:
        X = rand_data.X[cov_type]
        g = GaussianMixture(n_components=n_components,
                            covariance_type=cov_type)

        # Check covariance with bad shapes
        g.covariances_init = covariances_bad_shape[cov_type]
        assert_raise_message(ValueError,
                             "The parameter '%s covariance' should have "
                             "the shape of" % cov_type,
                             g.fit, X)

        # Check not positive covariances
        g.covariances_init = covariances_not_positive[cov_type]
        assert_raise_message(ValueError,
                             "'%s covariance' should be %s"
                             % (cov_type, not_positive_errors[cov_type]),
                             g.fit, X)

        # Check the correct init of covariances_init
        g.covariances_init = rand_data.covariances[cov_type]
        g.fit(X)
        assert_array_equal(rand_data.covariances[cov_type], g.covariances_init)
Ejemplo n.º 12
0
def test_sample():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7, n_components=3)
    n_features, n_components = rand_data.n_features, rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]

        gmm = GaussianMixture(n_components=n_components,
                              covariance_type=covar_type,
                              random_state=rng)
        # To sample we need that GaussianMixture is fitted
        assert_raise_message(NotFittedError, "This GaussianMixture instance "
                             "is not fitted", gmm.sample, 0)
        gmm.fit(X)

        assert_raise_message(ValueError, "Invalid value for 'n_samples",
                             gmm.sample, 0)

        # Just to make sure the class samples correctly
        n_samples = 20000
        X_s, y_s = gmm.sample(n_samples)

        for k in range(n_components):
            if covar_type == 'full':
                assert_array_almost_equal(gmm.covariances_[k],
                                          np.cov(X_s[y_s == k].T),
                                          decimal=1)
            elif covar_type == 'tied':
                assert_array_almost_equal(gmm.covariances_,
                                          np.cov(X_s[y_s == k].T),
                                          decimal=1)
            elif covar_type == 'diag':
                assert_array_almost_equal(gmm.covariances_[k],
                                          np.diag(np.cov(X_s[y_s == k].T)),
                                          decimal=1)
            else:
                assert_array_almost_equal(gmm.covariances_[k],
                                          np.var(X_s[y_s == k] -
                                                 gmm.means_[k]),
                                          decimal=1)

        means_s = np.array(
            [np.mean(X_s[y_s == k], 0) for k in range(n_components)])
        assert_array_almost_equal(gmm.means_, means_s, decimal=1)

        # Check shapes of sampled data, see
        # https://github.com/scikit-learn/scikit-learn/issues/7701
        assert_equal(X_s.shape, (n_samples, n_features))

        for sample_size in range(1, 100):
            X_s, _ = gmm.sample(sample_size)
            assert_equal(X_s.shape, (sample_size, n_features))
Ejemplo n.º 13
0
def test_warm_start(seed):
    random_state = seed
    rng = np.random.RandomState(random_state)
    n_samples, n_features, n_components = 500, 2, 2
    X = rng.rand(n_samples, n_features)

    # Assert the warm_start give the same result for the same number of iter
    g = GaussianMixture(n_components=n_components,
                        n_init=1,
                        max_iter=2,
                        reg_covar=0,
                        random_state=random_state,
                        warm_start=False)
    h = GaussianMixture(n_components=n_components,
                        n_init=1,
                        max_iter=1,
                        reg_covar=0,
                        random_state=random_state,
                        warm_start=True)

    g.fit(X)
    score1 = h.fit(X).score(X)
    score2 = h.fit(X).score(X)

    assert_almost_equal(g.weights_, h.weights_)
    assert_almost_equal(g.means_, h.means_)
    assert_almost_equal(g.precisions_, h.precisions_)
    assert score2 > score1

    # Assert that by using warm_start we can converge to a good solution
    g = GaussianMixture(n_components=n_components,
                        n_init=1,
                        max_iter=5,
                        reg_covar=0,
                        random_state=random_state,
                        warm_start=False,
                        tol=1e-6)
    h = GaussianMixture(n_components=n_components,
                        n_init=1,
                        max_iter=5,
                        reg_covar=0,
                        random_state=random_state,
                        warm_start=True,
                        tol=1e-6)

    g.fit(X)
    assert not g.converged_

    h.fit(X)
    # depending on the data there is large variability in the number of
    # refit necessary to converge due to the complete randomness of the
    # data
    for _ in range(1000):
        h.fit(X)
        if h.converged_:
            break
    assert h.converged_
def test_sample():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7, n_components=3)
    n_features, n_components = rand_data.n_features, rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]

        gmm = GaussianMixture(n_components=n_components,
                              covariance_type=covar_type, random_state=rng)
        # To sample we need that GaussianMixture is fitted
        assert_raise_message(NotFittedError, "This GaussianMixture instance "
                             "is not fitted", gmm.sample, 0)
        gmm.fit(X)

        assert_raise_message(ValueError, "Invalid value for 'n_samples",
                             gmm.sample, 0)

        # Just to make sure the class samples correctly
        n_samples = 20000
        X_s, y_s = gmm.sample(n_samples)

        for k in range(n_components):
            if covar_type == 'full':
                assert_array_almost_equal(gmm.covariances_[k],
                                          np.cov(X_s[y_s == k].T), decimal=1)
            elif covar_type == 'tied':
                assert_array_almost_equal(gmm.covariances_,
                                          np.cov(X_s[y_s == k].T), decimal=1)
            elif covar_type == 'diag':
                assert_array_almost_equal(gmm.covariances_[k],
                                          np.diag(np.cov(X_s[y_s == k].T)),
                                          decimal=1)
            else:
                assert_array_almost_equal(
                    gmm.covariances_[k], np.var(X_s[y_s == k] - gmm.means_[k]),
                    decimal=1)

        means_s = np.array([np.mean(X_s[y_s == k], 0)
                           for k in range(n_components)])
        assert_array_almost_equal(gmm.means_, means_s, decimal=1)

        # Check shapes of sampled data, see
        # https://github.com/scikit-learn/scikit-learn/issues/7701
        assert_equal(X_s.shape, (n_samples, n_features))

        for sample_size in range(1, 100):
            X_s, _ = gmm.sample(sample_size)
            assert_equal(X_s.shape, (sample_size, n_features))
Ejemplo n.º 15
0
def test_convergence_detected_with_warm_start():
    # We check that convergence is detected when warm_start=True
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_components = rand_data.n_components
    X = rand_data.X['full']

    for max_iter in (1, 2, 50):
        gmm = GaussianMixture(n_components=n_components, warm_start=True,
                              max_iter=max_iter, random_state=rng)
        for _ in range(100):
            gmm.fit(X)
            if gmm.converged_:
                break
        assert gmm.converged_
        assert max_iter >= gmm.n_iter_
def test_convergence_detected_with_warm_start():
    # We check that convergence is detected when warm_start=True
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_components = rand_data.n_components
    X = rand_data.X['full']

    for max_iter in (1, 2, 50):
        gmm = GaussianMixture(n_components=n_components, warm_start=True,
                              max_iter=max_iter, random_state=rng)
        for _ in range(100):
            gmm.fit(X)
            if gmm.converged_:
                break
        assert gmm.converged_
        assert max_iter >= gmm.n_iter_
Ejemplo n.º 17
0
def test_monotonic_likelihood():
    # We check that each step of the EM without regularization improve
    # monotonically the training set likelihood
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        gmm = GaussianMixture(n_components=n_components,
                              covariance_type=covar_type,
                              reg_covar=0,
                              warm_start=True,
                              max_iter=1,
                              random_state=rng,
                              tol=1e-7)
        current_log_likelihood = -np.infty
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", ConvergenceWarning)
            # Do one training iteration at a time so we can make sure that the
            # training log likelihood increases after each iteration.
            for _ in range(600):
                prev_log_likelihood = current_log_likelihood
                try:
                    current_log_likelihood = gmm.fit(X).score(X)
                except ConvergenceWarning:
                    pass
                assert_greater_equal(current_log_likelihood,
                                     prev_log_likelihood)

                if gmm.converged_:
                    break

            assert_true(gmm.converged_)
def test_monotonic_likelihood():
    # We check that each step of the EM without regularization improve
    # monotonically the training set likelihood
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        gmm = GaussianMixture(n_components=n_components,
                              covariance_type=covar_type, reg_covar=0,
                              warm_start=True, max_iter=1, random_state=rng,
                              tol=1e-7)
        current_log_likelihood = -np.infty
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", ConvergenceWarning)
            # Do one training iteration at a time so we can make sure that the
            # training log likelihood increases after each iteration.
            for _ in range(600):
                prev_log_likelihood = current_log_likelihood
                try:
                    current_log_likelihood = gmm.fit(X).score(X)
                except ConvergenceWarning:
                    pass
                assert_greater_equal(current_log_likelihood,
                                     prev_log_likelihood)

                if gmm.converged_:
                    break

            assert gmm.converged_
def test_gaussian_mixture_fit():
    # recover the ground truth
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_features = rand_data.n_features
    n_components = rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        g = GaussianMixture(n_components=n_components, n_init=20,
                            reg_covar=0, random_state=rng,
                            covariance_type=covar_type)
        g.fit(X)

        # needs more data to pass the test with rtol=1e-7
        assert_allclose(np.sort(g.weights_), np.sort(rand_data.weights),
                        rtol=0.1, atol=1e-2)

        arg_idx1 = g.means_[:, 0].argsort()
        arg_idx2 = rand_data.means[:, 0].argsort()
        assert_allclose(g.means_[arg_idx1], rand_data.means[arg_idx2],
                        rtol=0.1, atol=1e-2)

        if covar_type == 'full':
            prec_pred = g.precisions_
            prec_test = rand_data.precisions['full']
        elif covar_type == 'tied':
            prec_pred = np.array([g.precisions_] * n_components)
            prec_test = np.array([rand_data.precisions['tied']] * n_components)
        elif covar_type == 'spherical':
            prec_pred = np.array([np.eye(n_features) * c
                                 for c in g.precisions_])
            prec_test = np.array([np.eye(n_features) * c for c in
                                 rand_data.precisions['spherical']])
        elif covar_type == 'diag':
            prec_pred = np.array([np.diag(d) for d in g.precisions_])
            prec_test = np.array([np.diag(d) for d in
                                 rand_data.precisions['diag']])

        arg_idx1 = np.trace(prec_pred, axis1=1, axis2=2).argsort()
        arg_idx2 = np.trace(prec_test, axis1=1, axis2=2).argsort()
        for k, h in zip(arg_idx1, arg_idx2):
            ecov = EmpiricalCovariance()
            ecov.covariance_ = prec_test[h]
            # the accuracy depends on the number of data and randomness, rng
            assert_allclose(ecov.error_norm(prec_pred[k]), 0, atol=0.1)
def test_check_weights():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)

    n_components = rand_data.n_components
    X = rand_data.X['full']

    g = GaussianMixture(n_components=n_components)

    # Check bad shape
    weights_bad_shape = rng.rand(n_components, 1)
    g.weights_init = weights_bad_shape
    assert_raise_message(ValueError,
                         "The parameter 'weights' should have the shape of "
                         "(%d,), "
                         "but got %s" % (n_components,
                                         str(weights_bad_shape.shape)),
                         g.fit, X)

    # Check bad range
    weights_bad_range = rng.rand(n_components) + 1
    g.weights_init = weights_bad_range
    assert_raise_message(ValueError,
                         "The parameter 'weights' should be in the range "
                         "[0, 1], but got max value %.5f, min value %.5f"
                         % (np.min(weights_bad_range),
                            np.max(weights_bad_range)),
                         g.fit, X)

    # Check bad normalization
    weights_bad_norm = rng.rand(n_components)
    weights_bad_norm = weights_bad_norm / (weights_bad_norm.sum() + 1)
    g.weights_init = weights_bad_norm
    assert_raise_message(ValueError,
                         "The parameter 'weights' should be normalized, "
                         "but got sum(weights) = %.5f"
                         % np.sum(weights_bad_norm),
                         g.fit, X)

    # Check good weights matrix
    weights = rand_data.weights
    g = GaussianMixture(weights_init=weights, n_components=n_components)
    g.fit(X)
    assert_array_equal(weights, g.weights_init)
def test_gaussian_mixture_aic_bic():
    # Test the aic and bic criteria
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 50, 3, 2
    X = rng.randn(n_samples, n_features)
    # standard gaussian entropy
    sgh = 0.5 * (fast_logdet(np.cov(X.T, bias=1)) +
                 n_features * (1 + np.log(2 * np.pi)))
    for cv_type in COVARIANCE_TYPE:
        g = GaussianMixture(
            n_components=n_components, covariance_type=cv_type,
            random_state=rng, max_iter=200)
        g.fit(X)
        aic = 2 * n_samples * sgh + 2 * g._n_parameters()
        bic = (2 * n_samples * sgh +
               np.log(n_samples) * g._n_parameters())
        bound = n_features / np.sqrt(n_samples)
        assert (g.aic(X) - aic) / n_samples < bound
        assert (g.bic(X) - bic) / n_samples < bound
def test_gaussian_mixture_verbose():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_components = rand_data.n_components
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        g = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0,
                            random_state=rng, covariance_type=covar_type,
                            verbose=1)
        h = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0,
                            random_state=rng, covariance_type=covar_type,
                            verbose=2)
        old_stdout = sys.stdout
        sys.stdout = StringIO()
        try:
            g.fit(X)
            h.fit(X)
        finally:
            sys.stdout = old_stdout
Ejemplo n.º 23
0
def test_gaussian_mixture_aic_bic():
    # Test the aic and bic criteria
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 50, 3, 2
    X = rng.randn(n_samples, n_features)
    # standard gaussian entropy
    sgh = 0.5 * (fast_logdet(np.cov(X.T, bias=1)) + n_features *
                 (1 + np.log(2 * np.pi)))
    for cv_type in COVARIANCE_TYPE:
        g = GaussianMixture(n_components=n_components,
                            covariance_type=cv_type,
                            random_state=rng,
                            max_iter=200)
        g.fit(X)
        aic = 2 * n_samples * sgh + 2 * g._n_parameters()
        bic = (2 * n_samples * sgh + np.log(n_samples) * g._n_parameters())
        bound = n_features / np.sqrt(n_samples)
        assert_true((g.aic(X) - aic) / n_samples < bound)
        assert_true((g.bic(X) - bic) / n_samples < bound)
Ejemplo n.º 24
0
def test_gaussian_mixture_fit_best_params():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_components = rand_data.n_components
    n_init = 10
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        g = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0,
                            random_state=rng, covariance_type=covar_type)
        ll = []
        for _ in range(n_init):
            g.fit(X)
            ll.append(g.score(X))
        ll = np.array(ll)
        g_best = GaussianMixture(n_components=n_components,
                                 n_init=n_init, reg_covar=0, random_state=rng,
                                 covariance_type=covar_type)
        g_best.fit(X)
        assert_almost_equal(ll.min(), g_best.score(X))
def test_gaussian_mixture_fit_best_params():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_components = rand_data.n_components
    n_init = 10
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        g = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0,
                            random_state=rng, covariance_type=covar_type)
        ll = []
        for _ in range(n_init):
            g.fit(X)
            ll.append(g.score(X))
        ll = np.array(ll)
        g_best = GaussianMixture(n_components=n_components,
                                 n_init=n_init, reg_covar=0, random_state=rng,
                                 covariance_type=covar_type)
        g_best.fit(X)
        assert_almost_equal(ll.min(), g_best.score(X))
def test_property():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        gmm = GaussianMixture(n_components=n_components,
                              covariance_type=covar_type, random_state=rng,
                              n_init=5)
        gmm.fit(X)
        if covar_type == 'full':
            for prec, covar in zip(gmm.precisions_, gmm.covariances_):

                assert_array_almost_equal(linalg.inv(prec), covar)
        elif covar_type == 'tied':
            assert_array_almost_equal(linalg.inv(gmm.precisions_),
                                      gmm.covariances_)
        else:
            assert_array_almost_equal(gmm.precisions_, 1. / gmm.covariances_)
def test_property():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        gmm = GaussianMixture(n_components=n_components,
                              covariance_type=covar_type, random_state=rng,
                              n_init=5)
        gmm.fit(X)
        if covar_type == 'full':
            for prec, covar in zip(gmm.precisions_, gmm.covariances_):

                assert_array_almost_equal(linalg.inv(prec), covar)
        elif covar_type == 'tied':
            assert_array_almost_equal(linalg.inv(gmm.precisions_),
                                      gmm.covariances_)
        else:
            assert_array_almost_equal(gmm.precisions_, 1. / gmm.covariances_)
Ejemplo n.º 28
0
def test_check_means():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)

    n_components, n_features = rand_data.n_components, rand_data.n_features
    X = rand_data.X['full']

    g = GaussianMixture(n_components=n_components)

    # Check means bad shape
    means_bad_shape = rng.rand(n_components + 1, n_features)
    g.means_init = means_bad_shape
    assert_raise_message(ValueError,
                         "The parameter 'means' should have the shape of ",
                         g.fit, X)

    # Check good means matrix
    means = rand_data.means
    g.means_init = means
    g.fit(X)
    assert_array_equal(means, g.means_init)
def test_check_means():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)

    n_components, n_features = rand_data.n_components, rand_data.n_features
    X = rand_data.X['full']

    g = GaussianMixture(n_components=n_components)

    # Check means bad shape
    means_bad_shape = rng.rand(n_components + 1, n_features)
    g.means_init = means_bad_shape
    assert_raise_message(ValueError,
                         "The parameter 'means' should have the shape of ",
                         g.fit, X)

    # Check good means matrix
    means = rand_data.means
    g.means_init = means
    g.fit(X)
    assert_array_equal(means, g.means_init)
Ejemplo n.º 30
0
class ScikitLL(LikelihoodEvaluator):
    """
    Fastest Single Core Version so far!
    """
    def __init__(self, Xpoints, numMixtures):
        super().__init__(Xpoints, numMixtures)
        self.evaluator = GaussianMixture(numMixtures, 'diag')
        self.Xpoints = Xpoints
        self.evaluator.fit(Xpoints)

    def __str__(self):
        return "SciKit's learn implementation Implementation"

    def loglikelihood(self, means, diagCovs, weights):
        self.evaluator.weights_ = weights
        self.evaluator.covariances_ = diagCovs
        self.evaluator.means_ = means
        self.evaluator.precisions_cholesky_ = _compute_precision_cholesky(
            diagCovs, "diag")

        return self.numPoints * np.sum(self.evaluator.score(self.Xpoints))
Ejemplo n.º 31
0
def test_warm_start():

    random_state = 0
    rng = np.random.RandomState(random_state)
    n_samples, n_features, n_components = 500, 2, 2
    X = rng.rand(n_samples, n_features)

    # Assert the warm_start give the same result for the same number of iter
    g = GaussianMixture(n_components=n_components,
                        n_init=1,
                        max_iter=2,
                        reg_covar=0,
                        random_state=random_state,
                        warm_start=False)
    h = GaussianMixture(n_components=n_components,
                        n_init=1,
                        max_iter=1,
                        reg_covar=0,
                        random_state=random_state,
                        warm_start=True)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", ConvergenceWarning)
        g.fit(X)
        score1 = h.fit(X).score(X)
        score2 = h.fit(X).score(X)

    assert_almost_equal(g.weights_, h.weights_)
    assert_almost_equal(g.means_, h.means_)
    assert_almost_equal(g.precisions_, h.precisions_)
    assert_greater(score2, score1)

    # Assert that by using warm_start we can converge to a good solution
    g = GaussianMixture(n_components=n_components,
                        n_init=1,
                        max_iter=5,
                        reg_covar=0,
                        random_state=random_state,
                        warm_start=False,
                        tol=1e-6)
    h = GaussianMixture(n_components=n_components,
                        n_init=1,
                        max_iter=5,
                        reg_covar=0,
                        random_state=random_state,
                        warm_start=True,
                        tol=1e-6)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", ConvergenceWarning)
        g.fit(X)
        h.fit(X).fit(X)

    assert_true(not g.converged_)
    assert_true(h.converged_)
Ejemplo n.º 32
0
def test_sample():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_features, n_components = rand_data.n_features, rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]

        gmm = GaussianMixture(n_components=n_components,
                              covariance_type=covar_type, random_state=rng)
        # To sample we need that GaussianMixture is fitted
        assert_raise_message(NotFittedError, "This GaussianMixture instance "
                             "is not fitted", gmm.sample, 0)
        gmm.fit(X)

        assert_raise_message(ValueError, "Invalid value for 'n_samples",
                             gmm.sample, 0)

        # Just to make sure the class samples correctly
        X_s, y_s = gmm.sample(20000)
        for k in range(n_features):
            if covar_type == 'full':
                assert_array_almost_equal(gmm.covariances_[k],
                                          np.cov(X_s[y_s == k].T), decimal=1)
            elif covar_type == 'tied':
                assert_array_almost_equal(gmm.covariances_,
                                          np.cov(X_s[y_s == k].T), decimal=1)
            elif covar_type == 'diag':
                assert_array_almost_equal(gmm.covariances_[k],
                                          np.diag(np.cov(X_s[y_s == k].T)),
                                          decimal=1)
            else:
                assert_array_almost_equal(
                    gmm.covariances_[k], np.var(X_s[y_s == k] - gmm.means_[k]),
                    decimal=1)

        means_s = np.array([np.mean(X_s[y_s == k], 0)
                           for k in range(n_features)])
        assert_array_almost_equal(gmm.means_, means_s, decimal=1)
def test_warm_start(seed):
    random_state = seed
    rng = np.random.RandomState(random_state)
    n_samples, n_features, n_components = 500, 2, 2
    X = rng.rand(n_samples, n_features)

    # Assert the warm_start give the same result for the same number of iter
    g = GaussianMixture(n_components=n_components, n_init=1, max_iter=2,
                        reg_covar=0, random_state=random_state,
                        warm_start=False)
    h = GaussianMixture(n_components=n_components, n_init=1, max_iter=1,
                        reg_covar=0, random_state=random_state,
                        warm_start=True)

    g.fit(X)
    score1 = h.fit(X).score(X)
    score2 = h.fit(X).score(X)

    assert_almost_equal(g.weights_, h.weights_)
    assert_almost_equal(g.means_, h.means_)
    assert_almost_equal(g.precisions_, h.precisions_)
    assert score2 > score1

    # Assert that by using warm_start we can converge to a good solution
    g = GaussianMixture(n_components=n_components, n_init=1,
                        max_iter=5, reg_covar=0, random_state=random_state,
                        warm_start=False, tol=1e-6)
    h = GaussianMixture(n_components=n_components, n_init=1,
                        max_iter=5, reg_covar=0, random_state=random_state,
                        warm_start=True, tol=1e-6)

    g.fit(X)
    assert not g.converged_

    h.fit(X)
    # depending on the data there is large variability in the number of
    # refit necessary to converge due to the complete randomness of the
    # data
    for _ in range(1000):
        h.fit(X)
        if h.converged_:
            break
    assert h.converged_
def test_gaussian_mixture_predict_predict_proba():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        Y = rand_data.Y
        g = GaussianMixture(n_components=rand_data.n_components,
                            random_state=rng, weights_init=rand_data.weights,
                            means_init=rand_data.means,
                            precisions_init=rand_data.precisions[covar_type],
                            covariance_type=covar_type)

        # Check a warning message arrive if we don't do fit
        assert_raise_message(NotFittedError,
                             "This GaussianMixture instance is not fitted "
                             "yet. Call 'fit' with appropriate arguments "
                             "before using this method.", g.predict, X)

        g.fit(X)
        Y_pred = g.predict(X)
        Y_pred_proba = g.predict_proba(X).argmax(axis=1)
        assert_array_equal(Y_pred, Y_pred_proba)
        assert_greater(adjusted_rand_score(Y, Y_pred), .95)
Ejemplo n.º 35
0
def test_gaussian_mixture_predict_predict_proba():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        Y = rand_data.Y
        g = GaussianMixture(n_components=rand_data.n_components,
                            random_state=rng, weights_init=rand_data.weights,
                            means_init=rand_data.means,
                            precisions_init=rand_data.precisions[covar_type],
                            covariance_type=covar_type)

        # Check a warning message arrive if we don't do fit
        assert_raise_message(NotFittedError,
                             "This GaussianMixture instance is not fitted "
                             "yet. Call 'fit' with appropriate arguments "
                             "before using this method.", g.predict, X)

        g.fit(X)
        Y_pred = g.predict(X)
        Y_pred_proba = g.predict_proba(X).argmax(axis=1)
        assert_array_equal(Y_pred, Y_pred_proba)
        assert_greater(adjusted_rand_score(Y, Y_pred), .95)
def test_gaussian_mixture_estimate_log_prob_resp():
    # test whether responsibilities are normalized
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=5)
    n_samples = rand_data.n_samples
    n_features = rand_data.n_features
    n_components = rand_data.n_components

    X = rng.rand(n_samples, n_features)
    for covar_type in COVARIANCE_TYPE:
        weights = rand_data.weights
        means = rand_data.means
        precisions = rand_data.precisions[covar_type]
        g = GaussianMixture(n_components=n_components, random_state=rng,
                            weights_init=weights, means_init=means,
                            precisions_init=precisions,
                            covariance_type=covar_type)
        g.fit(X)
        resp = g.predict_proba(X)
        assert_array_almost_equal(resp.sum(axis=1), np.ones(n_samples))
        assert_array_equal(g.weights_init, weights)
        assert_array_equal(g.means_init, means)
        assert_array_equal(g.precisions_init, precisions)
Ejemplo n.º 37
0
def test_score_samples():
    covar_type = 'full'
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components
    X = rand_data.X[covar_type]

    # Check the error message if we don't call fit
    gmm = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0,
                          random_state=rng, covariance_type=covar_type)
    assert_raise_message(NotFittedError,
                         "This GaussianMixture instance is not fitted "
                         "yet. Call 'fit' with appropriate arguments "
                         "before using this method.", gmm.score_samples, X)

    gmm_score_samples = gmm.fit(X).score_samples(X)
    assert_equal(gmm_score_samples.shape[0], rand_data.n_samples)
def test_score_samples():
    covar_type = 'full'
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components
    X = rand_data.X[covar_type]

    # Check the error message if we don't call fit
    gmm = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0,
                          random_state=rng, covariance_type=covar_type)
    assert_raise_message(NotFittedError,
                         "This GaussianMixture instance is not fitted "
                         "yet. Call 'fit' with appropriate arguments "
                         "before using this method.", gmm.score_samples, X)

    gmm_score_samples = gmm.fit(X).score_samples(X)
    assert_equal(gmm_score_samples.shape[0], rand_data.n_samples)
def test_warm_start():

    random_state = 0
    rng = np.random.RandomState(random_state)
    n_samples, n_features, n_components = 500, 2, 2
    X = rng.rand(n_samples, n_features)

    # Assert the warm_start give the same result for the same number of iter
    g = GaussianMixture(n_components=n_components, n_init=1, max_iter=2,
                        reg_covar=0, random_state=random_state,
                        warm_start=False)
    h = GaussianMixture(n_components=n_components, n_init=1, max_iter=1,
                        reg_covar=0, random_state=random_state,
                        warm_start=True)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", ConvergenceWarning)
        g.fit(X)
        score1 = h.fit(X).score(X)
        score2 = h.fit(X).score(X)

    assert_almost_equal(g.weights_, h.weights_)
    assert_almost_equal(g.means_, h.means_)
    assert_almost_equal(g.precisions_, h.precisions_)
    assert_greater(score2, score1)

    # Assert that by using warm_start we can converge to a good solution
    g = GaussianMixture(n_components=n_components, n_init=1,
                        max_iter=5, reg_covar=0, random_state=random_state,
                        warm_start=False, tol=1e-6)
    h = GaussianMixture(n_components=n_components, n_init=1,
                        max_iter=5, reg_covar=0, random_state=random_state,
                        warm_start=True, tol=1e-6)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", ConvergenceWarning)
        g.fit(X)
        h.fit(X).fit(X)

    assert_true(not g.converged_)
    assert_true(h.converged_)
Ejemplo n.º 40
0
def trainGmmEM(x,
               t,
               K,
               cov_type='full',
               n_max_iter=500,
               n_restarts_random=20,
               n_restarts_kmeans=20,
               regularize=1e-2,
               uniform_class_prior=True):
    '''
    Trains a GMM for each class of the given data. If EM is run for several
    times, the models with the largest log-likelihood are kept.
    
    x: The input features
    t: The target values
    K: List containing the number of components per class
    cov_type: Which covariance type should be used ('full' or 'diag')
    n_max_iter: Maximum number of iterations used for EM training
    n_restarts_random: Number of random restarts with random initial values
    n_restarts_kmeans: Number of random restarts where initial values are
      computed with the k-means algorithm.
    regularize: Regularizer for the diagonal of the covariance matrices
    uniform_class_prior: If True, the class prior is set to 1/C for each class,
      otherwise it is computed as the fraction of samples per class.
      
    Returns a dictionary containing all parameters. 
    '''
    assert n_restarts_random + n_restarts_kmeans > 0

    C = int(np.max(t) + 1)
    params = {}
    for c in range(C):
        print 'EM training for class %d/%d with %d components (N=%d, D=%d)' % (
            c, C, K[c], np.sum(t == c), x.shape[1])
        t_start = time()
        if n_restarts_random > 0:
            gmm1 = GaussianMixture(n_components=K[c],
                                   covariance_type=cov_type,
                                   reg_covar=regularize,
                                   max_iter=n_max_iter,
                                   n_init=n_restarts_random,
                                   init_params='random',
                                   verbose=2)
            gmm1.fit(x[t == c, :])
        if n_restarts_kmeans > 0:
            gmm2 = GaussianMixture(n_components=K[c],
                                   covariance_type=cov_type,
                                   reg_covar=regularize,
                                   max_iter=n_max_iter,
                                   n_init=n_restarts_kmeans,
                                   init_params='kmeans',
                                   verbose=2)
            gmm2.fit(x[t == c, :])
        t_elapsed = time() - t_start
        print 'EM training for class %d/%d finished in %f seconds' % (
            c, C, t_elapsed)

        # Select the better model of gmm1 and gmm2
        # Don't use gmm.lower_bound_, it returns the last logl and not the best
        score1 = gmm1.score(x[t == c, :]) if n_restarts_random > 0 else -np.Inf
        score2 = gmm2.score(x[t == c, :]) if n_restarts_kmeans > 0 else -np.Inf
        gmm = gmm1 if score1 > score2 else gmm2

        params['alpha_%d' % (c)] = gmm.weights_
        params['mu_%d' % (c)] = gmm.means_
        params['Sigma_%d' % (c)] = gmm.covariances_
        params['Lambda_%d' % (c)] = gmm.precisions_

    if uniform_class_prior == True:
        params['prior'] = np.full((C, ), 1. / C, 'float32')
    else:
        _, counts = np.unique(t, return_counts=True)
        counts = np.asarray(counts, 'float32')
        params['prior'] = counts / np.sum(counts)

    return params
Ejemplo n.º 41
0
estimator = GaussianMixture(n_components=2, max_iter=1000, random_state=0, init_params='random')


plt.figure()
plt.plot(X_train[:, 0], X_train[:, 1], 'k.', markersize=25)
plt.savefig('Figure_6-datapoints.png')
plt.close()

colors = ['r', 'b']


# initial means
estimator.means_init = np.array([[0, 0.5], [0.5, 0]])
# Train the other parameters using the EM algorithm.
estimator.fit(X_train)
classes = estimator.predict(X_train)

plt.figure()
for i in range(2):
    plt.plot(X_train[classes == i, 0], X_train[classes == i, 1], colors[i]+'.', markersize=25, label='class'+str(i+1))
    plt.plot(estimator.means_init[i, 0], estimator.means_init[i, 1], colors[i]+'*', markersize=20, label='mean_init')
    plt.plot(np.mean(X_train[classes == i, 0]), np.mean(X_train[classes == i, 1]), colors[i] + 'P', markersize=15, label='mean_init')
plt.title('mean_init='+str(estimator.means_init))
plt.legend()
plt.savefig('Figure_6-EM1.png')
plt.close()

# initial means
estimator.means_init = np.array([[0, 1], [0, -1]])
# Train the other parameters using the EM algorithm.
def wemd_from_pred_samples(y_pred, ):
    gmm = GMM(covariance_type="diag")
    gmm = gmm.fit(y_pred)
    y_s, _ = gmm.sample(len(y_pred))
    return wemd_from_samples(y_s, y_pred)
Ejemplo n.º 43
0
    if args.plot:
        plot_sample(x, y, pdfpath="temp/gmm_%ssamples.pdf" % sample_type)

        # to plot probability contours
        xx, yy = np.meshgrid(np.linspace(np.min(x[:, 0]), np.max(x[:, 0]), 50),
                             np.linspace(np.min(x[:, 1]), np.max(x[:, 1]), 50))
        x_grid = np.c_[xx.ravel(), yy.ravel()]

    cov_type = 'full'  # 'spherical', 'diag', 'tied', 'full'
    n_classes = 3
    max_iters = 20 if sample_type == "" else 100
    gmm = GMM(n_components=n_classes,
              covariance_type=cov_type,
              max_iter=max_iters,
              random_state=0)
    gmm.fit(x)

    logger.debug("Means:\n%s" % str(gmm.means_))
    logger.debug("Covariances:\n%s" % str(gmm.covariances_))

    scores = gmm.score_samples(x)
    top_anoms = np.argsort(scores)[np.arange(10)]

    if args.plot:
        # colors = ["red", "blue"]
        colors = ['navy', 'turquoise', 'darkorange']

        pdfpath = "temp/gmm_%scontours.pdf" % sample_type
        dp = DataPlotter(pdfpath=pdfpath, rows=1, cols=2)

        pl = dp.get_next_plot()
Ejemplo n.º 44
0
    hemispheres_index = np.load(HEMI_INDEXES)
    side_index = np.mod(hemispheres_index, 2)
    labels = np.load(DBSCAN_LABELS)
    g = GaussianMixture(n_components=N, n_init=N_INIT, init_params=INIT_METHOD)
    for j, side in enumerate(SIDES.keys()):

        data_ = data[side_index == j]
        labels_ = labels[side_index == j]

        final_labels = labels_.copy()
        # reencode the labels to take into account the added clusters
        final_labels[labels_ == 2] = 4
        # select central cluster
        data_ = data_[labels_ == 1]

        g.fit(data_)
        w = g.weights_
        m = g.means_
        c = g.covariances_
        pred_labels = g.predict(data_)

        # directly sort the labels according to the precentral coordinates of the cluster
        t = np.argsort(m, axis=0)[:, 0]
        sorted_means = m[t]
        sorted_cov = c[t]
        sorted_weigth = w[t]
        sorted_pred_labels = pred_labels.copy()

        for z, s in enumerate(t):
            # s+1 to account for the existence of ventral cluster
            sorted_pred_labels[pred_labels == s] = z + 1