Example #1
0
def test_kmeanspp_initialization():
    random_state = check_random_state(1)

    n_samples = 300
    n_features = 2
    X = np.ndarray((n_samples, n_features))
    X[:n_samples // 3, :] = random_state.multivariate_normal(
        [0.0, 1.0], [[0.5, -1.0], [-1.0, 5.0]], size=(n_samples // 3, ))
    X[n_samples // 3:-n_samples // 3, :] = random_state.multivariate_normal(
        [-2.0, -2.0], [[3.0, 1.0], [1.0, 1.0]], size=(n_samples // 3, ))
    X[-n_samples // 3:, :] = random_state.multivariate_normal(
        [3.0, 1.0], [[3.0, -1.0], [-1.0, 1.0]], size=(n_samples // 3, ))

    # artificial scaling, makes standard implementation fail
    # either the initial covariances have to be adjusted or we have
    # to normalize the dataset
    X[:, 1] *= 10000.0

    gmm = GMM(n_components=3, random_state=random_state)
    gmm.from_samples(X, init_params="random")
    ellipses = gmm.to_ellipses()
    widths = np.array([ellipsis_params[1]
                       for _, ellipsis_params in ellipses])[:, np.newaxis]
    average_widths_random = np.mean(pdist(widths))

    gmm = GMM(n_components=3, random_state=random_state)
    gmm.from_samples(X, init_params="kmeans++")
    ellipses = gmm.to_ellipses()
    widths = np.array([ellipsis_params[1]
                       for _, ellipsis_params in ellipses])[:, np.newaxis]
    average_widths_kmeanspp = np.mean(pdist(widths))

    # random initialization produces uneven covariance scaling
    assert_less(average_widths_kmeanspp, average_widths_random)
Example #2
0
def test_ellipses():
    """Test equiprobable ellipses."""
    random_state = check_random_state(0)

    means = np.array([[0.0, 1.0], [2.0, -1.0]])
    covariances = np.array([[[0.5, 0.0], [0.0, 5.0]], [[5.0, 0.0], [0.0,
                                                                    0.5]]])

    gmm = GMM(n_components=2,
              priors=np.array([0.5, 0.5]),
              means=means,
              covariances=covariances,
              random_state=random_state)
    ellipses = gmm.to_ellipses()

    mean, (angle, width, height) = ellipses[0]
    assert_array_almost_equal(means[0], mean)
    assert_equal(angle, 0.5 * np.pi)
    assert_equal(width, np.sqrt(5.0))
    assert_equal(height, np.sqrt(0.5))

    mean, (angle, width, height) = ellipses[1]
    assert_array_almost_equal(means[1], mean)
    assert_equal(angle, -np.pi)
    assert_equal(width, np.sqrt(5.0))
    assert_equal(height, np.sqrt(0.5))
Example #3
0
def test_ellipses():
    """Test equiprobable ellipses."""
    random_state = check_random_state(0)

    means = np.array([[0.0, 1.0],
                      [2.0, -1.0]])
    covariances = np.array([[[0.5, 0.0], [0.0, 5.0]],
                            [[5.0, 0.0], [0.0, 0.5]]])

    gmm = GMM(n_components=2, priors=np.array([0.5, 0.5]), means=means,
              covariances=covariances, random_state=random_state)
    ellipses = gmm.to_ellipses()

    mean, (angle, width, height) = ellipses[0]
    assert_array_almost_equal(means[0], mean)
    assert_equal(angle, 0.5 * np.pi)
    assert_equal(width, np.sqrt(5.0))
    assert_equal(height, np.sqrt(0.5))

    mean, (angle, width, height) = ellipses[1]
    assert_array_almost_equal(means[1], mean)
    assert_equal(angle, -np.pi)
    assert_equal(width, np.sqrt(5.0))
    assert_equal(height, np.sqrt(0.5))
Example #4
0
plt.plot(means_over_time[:, 0], means_over_time[:, 1], c="r", lw=2)
plt.fill_between(means_over_time[:, 0],
                 means_over_time[:, 1] - 1.96 * y_stds,
                 means_over_time[:, 1] + 1.96 * y_stds,
                 color="r",
                 alpha=0.5)

if plot_covariances:
    colors = cycle(["r", "g", "b"])
    for factor in np.linspace(0.5, 4.0, 8):
        new_gmm = GMM(n_components=len(gmm.means),
                      priors=gmm.priors,
                      means=gmm.means[:, 1:],
                      covariances=gmm.covariances[:, 1:, 1:],
                      random_state=gmm.random_state)
        for mean, (angle, width, height) in new_gmm.to_ellipses(factor):
            ell = Ellipse(xy=mean,
                          width=width,
                          height=height,
                          angle=np.degrees(angle))
            ell.set_alpha(0.15)
            ell.set_color(next(colors))
            plt.gca().add_artist(ell)

plt.xlabel("$x_1$")
plt.ylabel("$x_2$")

plt.subplot(122)
plt.title("Confidence Interval from Raw Data")
plt.plot(X[:, :, 0].T, X[:, :, 1].T, c="k", alpha=0.1)