예제 #1
0
def test_outputs():
    """
    Easily separable two gaussian problem.
    """
    np.random.seed(2)

    n = 100
    d = 3

    num_sims = 10

    for _ in range(num_sims):
        X1 = np.random.normal(2, 0.5, size=(n, d))
        X2 = np.random.normal(-2, 0.5, size=(n, d))
        X = np.vstack((X1, X2))
        y = np.repeat([0, 1], n)

        gclust = GaussianCluster(min_components=5)
        gclust.fit(X, y)
        bics = gclust.bic_
        aris = gclust.ari_

        bic_argmin = bics.iloc[:, 0].values.argmin()

        # Assert that the two cluster model is the best
        assert_equal(bic_argmin, 1)
        # The plus one is to adjust the index by min_components
        assert_allclose(aris.iloc[:, 0][bic_argmin + 1], 1)
예제 #2
0
def test_predict_without_fit():
    # Generate random data
    X = np.random.normal(0, 1, size=(100, 3))

    with pytest.raises(NotFittedError):
        gclust = GaussianCluster(min_components=2)
        gclust.predict(X)
예제 #3
0
def test_bic():
    """
    Expect 3 clusters from a 3 block model
    """
    np.random.seed(3)
    num_sims = 10

    # Generate adjacency and labels
    n = 50
    n_communites = [n, n, n]
    p = np.array([[0.8, 0.3, 0.2], [0.3, 0.8, 0.3], [0.2, 0.3, 0.8]])
    y = np.repeat([1, 2, 3], repeats=n)

    for _ in range(num_sims):
        A = sbm(n=n_communites, p=p)

        # Embed to get latent positions
        ase = AdjacencySpectralEmbed(n_components=5)
        X_hat = ase.fit_transform(A)

        # Compute clusters
        gclust = GaussianCluster(min_components=10)
        gclust.fit(X_hat, y)

        bics = gclust.bic_
        aris = gclust.ari_

        bic_argmin = bics.iloc[:, 0].values.argmin()

        assert_equal(2, bic_argmin)
        # The plus one is to adjust the index by min_components
        assert_allclose(1, aris.iloc[:, 0][bic_argmin + 1])
예제 #4
0
def test_two_class():
    """
    Easily separable two gaussian problem.
    """
    np.random.seed(2)

    n = 100
    d = 3

    num_sims = 10

    for _ in range(num_sims):
        X1 = np.random.normal(2, 0.5, size=(n, d))
        X2 = np.random.normal(-2, 0.5, size=(n, d))
        X = np.vstack((X1, X2))
        y = np.repeat([0, 1], n)

        gclust = GaussianCluster(min_components=5)
        gclust.fit(X, y)

        n_components = gclust.n_components_

        # Assert that the two cluster model is the best
        assert_equal(n_components, 2)

        # Asser that we get perfect clustering
        assert_allclose(gclust.ari_.loc[n_components], 1)
예제 #5
0
def test_ase_three_blocks():
    """
    Expect 3 clusters from a 3 block model
    """
    np.random.seed(3)
    num_sims = 10

    # Generate adjacency and labels
    n = 50
    n_communites = [n, n, n]
    p = np.array([[0.8, 0.3, 0.2], [0.3, 0.8, 0.3], [0.2, 0.3, 0.8]])
    y = np.repeat([1, 2, 3], repeats=n)

    for _ in range(num_sims):
        A = sbm(n=n_communites, p=p)

        # Embed to get latent positions
        ase = AdjacencySpectralEmbed(n_components=5)
        X_hat = ase.fit_transform(A)

        # Compute clusters
        gclust = GaussianCluster(min_components=10)
        gclust.fit(X_hat, y)

        n_components = gclust.n_components_

        # Assert that the three cluster model is the best
        assert_equal(n_components, 3)

        # Asser that we get perfect clustering
        assert_allclose(gclust.ari_.loc[n_components], 1)
예제 #6
0
def test_no_y():
    np.random.seed(2)

    n = 100
    d = 3

    X1 = np.random.normal(2, 0.5, size=(n, d))
    X2 = np.random.normal(-2, 0.5, size=(n, d))
    X = np.vstack((X1, X2))

    gclust = GaussianCluster(min_components=5)
    gclust.fit(X)

    assert_equal(gclust.n_components_, 2)
예제 #7
0
def test_no_y():
    np.random.seed(2)

    n = 100
    d = 3

    X1 = np.random.normal(2, 0.5, size=(n, d))
    X2 = np.random.normal(-2, 0.5, size=(n, d))
    X = np.vstack((X1, X2))

    gclust = GaussianCluster(min_components=5)
    gclust.fit(X)

    bics = gclust.bic_
    assert_equal(bics.iloc[:, 0].values.argmin(), 1)
예제 #8
0
    def run(diag_aug, scaled):
        # undirected case
        np.random.seed(4 + diag_aug + scaled)
        X = make_train_undirected(n, m)

        res = MultipleASE(n_components=2).fit(X).latent_left_
        gmm = GaussianCluster(10, covariance_type="all").fit(res)
        assert gmm.n_components_ == 2

        # directed case
        np.random.seed(5 + diag_aug + scaled)
        X = make_train_directed(n, m)

        mase = MultipleASE(n_components=2).fit(X)
        res = np.hstack([mase.latent_left_, mase.latent_right_])
        gmm = GaussianCluster(10, covariance_type="all").fit(res)
        assert gmm.n_components_ == 2
예제 #9
0
    def run(diag_aug, scaled):
        # undirected case
        np.random.seed(2 + diag_aug + scaled)
        X = make_train_undirected(n, m)

        res = (MultipleASE(2, diag_aug=diag_aug,
                           scaled=scaled).fit(X).scores_.reshape((m * 4, -1)))
        gmm = GaussianCluster(10, covariance_type="all").fit(res)
        assert gmm.n_components_ == 4

        # directed case
        np.random.seed(3 + diag_aug + scaled)
        X = make_train_directed(n, m)

        res = MultipleASE(2, diag_aug=diag_aug).fit(X).scores_.reshape(
            (m * 4, -1))
        gmm = GaussianCluster(10, covariance_type="all").fit(res)
        assert gmm.n_components_ == 4
예제 #10
0
def test_five_class():
    """
    Easily separable five gaussian problem.
    """
    np.random.seed(10)

    n = 100
    mus = [[i * 5, 0] for i in range(5)]
    cov = np.eye(2)  # balls

    num_sims = 10

    for _ in range(num_sims):
        X = np.vstack(
            [np.random.multivariate_normal(mu, cov, n) for mu in mus])

        gclust = GaussianCluster(min_components=3,
                                 max_components=10,
                                 covariance_type="all")
        gclust.fit(X)
        assert_equal(gclust.n_components_, 5)
예제 #11
0
파일: test_mase.py 프로젝트: tathey1/graspy
def test_vertex():
    """
    There should be 2 clusters since each graph is a 2 block model
    """
    # undirected case
    np.random.seed(4)
    n = [128, 128]
    m = 10
    X = make_train_undirected(n, m)

    # undirected case
    res = MultipleASE(n_components=2).fit(X).latent_left_
    gmm = GaussianCluster(10, covariance_type="all").fit(res)
    assert gmm.n_components_ == 2

    # directed case
    np.random.seed(5)
    X = make_train_directed(n, m)

    mase = MultipleASE(n_components=2).fit(X)
    res = np.hstack([mase.latent_left_, mase.latent_right_])
    gmm = GaussianCluster(10, covariance_type="all").fit(res)
    assert gmm.n_components_ == 2

    # Scaled and undirected case
    np.random.seed(4)
    X = make_train_undirected(n, m)

    res = MultipleASE(n_components=2, scaled=True).fit_transform(X)
    gmm = GaussianCluster(10, covariance_type="all").fit(res)
    assert gmm.n_components_ == 2

    # Scaled and directed case
    np.random.seed(5)
    X = make_train_directed(n, m)

    left, right = MultipleASE(n_components=2, scaled=True).fit_transform(X)
    res = np.hstack([left, right])
    gmm = GaussianCluster(10, covariance_type="all").fit(res)
    assert gmm.n_components_ == 2
예제 #12
0
파일: test_mase.py 프로젝트: tathey1/graspy
def test_graph_clustering():
    """
    There should be 4 total clusters since 4 class problem.
    n_components = 2
    """
    # undirected case
    np.random.seed(2)
    n = [128, 128]
    m = 10
    X = make_train_undirected(n, m)

    res = MultipleASE(2).fit(X).scores_.reshape((m * 4, -1))
    gmm = GaussianCluster(10, covariance_type="all").fit(res)
    assert gmm.n_components_ == 4

    # directed case
    np.random.seed(3)
    X = make_train_directed(n, m)

    res = MultipleASE(2).fit(X).scores_.reshape((m * 4, -1))
    gmm = GaussianCluster(10, covariance_type="all").fit(res)
    assert gmm.n_components_ == 4

    # Scaled cases
    # undirected case
    np.random.seed(12)
    X = make_train_undirected(n, m)

    res = MultipleASE(2, scaled=True).fit(X).scores_.reshape((m * 4, -1))
    gmm = GaussianCluster(10, covariance_type="all").fit(res)
    assert gmm.n_components_ == 4

    # directed case
    np.random.seed(13)
    X = make_train_directed(n, m)

    res = MultipleASE(2, scaled=True).fit(X).scores_.reshape((m * 4, -1))
    gmm = GaussianCluster(10, covariance_type="all").fit(res)
    assert gmm.n_components_ == 4
예제 #13
0
def test_covariances():
    """
    Easily separable two gaussian problem.
    """
    np.random.seed(2)

    n = 100

    mu1 = [-10, 0]
    mu2 = [10, 0]

    # Spherical
    cov1 = 2 * np.eye(2)
    cov2 = 2 * np.eye(2)

    X1 = np.random.multivariate_normal(mu1, cov1, n)
    X2 = np.random.multivariate_normal(mu2, cov2, n)

    X = np.concatenate((X1, X2))

    gclust_object = GaussianCluster(min_components=2, covariance_type="all")
    gclust_object.fit(X)
    assert_equal(gclust_object.bic_.iloc[1, :].values.argmin(), 0)

    # Diagonal
    np.random.seed(10)
    cov1 = np.diag([1, 1])
    cov2 = np.diag([2, 1])

    X1 = np.random.multivariate_normal(mu1, cov1, n)
    X2 = np.random.multivariate_normal(mu2, cov2, n)

    X = np.concatenate((X1, X2))

    gclust_object = GaussianCluster(min_components=2, covariance_type="all")
    gclust_object.fit(X)
    assert_equal(gclust_object.bic_.iloc[1, :].values.argmin(), 1)

    # Tied
    cov1 = np.array([[2, 1], [1, 2]])
    cov2 = np.array([[2, 1], [1, 2]])

    X1 = np.random.multivariate_normal(mu1, cov1, n)
    X2 = np.random.multivariate_normal(mu2, cov2, n)

    X = np.concatenate((X1, X2))

    gclust_object = GaussianCluster(min_components=2, covariance_type="all")
    gclust_object.fit(X)
    assert_equal(gclust_object.bic_.iloc[1, :].values.argmin(), 2)

    # Full
    cov1 = np.array([[2, -1], [-1, 2]])
    cov2 = np.array([[2, 1], [1, 2]])

    X1 = np.random.multivariate_normal(mu1, cov1, n)
    X2 = np.random.multivariate_normal(mu2, cov2, n)

    X = np.concatenate((X1, X2))

    gclust_object = GaussianCluster(min_components=2, covariance_type="all")
    gclust_object.fit(X)
    assert_equal(gclust_object.bic_.iloc[1, :].values.argmin(), 3)
예제 #14
0
def test_inputs():
    # Generate random data
    X = np.random.normal(0, 1, size=(100, 3))

    # min_components < 1
    with pytest.raises(ValueError):
        gclust = GaussianCluster(min_components=0)

    # min_components integer
    with pytest.raises(TypeError):
        gclust = GaussianCluster(min_components="1")

    # max_components < min_components
    with pytest.raises(ValueError):
        gclust = GaussianCluster(min_components=1, max_components=0)

    # max_components integer
    with pytest.raises(TypeError):
        gclust = GaussianCluster(min_components=1, max_components="1")

    # covariance type is not an array, string or list
    with pytest.raises(TypeError):
        gclust = GaussianCluster(min_components=1, covariance_type=1)

    # covariance type is not in ['spherical', 'diag', 'tied', 'full']
    with pytest.raises(ValueError):
        gclust = GaussianCluster(min_components=1, covariance_type="graspy")

    # min_cluster > n_samples when max_cluster is None
    with pytest.raises(ValueError):
        gclust = GaussianCluster(1000)
        gclust.fit(X)

    with pytest.raises(ValueError):
        gclust = GaussianCluster(1000)
        gclust.fit_predict(X)

    # max_cluster > n_samples when max_cluster is not None
    with pytest.raises(ValueError):
        gclust = GaussianCluster(10, 1001)
        gclust.fit(X)

    with pytest.raises(ValueError):
        gclust = GaussianCluster(10, 1001)
        gclust.fit_predict(X)

    # min_cluster > n_samples when max_cluster is None
    with pytest.raises(ValueError):
        gclust = GaussianCluster(1000)
        gclust.fit(X)

    with pytest.raises(ValueError):
        gclust = GaussianCluster(10, 1001)
        gclust.fit_predict(X)

    # min_cluster > n_samples when max_cluster is not None
    with pytest.raises(ValueError):
        gclust = GaussianCluster(1000, 1001)
        gclust.fit(X)

    with pytest.raises(ValueError):
        gclust = GaussianCluster(1000, 1001)
        gclust.fit_predict(X)