Exemple #1
0
def test_ase_three_blocks():
    """
    Expect 3 clusters from a 3 block model
    """
    np.random.seed(1)
    num_sims = 10

    # Generate adjacency and labels
    n = 50
    n_communites = [n, n, n]
    p = np.array([[0.8, 0.3, 0.2], [0.3, 0.8, 0.3], [0.2, 0.3, 0.8]])
    y = np.repeat([1, 2, 3], repeats=n)

    for _ in range(num_sims):
        A = sbm(n=n_communites, p=p)

        # Embed to get latent positions
        ase = AdjacencySpectralEmbed(n_components=5)
        X_hat = ase.fit_transform(A)

        # Compute clusters
        AutoGMM = AutoGMMCluster(max_components=10)
        AutoGMM.fit(X_hat, y)

        n_components = AutoGMM.n_components_

        # Assert that the three cluster model is the best
        assert_equal(n_components, 3)

        # Asser that we get perfect clustering
        assert_allclose(AutoGMM.ari_, 1)
Exemple #2
0
def test_predict_without_fit():
    # Generate random data
    X = np.random.normal(0, 1, size=(100, 3))

    with pytest.raises(NotFittedError):
        AutoGMM = AutoGMMCluster(min_components=2)
        AutoGMM.predict(X)
Exemple #3
0
def test_two_class():
    """
    Easily separable two gaussian problem.
    """
    np.random.seed(1)

    n = 100
    d = 3

    num_sims = 10

    for _ in range(num_sims):
        X1 = np.random.normal(2, 0.5, size=(n, d))
        X2 = np.random.normal(-2, 0.5, size=(n, d))
        X = np.vstack((X1, X2))
        y = np.repeat([0, 1], n)

        AutoGMM = AutoGMMCluster(max_components=5)
        AutoGMM.fit(X, y)

        n_components = AutoGMM.n_components_

        # Assert that the two cluster model is the best
        assert_equal(n_components, 2)

        # Asser that we get perfect clustering
        assert_allclose(AutoGMM.ari_, 1)
Exemple #4
0
def test_two_class_aic():
    """
    Easily separable two gaussian problem.
    """
    np.random.seed(1)

    n = 100
    d = 3

    X1 = np.random.normal(2, 0.5, size=(n, d))
    X2 = np.random.normal(-2, 0.5, size=(n, d))
    X = np.vstack((X1, X2))
    y = np.repeat([0, 1], n)

    AutoGMM = AutoGMMCluster(max_components=5, selection_criteria="aic")
    AutoGMM.fit(X, y)

    n_components = AutoGMM.n_components_

    # AIC gets the number of components wrong
    assert_equal(n_components >= 1, True)
    assert_equal(n_components <= 5, True)

    # Assert that the ari value is valid
    assert_equal(AutoGMM.ari_ >= -1, True)
    assert_equal(AutoGMM.ari_ <= 1, True)
Exemple #5
0
def test_no_y():
    np.random.seed(1)

    n = 100
    d = 3

    X1 = np.random.normal(2, 0.5, size=(n, d))
    X2 = np.random.normal(-2, 0.5, size=(n, d))
    X = np.vstack((X1, X2))

    AutoGMM = AutoGMMCluster(max_components=5)
    AutoGMM.fit(X)

    assert_equal(AutoGMM.n_components_, 2)
Exemple #6
0
def test_five_class():
    """
    Easily separable five gaussian problem.
    """
    np.random.seed(1)

    n = 100
    mus = [[i * 5, 0] for i in range(5)]
    cov = np.eye(2)  # balls

    X = np.vstack([np.random.multivariate_normal(mu, cov, n) for mu in mus])

    AutoGMM = AutoGMMCluster(min_components=3, max_components=10, covariance_type="all")
    AutoGMM.fit(X)

    assert_equal(AutoGMM.n_components_, 5)
Exemple #7
0
def test_cosine_with_0():
    X = np.array([
        [0, 1, 0],
        [1, 0, 1],
        [0, 0, 0],
        [1, 1, 0],
        [0, 0, 1],
        [0, 1, 1],
        [1, 1, 1],
        [1, 0, 0],
        [0, 1, 1],
        [1, 1, 0],
        [0, 1, 0],
    ])

    with pytest.warns(UserWarning):
        AutoGMM = AutoGMMCluster(min_components=2, affinity="all")
        AutoGMM.fit(X)
Exemple #8
0
def test_five_class_aic():
    """
    Easily separable five gaussian problem.
    """
    np.random.seed(1)

    n = 100
    mus = [[i * 5, 0] for i in range(5)]
    cov = np.eye(2)  # balls

    X = np.vstack([np.random.multivariate_normal(mu, cov, n) for mu in mus])

    AutoGMM = AutoGMMCluster(
        min_components=3,
        max_components=10,
        covariance_type="all",
        selection_criteria="aic",
    )
    AutoGMM.fit(X)

    # AIC fails often so there is no assertion here
    assert_equal(AutoGMM.n_components_ >= 3, True)
    assert_equal(AutoGMM.n_components_ <= 10, True)
Exemple #9
0
results = pd.DataFrame(columns=['ARI', 'Time'])
x_full = x
c_true_full = c_true
n_full = x.shape[0]

for i in range(num_runs):
    print('Run number: ' + str(i))
    idxs = idxs_full.iloc[:, i].values - 1
    x = x_full[idxs, ]
    c_true = c_true_full[idxs, ]

    start_time = time.time()
    pyc = AutoGMMCluster(min_components=ks[0],
                         max_components=ks[len(ks) - 1],
                         affinity=affinities,
                         linkage=linkages,
                         covariance_type=covariance_types,
                         random_state=0)
    pyc.fit(x, c_true)
    best_ari_bic = pyc.ari_
    results = results.append(
        {
            'Time': time.time() - start_time,
            'ARI': best_ari_bic
        },
        ignore_index=True)
results.to_csv(path_or_buf=output_file)

#%%
#Wisconsin Breast Cancer Diagnostic Data
Exemple #10
0
def test_covariances():
    """
    Easily separable two gaussian problem.
    """
    np.random.seed(1)

    n = 100

    mu1 = [-10, 0]
    mu2 = [10, 0]

    # Spherical
    cov1 = 2 * np.eye(2)
    cov2 = 2 * np.eye(2)

    X1 = np.random.multivariate_normal(mu1, cov1, n)
    X2 = np.random.multivariate_normal(mu2, cov2, n)

    X = np.concatenate((X1, X2))

    AutoGMM = AutoGMMCluster(min_components=2, covariance_type="all")
    AutoGMM.fit(X)
    assert_equal(AutoGMM.covariance_type_, "spherical")

    # Diagonal
    np.random.seed(10)
    cov1 = np.diag([1, 1])
    cov2 = np.diag([2, 1])

    X1 = np.random.multivariate_normal(mu1, cov1, n)
    X2 = np.random.multivariate_normal(mu2, cov2, n)

    X = np.concatenate((X1, X2))

    AutoGMM = AutoGMMCluster(max_components=2, covariance_type="all")
    AutoGMM.fit(X)
    assert_equal(AutoGMM.covariance_type_, "diag")

    # Tied
    cov1 = np.array([[2, 1], [1, 2]])
    cov2 = np.array([[2, 1], [1, 2]])

    X1 = np.random.multivariate_normal(mu1, cov1, n)
    X2 = np.random.multivariate_normal(mu2, cov2, n)

    X = np.concatenate((X1, X2))

    AutoGMM = AutoGMMCluster(max_components=2, covariance_type="all")
    AutoGMM.fit(X)
    assert_equal(AutoGMM.covariance_type_, "tied")

    # Full
    cov1 = np.array([[2, -1], [-1, 2]])
    cov2 = np.array([[2, 1], [1, 2]])

    X1 = np.random.multivariate_normal(mu1, cov1, n)
    X2 = np.random.multivariate_normal(mu2, cov2, n)

    X = np.concatenate((X1, X2))

    AutoGMM = AutoGMMCluster(max_components=2, covariance_type="all")
    AutoGMM.fit(X)
    assert_equal(AutoGMM.covariance_type_, "full")
Exemple #11
0
def test_cosine_on_0():
    X = np.array([[0, 1, 0], [1, 0, 1], [0, 0, 0], [1, 1, 0], [0, 0, 1]])

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=3, affinity="all")
        AutoGMM.fit(X)
Exemple #12
0
def test_inputs():
    # Generate random data
    X = np.random.normal(0, 1, size=(100, 3))

    # min_components < 1
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=0)

    # min_components integer
    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(min_components="1")

    # max_components < min_components
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=1, max_components=0)

    # max_components integer
    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(min_components=1, max_components="1")

    # affinity is not an array, string or list
    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(min_components=1, affinity=1)

    # affinity is not in ['euclidean', 'manhattan', 'cosine', 'none']
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=1, affinity="graspy")

    # linkage is not an array, string or list
    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(min_components=1, linkage=1)

    # linkage is not in ['single', 'average', 'complete', 'ward']
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=1, linkage="graspy")

    # euclidean is not an affinity option when ward is a linkage option
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=1, affinity="manhattan", linkage="ward")

    # covariance type is not an array, string or list
    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(min_components=1, covariance_type=1)

    # covariance type is not in ['spherical', 'diag', 'tied', 'full']
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=1, covariance_type="graspy")

    # min_cluster > n_samples when max_cluster is None
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(1000)
        AutoGMM.fit(X)

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(1000)
        AutoGMM.fit_predict(X)

    # max_cluster > n_samples when max_cluster is not None
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(10, 1001)
        AutoGMM.fit(X)

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(10, 1001)
        AutoGMM.fit_predict(X)

    # min_cluster > n_samples when max_cluster is None
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(1000)
        AutoGMM.fit(X)

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(10, 1001)
        AutoGMM.fit_predict(X)

    # min_cluster > n_samples when max_cluster is not None
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(1000, 1001)
        AutoGMM.fit(X)

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(1000, 1001)
        AutoGMM.fit_predict(X)

    # label_init is not a 1-D array
    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(label_init=np.zeros([100, 2]))

    # label_init is not 1-D array, a list or None.
    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(label_init="label")

    # label_init length is not equal to n_samples
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(label_init=np.zeros([50, 1]))
        AutoGMM.fit(X)

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(label_init=np.zeros([50, 1]))
        AutoGMM.fit_predict(X)

    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(label_init=np.zeros([100, 2]), max_iter=-2)
Exemple #13
0
def test_labels_init():
    X = np.random.normal(0, 1, size=(5, 3))

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=1,
                                 max_components=1,
                                 label_init=np.array([0, 0, 0, 0, 1]))
        AutoGMM.fit_predict(X)

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=1,
                                 max_components=2,
                                 label_init=np.array([0, 0, 0, 0, 1]))
        AutoGMM.fit_predict(X)

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=2,
                                 max_components=3,
                                 label_init=np.array([0, 0, 0, 0, 1]))
        AutoGMM.fit_predict(X)

    AutoGMM = AutoGMMCluster(min_components=2,
                             max_components=2,
                             label_init=np.array([0, 0, 0, 0, 1]))
    AutoGMM.fit_predict(X)
Exemple #14
0
            continue
        if affinity == 'none' and linkage != 'complete':
            continue
        for covariance_type in covariance_types:
            for i, n in enumerate(ns):
                file = ".\data\\" + str(n) + ".csv"
                x = np.genfromtxt(file, delimiter=',', skip_header=0)
                x = x[:, np.arange(1, x.shape[1])]
                c_true = np.genfromtxt(file,
                                       delimiter=',',
                                       usecols=(0),
                                       skip_header=0)

                start_time = time.time()
                pyc = AutoGMMCluster(min_components=ks[0],
                                     max_components=ks[len(ks) - 1],
                                     affinity=affinity,
                                     linkage=linkage,
                                     covariance_type=covariance_type)
                pyc.fit(x, c_true)
                entry = {
                    'N': n,
                    'Affinity': affinity,
                    'Linkage': linkage,
                    'Covariance_Type': covariance_type,
                    'Time': time.time() - start_time
                }
                results = results.append(entry, ignore_index=True)

                print(entry)
results.to_csv(path_or_buf=output_file)