Beispiel #1
0
def test_labels_init():
    X = np.random.normal(0, 1, size=(5, 3))

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=1,
                                 max_components=1,
                                 label_init=np.array([0, 0, 0, 0, 1]))
        AutoGMM.fit_predict(X)

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=1,
                                 max_components=2,
                                 label_init=np.array([0, 0, 0, 0, 1]))
        AutoGMM.fit_predict(X)

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=2,
                                 max_components=3,
                                 label_init=np.array([0, 0, 0, 0, 1]))
        AutoGMM.fit_predict(X)

    AutoGMM = AutoGMMCluster(min_components=2,
                             max_components=2,
                             label_init=np.array([0, 0, 0, 0, 1]))
    AutoGMM.fit_predict(X)
Beispiel #2
0
def test_inputs():
    # Generate random data
    X = np.random.normal(0, 1, size=(100, 3))

    # min_components < 1
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=0)

    # min_components integer
    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(min_components="1")

    # max_components < min_components
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=1, max_components=0)

    # max_components integer
    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(min_components=1, max_components="1")

    # affinity is not an array, string or list
    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(min_components=1, affinity=1)

    # affinity is not in ['euclidean', 'manhattan', 'cosine', 'none']
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=1, affinity="graspy")

    # linkage is not an array, string or list
    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(min_components=1, linkage=1)

    # linkage is not in ['single', 'average', 'complete', 'ward']
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=1, linkage="graspy")

    # euclidean is not an affinity option when ward is a linkage option
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=1, affinity="manhattan", linkage="ward")

    # covariance type is not an array, string or list
    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(min_components=1, covariance_type=1)

    # covariance type is not in ['spherical', 'diag', 'tied', 'full']
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(min_components=1, covariance_type="graspy")

    # min_cluster > n_samples when max_cluster is None
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(1000)
        AutoGMM.fit(X)

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(1000)
        AutoGMM.fit_predict(X)

    # max_cluster > n_samples when max_cluster is not None
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(10, 1001)
        AutoGMM.fit(X)

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(10, 1001)
        AutoGMM.fit_predict(X)

    # min_cluster > n_samples when max_cluster is None
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(1000)
        AutoGMM.fit(X)

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(10, 1001)
        AutoGMM.fit_predict(X)

    # min_cluster > n_samples when max_cluster is not None
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(1000, 1001)
        AutoGMM.fit(X)

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(1000, 1001)
        AutoGMM.fit_predict(X)

    # label_init is not a 1-D array
    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(label_init=np.zeros([100, 2]))

    # label_init is not 1-D array, a list or None.
    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(label_init="label")

    # label_init length is not equal to n_samples
    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(label_init=np.zeros([50, 1]))
        AutoGMM.fit(X)

    with pytest.raises(ValueError):
        AutoGMM = AutoGMMCluster(label_init=np.zeros([50, 1]))
        AutoGMM.fit_predict(X)

    with pytest.raises(TypeError):
        AutoGMM = AutoGMMCluster(label_init=np.zeros([100, 2]), max_iter=-2)
    plt.xlabel('First Dimension', fontsize=24)
    plt.ylabel('Second Dimension', fontsize=24)
    plt.title('AutoGMM Clustering', fontsize=24, fontweight='bold')
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)
    fname = './autogmm_clustering_dataset' + str(dataset) + '.png'
    plt.savefig(fname)


pyc = AutoGMMCluster(min_components=ks[0],
                     max_components=ks[len(ks) - 1],
                     affinity=affinities,
                     linkage=linkages,
                     covariance_type=covariance_types,
                     random_state=0)
c_hat_autogmm, ari = pyc.fit_predict(x, c_true)
#np.savetxt('autogmm.csv',labels, delimiter=',')

combo = [pyc.affinity_, pyc.linkage_, pyc.covariance_type_]
k = pyc.n_components_
reg = pyc.reg_covar_
bic = -pyc.criter_
results = pyc.results_

print('Info for table:')
print('Best model: ' + str(combo))
print('Best reg: ' + str(reg))
print('Best k: ' + str(k))
print('Best BIC: ' + str(bic))
print('Best ARI: ' + str(ari))