Exemple #1
0
def test_double_fit_agnes():
    data1, r_labels1 = datasets.make_blobs(n_samples=288, centers=6, cluster_std=0.2, random_state=31)
    data2, r_labels2 = datasets.make_blobs(n_samples=288, centers=6, cluster_std=0.2, random_state=79)

    impl = agnes.Agnes(6, 'wards')
    impl.Fit(data1)

    t1 = t.check_clusters(r_labels1, impl.GetLabels(data1.shape[0]), 6,)
    
    impl.Fit(data2)
    t2 = t.check_clusters(r_labels2, impl.GetLabels(data2.shape[0]), 6)

    assert t1 == True and t2 == True
Exemple #2
0
def test_duplicate_dual_kmeans():
    data = np.array([
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [-1, -1],
        [-1, -1],
        [-1, -1],
        [-1, -1],
        [-1, -1],
        [-1, -1],
        [-1, -1],
    ])
    impl = kmeans.KMeans(2, max_iter, rstate)

    impl.Fit(data)

    r_labels = np.array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
    i_labels = impl.GetLabels(data.shape[0])

    assert t.check_clusters(r_labels, i_labels, 2) == True
Exemple #3
0
def test_duplicate_dual_dbscan():
    data = np.array([
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [-1, -1],
        [-1, -1],
        [-1, -1],
        [-1, -1],
        [-1, -1],
        [-1, -1],
        [-1, -1],
    ])
    impl = DBSCAN(0.1, 5)

    impl.Fit(data)

    r_labels = np.array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
    i_labels = impl.GetLabels(data.shape[0])

    assert t.check_clusters(r_labels, i_labels, 2) == True
Exemple #4
0
def test_large_kmeans():
    data, r_labels = datasets.make_blobs(n_samples=1000, centers=1)

    impl = kmeans.KMeans(1, max_iter, rstate)
    impl.Fit(data)
    i_labels = impl.GetLabels(data.shape[0])

    assert t.check_clusters(r_labels, i_labels, 1) == True
Exemple #5
0
def test_clear_blobs_kmeans():
    centers = ((-5, -5), (5, 5))
    data, r_labels = datasets.make_blobs(n_samples=100, centers=centers)

    impl = kmeans.KMeans(2, max_iter, rstate)
    impl.Fit(data)
    i_labels = impl.GetLabels(data.shape[0])

    assert t.check_clusters(r_labels, i_labels, 2) == True
Exemple #6
0
def test_dimensionality_agnes(triple):
    data, r_labels = datasets.make_blobs(n_samples=60, n_features=16, cluster_std=0.2, random_state=31)

    impl = triple
    impl.Fit(data)

    assert True == t.check_clusters(r_labels, impl.GetLabels(data.shape[0]), 3)

    
Exemple #7
0
def test_clear_blobs_agnes(dual):
    centers = ((-5, -5), (5, 5))
    data, r_labels = datasets.make_blobs(n_samples=100, centers=centers)
    impl = dual

    impl.Fit(data)
    i_labels = impl.GetLabels(data.shape[0])

    assert t.check_clusters(r_labels, i_labels, 2) == True
Exemple #8
0
def test_dimensionality_kmeans():
    data, r_labels = datasets.make_blobs(n_samples=288,
                                         n_features=16,
                                         cluster_std=0.2,
                                         random_state=31)

    impl = kmeans.KMeans(3, max_iter, rstate)
    impl.Fit(data)

    assert True == t.check_clusters(r_labels, impl.GetLabels(data.shape[0]), 3)
Exemple #9
0
def test_simple_single_agnes(single):
    data = np.array([[1, 1],])
    impl = single

    impl.Fit(data)

    r_labels = np.array([0])
    i_labels = impl.GetLabels(data.shape[0])

    assert t.check_clusters(r_labels, i_labels, 1) == True
Exemple #10
0
def test_duplicate_dual_agnes(dual):
    data = np.array([[1,1], [1,1], [1,1], [1,1], [1,1], [1,1], [1,1], 
                    [-1,-1], [-1,-1], [-1,-1], [-1,-1], [-1,-1], [-1,-1], [-1,-1], ])
    impl = dual

    impl.Fit(data)

    r_labels = np.array([0,0,0,0,0,0,0,1,1,1,1,1,1,1])
    i_labels = impl.GetLabels(data.shape[0])

    assert t.check_clusters(r_labels, i_labels, 2) == True
Exemple #11
0
def test_large_dbscan():
    data, r_labels = datasets.make_blobs(n_samples=1000, centers=1)

    impl = DBSCAN(0.6, 4)
    ref = cluster.DBSCAN(0.6, 4)
    impl.Fit(data)
    ref.fit(data)
    r_labels = ref.labels_
    i_labels = impl.GetLabels(data.shape[0])

    assert t.check_clusters(r_labels, i_labels, 1) == True
Exemple #12
0
def test_simple_single_kmeans():
    data = np.array([
        [1, 1],
    ])
    impl = kmeans.KMeans(1, max_iter, rstate)

    impl.Fit(data)

    r_labels = np.array([0])
    i_labels = impl.GetLabels(data.shape[0])

    assert t.check_clusters(r_labels, i_labels, 1) == True
Exemple #13
0
def test_simple_single_dbscan():
    data = np.array([
        [1, 1],
    ])
    impl = DBSCAN(1, 1)

    impl.Fit(data)

    r_labels = np.array([0])
    i_labels = impl.GetLabels(data.shape[0])

    assert t.check_clusters(r_labels, i_labels, 1) == True
Exemple #14
0
def test_large_values_kmeans():
    max_int = 1 << 25
    data, r_labels = datasets.make_blobs(n_samples=144,
                                         centers=(
                                             (max_int, max_int),
                                             (0, 0),
                                             (-max_int, -max_int),
                                         ))

    impl = kmeans.KMeans(3, max_iter, rstate)
    impl.Fit(data)

    assert True == t.check_clusters(r_labels, impl.GetLabels(data.shape[0]), 3)
Exemple #15
0
def test_dimensionality_dbscan():
    data, r_labels = datasets.make_blobs(n_samples=288,
                                         n_features=16,
                                         cluster_std=0.2,
                                         random_state=31)

    impl = DBSCAN(0.6, 4)
    impl.Fit(data)
    ref = cluster.DBSCAN(0.6, 4)
    ref.fit(data)

    assert True == t.check_clusters(ref.labels_, impl.GetLabels(data.shape[0]),
                                    3)
Exemple #16
0
def test_clear_blobs_dbscan():
    centers = ((-5, -5), (5, 5))
    data, _ = datasets.make_blobs(n_samples=100,
                                  centers=centers,
                                  cluster_std=0.1)

    ref = cluster.DBSCAN(0.6, 4)
    impl = DBSCAN(0.6, 4)
    impl.Fit(data)
    ref.fit(data)
    r_labels = ref.labels_
    i_labels = impl.GetLabels(data.shape[0])

    assert t.check_clusters(r_labels, i_labels, 2) == True