Exemple #1
0
 def test_works_with_simplified_dunn(self, _, n_clusters):
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2, init="kdtree")
     kmeans = DunnSearch(single_kmeans, max_clusters=10).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     assert n_clusters == kmeans.n_clusters_
     assert rand > 0.75
Exemple #2
0
 def test_works_with_simplified_dunn(self, _, n_clusters):
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2, init='kdtree')
     kmeans = DunnSearch(single_kmeans, max_clusters=10).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     self.assertEqual(n_clusters, kmeans.n_clusters_)
     self.assertGreater(rand, 0.75)
Exemple #3
0
 def test_works_with_sampled_gap(self, _, n_clusters):
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2)
     kmeans = GAPSearch(single_kmeans, max_clusters=10).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     # allow for misidentification of 1 cluster
     self.assertGreaterEqual(kmeans.n_clusters_ + 1, n_clusters)
     self.assertLessEqual(kmeans.n_clusters_ - 1, n_clusters)
     self.assertGreater(rand, 0.75)
Exemple #4
0
 def test_works_with_sampled_gap(self, _, n_clusters):
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2)
     kmeans = GAPSearch(single_kmeans, max_clusters=10).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     # allow for misidentification of 1 cluster
     assert kmeans.n_clusters_ + 1 >= n_clusters
     assert kmeans.n_clusters_ - 1 <= n_clusters
     assert rand > 0.75
Exemple #5
0
 def test_works_with_unfit_removal(self):
     n_clusters = 3
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2)
     kmeans = DunnSearch(single_kmeans, max_clusters=10,
                         drop_unfit=True).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     assert n_clusters == kmeans.n_clusters_
     assert rand > 0.75
     assert kmeans.estimators_ is None
Exemple #6
0
 def test_works_with_unfit_removal(self):
     n_clusters = 3
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2)
     kmeans = DunnSearch(single_kmeans, max_clusters=10,
                         drop_unfit=True).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     self.assertEqual(n_clusters, kmeans.n_clusters_)
     self.assertGreater(rand, 0.75)
     self.assertIsNone(kmeans.estimators_)
Exemple #7
0
 def test_works_with_unfit_removal(self):
     n_clusters = 3
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2)
     kmeans = GAPSearch(
         single_kmeans, max_clusters=10, drop_unfit=True).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     # allow for misidentification of 1 cluster
     self.assertGreaterEqual(kmeans.n_clusters_ + 1, n_clusters)
     self.assertLessEqual(kmeans.n_clusters_ - 1, n_clusters)
     self.assertGreater(rand, 0.75)
     self.assertIsNone(kmeans.estimators_)
Exemple #8
0
 def test_works_with_unfit_removal(self):
     n_clusters = 3
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2)
     kmeans = GAPSearch(single_kmeans, max_clusters=10,
                        drop_unfit=True).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     # allow for misidentification of 1 cluster
     assert kmeans.n_clusters_ + 1 >= n_clusters
     assert kmeans.n_clusters_ - 1 <= n_clusters
     assert rand > 0.75
     assert kmeans.estimators_ is None
Exemple #9
0
 def test_works_with_full_exact_dunn(self, _, n_clusters):
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2, init="kdtree")
     kmeans = DunnSearch(
         single_kmeans,
         max_clusters=15,
         inter="closest",
         intra="furthest",
     ).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     assert n_clusters == kmeans.n_clusters_
     assert rand > 0.75
Exemple #10
0
 def test_works_with_full_exact_dunn(self, _, n_clusters):
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2, init='kdtree')
     kmeans = DunnSearch(
         single_kmeans,
         max_clusters=15,
         inter='closest',
         intra='furthest',
     ).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     self.assertEqual(n_clusters, kmeans.n_clusters_)
     self.assertGreater(rand, 0.75)