Ejemplo n.º 1
0
 def test_works_with_simplified_dunn(self, _, n_clusters):
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2, init="kdtree")
     kmeans = DunnSearch(single_kmeans, max_clusters=10).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     assert n_clusters == kmeans.n_clusters_
     assert rand > 0.75
Ejemplo n.º 2
0
 def test_works_with_simplified_dunn(self, _, n_clusters):
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2, init='kdtree')
     kmeans = DunnSearch(single_kmeans, max_clusters=10).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     self.assertEqual(n_clusters, kmeans.n_clusters_)
     self.assertGreater(rand, 0.75)
Ejemplo n.º 3
0
 def test_works_with_sampled_gap(self, _, n_clusters):
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2)
     kmeans = GAPSearch(single_kmeans, max_clusters=10).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     # allow for misidentification of 1 cluster
     self.assertGreaterEqual(kmeans.n_clusters_ + 1, n_clusters)
     self.assertLessEqual(kmeans.n_clusters_ - 1, n_clusters)
     self.assertGreater(rand, 0.75)
Ejemplo n.º 4
0
 def test_works_with_sampled_gap(self, _, n_clusters):
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2)
     kmeans = GAPSearch(single_kmeans, max_clusters=10).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     # allow for misidentification of 1 cluster
     assert kmeans.n_clusters_ + 1 >= n_clusters
     assert kmeans.n_clusters_ - 1 <= n_clusters
     assert rand > 0.75
Ejemplo n.º 5
0
 def test_works_with_unfit_removal(self):
     n_clusters = 3
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2)
     kmeans = DunnSearch(single_kmeans, max_clusters=10,
                         drop_unfit=True).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     assert n_clusters == kmeans.n_clusters_
     assert rand > 0.75
     assert kmeans.estimators_ is None
Ejemplo n.º 6
0
 def test_works_with_unfit_removal(self):
     n_clusters = 3
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2)
     kmeans = DunnSearch(single_kmeans, max_clusters=10,
                         drop_unfit=True).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     self.assertEqual(n_clusters, kmeans.n_clusters_)
     self.assertGreater(rand, 0.75)
     self.assertIsNone(kmeans.estimators_)
Ejemplo n.º 7
0
 def test_works_with_unfit_removal(self):
     n_clusters = 3
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2)
     kmeans = GAPSearch(
         single_kmeans, max_clusters=10, drop_unfit=True).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     # allow for misidentification of 1 cluster
     self.assertGreaterEqual(kmeans.n_clusters_ + 1, n_clusters)
     self.assertLessEqual(kmeans.n_clusters_ - 1, n_clusters)
     self.assertGreater(rand, 0.75)
     self.assertIsNone(kmeans.estimators_)
Ejemplo n.º 8
0
 def test_works_with_unfit_removal(self):
     n_clusters = 3
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2)
     kmeans = GAPSearch(single_kmeans, max_clusters=10,
                        drop_unfit=True).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     # allow for misidentification of 1 cluster
     assert kmeans.n_clusters_ + 1 >= n_clusters
     assert kmeans.n_clusters_ - 1 <= n_clusters
     assert rand > 0.75
     assert kmeans.estimators_ is None
Ejemplo n.º 9
0
 def test_works_with_full_exact_dunn(self, _, n_clusters):
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2, init="kdtree")
     kmeans = DunnSearch(
         single_kmeans,
         max_clusters=15,
         inter="closest",
         intra="furthest",
     ).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     assert n_clusters == kmeans.n_clusters_
     assert rand > 0.75
Ejemplo n.º 10
0
 def test_works_with_full_exact_dunn(self, _, n_clusters):
     X, y = data(n_clusters)
     single_kmeans = KMeans(n_clusters=2, init='kdtree')
     kmeans = DunnSearch(
         single_kmeans,
         max_clusters=15,
         inter='closest',
         intra='furthest',
     ).fit(X)
     rand = adjusted_rand_score(y, kmeans.labels_)
     self.assertEqual(n_clusters, kmeans.n_clusters_)
     self.assertGreater(rand, 0.75)