def do_test_kmeans_results(self, representation, algo, dtype): # cheks that kmeans works as intended array_constr = { 'dense': np.array, 'sparse': sp.csr_matrix }[representation] X = array_constr([[0, 0], [0.5, 0], [0.5, 1], [1, 1]], dtype=dtype) # will be rescaled to [1.5, 0.5, 0.5, 1.5] sample_weight = [3, 1, 1, 3] init_centers = np.array([[0, 0], [1, 1]], dtype=dtype) expected_labels = [0, 0, 1, 1] expected_inertia = 0.1875 expected_centers = np.array([[0.125, 0], [0.875, 1]], dtype=dtype) expected_n_iter = 2 kmeans = KMeansL1L2(n_clusters=2, n_init=1, init=init_centers, algorithm=algo) kmeans.fit(X, sample_weight=sample_weight) assert_array_equal(kmeans.labels_, expected_labels) assert_almost_equal(kmeans.inertia_, expected_inertia) assert_array_almost_equal(kmeans.cluster_centers_, expected_centers) self.assertEqualArray(kmeans.n_iter_, expected_n_iter)
def test_kmeans_l2_iris(self): iris = datasets.load_iris() X = iris.data clr = KMeansL1L2(4, norm='L2') clr.fit(X) cls = set(clr.predict(X)) self.assertEqual({0, 1, 2, 3}, cls)
def test_k_means_random_init_not_precomputed(self): km = KMeansL1L2(init="random", n_clusters=TestKMeansL1L2Sklearn.n_clusters, random_state=42, precompute_distances=False).fit( TestKMeansL1L2Sklearn.X) self._check_fitted_model(km)
def test_kmeans_l2_random(self): iris = datasets.load_iris() X = iris.data clr = KMeansL1L2(4, init="random") clr.fit(X) cls = set(clr.predict(X)) self.assertEqual({0, 1, 2, 3}, cls)
def test_kmeans_l1_small(self): iris = datasets.load_iris() X = iris.data X = X[:6] clr = KMeansL1L2(4, norm='L1', n_jobs=1) clr.fit(X) cls = set(clr.predict(X)) self.assertEqual({0, 1, 2, 3}, cls)
def do_test_kmeans_results(self, representation, algo, dtype, norm, sw): # cheks that kmeans works as intended array_constr = { 'dense': np.array, 'sparse': sp.csr_matrix }[representation] X = array_constr([[0, 0], [0.5, 0], [0.5, 1], [1, 1]], dtype=dtype) init_centers = np.array([[0, 0], [1, 1]], dtype=dtype) # will be rescaled to [1.5, 0.5, 0.5, 1.5] if sw: sample_weight = [3, 1, 1, 3] if sklearn_023: expected_inertia = 0.375 else: expected_inertia = 0.1875 expected_centers = np.array([[0.125, 0], [0.875, 1]], dtype=dtype) expected_n_iter = 2 else: sample_weight = None if norm == 'L2': expected_inertia = 0.25 expected_centers = np.array([[0.25, 0], [0.75, 1]], dtype=dtype) expected_n_iter = 2 else: expected_inertia = 1. expected_centers = np.array([[0.25, 0], [0.75, 1]], dtype=dtype) expected_n_iter = 1 expected_labels = [0, 0, 1, 1] try: kmeans = KMeansL1L2(n_clusters=2, n_init=1, init=init_centers, algorithm=algo, norm=norm) except NotImplementedError as e: if ("Only algorithm 'full' is implemented" in str(e) and norm == 'L1'): return raise e try: kmeans.fit(X, sample_weight=sample_weight) except NotImplementedError as e: if ("Non uniform weights are not implemented yet" in str(e) and norm == 'L1'): return if ("Sparse matrix is not implemented" in str(e) and norm == 'L1'): return raise e assert_array_equal(kmeans.labels_, expected_labels) assert_almost_equal(kmeans.inertia_, expected_inertia) assert_array_almost_equal(kmeans.cluster_centers_, expected_centers) self.assertEqualArray(kmeans.n_iter_, expected_n_iter)
def test_kmeans_l1_check(self): X = numpy.array([[-10, 1, 2, 3, 4, 10], [-10, 1, 2, 3, 4, 10]]).T clr = KMeansL1L2(2, norm='L1') clr.fit(X) cls = set(clr.predict(X)) self.assertEqual({0, 1}, cls) self.assertEqual(clr.cluster_centers_.shape, (2, 2)) self.assertEqualArray(clr.cluster_centers_.max(), [3, 3]) tr = clr.transform(X) self.assertEqual(tr.shape, (X.shape[0], 2)) tr = clr.transform([[3, 3]]) self.assertEqualArray(tr.min(), [0])
def test_k_means_new_centers(self): # Explore the part of the code where a new center is reassigned X = np.array([[0, 0, 1, 1], [0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 0]]) labels = [0, 1, 2, 1, 1, 2] bad_centers = np.array([[+0, 1, 0, 0], [.2, 0, .2, .2], [+0, 0, 0, 0]]) km = KMeansL1L2(n_clusters=3, init=bad_centers, n_init=1, max_iter=10, random_state=1) for this_X in (X, sp.coo_matrix(X)): km.fit(this_X) this_labels = km.labels_ # Reorder the labels so that the first instance is in cluster 0, # the second in cluster 1, ... this_labels = np.unique(this_labels, return_index=True)[1][this_labels] np.testing.assert_array_equal(this_labels, labels)
def test_k_means_plus_plus_init_not_precomputed(self): km = KMeansL1L2(init="k-means++", n_clusters=TestKMeansL1L2Sklearn.n_clusters, random_state=42).fit(TestKMeansL1L2Sklearn.X) self._check_fitted_model(km)