def test_kmeans_constraint_weights_bigger(self): n_samples = 100 data = make_blobs(n_samples=n_samples, n_features=2, centers=2, cluster_std=1.0, center_box=(-10.0, 0.0), shuffle=True, random_state=2) X1 = data[0] data = make_blobs(n_samples=n_samples // 2, n_features=2, centers=2, cluster_std=1.0, center_box=(0.0, 10.0), shuffle=True, random_state=2) X2 = data[0] X = numpy.vstack([X1, X2]) km = ConstraintKMeans(n_clusters=4, strategy='weights', history=True) km.fit(X) cl = km.predict(X) self.assertEqual(cl.shape, (X.shape[0], )) cls = km.cluster_centers_iter_ self.assertEqual(len(cls.shape), 3) edges = km.cluster_edges() self.assertIsInstance(edges, set) self.assertEqual(len(edges), 5) self.assertIsInstance(list(edges)[0], tuple)
def test_kmeans_constraint_weights(self): mat = numpy.array([[0, 0], [0.2, 0.2], [-0.1, -0.1], [1, 1]]) km = ConstraintKMeans(n_clusters=2, verbose=10, kmeans0=False, random_state=1, strategy='weights') buf = BufferedPrint() km.fit(mat, fLOG=buf.fprint) km = ConstraintKMeans(n_clusters=2, verbose=5, kmeans0=False, random_state=1, strategy='weights') km.fit(mat, fLOG=buf.fprint) self.assertEqual(km.cluster_centers_.shape, (2, 2)) self.assertLesser(km.inertia_, 4.55) self.assertEqual(km.cluster_centers_, numpy.array([[0.6, 0.6], [-0.05, -0.05]])) self.assertEqual(km.labels_, numpy.array([1, 0, 1, 0])) pred = km.predict(mat) self.assertEqual(pred, numpy.array([1, 1, 1, 0])) dist = km.transform(mat) self.assertEqual(dist.shape, (4, 2)) score = km.score(mat) self.assertEqual(score.shape, (4, )) self.assertIn("CKMeans", str(buf))
def test_kmeans_constraint_blobs20(self): data = make_blobs(n_samples=20, n_features=2, centers=2, cluster_std=1.0, center_box=(-10.0, 0.0), shuffle=True, random_state=0) X1 = data[0] data = make_blobs(n_samples=10, n_features=2, centers=2, cluster_std=1.0, center_box=(0.0, 10.0), shuffle=True, random_state=0) X2 = data[0] X = numpy.vstack([X1, X2]) km = ConstraintKMeans(n_clusters=4, verbose=0, kmeans0=False, random_state=2, strategy='gain', balanced_predictions=True, history=True) km.fit(X) pred = km.predict(X) diff = numpy.abs(km.labels_ - pred).sum() self.assertLesser(diff, 6) cls = km.cluster_centers_iter_ self.assertEqual(len(cls.shape), 3)
def test_kmeans_constraint_gain(self): mat = numpy.array([[0, 0], [0.2, 0.2], [-0.1, -0.1], [1, 1]]) km = ConstraintKMeans(n_clusters=2, verbose=0, kmeans0=False, random_state=1, strategy='gain') km.fit(mat) self.assertEqual(km.cluster_centers_.shape, (2, 2)) self.assertEqualFloat(km.inertia_, 0.455) self.assertEqual(km.cluster_centers_, numpy.array( [[0.6, 0.6], [-0.05, -0.05]])) self.assertEqual(km.labels_, numpy.array([1, 0, 1, 0])) pred = km.predict(mat) self.assertEqual(pred, numpy.array([1, 1, 1, 0]))
def test_kmeans_constraint_gain3(self): mat = numpy.array([[0, 0], [0.2, 0.2], [-0.1, -0.1], [1, 1], [1.1, 0.9], [-1.1, 1.]]) # Choose random_state=2 to get the labels [1 1 0 2 2 0]. # This configuration can only be modified with a permutation # of 3 elements. km = ConstraintKMeans(n_clusters=3, verbose=0, kmeans0=False, random_state=1, strategy='gain', balanced_predictions=True) km.fit(mat) self.assertEqual(km.cluster_centers_.shape, (3, 2)) lab = km.labels_ self.assertEqual(lab[1], lab[2]) self.assertEqual(lab[0], lab[5]) self.assertEqual(lab[3], lab[4]) pred = km.predict(mat) self.assertEqualArray(pred, lab)
def test_kmeans_constraint_sparse(self): mat = numpy.array([[0, 0], [0.2, 0.2], [-0.1, -0.1], [1, 1]]) mat = scipy.sparse.csr_matrix(mat) km = ConstraintKMeans(n_clusters=2, verbose=0, strategy='distance') km.fit(mat) self.assertEqual(km.cluster_centers_.shape, (2, 2)) self.assertEqualFloat(km.inertia_, 0.455) if km.labels_[0] == 0: self.assertEqual(km.labels_, numpy.array([0, 1, 0, 1])) self.assertEqual(km.cluster_centers_, numpy.array( [[-0.05, -0.05], [0.6, 0.6]])) else: self.assertEqual(km.labels_, numpy.array([1, 0, 1, 0])) self.assertEqual(km.cluster_centers_, numpy.array( [[0.6, 0.6], [-0.05, -0.05]])) pred = km.predict(mat) if km.labels_[0] == 0: self.assertEqual(pred, numpy.array([0, 0, 0, 1])) else: self.assertEqual(pred, numpy.array([1, 1, 1, 0]))
km1 = ConstraintKMeans(n_clusters=4, strategy='gain', balanced_predictions=True) km1.fit(X) km2 = ConstraintKMeans(n_clusters=4, strategy='distance', balanced_predictions=True) km2.fit(X) ########################## # This algorithm tries to exchange points # between clusters. cl1 = km1.predict(X) hist1 = Counter(cl1) cl2 = km2.predict(X) hist2 = Counter(cl2) fig, ax = plt.subplots(1, 2, figsize=(10, 4)) for i in range(0, max(cl1) + 1): ax[0].plot(X[cl1 == i, 0], X[cl1 == i, 1], colors[i] + '.', label='cl%d' % i) ax[1].plot(X[cl2 == i, 0], X[cl2 == i, 1], colors[i] + '.', label='cl%d' % i)