def grade3(): marks = 0 try: data = np.array([[i, i] for i in range(5)]) centers = np.array([[1., 1.], [2., 2.], [3., 3.]]) op = np.array([[0.5, 0.5], [2.0, 2.0], [3.5, 3.5]]) kmeans = KMeans(D=2, n_clusters=3) kmeans.cluster_centers = centers it = kmeans.train(data, 1) if np.allclose(kmeans.cluster_centers, op) and it == 0: marks += 0.5 data = np.array([[i + 1, i * 2.3] for i in range(5)]) centers = np.array([[5., 1.], [-1., 2.], [3., 6.]]) op = np.array([[5, 1], [1.5, 1.15], [4.0, 6.8999999999999995]]) kmeans = KMeans(D=2, n_clusters=3) kmeans.cluster_centers = centers it = kmeans.train(data, 1) if np.allclose(kmeans.cluster_centers, op) and it == 0: marks += 0.5 data = np.array([[i + 1, i * 2.3] for i in range(3)]) centers = np.array([[5, 1], [-1., 2]]) op = np.array([[3.0, 4.6], [1.5, 1.15]]) kmeans = KMeans(D=2, n_clusters=2) kmeans.cluster_centers = centers it = kmeans.train(data, 5) if np.allclose(kmeans.cluster_centers, op) and it == 1: marks += 1 except: print('Error in k-means') return marks
def test_update_centers(self): """ Tests update centers """ X, y, centers = generate_cluster_samples() n_samples = X.shape[0] n_features = X.shape[1] k = centers.shape[0] kmeans = KMeans(k, N_ITER) # Set cluster centers so that assignment is deterministic kmeans.cluster_centers = centers assignments, distances = kmeans.assign_points(X) assignments = kmeans.reinitialize_empty_clusters( X, assignments, distances) # clear out centers to test method kmeans.cluster_centers = np.zeros((k, n_features)) kmeans.update_centers(X, assignments) # calculate average difference in coordinates of estimated # and real centers error = np.linalg.norm(kmeans.cluster_centers - centers) / k self.assertLess(error, EPS)
def test_assign_points(self): """ Tests initialize methods of the KMeans class. """ X, y, centers = generate_cluster_samples() n_samples = X.shape[0] k = centers.shape[0] kmeans = KMeans(k, N_ITER) # Set cluster centers so that assignment is deterministic kmeans.cluster_centers = centers assignments, distances = kmeans.assign_points(X) # check assignment array shape self.assertEqual(assignments.ndim, 1) self.assertEqual(assignments.shape[0], n_samples) # check distances array shape self.assertEqual(distances.ndim, 1) self.assertEqual(distances.shape[0], n_samples) # check that assignments only include valid cluster indices (0 <= idx < k) self.assertTrue( np.all(np.logical_and(assignments < k, assignments >= 0))) # Check cluster assignments are correct self.assertTrue(np.all(assignments[:25] == 0)) self.assertTrue(np.all(assignments[25:50] == 1)) self.assertTrue(np.all(assignments[50:75] == 2)) self.assertTrue(np.all(assignments[75:] == 3))
def test_reinitialize_empty_clusters(self): """ Tests reassignment of points to empty clusters """ X, y, centers = generate_cluster_samples() n_samples = X.shape[0] k = centers.shape[0] kmeans = KMeans(k, N_ITER) # Set cluster centers so that assignment is deterministic kmeans.cluster_centers = centers assignments, distances = kmeans.assign_points(X) # reassign all points in cluster 3 to cluster 2 to create empty cluster assignments[75:] = 2 # reinitialize empty clusters by reassigning points assignments = kmeans.reinitialize_empty_clusters( X, assignments, distances) # ensure that each cluster has an assigned point # and that only valid cluster indices are used self.assertSetEqual(set(assignments), set(range(k)))