def grade3():
    marks = 0
    try:
        data = np.array([[i, i] for i in range(5)])
        centers = np.array([[1., 1.], [2., 2.], [3., 3.]])
        op = np.array([[0.5, 0.5], [2.0, 2.0], [3.5, 3.5]])

        kmeans = KMeans(D=2, n_clusters=3)
        kmeans.cluster_centers = centers
        it = kmeans.train(data, 1)
        if np.allclose(kmeans.cluster_centers, op) and it == 0:
            marks += 0.5

        data = np.array([[i + 1, i * 2.3] for i in range(5)])
        centers = np.array([[5., 1.], [-1., 2.], [3., 6.]])
        op = np.array([[5, 1], [1.5, 1.15], [4.0, 6.8999999999999995]])

        kmeans = KMeans(D=2, n_clusters=3)
        kmeans.cluster_centers = centers
        it = kmeans.train(data, 1)
        if np.allclose(kmeans.cluster_centers, op) and it == 0:
            marks += 0.5

        data = np.array([[i + 1, i * 2.3] for i in range(3)])
        centers = np.array([[5, 1], [-1., 2]])
        op = np.array([[3.0, 4.6], [1.5, 1.15]])
        kmeans = KMeans(D=2, n_clusters=2)
        kmeans.cluster_centers = centers
        it = kmeans.train(data, 5)
        if np.allclose(kmeans.cluster_centers, op) and it == 1:
            marks += 1
    except:
        print('Error in k-means')
    return marks
Beispiel #2
0
    def test_update_centers(self):
        """
        Tests update centers
        """
        X, y, centers = generate_cluster_samples()
        n_samples = X.shape[0]
        n_features = X.shape[1]
        k = centers.shape[0]

        kmeans = KMeans(k, N_ITER)

        # Set cluster centers so that assignment is deterministic
        kmeans.cluster_centers = centers
        assignments, distances = kmeans.assign_points(X)
        assignments = kmeans.reinitialize_empty_clusters(
            X, assignments, distances)

        # clear out centers to test method
        kmeans.cluster_centers = np.zeros((k, n_features))
        kmeans.update_centers(X, assignments)

        # calculate average difference in coordinates of estimated
        # and real centers
        error = np.linalg.norm(kmeans.cluster_centers - centers) / k
        self.assertLess(error, EPS)
Beispiel #3
0
    def test_assign_points(self):
        """
        Tests initialize methods of the KMeans class. 
        """
        X, y, centers = generate_cluster_samples()
        n_samples = X.shape[0]
        k = centers.shape[0]

        kmeans = KMeans(k, N_ITER)

        # Set cluster centers so that assignment is deterministic
        kmeans.cluster_centers = centers
        assignments, distances = kmeans.assign_points(X)

        # check assignment array shape
        self.assertEqual(assignments.ndim, 1)
        self.assertEqual(assignments.shape[0], n_samples)

        # check distances array shape
        self.assertEqual(distances.ndim, 1)
        self.assertEqual(distances.shape[0], n_samples)

        # check that assignments only include valid cluster indices (0 <= idx < k)
        self.assertTrue(
            np.all(np.logical_and(assignments < k, assignments >= 0)))

        # Check cluster assignments are correct
        self.assertTrue(np.all(assignments[:25] == 0))
        self.assertTrue(np.all(assignments[25:50] == 1))
        self.assertTrue(np.all(assignments[50:75] == 2))
        self.assertTrue(np.all(assignments[75:] == 3))
Beispiel #4
0
    def test_reinitialize_empty_clusters(self):
        """
        Tests reassignment of points to empty clusters
        """
        X, y, centers = generate_cluster_samples()
        n_samples = X.shape[0]
        k = centers.shape[0]

        kmeans = KMeans(k, N_ITER)

        # Set cluster centers so that assignment is deterministic
        kmeans.cluster_centers = centers
        assignments, distances = kmeans.assign_points(X)

        # reassign all points in cluster 3 to cluster 2 to create empty cluster
        assignments[75:] = 2

        # reinitialize empty clusters by reassigning points
        assignments = kmeans.reinitialize_empty_clusters(
            X, assignments, distances)

        # ensure that each cluster has an assigned point
        # and that only valid cluster indices are used
        self.assertSetEqual(set(assignments), set(range(k)))