コード例 #1
0
class TestKMeans(unittest.TestCase):
    def setUp(self):
        self._kmeans = KMeans(2) # n_clusters

    def test_get_intial_centroids(self):
        data = np.array([[1, 1], [0, 0], [-1, -1], [2, 2]])
        data = self._kmeans._get_initial_centroids(data)
        # random seed chooses the same centroids
        exp_data = np.array([[0, 0], [2, 2]])
        self.assertTrue(np.array_equal(data, exp_data))

        cent1, cent2 = [cluster.centroid for cluster in self._kmeans.clusters]
        exp_cent1 = np.array([1, 1])
        exp_cent2 = np.array([-1, -1])

        self.assertTrue(np.array_equal(cent1, exp_cent1))
        self.assertTrue(np.array_equal(cent2, exp_cent2))

    def test_choose_cluster(self):
        self._kmeans.clusters.append(self._kmeans.Cluster
                (np.array([1, 1]), initial=True))
        self._kmeans.clusters.append(self._kmeans.Cluster
                (np.array([-1, -1]), initial=True))
        self._kmeans._choose_cluster(np.array([1, 0]))
        self._kmeans._choose_cluster(np.array([-1, -2]))
 
        data_points1 = self._kmeans.clusters[0].data_points
        exp1 = np.array([[1, 1], [1, 0]])
        data_points2 = self._kmeans.clusters[1].data_points
        exp2 = np.array([[-1, -1], [-1, -2]])

        self.assertTrue(np.array_equal(data_points1, exp1))
        self.assertTrue(np.array_equal(data_points2, exp2))

    def test_squared_euclidian_dist(self):
        x1, y1 = (0, 0) #0
        x2, y2 = (np.array([1, 2, 3]), np.array([3, 2, 1])) #8
        x3, y3 = (np.array([[1, 2], [1, 2]])), np.array([[1, 1], [1, 1]])

        exp1 = 0
        res1 = self._kmeans._squared_euclidian_dist(x1, y1)
        exp2 = 8
        res2 = self._kmeans._squared_euclidian_dist(x2, y2)
        exp3 = 2
        res3 = self._kmeans._squared_euclidian_dist(x3, y3)

        self.assertEqual(res1, exp1)
        self.assertEqual(res2, exp2)
        self.assertEqual(res3, exp3)

    def test_is_finish_success(self):
        self._kmeans.clusters.append(self._kmeans.Cluster(np.array([1, 1])))
        self._kmeans.clusters.append(self._kmeans.Cluster(np.array([-1, -1])))
        self._kmeans.clusters[0].data_points = np.array([[1, 1], [1, 0]])
        self._kmeans.clusters[1].data_points = np.array([[1, 1], [-1, -2]])
        self._kmeans.prev_clusters = deepcopy(self._kmeans.clusters)

        res = self._kmeans._is_finish()
        exp = 1
        self.assertEqual(res, exp)

    def test_is_finish_fail(self):
        self._kmeans.clusters.append(self._kmeans.Cluster(np.array([1, 1])))
        self._kmeans.clusters.append(self._kmeans.Cluster(np.array([-1, -1])))
        self._kmeans.clusters[0].data_points = np.array([[1, 1], [1, 0]])
        self._kmeans.clusters[1].data_points = np.array([[1, 1], [-1, -2]])
        self._kmeans.prev_clusters = deepcopy(self._kmeans.clusters)
        self._kmeans.clusters[0].data_points = np.array([[2, 1], [1, 0]])

        res = self._kmeans._is_finish()
        exp = 0
        self.assertEqual(res, exp)

    def test_update_centroids_and_data(self):
        self._kmeans.clusters.append(self._kmeans.Cluster(np.array([1, 1])))
        self._kmeans.clusters.append(self._kmeans.Cluster(np.array([-1, -1])))
        self._kmeans.clusters[0].data_points = np.array([[1, 1], [1, 0]])
        self._kmeans.clusters[1].data_points = np.array([[1, 1], [-1, -2]])
        self._kmeans.prev_clusters = deepcopy(self._kmeans.clusters)

        data = self._kmeans._update_centroids_and_data()
        res_centroid1 = self._kmeans.clusters[0].centroid
        res_centroid2 = self._kmeans.clusters[1].centroid
        exp_centroid1 = np.array([1., 0.5])
        exp_centroid2 = np.array([0., -0.5])
        exp_data = np.array([[1, 1], [1, 0], [1, 1], [-1, -2]])

        self.assertTrue(np.array_equal(res_centroid1, exp_centroid1))
        self.assertTrue(np.array_equal(res_centroid2, exp_centroid2))
        self.assertTrue(np.array_equal(data, exp_data))