Ejemplo n.º 1
0
    def test_kmean_nonsense_centroid(self):
        centroids = [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0], [99999.0, 99999.0, 99999.0]]

        expected_centroids = [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]]

        clean_points = [p for p in int_points_3d if 100 not in p]

        # Since we pass a 4th centroid guess that is completely ridiculous, we
        # expect that the resulting centroids will only contain 3 members.
        m_centroids, _ = kmeans.kmeans(clean_points, centroids)

        self.assertSequenceEqual(expected_centroids, m_centroids)
Ejemplo n.º 2
0
    def test_kmean_nonsense_centroid(self):
        centroids = [[1., 1., 1.], [2., 2., 2.], [3., 3., 3.],
                     [99999., 99999., 99999.]]

        expected_centroids = [[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]]

        clean_points = [p for p in int_points_3d if 100 not in p]

        # Since we pass a 4th centroid guess that is completely ridiculous, we
        # expect that the resulting centroids will only contain 3 members.
        m_centroids, _ = kmeans.kmeans(clean_points, centroids)

        self.assertSequenceEqual(expected_centroids, m_centroids)
Ejemplo n.º 3
0
    def test_kmeans(self):
        # These indices don't really matter since the points are random but
        # I am fixing them here for repeatability of the test.
        centroids = [random_points_3d[125], 
                     random_points_3d[500], 
                     random_points_3d[875]]

        s_centroids = [[ 0.44876204,  0.3331773 ,  0.233552  ],
                       [ 0.49838519,  0.29378851,  0.75018887],
                       [ 0.5225907 ,  0.80407079,  0.53268326]]
        m_centroids, m_distance = kmeans.kmeans(random_points_3d, centroids)
        
        self.assertEqual(0.35944390987038655, m_distance)

        # I'm getting everything to pass at 10 places except for this where 
        # there always seems to be at least one dimension of one centroid 
        # about [.001-.005] away from where we expect it to be. This is due to
        # difference in floating point storage between numpy and python. See
        # the following for more info:
        #           https://github.com/cbmi/avocado/issues/34
        self.assertSequenceAlmostEqual(s_centroids, m_centroids, num_places=2)

        self.assertRaises(ValueError, kmeans.kmeans, [], 3)
        self.assertRaises(ValueError, kmeans.kmeans, random_points_3d, 0)
Ejemplo n.º 4
0
    def test_kmeans(self):
        # These indices don't really matter since the points are random but
        # I am fixing them here for repeatability of the test.
        centroids = [
            random_points_3d[125], random_points_3d[500], random_points_3d[875]
        ]

        s_centroids = [[0.44876204, 0.3331773, 0.233552],
                       [0.49838519, 0.29378851, 0.75018887],
                       [0.5225907, 0.80407079, 0.53268326]]
        m_centroids, m_distance = kmeans.kmeans(random_points_3d, centroids)

        self.assertEqual(0.35944390987038655, m_distance)

        # I'm getting everything to pass at 10 places except for this where
        # there always seems to be at least one dimension of one centroid
        # about [.001-.005] away from where we expect it to be. This is due to
        # difference in floating point storage between numpy and python. See
        # the following for more info:
        #           https://github.com/cbmi/avocado/issues/34
        self.assertSequenceAlmostEqual(s_centroids, m_centroids, num_places=2)

        self.assertRaises(ValueError, kmeans.kmeans, [], 3)
        self.assertRaises(ValueError, kmeans.kmeans, random_points_3d, 0)