def test_fit_iris_unsplit(self): split = 0 # get some test data iris = ht.load("heat/datasets/iris.csv", sep=";", split=split) ht.random.seed(1) # fit the clusters k = 3 kmedoid = ht.cluster.KMedoids(n_clusters=k, random_state=1) kmedoid.fit(iris) # check whether the results are correct self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray) self.assertEqual(kmedoid.cluster_centers_.shape, (k, iris.shape[1])) # same test with init=kmedoids++ kmedoid = ht.cluster.KMedoids(n_clusters=k, init="kmedoids++") kmedoid.fit(iris) # check whether the results are correct self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray) self.assertEqual(kmedoid.cluster_centers_.shape, (k, iris.shape[1])) # check whether result is actually a datapoint for i in range(kmedoid.cluster_centers_.shape[0]): self.assertTrue( ht.any( ht.sum(ht.abs(kmedoid.cluster_centers_[i, :] - iris), axis=1) == 0))
def test_any(self): # float values, minor axis x = ht.float32([[2.7, 0, 0], [0, 0, 0], [0, 0.3, 0]], device=ht_device) any_tensor = x.any(axis=1) res = ht.uint8([1, 0, 1], device=ht_device) self.assertIsInstance(any_tensor, ht.DNDarray) self.assertEqual(any_tensor.shape, (3, )) self.assertEqual(any_tensor.dtype, ht.bool) self.assertTrue(ht.equal(any_tensor, res)) # integer values, major axis, output tensor any_tensor = ht.zeros((2, ), device=ht_device) x = ht.int32([[0, 0], [0, 0], [0, 1]], device=ht_device) ht.any(x, axis=0, out=any_tensor) res = ht.uint8([0, 1], device=ht_device) self.assertIsInstance(any_tensor, ht.DNDarray) self.assertEqual(any_tensor.shape, (2, )) self.assertEqual(any_tensor.dtype, ht.bool) self.assertTrue(ht.equal(any_tensor, res)) # float values, no axis x = ht.float64([[0, 0, 0], [0, 0, 0]], device=ht_device) res = ht.zeros(1, dtype=ht.uint8, device=ht_device) any_tensor = ht.any(x) self.assertIsInstance(any_tensor, ht.DNDarray) self.assertEqual(any_tensor.shape, (1, )) self.assertEqual(any_tensor.dtype, ht.bool) self.assertTrue(ht.equal(any_tensor, res)) # split tensor, along axis x = ht.arange(10, split=0, device=ht_device) any_tensor = ht.any(x, axis=0) res = ht.uint8([1], device=ht_device) self.assertIsInstance(any_tensor, ht.DNDarray) self.assertEqual(any_tensor.shape, (1, )) self.assertEqual(any_tensor.dtype, ht.bool) self.assertTrue(ht.equal(any_tensor, res))
def test_spherical_clusters(self): seed = 1 n = 20 * ht.MPI_WORLD.size data = self.create_spherical_dataset(num_samples_cluster=n, radius=1.0, offset=4.0, dtype=ht.float32, random_state=seed) kmedoid = ht.cluster.KMedoids(n_clusters=4, init="kmedoids++") kmedoid.fit(data) self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray) self.assertEqual(kmedoid.cluster_centers_.shape, (4, 3)) for i in range(kmedoid.cluster_centers_.shape[0]): self.assertTrue( ht.any( ht.sum(ht.abs(kmedoid.cluster_centers_[i, :] - data), axis=1) == 0)) # More Samples n = 100 * ht.MPI_WORLD.size data = self.create_spherical_dataset(num_samples_cluster=n, radius=1.0, offset=4.0, dtype=ht.float32, random_state=seed) kmedoid = ht.cluster.KMedoids(n_clusters=4, init="kmedoids++") kmedoid.fit(data) self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray) self.assertEqual(kmedoid.cluster_centers_.shape, (4, 3)) # check whether result is actually a datapoint for i in range(kmedoid.cluster_centers_.shape[0]): self.assertTrue( ht.any( ht.sum(ht.abs(kmedoid.cluster_centers_[i, :] - data), axis=1) == 0)) # different datatype n = 20 * ht.MPI_WORLD.size data = self.create_spherical_dataset(num_samples_cluster=n, radius=1.0, offset=4.0, dtype=ht.float64, random_state=seed) kmedoid = ht.cluster.KMedoids(n_clusters=4, init="kmedoids++") kmedoid.fit(data) self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray) self.assertEqual(kmedoid.cluster_centers_.shape, (4, 3)) for i in range(kmedoid.cluster_centers_.shape[0]): self.assertTrue( ht.any( ht.sum(ht.abs(kmedoid.cluster_centers_[i, :] - data.astype(ht.float32)), axis=1) == 0)) # on Ints (different radius, offset and datatype data = self.create_spherical_dataset(num_samples_cluster=n, radius=10.0, offset=40.0, dtype=ht.int32, random_state=seed) kmedoid = ht.cluster.KMedoids(n_clusters=4, init="kmedoids++") kmedoid.fit(data) self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray) self.assertEqual(kmedoid.cluster_centers_.shape, (4, 3)) for i in range(kmedoid.cluster_centers_.shape[0]): self.assertTrue( ht.any( ht.sum(ht.abs(kmedoid.cluster_centers_[i, :] - data), axis=1) == 0))