def create_spherical_dataset(self, num_samples_cluster, radius=1.0, offset=4.0, dtype=ht.float32, random_state=1): """ Creates k=4 sperical clusters in 3D space along the space-diagonal Parameters ---------- num_samples_cluster: int Number of samples per cluster. Each process will create n // MPI_WORLD.size elements for each cluster radius: float Radius of the sphere offset: float Shift of the clusters along the axes. The 4 clusters will be positioned centered around c1=(offset, offset,offset), c2=(2*offset,2*offset,2*offset), c3=(-offset, -offset, -offset) and c4=(2*offset, -2*offset, -2*offset) dtype: ht.datatype random_state: int seed of the torch random number generator """ # contains num_samples p = ht.MPI_WORLD.size # create k sperical clusters with each n elements per cluster. Each process creates k * n/p elements num_ele = num_samples_cluster // p ht.random.seed(random_state) # radius between 0 and 1 r = ht.random.rand(num_ele, split=0) * radius # theta between 0 and pi theta = ht.random.rand(num_ele, split=0) * 3.1415 # phi between 0 and 2pi phi = ht.random.rand(num_ele, split=0) * 2 * 3.1415 # Cartesian coordinates x = r * ht.sin(theta) * ht.cos(phi) x.astype(dtype, copy=False) y = r * ht.sin(theta) * ht.sin(phi) y.astype(dtype, copy=False) z = r * ht.cos(theta) z.astype(dtype, copy=False) cluster1 = ht.stack((x + offset, y + offset, z + offset), axis=1) cluster2 = ht.stack((x + 2 * offset, y + 2 * offset, z + 2 * offset), axis=1) cluster3 = ht.stack((x - offset, y - offset, z - offset), axis=1) cluster4 = ht.stack((x - 2 * offset, y - 2 * offset, z - 2 * offset), axis=1) data = ht.concatenate((cluster1, cluster2, cluster3, cluster4), axis=0) # Note: enhance when shuffle is available return data
def test_save(self): if ht.io.supports_hdf5(): # local range local_range = ht.arange(100) local_range.save(self.HDF5_OUT_PATH, self.HDF5_DATASET, dtype=local_range.dtype.char()) if local_range.comm.rank == 0: with ht.io.h5py.File(self.HDF5_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.HDF5_DATASET], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((local_range.larray == comparison).all()) # split range split_range = ht.arange(100, split=0) split_range.save(self.HDF5_OUT_PATH, self.HDF5_DATASET, dtype=split_range.dtype.char()) if split_range.comm.rank == 0: with ht.io.h5py.File(self.HDF5_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.HDF5_DATASET], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((local_range.larray == comparison).all()) if ht.io.supports_netcdf(): # local range local_range = ht.arange(100) local_range.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE) if local_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][:], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((local_range.larray == comparison).all()) # split range split_range = ht.arange(100, split=0) split_range.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE) if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][:], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((local_range.larray == comparison).all()) # naming dimensions: string local_range = ht.arange(100, device=self.device) local_range.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, dimension_names=self.NETCDF_DIMENSION) if local_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = handle[self.NETCDF_VARIABLE].dimensions self.assertTrue(self.NETCDF_DIMENSION in comparison) # naming dimensions: tuple local_range = ht.arange(100, device=self.device) local_range.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, dimension_names=(self.NETCDF_DIMENSION, )) if local_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = handle[self.NETCDF_VARIABLE].dimensions self.assertTrue(self.NETCDF_DIMENSION in comparison) # appending unlimited variable split_range.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, is_unlimited=True) ht.MPI_WORLD.Barrier() split_range.save( self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="r+", file_slices=slice(split_range.size, None, None), # debug=True, ) if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][:], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((ht.concatenate( (local_range, local_range)).larray == comparison).all()) # indexing netcdf file: single index ht.MPI_WORLD.Barrier() zeros = ht.zeros((20, 1, 20, 2), device=self.device) zeros.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="w") ones = ht.ones(20, device=self.device) indices = (-1, 0, slice(None), 1) ones.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="r+", file_slices=indices) if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][indices], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((ones.larray == comparison).all()) # indexing netcdf file: multiple indices ht.MPI_WORLD.Barrier() small_range_split = ht.arange(10, split=0, device=self.device) small_range = ht.arange(10, device=self.device) indices = [[0, 9, 5, 2, 1, 3, 7, 4, 8, 6]] small_range_split.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="w", file_slices=indices) if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][indices], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((small_range.larray == comparison).all()) # slicing netcdf file sslice = slice(7, 2, -1) range_five_split = ht.arange(5, split=0, device=self.device) range_five = ht.arange(5, device=self.device) range_five_split.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="r+", file_slices=sslice) if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][sslice], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((range_five.larray == comparison).all()) # indexing netcdf file: broadcasting array zeros = ht.zeros((2, 1, 1, 4), device=self.device) zeros.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="w") ones = ht.ones((4), split=0, device=self.device) ones_nosplit = ht.ones((4), split=None, device=self.device) indices = (0, slice(None), slice(None)) ones.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="r+", file_slices=indices) if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][indices], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((ones_nosplit.larray == comparison).all()) # indexing netcdf file: broadcasting var ht.MPI_WORLD.Barrier() zeros = ht.zeros((2, 2), device=self.device) zeros.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="w") ones = ht.ones((1, 2, 1), split=0, device=self.device) ones_nosplit = ht.ones((1, 2, 1), device=self.device) indices = (0, ) ones.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="r+", file_slices=indices) if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][indices], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((ones_nosplit.larray == comparison).all()) # indexing netcdf file: broadcasting ones ht.MPI_WORLD.Barrier() zeros = ht.zeros((1, 1, 1, 1), device=self.device) zeros.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="w") ones = ht.ones((1, 1), device=self.device) ones.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="r+") if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][indices], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((ones.larray == comparison).all()) # different split and dtype ht.MPI_WORLD.Barrier() zeros = ht.zeros((2, 2), split=1, dtype=ht.int32, device=self.device) zeros_nosplit = ht.zeros((2, 2), dtype=ht.int32, device=self.device) zeros.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="w") if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][:], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((zeros_nosplit.larray == comparison).all())
def test_concatenate(self): # cases to test: # Matrices / Vectors # s0 s1 axis # None None 0 x = ht.zeros((16, 15), split=None, device=ht_device) y = ht.ones((16, 15), split=None, device=ht_device) res = ht.concatenate((x, y), axis=0) self.assertEqual(res.gshape, (32, 15)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((32, 15), res.split) lshape = [0, 0] for i in range(2): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # None None 1 res = ht.concatenate((x, y), axis=1) self.assertEqual(res.gshape, (16, 30)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((16, 30), res.split) lshape = [0, 0] for i in range(2): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # ============================================= # None 0 0 x = ht.zeros((16, 15), split=None, device=ht_device) y = ht.ones((16, 15), split=0, device=ht_device) res = ht.concatenate((x, y), axis=0) self.assertEqual(res.gshape, (32, 15)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((32, 15), res.split) lshape = [0, 0] for i in range(2): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # None 0 1 res = ht.concatenate((x, y), axis=1) self.assertEqual(res.gshape, (16, 30)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((16, 30), res.split) lshape = [0, 0] for i in range(2): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # ============================================= # None 1 1 x = ht.zeros((16, 15), split=None, device=ht_device) y = ht.ones((16, 15), split=1, device=ht_device) res = ht.concatenate((x, y), axis=1) self.assertEqual(res.gshape, (16, 30)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((16, 30), res.split) lshape = [0, 0] for i in range(2): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # # None 1 0 x = ht.zeros((16, 15), split=None, device=ht_device) y = ht.ones((16, 15), split=1, device=ht_device) res = ht.concatenate((x, y), axis=0) self.assertEqual(res.gshape, (32, 15)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((32, 15), res.split) lshape = [0, 0] for i in range(2): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # # ============================================= # # 0 None 0 x = ht.zeros((16, 15), split=0, device=ht_device) y = ht.ones((16, 15), split=None, device=ht_device) res = ht.concatenate((x, y), axis=0) self.assertEqual(res.gshape, (32, 15)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((32, 15), res.split) lshape = [0, 0] for i in range(2): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # # 0 None 1 res = ht.concatenate((x, y), axis=1) self.assertEqual(res.gshape, (16, 30)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((16, 30), res.split) lshape = [0, 0] for i in range(2): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # ============================================= # 1 None 0 x = ht.zeros((16, 15), split=1, device=ht_device) y = ht.ones((16, 15), split=None, device=ht_device) res = ht.concatenate((x, y), axis=0) self.assertEqual(res.gshape, (32, 15)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((32, 15), res.split) lshape = [0, 0] for i in range(2): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # 1 None 1 res = ht.concatenate((x, y), axis=1) self.assertEqual(res.gshape, (16, 30)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((16, 30), res.split) lshape = [0, 0] for i in range(2): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # ============================================= x = ht.zeros((16, 15), split=0, device=ht_device) y = ht.ones((16, 15), split=0, device=ht_device) # # 0 0 0 res = ht.concatenate((x, y), axis=0) self.assertEqual(res.gshape, (32, 15)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((32, 15), res.split) lshape = [0, 0] for i in range(2): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # 0 0 1 res = ht.concatenate((x, y), axis=1) self.assertEqual(res.gshape, (16, 30)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((16, 30), res.split) lshape = [0, 0] for i in range(2): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # ============================================= x = ht.zeros((16, 15), split=1, device=ht_device) y = ht.ones((16, 15), split=1, device=ht_device) # 1 1 0 res = ht.concatenate((x, y), axis=0) self.assertEqual(res.gshape, (32, 15)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((32, 15), res.split) lshape = [0, 0] for i in range(2): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # # 1 1 1 res = ht.concatenate((x, y), axis=1) self.assertEqual(res.gshape, (16, 30)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((16, 30), res.split) lshape = [0, 0] for i in range(2): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # ============================================= x = ht.zeros((16, 15, 14), split=2, device=ht_device) y = ht.ones((16, 15, 14), split=2, device=ht_device) # 2 2 0 res = ht.concatenate((x, y), axis=0) self.assertEqual(res.gshape, (32, 15, 14)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((32, 15, 14), res.split) lshape = [0, 0, 0] for i in range(3): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # 2 2 1 res = ht.concatenate((x, y), axis=1) self.assertEqual(res.gshape, (16, 30, 14)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((16, 30, 14), res.split) lshape = [0, 0, 0] for i in range(3): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # # 2 2 2 res = ht.concatenate((x, y), axis=2) self.assertEqual(res.gshape, (16, 15, 28)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((16, 15, 28), res.split) lshape = [0, 0, 0] for i in range(3): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # # ============================================= y = ht.ones((16, 15, 14), split=None, device=ht_device) # 2 None 1 res = ht.concatenate((x, y), axis=1) self.assertEqual(res.gshape, (16, 30, 14)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((16, 30, 14), res.split) lshape = [0, 0, 0] for i in range(3): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # 2 None 2 res = ht.concatenate((x, y), axis=2) self.assertEqual(res.gshape, (16, 15, 28)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((16, 15, 28), res.split) lshape = [0, 0, 0] for i in range(3): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) res = ht.concatenate((x, y), axis=-1) self.assertEqual(res.gshape, (16, 15, 28)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((16, 15, 28), res.split) lshape = [0, 0, 0] for i in range(3): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # ============================================= x = ht.zeros((16, 15, 14), split=None, device=ht_device) y = ht.ones((16, 15, 14), split=2, device=ht_device) # None 2 0 res = ht.concatenate((x, y), axis=0) self.assertEqual(res.gshape, (32, 15, 14)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((32, 15, 14), res.split) lshape = [0, 0, 0] for i in range(3): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) x = ht.zeros((16, 15, 14), split=None, device=ht_device) y = ht.ones((16, 15, 14), split=2, device=ht_device) # None 2 0 res = ht.concatenate((x, y, y), axis=0) self.assertEqual(res.gshape, (32 + 16, 15, 14)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((32 + 16, 15, 14), res.split) lshape = [0, 0, 0] for i in range(3): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # None 2 2 res = ht.concatenate((x, y), axis=2) self.assertEqual(res.gshape, (16, 15, 28)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((16, 15, 28), res.split) lshape = [0, 0, 0] for i in range(3): lshape[i] = chk[i].stop - chk[i].start self.assertEqual(res.lshape, tuple(lshape)) # vectors # None None 0 x = ht.zeros((16,), split=None, device=ht_device) y = ht.ones((16,), split=None, device=ht_device) res = ht.concatenate((x, y), axis=0) self.assertEqual(res.gshape, (32,)) self.assertEqual(res.dtype, ht.float) # None 0 0 y = ht.ones((16,), split=0, device=ht_device) res = ht.concatenate((x, y), axis=0) self.assertEqual(res.gshape, (32,)) self.assertEqual(res.dtype, ht.float) _, _, chk = res.comm.chunk((32,), res.split) lshape = [0] lshape[0] = chk[0].stop - chk[0].start self.assertEqual(res.lshape, tuple(lshape)) # 0 0 0 x = ht.ones((16,), split=0, dtype=ht.float64, device=ht_device) res = ht.concatenate((x, y), axis=0) self.assertEqual(res.gshape, (32,)) self.assertEqual(res.dtype, ht.float64) _, _, chk = res.comm.chunk((32,), res.split) lshape = [0] lshape[0] = chk[0].stop - chk[0].start self.assertEqual(res.lshape, tuple(lshape)) # 0 None 0 x = ht.ones((16,), split=0, device=ht_device) y = ht.ones((16,), split=None, dtype=ht.int64, device=ht_device) res = ht.concatenate((x, y), axis=0) self.assertEqual(res.gshape, (32,)) self.assertEqual(res.dtype, ht.float64) _, _, chk = res.comm.chunk((32,), res.split) lshape = [0] lshape[0] = chk[0].stop - chk[0].start self.assertEqual(res.lshape, tuple(lshape)) # test raises with self.assertRaises(ValueError): ht.concatenate( (ht.zeros((6, 3, 5), device=ht_device), ht.zeros((4, 5, 1), device=ht_device)) ) with self.assertRaises(TypeError): ht.concatenate((x, "5")) with self.assertRaises(TypeError): ht.concatenate((x)) with self.assertRaises(TypeError): ht.concatenate((x, x), axis=x) with self.assertRaises(RuntimeError): ht.concatenate((x, ht.zeros((2, 2), device=ht_device)), axis=0) with self.assertRaises(ValueError): ht.concatenate( (ht.zeros((12, 12), device=ht_device), ht.zeros((2, 2), device=ht_device)), axis=0 ) with self.assertRaises(RuntimeError): ht.concatenate( ( ht.zeros((2, 2), split=0, device=ht_device), ht.zeros((2, 2), split=1, device=ht_device), ), axis=0, )