def test_int_cast(self): # simple scalar tensor a = ht.ones(1) casted_a = int(a) self.assertEqual(casted_a, 1) self.assertIsInstance(casted_a, int) # multi-dimensional scalar tensor b = ht.zeros((1, 1, 1, 1)) casted_b = int(b) self.assertEqual(casted_b, 0) self.assertIsInstance(casted_b, int) # split scalar tensor c = ht.full((1,), 5, split=0) casted_c = int(c) self.assertEqual(casted_c, 5) self.assertIsInstance(casted_c, int) # exception on non-scalar tensor with self.assertRaises(TypeError): int(ht.empty(1, 2, 1, 1)) # exception on empty tensor with self.assertRaises(TypeError): int(ht.empty((0, 1, 2))) # exception on split tensor, where each chunk has size 1 if ht.MPI_WORLD.size > 1: with self.assertRaises(TypeError): int(ht.full((ht.MPI_WORLD.size,), 2, split=0))
def test_float_cast(self): # simple scalar tensor a = ht.ones(1, device=ht_device) casted_a = float(a) self.assertEqual(casted_a, 1.0) self.assertIsInstance(casted_a, float) # multi-dimensional scalar tensor b = ht.zeros((1, 1, 1, 1), device=ht_device) casted_b = float(b) self.assertEqual(casted_b, 0.0) self.assertIsInstance(casted_b, float) # split scalar tensor c = ht.full((1,), 5, split=0, device=ht_device) casted_c = float(c) self.assertEqual(casted_c, 5.0) self.assertIsInstance(casted_c, float) # exception on non-scalar tensor with self.assertRaises(TypeError): float(ht.empty(1, 2, 1, 1, device=ht_device)) # exception on empty tensor with self.assertRaises(TypeError): float(ht.empty((0, 1, 2), device=ht_device)) # exception on split tensor, where each chunk has size 1 if ht.MPI_WORLD.size > 1: with self.assertRaises(TypeError): float(ht.full((ht.MPI_WORLD.size,), 2, split=0), device=ht_device)
def test_bool_cast(self): # simple scalar tensor a = ht.ones(1, device=ht_device) casted_a = bool(a) self.assertEqual(casted_a, True) self.assertIsInstance(casted_a, bool) # multi-dimensional scalar tensor b = ht.zeros((1, 1, 1, 1), device=ht_device) casted_b = bool(b) self.assertEqual(casted_b, False) self.assertIsInstance(casted_b, bool) # split scalar tensor c = ht.full((1,), 5, split=0, device=ht_device) casted_c = bool(c) self.assertEqual(casted_c, True) self.assertIsInstance(casted_c, bool) # exception on non-scalar tensor with self.assertRaises(TypeError): bool(ht.empty(1, 2, 1, 1, device=ht_device)) # exception on empty tensor with self.assertRaises(TypeError): bool(ht.empty((0, 1, 2), device=ht_device)) # exception on split tensor, where each chunk has size 1 if ht.MPI_WORLD.size > 1: with self.assertRaises(TypeError): bool(ht.full((ht.MPI_WORLD.size,), 2, split=0, device=ht_device))
def predict_log_proba(self, X): """ Adapted to HeAT from scikit-learn. Return log-probability estimates for the test tensor X. Parameters ---------- X : ht.tensor of shape (n_samples, n_features) Returns ------- C : ht.tensor of shape (n_samples, n_classes) Returns the log-probability of the samples for each class in the model. The columns correspond to the classes in sorted order, as they appear in the attribute `classes_`. """ # TODO: sanitation/validation module, cf. #468, log_prob_x must be 2D (cf. np.atleast_2D) jll = self.__joint_log_likelihood(X) log_prob_x_shape = (jll.gshape[0], 1) log_prob_x = ht.empty(log_prob_x_shape, dtype=jll.dtype, split=jll.split, device=jll.device) # normalize by P(x) = P(f_1, ..., f_n) log_prob_x._DNDarray__array = self.logsumexp( jll, axis=1)._DNDarray__array.unsqueeze(1) return jll - log_prob_x
def test_isposinf(self): a = ht.array([1, ht.inf, -ht.inf, ht.nan]) s = ht.array([False, True, False, False]) r = ht.isposinf(a) self.assertEqual(r.shape, s.shape) self.assertEqual(r.dtype, s.dtype) self.assertEqual(r.device, s.device) self.assertTrue(ht.equal(r, s)) a = ht.array([1, ht.inf, -ht.inf, ht.nan], split=0) out = ht.empty(4, dtype=ht.bool, split=0) s = ht.array([False, True, False, False], split=0) ht.isposinf(a, out) self.assertEqual(out.shape, s.shape) self.assertEqual(out.dtype, s.dtype) self.assertEqual(out.device, s.device) self.assertTrue(ht.equal(r, s)) a = ht.ones((6, 6), dtype=ht.bool, split=0) s = ht.zeros((6, 6), dtype=ht.bool, split=0) r = ht.isposinf(a) self.assertEqual(r.shape, s.shape) self.assertEqual(r.dtype, s.dtype) self.assertEqual(r.device, s.device) self.assertTrue(ht.equal(r, s)) a = ht.ones((5, 5), dtype=ht.int, split=1) s = ht.zeros((5, 5), dtype=ht.bool, split=1) r = ht.isposinf(a) self.assertEqual(r.shape, s.shape) self.assertEqual(r.dtype, s.dtype) self.assertEqual(r.device, s.device) self.assertTrue(ht.equal(r, s))
def test_empty(self): # scalar input simple_empty_float = ht.empty(3, device=ht_device) self.assertIsInstance(simple_empty_float, ht.DNDarray) self.assertEqual(simple_empty_float.shape, (3, )) self.assertEqual(simple_empty_float.lshape, (3, )) self.assertEqual(simple_empty_float.split, None) self.assertEqual(simple_empty_float.dtype, ht.float32) # different data type simple_empty_uint = ht.empty(5, dtype=ht.bool, device=ht_device) self.assertIsInstance(simple_empty_uint, ht.DNDarray) self.assertEqual(simple_empty_uint.shape, (5, )) self.assertEqual(simple_empty_uint.lshape, (5, )) self.assertEqual(simple_empty_uint.split, None) self.assertEqual(simple_empty_uint.dtype, ht.bool) # multi-dimensional elaborate_empty_int = ht.empty((2, 3), dtype=ht.int32, device=ht_device) self.assertIsInstance(elaborate_empty_int, ht.DNDarray) self.assertEqual(elaborate_empty_int.shape, (2, 3)) self.assertEqual(elaborate_empty_int.lshape, (2, 3)) self.assertEqual(elaborate_empty_int.split, None) self.assertEqual(elaborate_empty_int.dtype, ht.int32) # split axis elaborate_empty_split = ht.empty((6, 4), dtype=ht.int32, split=0, device=ht_device) self.assertIsInstance(elaborate_empty_split, ht.DNDarray) self.assertEqual(elaborate_empty_split.shape, (6, 4)) self.assertLessEqual(elaborate_empty_split.lshape[0], 6) self.assertEqual(elaborate_empty_split.lshape[1], 4) self.assertEqual(elaborate_empty_split.split, 0) self.assertEqual(elaborate_empty_split.dtype, ht.int32) # exceptions with self.assertRaises(TypeError): ht.empty("(2, 3,)", dtype=ht.float64, device=ht_device) with self.assertRaises(ValueError): ht.empty((-1, 3), dtype=ht.float64, device=ht_device) with self.assertRaises(TypeError): ht.empty((2, 3), dtype=ht.float64, split="axis", device=ht_device)
def test_sanitize_out(self): output_shape = (4, 5, 6) output_split = 1 output_device = "cpu" out_wrong_type = torch.empty(output_shape) with self.assertRaises(TypeError): ht.sanitize_out(out_wrong_type, output_shape, output_split, output_device) out_wrong_shape = ht.empty((4, 7, 6), split=output_split, device=output_device) with self.assertRaises(ValueError): ht.sanitize_out(out_wrong_shape, output_shape, output_split, output_device) out_wrong_split = ht.empty(output_shape, split=2, device=output_device) with self.assertRaises(ValueError): ht.sanitize_out(out_wrong_split, output_shape, output_split, output_device)
def test_cumprod(self): a = ht.full((2, 4), 2, dtype=ht.int32) result = ht.array([[2, 4, 8, 16], [2, 4, 8, 16]], dtype=ht.int32) # split = None cumprod = ht.cumprod(a, 1) self.assertTrue(ht.equal(cumprod, result)) # Alias cumprod = ht.cumproduct(a, 1) self.assertTrue(ht.equal(cumprod, result)) a = ht.full((4, 2), 2, dtype=ht.int64, split=0) result = ht.array([[2, 2], [4, 4], [8, 8], [16, 16]], dtype=ht.int64, split=0) cumprod = ht.cumprod(a, 0) self.assertTrue(ht.equal(cumprod, result)) # 3D out = ht.empty((2, 2, 2), dtype=ht.float32, split=0) a = ht.full((2, 2, 2), 2, split=0) result = ht.array([[[2, 2], [2, 2]], [[4, 4], [4, 4]]], dtype=ht.float32, split=0) cumprod = ht.cumprod(a, 0, out=out) self.assertTrue(ht.equal(cumprod, out)) self.assertTrue(ht.equal(cumprod, result)) a = ht.full((2, 2, 2), 2, dtype=ht.int32, split=1) result = ht.array([[[2, 2], [4, 4]], [[2, 2], [4, 4]]], dtype=ht.float32, split=1) cumprod = ht.cumprod(a, 1, dtype=ht.float64) self.assertTrue(ht.equal(cumprod, result)) a = ht.full((2, 2, 2), 2, dtype=ht.float32, split=2) result = ht.array([[[2, 4], [2, 4]], [[2, 4], [2, 4]]], dtype=ht.float32, split=2) cumprod = ht.cumprod(a, 2) self.assertTrue(ht.equal(cumprod, result)) with self.assertRaises(NotImplementedError): ht.cumprod(ht.ones((2, 2)), axis=None) with self.assertRaises(TypeError): ht.cumprod(ht.ones((2, 2)), axis="1") with self.assertRaises(ValueError): ht.cumprod(a, 2, out=out) with self.assertRaises(ValueError): ht.cumprod(ht.ones((2, 2)), 2)
def test_cumsum(self): a = ht.ones((2, 4), dtype=ht.int32) result = ht.array([[1, 2, 3, 4], [1, 2, 3, 4]], dtype=ht.int32) # split = None cumsum = ht.cumsum(a, 1) self.assertTrue(ht.equal(cumsum, result)) a = ht.ones((4, 2), dtype=ht.int64, split=0) result = ht.array([[1, 1], [2, 2], [3, 3], [4, 4]], dtype=ht.int64, split=0) cumsum = ht.cumsum(a, 0) self.assertTrue(ht.equal(cumsum, result)) # 3D out = ht.empty((2, 2, 2), dtype=ht.float32, split=0) a = ht.ones((2, 2, 2), split=0) result = ht.array([[[1, 1], [1, 1]], [[2, 2], [2, 2]]], dtype=ht.float32, split=0) cumsum = ht.cumsum(a, 0, out=out) self.assertTrue(ht.equal(cumsum, out)) self.assertTrue(ht.equal(cumsum, result)) a = ht.ones((2, 2, 2), dtype=ht.int32, split=1) result = ht.array([[[1, 1], [2, 2]], [[1, 1], [2, 2]]], dtype=ht.float32, split=1) cumsum = ht.cumsum(a, 1, dtype=ht.float64) self.assertTrue(ht.equal(cumsum, result)) a = ht.ones((2, 2, 2), dtype=ht.float32, split=2) result = ht.array([[[1, 2], [1, 2]], [[1, 2], [1, 2]]], dtype=ht.float32, split=2) cumsum = ht.cumsum(a, 2) self.assertTrue(ht.equal(cumsum, result)) with self.assertRaises(NotImplementedError): ht.cumsum(ht.ones((2, 2)), axis=None) with self.assertRaises(TypeError): ht.cumsum(ht.ones((2, 2)), axis="1") with self.assertRaises(ValueError): ht.cumsum(a, 2, out=out) with self.assertRaises(ValueError): ht.cumsum(ht.ones((2, 2)), 2)
def test_pos(self): self.assertTrue(ht.equal(ht.pos(ht.array([-1, 1])), ht.array([-1, 1]))) self.assertTrue(ht.equal(+ht.array([-1.0, 1.0]), ht.array([-1.0, 1.0]))) a = ht.array([1 + 1j, 2 - 2j, 3, 4j, 5], split=0) b = out = ht.empty(5, dtype=ht.complex64, split=0) ht.positive(a, out=out) self.assertTrue(ht.equal(out, a)) self.assertIs(out, b) with self.assertRaises(TypeError): ht.pos(1)
def test_neg(self): self.assertTrue(ht.equal(ht.neg(ht.array([-1, 1])), ht.array([1, -1]))) self.assertTrue(ht.equal(-ht.array([-1.0, 1.0]), ht.array([1.0, -1.0]))) a = ht.array([1 + 1j, 2 - 2j, 3, 4j, 5], split=0) b = out = ht.empty(5, dtype=ht.complex64, split=0) ht.negative(a, out=out) self.assertTrue( ht.equal(out, ht.array([-1 - 1j, -2 + 2j, -3, -4j, -5], split=0))) self.assertIs(out, b) with self.assertRaises(TypeError): ht.neg(1)
def __joint_log_likelihood(self, X): """ Adapted to HeAT from scikit-learn. Calculates joint log-likelihood for n_samples to be assigned to each class. Returns ht.DNDarray joint_log_likelihood(n_samples, n_classes). """ jll_size = self.classes_._DNDarray__array.numel() jll_shape = (X.shape[0], jll_size) joint_log_likelihood = ht.empty(jll_shape, dtype=X.dtype, split=X.split, device=X.device) for i in range(jll_size): jointi = ht.log(self.class_prior_[i]) n_ij = -0.5 * ht.sum(ht.log(2.0 * ht.pi * self.sigma_[i, :])) n_ij -= 0.5 * ht.sum(((X - self.theta_[i, :]) ** 2) / (self.sigma_[i, :]), 1) joint_log_likelihood[:, i] = jointi + n_ij return joint_log_likelihood
def test_len(self): # vector a = ht.zeros((10,), device=ht_device) a_length = len(a) self.assertIsInstance(a_length, int) self.assertEqual(a_length, 10) # matrix b = ht.ones((50, 2), device=ht_device) b_length = len(b) self.assertIsInstance(b_length, int) self.assertEqual(b_length, 50) # split 5D array c = ht.empty((3, 4, 5, 6, 7), split=-1, device=ht_device) c_length = len(c) self.assertIsInstance(c_length, int) self.assertEqual(c_length, 3)
def test_ndim(self): a = ht.empty([2, 3, 3, 2]) self.assertEqual(a.ndim, 4) with self.assertWarns(Warning): a.numdims
def test_outer(self): # test outer, a and b local, different dtypes a = ht.arange(3, dtype=ht.int32) b = ht.arange(8, dtype=ht.float32) ht_outer = ht.outer(a, b, split=None) np_outer = np.outer(a.numpy(), b.numpy()) t_outer = torch.einsum("i,j->ij", a._DNDarray__array, b._DNDarray__array) self.assertTrue((ht_outer.numpy() == np_outer).all()) self.assertTrue(ht_outer._DNDarray__array.dtype is t_outer.dtype) # test outer, a and b distributed, no data on some ranks a_split = ht.arange(3, dtype=ht.float32, split=0) b_split = ht.arange(8, dtype=ht.float32, split=0) ht_outer_split = ht.outer(a_split, b_split, split=None) # a and b split 0, outer split 1 ht_outer_split = ht.outer(a_split, b_split, split=1) self.assertTrue((ht_outer_split.numpy() == np_outer).all()) self.assertTrue(ht_outer_split.split == 1) # a and b distributed, outer split unspecified ht_outer_split = ht.outer(a_split, b_split, split=None) self.assertTrue((ht_outer_split.numpy() == np_outer).all()) self.assertTrue(ht_outer_split.split == 0) # a not distributed, outer.split = 1 ht_outer_split = ht.outer(a, b_split, split=1) self.assertTrue((ht_outer_split.numpy() == np_outer).all()) self.assertTrue(ht_outer_split.split == 1) # b not distributed, outer.split = 0 ht_outer_split = ht.outer(a_split, b, split=0) self.assertTrue((ht_outer_split.numpy() == np_outer).all()) self.assertTrue(ht_outer_split.split == 0) # a_split.ndim > 1 and a.split != 0 a_split_3d = ht.random.randn(3, 3, 3, dtype=ht.float64, split=2) ht_outer_split = ht.outer(a_split_3d, b_split) np_outer_3d = np.outer(a_split_3d.numpy(), b_split.numpy()) self.assertTrue((ht_outer_split.numpy() == np_outer_3d).all()) self.assertTrue(ht_outer_split.split == 0) # write to out buffer ht_out = ht.empty((a.gshape[0], b.gshape[0]), dtype=ht.float32) ht.outer(a, b, out=ht_out) self.assertTrue((ht_out.numpy() == np_outer).all()) ht_out_split = ht.empty((a_split.gshape[0], b_split.gshape[0]), dtype=ht.float32, split=1) ht.outer(a_split, b_split, out=ht_out_split, split=1) self.assertTrue((ht_out_split.numpy() == np_outer).all()) # test exceptions t_a = torch.arange(3) with self.assertRaises(TypeError): ht.outer(t_a, b) np_b = np.arange(8) with self.assertRaises(TypeError): ht.outer(a, np_b) a_0d = ht.array(2.3) with self.assertRaises(RuntimeError): ht.outer(a_0d, b) t_out = torch.empty((a.gshape[0], b.gshape[0]), dtype=torch.float32) with self.assertRaises(TypeError): ht.outer(a, b, out=t_out) ht_out_wrong_dtype = ht.empty((a.gshape[0], b.gshape[0]), dtype=ht.float64) with self.assertRaises(TypeError): ht.outer(a, b, out=ht_out_wrong_dtype) ht_out_wrong_shape = ht.empty((7, b.gshape[0]), dtype=ht.float32) with self.assertRaises(ValueError): ht.outer(a, b, out=ht_out_wrong_shape) ht_out_wrong_split = ht.empty((a_split.gshape[0], b_split.gshape[0]), dtype=ht.float32, split=1) with self.assertRaises(ValueError): ht.outer(a_split, b_split, out=ht_out_wrong_split, split=0)
def test_expand_dims(self): # vector data a = ht.arange(10, device=ht_device) b = ht.expand_dims(a, 0) self.assertIsInstance(b, ht.DNDarray) self.assertEqual(len(b.shape), 2) self.assertEqual(b.shape[0], 1) self.assertEqual(b.shape[1], a.shape[0]) self.assertEqual(b.lshape[0], 1) self.assertEqual(b.lshape[1], a.shape[0]) self.assertIs(b.split, None) # vector data with out-of-bounds axis a = ht.arange(12, device=ht_device) b = a.expand_dims(1) self.assertIsInstance(b, ht.DNDarray) self.assertEqual(len(b.shape), 2) self.assertEqual(b.shape[0], a.shape[0]) self.assertEqual(b.shape[1], 1) self.assertEqual(b.lshape[0], a.shape[0]) self.assertEqual(b.lshape[1], 1) self.assertIs(b.split, None) # volume with intermediate axis a = ht.empty((3, 4, 5), device=ht_device) b = a.expand_dims(1) self.assertIsInstance(b, ht.DNDarray) self.assertEqual(len(b.shape), 4) self.assertEqual(b.shape[0], a.shape[0]) self.assertEqual(b.shape[1], 1) self.assertEqual(b.shape[2], a.shape[1]) self.assertEqual(b.shape[3], a.shape[2]) self.assertEqual(b.lshape[0], a.shape[0]) self.assertEqual(b.lshape[1], 1) self.assertEqual(b.lshape[2], a.shape[1]) self.assertEqual(b.lshape[3], a.shape[2]) self.assertIs(b.split, None) # volume with negative axis a = ht.empty((3, 4, 5), device=ht_device) b = a.expand_dims(-4) self.assertIsInstance(b, ht.DNDarray) self.assertEqual(len(b.shape), 4) self.assertEqual(b.shape[0], 1) self.assertEqual(b.shape[1], a.shape[0]) self.assertEqual(b.shape[2], a.shape[1]) self.assertEqual(b.shape[3], a.shape[2]) self.assertEqual(b.lshape[0], 1) self.assertEqual(b.lshape[1], a.shape[0]) self.assertEqual(b.lshape[2], a.shape[1]) self.assertEqual(b.lshape[3], a.shape[2]) self.assertIs(b.split, None) # split volume with negative axis expansion after the split a = ht.empty((3, 4, 5), split=1, device=ht_device) b = a.expand_dims(-2) self.assertIsInstance(b, ht.DNDarray) self.assertEqual(len(b.shape), 4) self.assertEqual(b.shape[0], a.shape[0]) self.assertEqual(b.shape[1], a.shape[1]) self.assertEqual(b.shape[2], 1) self.assertEqual(b.shape[3], a.shape[2]) self.assertEqual(b.lshape[0], a.shape[0]) self.assertLessEqual(b.lshape[1], a.shape[1]) self.assertEqual(b.lshape[2], 1) self.assertEqual(b.lshape[3], a.shape[2]) self.assertIs(b.split, 1) # split volume with negative axis expansion before the split a = ht.empty((3, 4, 5), split=2, device=ht_device) b = a.expand_dims(-3) self.assertIsInstance(b, ht.DNDarray) self.assertEqual(len(b.shape), 4) self.assertEqual(b.shape[0], a.shape[0]) self.assertEqual(b.shape[1], 1) self.assertEqual(b.shape[2], a.shape[1]) self.assertEqual(b.shape[3], a.shape[2]) self.assertEqual(b.lshape[0], a.shape[0]) self.assertEqual(b.lshape[1], 1) self.assertEqual(b.lshape[2], a.shape[1]) self.assertLessEqual(b.lshape[3], a.shape[2]) self.assertIs(b.split, 3) # exceptions with self.assertRaises(TypeError): ht.expand_dims("(3, 4, 5,)", 1) with self.assertRaises(TypeError): ht.empty((3, 4, 5), device=ht_device).expand_dims("1") with self.assertRaises(ValueError): ht.empty((3, 4, 5), device=ht_device).expand_dims(4) with self.assertRaises(ValueError): ht.empty((3, 4, 5), device=ht_device).expand_dims(-5)
def test_diag(self): size = ht.MPI_WORLD.size rank = ht.MPI_WORLD.rank data = torch.arange(size * 2, device=device) a = ht.array(data, device=ht_device) res = ht.diag(a) self.assertTrue(torch.equal(res._DNDarray__array, torch.diag(data))) res = ht.diag(a, offset=size) self.assertTrue(torch.equal(res._DNDarray__array, torch.diag(data, diagonal=size))) res = ht.diag(a, offset=-size) self.assertTrue(torch.equal(res._DNDarray__array, torch.diag(data, diagonal=-size))) a = ht.array(data, split=0, device=ht_device) res = ht.diag(a) self.assertEqual(res.split, a.split) self.assertEqual(res.shape, (size * 2, size * 2)) self.assertEqual(res.lshape[res.split], 2) exp = torch.diag(data) for i in range(rank * 2, (rank + 1) * 2): self.assertTrue(torch.equal(res[i, i]._DNDarray__array, exp[i, i])) res = ht.diag(a, offset=size) self.assertEqual(res.split, a.split) self.assertEqual(res.shape, (size * 3, size * 3)) self.assertEqual(res.lshape[res.split], 3) exp = torch.diag(data, diagonal=size) for i in range(rank * 3, min((rank + 1) * 3, a.shape[0])): self.assertTrue(torch.equal(res[i, i + size]._DNDarray__array, exp[i, i + size])) res = ht.diag(a, offset=-size) self.assertEqual(res.split, a.split) self.assertEqual(res.shape, (size * 3, size * 3)) self.assertEqual(res.lshape[res.split], 3) exp = torch.diag(data, diagonal=-size) for i in range(max(size, rank * 3), (rank + 1) * 3): self.assertTrue(torch.equal(res[i, i - size]._DNDarray__array, exp[i, i - size])) self.assertTrue(ht.equal(ht.diag(ht.diag(a)), a)) a = ht.random.rand(15, 20, 5, split=1, device=ht_device) res_1 = ht.diag(a) res_2 = ht.diagonal(a) self.assertTrue(ht.equal(res_1, res_2)) with self.assertRaises(ValueError): ht.diag(data) with self.assertRaises(ValueError): ht.diag(a, offset=None) a = ht.arange(size, device=ht_device) with self.assertRaises(ValueError): ht.diag(a, offset="3") a = ht.empty([], device=ht_device) with self.assertRaises(ValueError): ht.diag(a) if rank == 0: data = torch.ones(size, dtype=torch.int32, device=device) else: data = torch.empty(0, dtype=torch.int32, device=device) a = ht.array(data, is_split=0, device=ht_device) res = ht.diag(a) self.assertTrue( torch.equal( res[rank, rank]._DNDarray__array, torch.tensor(1, dtype=torch.int32, device=device) ) ) self.assert_func_equal_for_tensor( np.arange(23), heat_func=ht.diag, numpy_func=np.diag, heat_args={"offset": 2}, numpy_args={"k": 2}, ) self.assert_func_equal( (27,), heat_func=ht.diag, numpy_func=np.diag, heat_args={"offset": -3}, numpy_args={"k": -3}, )
def test_minimum(self): data1 = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] data2 = [[0, 3, 2], [5, 4, 7], [6, 9, 8], [9, 10, 11]] ht_array1 = ht.array(data1) ht_array2 = ht.array(data2) comparison1 = torch.tensor(data1, device=self.device.torch_device) comparison2 = torch.tensor(data2, device=self.device.torch_device) # check minimum minimum = ht.minimum(ht_array1, ht_array2) self.assertIsInstance(minimum, ht.DNDarray) self.assertEqual(minimum.shape, (4, 3)) self.assertEqual(minimum.lshape, (4, 3)) self.assertEqual(minimum.split, None) self.assertEqual(minimum.dtype, ht.int64) self.assertEqual(minimum._DNDarray__array.dtype, torch.int64) self.assertTrue( (minimum._DNDarray__array == torch.min(comparison1, comparison2)).all()) # check minimum over float elements of split 3d tensors # TODO: add check for uneven distribution of dimensions (see Issue #273) size = ht.MPI_WORLD.size torch.manual_seed(1) random_volume_1 = ht.random.randn(12 * size, 3, 3, split=0) random_volume_2 = ht.random.randn(12 * size, 1, 3, split=0) minimum_volume = ht.minimum(random_volume_1, random_volume_2) self.assertIsInstance(minimum_volume, ht.DNDarray) self.assertEqual(minimum_volume.shape, (size * 12, 3, 3)) self.assertEqual(minimum_volume.lshape, (size * 12, 3, 3)) self.assertEqual(minimum_volume.dtype, ht.float32) self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float32) self.assertEqual(minimum_volume.split, random_volume_1.split) # check minimum over float elements of split 3d tensors with different split axis torch.manual_seed(1) random_volume_1_splitdiff = ht.random.randn(size * 3, size * 3, 4, split=0) random_volume_2_splitdiff = ht.random.randn(size * 3, size * 3, 4, split=1) minimum_volume_splitdiff = ht.minimum(random_volume_1_splitdiff, random_volume_2_splitdiff) self.assertIsInstance(minimum_volume_splitdiff, ht.DNDarray) self.assertEqual(minimum_volume_splitdiff.shape, (size * 3, size * 3, 4)) self.assertEqual(minimum_volume_splitdiff.lshape, (size * 3, size * 3, 4)) self.assertEqual(minimum_volume_splitdiff.dtype, ht.float32) self.assertEqual(minimum_volume_splitdiff._DNDarray__array.dtype, torch.float32) self.assertEqual(minimum_volume_splitdiff.split, 0) random_volume_1_splitdiff = ht.random.randn(size * 3, size * 3, 4, split=1) random_volume_2_splitdiff = ht.random.randn(size * 3, size * 3, 4, split=0) minimum_volume_splitdiff = ht.minimum(random_volume_1_splitdiff, random_volume_2_splitdiff) self.assertEqual(minimum_volume_splitdiff.split, 0) random_volume_1_split_none = ht.random.randn(size * 3, size * 3, 4, split=None) random_volume_2_splitdiff = ht.random.randn(size * 3, size * 3, 4, split=1) minimum_volume_splitdiff = ht.minimum(random_volume_1_split_none, random_volume_2_splitdiff) self.assertEqual(minimum_volume_splitdiff.split, 1) random_volume_1_split_none = ht.random.randn(size * 3, size * 3, 4, split=0) random_volume_2_splitdiff = ht.random.randn(size * 3, size * 3, 4, split=None) minimum_volume_splitdiff = ht.minimum(random_volume_1_split_none, random_volume_2_splitdiff) self.assertEqual(minimum_volume_splitdiff.split, 0) # check output buffer out_shape = ht.stride_tricks.broadcast_shape(random_volume_1.gshape, random_volume_2.gshape) output = ht.empty(out_shape) ht.minimum(random_volume_1, random_volume_2, out=output) self.assertIsInstance(output, ht.DNDarray) self.assertEqual(output.shape, (ht.MPI_WORLD.size * 12, 3, 3)) self.assertEqual(output.lshape, (ht.MPI_WORLD.size * 12, 3, 3)) self.assertEqual(output.dtype, ht.float32) self.assertEqual(output._DNDarray__array.dtype, torch.float32) self.assertEqual(output.split, random_volume_1.split) # check exceptions random_volume_3 = ht.random.randn(4, 2, 3, split=0) with self.assertRaises(ValueError): ht.minimum(random_volume_1, random_volume_3) random_volume_3 = torch.ones(12, 3, 3, device=self.device.torch_device) with self.assertRaises(TypeError): ht.minimum(random_volume_1, random_volume_3) output = torch.ones(12, 3, 3, device=self.device.torch_device) with self.assertRaises(TypeError): ht.minimum(random_volume_1, random_volume_2, out=output) output = ht.ones((12, 4, 3)) with self.assertRaises(ValueError): ht.minimum(random_volume_1, random_volume_2, out=output)
def _initialize_cluster_centers(self, X): """ Initializes the K-Means centroids. Parameters ---------- X : ht.DNDarray, shape=(n_point, n_features) The data to initialize the clusters for. """ # always initialize the random state if self.random_state is not None: ht.random.seed(self.random_state) # initialize the centroids by randomly picking some of the points if self.init == "random": # Samples will be equally distributed drawn from all involved processes _, displ, _ = X.comm.counts_displs_shape(shape=X.shape, axis=0) centroids = ht.empty((self.n_clusters, X.shape[1]), split=None, device=X.device, comm=X.comm) if (X.split is None) or (X.split == 0): for i in range(self.n_clusters): samplerange = ( X.gshape[0] // self.n_clusters * i, X.gshape[0] // self.n_clusters * (i + 1), ) sample = ht.random.randint(samplerange[0], samplerange[1]).item() proc = 0 for p in range(X.comm.size): if displ[p] > sample: break proc = p xi = ht.zeros(X.shape[1], dtype=X.dtype) if X.comm.rank == proc: idx = sample - displ[proc] xi = ht.array(X.lloc[idx, :], device=X.device, comm=X.comm) xi.comm.Bcast(xi, root=proc) centroids[i, :] = xi else: raise NotImplementedError( "Not implemented for other splitting-axes") self._cluster_centers = centroids # directly passed centroids elif isinstance(self.init, ht.DNDarray): if len(self.init.shape) != 2: raise ValueError( "passed centroids need to be two-dimensional, but are {}". format(len(self.init))) if self.init.shape[0] != self.n_clusters or self.init.shape[ 1] != X.shape[1]: raise ValueError( "passed centroids do not match cluster count or data shape" ) self._cluster_centers = self.init.resplit(None) # kmeans++, smart centroid guessing elif self.init == "kmeans++": if (X.split is None) or (X.split == 0): centroids = ht.zeros((self.n_clusters, X.shape[1]), split=None, device=X.device, comm=X.comm) sample = ht.random.randint(0, X.shape[0] - 1).item() _, displ, _ = X.comm.counts_displs_shape(shape=X.shape, axis=0) proc = 0 for p in range(X.comm.size): if displ[p] > sample: break proc = p x0 = ht.zeros(X.shape[1], dtype=X.dtype, device=X.device, comm=X.comm) if X.comm.rank == proc: idx = sample - displ[proc] x0 = ht.array(X.lloc[idx, :], device=X.device, comm=X.comm) x0.comm.Bcast(x0, root=proc) centroids[0, :] = x0 for i in range(1, self.n_clusters): distances = ht.spatial.distance.cdist( X, centroids, quadratic_expansion=True) D2 = distances.min(axis=1) D2.resplit_(axis=None) prob = D2 / D2.sum() x = ht.random.rand().item() sample = 0 sum = 0 for j in range(len(prob)): if sum > x: break sum += prob[j].item() sample = j proc = 0 for p in range(X.comm.size): if displ[p] > sample: break proc = p xi = ht.zeros(X.shape[1], dtype=X.dtype) if X.comm.rank == proc: idx = sample - displ[proc] xi = ht.array(X.lloc[idx, :], device=X.device, comm=X.comm) xi.comm.Bcast(xi, root=proc) centroids[i, :] = xi else: raise NotImplementedError( "Not implemented for other splitting-axes") self._cluster_centers = centroids else: raise ValueError( 'init needs to be one of "random", ht.DNDarray or "kmeans++", but was {}' .format(self.init))
def test_argmax(self): torch.manual_seed(1) data = ht.random.randn(3, 4, 5) # 3D local tensor, major axis result = ht.argmax(data, axis=0) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.dtype, ht.int64) self.assertEqual(result._DNDarray__array.dtype, torch.int64) self.assertEqual(result.shape, (4, 5)) self.assertEqual(result.lshape, (4, 5)) self.assertEqual(result.split, None) self.assertTrue( (result._DNDarray__array == data._DNDarray__array.argmax(0)).all()) # 3D local tensor, minor axis result = ht.argmax(data, axis=-1, keepdim=True) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.dtype, ht.int64) self.assertEqual(result._DNDarray__array.dtype, torch.int64) self.assertEqual(result.shape, (3, 4, 1)) self.assertEqual(result.lshape, (3, 4, 1)) self.assertEqual(result.split, None) self.assertTrue( (result._DNDarray__array == data._DNDarray__array.argmax( -1, keepdim=True)).all()) # 1D split tensor, no axis data = ht.arange(-10, 10, split=0) result = ht.argmax(data) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.dtype, ht.int64) self.assertEqual(result._DNDarray__array.dtype, torch.int64) self.assertEqual(result.shape, (1, )) self.assertEqual(result.lshape, (1, )) self.assertEqual(result.split, None) self.assertTrue((result._DNDarray__array == torch.tensor( [19], device=self.device.torch_device))) # 2D split tensor, along the axis data = ht.array(ht.random.randn(4, 5), is_split=0) result = ht.argmax(data, axis=1) expected = torch.argmax(data._DNDarray__array, dim=1) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.dtype, ht.int64) self.assertEqual(result._DNDarray__array.dtype, torch.int64) self.assertEqual(result.shape, (ht.MPI_WORLD.size * 4, )) self.assertEqual(result.lshape, (4, )) self.assertEqual(result.split, 0) self.assertTrue((result._DNDarray__array == expected).all()) # 2D split tensor, across the axis size = ht.MPI_WORLD.size * 2 data = ht.tril(ht.ones((size, size), split=0), k=-1) result = ht.argmax(data, axis=0) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.dtype, ht.int64) self.assertEqual(result._DNDarray__array.dtype, torch.int64) self.assertEqual(result.shape, (size, )) self.assertEqual(result.lshape, (size, )) self.assertEqual(result.split, None) # skip test on gpu; argmax works different if not (torch.cuda.is_available() and result.device == ht.gpu): self.assertTrue((result._DNDarray__array != 0).all()) # 2D split tensor, across the axis, output tensor size = ht.MPI_WORLD.size * 2 data = ht.tril(ht.ones((size, size), split=0), k=-1) output = ht.empty((size, )) result = ht.argmax(data, axis=0, out=output) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(output.dtype, ht.int64) self.assertEqual(output._DNDarray__array.dtype, torch.int64) self.assertEqual(output.shape, (size, )) self.assertEqual(output.lshape, (size, )) self.assertEqual(output.split, None) # skip test on gpu; argmax works different if not (torch.cuda.is_available() and output.device == ht.gpu): self.assertTrue((output._DNDarray__array != 0).all()) # check exceptions with self.assertRaises(TypeError): data.argmax(axis=(0, 1)) with self.assertRaises(TypeError): data.argmax(axis=1.1) with self.assertRaises(TypeError): data.argmax(axis="y") with self.assertRaises(ValueError): ht.argmax(data, axis=-4)