Example #1
0
    def test_int_cast(self):
        # simple scalar tensor
        a = ht.ones(1)
        casted_a = int(a)
        self.assertEqual(casted_a, 1)
        self.assertIsInstance(casted_a, int)

        # multi-dimensional scalar tensor
        b = ht.zeros((1, 1, 1, 1))
        casted_b = int(b)
        self.assertEqual(casted_b, 0)
        self.assertIsInstance(casted_b, int)

        # split scalar tensor
        c = ht.full((1,), 5, split=0)
        casted_c = int(c)
        self.assertEqual(casted_c, 5)
        self.assertIsInstance(casted_c, int)

        # exception on non-scalar tensor
        with self.assertRaises(TypeError):
            int(ht.empty(1, 2, 1, 1))
        # exception on empty tensor
        with self.assertRaises(TypeError):
            int(ht.empty((0, 1, 2)))
        # exception on split tensor, where each chunk has size 1
        if ht.MPI_WORLD.size > 1:
            with self.assertRaises(TypeError):
                int(ht.full((ht.MPI_WORLD.size,), 2, split=0))
Example #2
0
    def test_float_cast(self):
        # simple scalar tensor
        a = ht.ones(1, device=ht_device)
        casted_a = float(a)
        self.assertEqual(casted_a, 1.0)
        self.assertIsInstance(casted_a, float)

        # multi-dimensional scalar tensor
        b = ht.zeros((1, 1, 1, 1), device=ht_device)
        casted_b = float(b)
        self.assertEqual(casted_b, 0.0)
        self.assertIsInstance(casted_b, float)

        # split scalar tensor
        c = ht.full((1,), 5, split=0, device=ht_device)
        casted_c = float(c)
        self.assertEqual(casted_c, 5.0)
        self.assertIsInstance(casted_c, float)

        # exception on non-scalar tensor
        with self.assertRaises(TypeError):
            float(ht.empty(1, 2, 1, 1, device=ht_device))
        # exception on empty tensor
        with self.assertRaises(TypeError):
            float(ht.empty((0, 1, 2), device=ht_device))
        # exception on split tensor, where each chunk has size 1
        if ht.MPI_WORLD.size > 1:
            with self.assertRaises(TypeError):
                float(ht.full((ht.MPI_WORLD.size,), 2, split=0), device=ht_device)
Example #3
0
    def test_bool_cast(self):
        # simple scalar tensor
        a = ht.ones(1, device=ht_device)
        casted_a = bool(a)
        self.assertEqual(casted_a, True)
        self.assertIsInstance(casted_a, bool)

        # multi-dimensional scalar tensor
        b = ht.zeros((1, 1, 1, 1), device=ht_device)
        casted_b = bool(b)
        self.assertEqual(casted_b, False)
        self.assertIsInstance(casted_b, bool)

        # split scalar tensor
        c = ht.full((1,), 5, split=0, device=ht_device)
        casted_c = bool(c)
        self.assertEqual(casted_c, True)
        self.assertIsInstance(casted_c, bool)

        # exception on non-scalar tensor
        with self.assertRaises(TypeError):
            bool(ht.empty(1, 2, 1, 1, device=ht_device))
        # exception on empty tensor
        with self.assertRaises(TypeError):
            bool(ht.empty((0, 1, 2), device=ht_device))
        # exception on split tensor, where each chunk has size 1
        if ht.MPI_WORLD.size > 1:
            with self.assertRaises(TypeError):
                bool(ht.full((ht.MPI_WORLD.size,), 2, split=0, device=ht_device))
Example #4
0
    def predict_log_proba(self, X):
        """
        Adapted to HeAT from scikit-learn.

        Return log-probability estimates for the test tensor X.

        Parameters
        ----------
        X : ht.tensor of shape (n_samples, n_features)

        Returns
        -------
        C : ht.tensor of shape (n_samples, n_classes)
            Returns the log-probability of the samples for each class in
            the model. The columns correspond to the classes in sorted
            order, as they appear in the attribute `classes_`.
        """
        # TODO: sanitation/validation module, cf. #468, log_prob_x must be 2D (cf. np.atleast_2D)
        jll = self.__joint_log_likelihood(X)
        log_prob_x_shape = (jll.gshape[0], 1)
        log_prob_x = ht.empty(log_prob_x_shape,
                              dtype=jll.dtype,
                              split=jll.split,
                              device=jll.device)
        # normalize by P(x) = P(f_1, ..., f_n)
        log_prob_x._DNDarray__array = self.logsumexp(
            jll, axis=1)._DNDarray__array.unsqueeze(1)
        return jll - log_prob_x
Example #5
0
    def test_isposinf(self):
        a = ht.array([1, ht.inf, -ht.inf, ht.nan])
        s = ht.array([False, True, False, False])
        r = ht.isposinf(a)
        self.assertEqual(r.shape, s.shape)
        self.assertEqual(r.dtype, s.dtype)
        self.assertEqual(r.device, s.device)
        self.assertTrue(ht.equal(r, s))

        a = ht.array([1, ht.inf, -ht.inf, ht.nan], split=0)
        out = ht.empty(4, dtype=ht.bool, split=0)
        s = ht.array([False, True, False, False], split=0)
        ht.isposinf(a, out)
        self.assertEqual(out.shape, s.shape)
        self.assertEqual(out.dtype, s.dtype)
        self.assertEqual(out.device, s.device)
        self.assertTrue(ht.equal(r, s))

        a = ht.ones((6, 6), dtype=ht.bool, split=0)
        s = ht.zeros((6, 6), dtype=ht.bool, split=0)
        r = ht.isposinf(a)
        self.assertEqual(r.shape, s.shape)
        self.assertEqual(r.dtype, s.dtype)
        self.assertEqual(r.device, s.device)
        self.assertTrue(ht.equal(r, s))

        a = ht.ones((5, 5), dtype=ht.int, split=1)
        s = ht.zeros((5, 5), dtype=ht.bool, split=1)
        r = ht.isposinf(a)
        self.assertEqual(r.shape, s.shape)
        self.assertEqual(r.dtype, s.dtype)
        self.assertEqual(r.device, s.device)
        self.assertTrue(ht.equal(r, s))
Example #6
0
    def test_empty(self):
        # scalar input
        simple_empty_float = ht.empty(3, device=ht_device)
        self.assertIsInstance(simple_empty_float, ht.DNDarray)
        self.assertEqual(simple_empty_float.shape, (3, ))
        self.assertEqual(simple_empty_float.lshape, (3, ))
        self.assertEqual(simple_empty_float.split, None)
        self.assertEqual(simple_empty_float.dtype, ht.float32)

        # different data type
        simple_empty_uint = ht.empty(5, dtype=ht.bool, device=ht_device)
        self.assertIsInstance(simple_empty_uint, ht.DNDarray)
        self.assertEqual(simple_empty_uint.shape, (5, ))
        self.assertEqual(simple_empty_uint.lshape, (5, ))
        self.assertEqual(simple_empty_uint.split, None)
        self.assertEqual(simple_empty_uint.dtype, ht.bool)

        # multi-dimensional
        elaborate_empty_int = ht.empty((2, 3),
                                       dtype=ht.int32,
                                       device=ht_device)
        self.assertIsInstance(elaborate_empty_int, ht.DNDarray)
        self.assertEqual(elaborate_empty_int.shape, (2, 3))
        self.assertEqual(elaborate_empty_int.lshape, (2, 3))
        self.assertEqual(elaborate_empty_int.split, None)
        self.assertEqual(elaborate_empty_int.dtype, ht.int32)

        # split axis
        elaborate_empty_split = ht.empty((6, 4),
                                         dtype=ht.int32,
                                         split=0,
                                         device=ht_device)
        self.assertIsInstance(elaborate_empty_split, ht.DNDarray)
        self.assertEqual(elaborate_empty_split.shape, (6, 4))
        self.assertLessEqual(elaborate_empty_split.lshape[0], 6)
        self.assertEqual(elaborate_empty_split.lshape[1], 4)
        self.assertEqual(elaborate_empty_split.split, 0)
        self.assertEqual(elaborate_empty_split.dtype, ht.int32)

        # exceptions
        with self.assertRaises(TypeError):
            ht.empty("(2, 3,)", dtype=ht.float64, device=ht_device)
        with self.assertRaises(ValueError):
            ht.empty((-1, 3), dtype=ht.float64, device=ht_device)
        with self.assertRaises(TypeError):
            ht.empty((2, 3), dtype=ht.float64, split="axis", device=ht_device)
Example #7
0
 def test_sanitize_out(self):
     output_shape = (4, 5, 6)
     output_split = 1
     output_device = "cpu"
     out_wrong_type = torch.empty(output_shape)
     with self.assertRaises(TypeError):
         ht.sanitize_out(out_wrong_type, output_shape, output_split,
                         output_device)
     out_wrong_shape = ht.empty((4, 7, 6),
                                split=output_split,
                                device=output_device)
     with self.assertRaises(ValueError):
         ht.sanitize_out(out_wrong_shape, output_shape, output_split,
                         output_device)
     out_wrong_split = ht.empty(output_shape, split=2, device=output_device)
     with self.assertRaises(ValueError):
         ht.sanitize_out(out_wrong_split, output_shape, output_split,
                         output_device)
Example #8
0
    def test_cumprod(self):
        a = ht.full((2, 4), 2, dtype=ht.int32)
        result = ht.array([[2, 4, 8, 16], [2, 4, 8, 16]], dtype=ht.int32)

        # split = None
        cumprod = ht.cumprod(a, 1)
        self.assertTrue(ht.equal(cumprod, result))

        # Alias
        cumprod = ht.cumproduct(a, 1)
        self.assertTrue(ht.equal(cumprod, result))

        a = ht.full((4, 2), 2, dtype=ht.int64, split=0)
        result = ht.array([[2, 2], [4, 4], [8, 8], [16, 16]],
                          dtype=ht.int64,
                          split=0)

        cumprod = ht.cumprod(a, 0)
        self.assertTrue(ht.equal(cumprod, result))

        # 3D
        out = ht.empty((2, 2, 2), dtype=ht.float32, split=0)

        a = ht.full((2, 2, 2), 2, split=0)
        result = ht.array([[[2, 2], [2, 2]], [[4, 4], [4, 4]]],
                          dtype=ht.float32,
                          split=0)

        cumprod = ht.cumprod(a, 0, out=out)
        self.assertTrue(ht.equal(cumprod, out))
        self.assertTrue(ht.equal(cumprod, result))

        a = ht.full((2, 2, 2), 2, dtype=ht.int32, split=1)
        result = ht.array([[[2, 2], [4, 4]], [[2, 2], [4, 4]]],
                          dtype=ht.float32,
                          split=1)

        cumprod = ht.cumprod(a, 1, dtype=ht.float64)
        self.assertTrue(ht.equal(cumprod, result))

        a = ht.full((2, 2, 2), 2, dtype=ht.float32, split=2)
        result = ht.array([[[2, 4], [2, 4]], [[2, 4], [2, 4]]],
                          dtype=ht.float32,
                          split=2)

        cumprod = ht.cumprod(a, 2)
        self.assertTrue(ht.equal(cumprod, result))

        with self.assertRaises(NotImplementedError):
            ht.cumprod(ht.ones((2, 2)), axis=None)
        with self.assertRaises(TypeError):
            ht.cumprod(ht.ones((2, 2)), axis="1")
        with self.assertRaises(ValueError):
            ht.cumprod(a, 2, out=out)
        with self.assertRaises(ValueError):
            ht.cumprod(ht.ones((2, 2)), 2)
Example #9
0
    def test_cumsum(self):
        a = ht.ones((2, 4), dtype=ht.int32)
        result = ht.array([[1, 2, 3, 4], [1, 2, 3, 4]], dtype=ht.int32)

        # split = None
        cumsum = ht.cumsum(a, 1)
        self.assertTrue(ht.equal(cumsum, result))

        a = ht.ones((4, 2), dtype=ht.int64, split=0)
        result = ht.array([[1, 1], [2, 2], [3, 3], [4, 4]],
                          dtype=ht.int64,
                          split=0)

        cumsum = ht.cumsum(a, 0)
        self.assertTrue(ht.equal(cumsum, result))

        # 3D
        out = ht.empty((2, 2, 2), dtype=ht.float32, split=0)

        a = ht.ones((2, 2, 2), split=0)
        result = ht.array([[[1, 1], [1, 1]], [[2, 2], [2, 2]]],
                          dtype=ht.float32,
                          split=0)

        cumsum = ht.cumsum(a, 0, out=out)
        self.assertTrue(ht.equal(cumsum, out))
        self.assertTrue(ht.equal(cumsum, result))

        a = ht.ones((2, 2, 2), dtype=ht.int32, split=1)
        result = ht.array([[[1, 1], [2, 2]], [[1, 1], [2, 2]]],
                          dtype=ht.float32,
                          split=1)

        cumsum = ht.cumsum(a, 1, dtype=ht.float64)
        self.assertTrue(ht.equal(cumsum, result))

        a = ht.ones((2, 2, 2), dtype=ht.float32, split=2)
        result = ht.array([[[1, 2], [1, 2]], [[1, 2], [1, 2]]],
                          dtype=ht.float32,
                          split=2)

        cumsum = ht.cumsum(a, 2)
        self.assertTrue(ht.equal(cumsum, result))

        with self.assertRaises(NotImplementedError):
            ht.cumsum(ht.ones((2, 2)), axis=None)
        with self.assertRaises(TypeError):
            ht.cumsum(ht.ones((2, 2)), axis="1")
        with self.assertRaises(ValueError):
            ht.cumsum(a, 2, out=out)
        with self.assertRaises(ValueError):
            ht.cumsum(ht.ones((2, 2)), 2)
Example #10
0
    def test_pos(self):
        self.assertTrue(ht.equal(ht.pos(ht.array([-1, 1])), ht.array([-1, 1])))
        self.assertTrue(ht.equal(+ht.array([-1.0, 1.0]), ht.array([-1.0,
                                                                   1.0])))

        a = ht.array([1 + 1j, 2 - 2j, 3, 4j, 5], split=0)
        b = out = ht.empty(5, dtype=ht.complex64, split=0)
        ht.positive(a, out=out)
        self.assertTrue(ht.equal(out, a))
        self.assertIs(out, b)

        with self.assertRaises(TypeError):
            ht.pos(1)
Example #11
0
    def test_neg(self):
        self.assertTrue(ht.equal(ht.neg(ht.array([-1, 1])), ht.array([1, -1])))
        self.assertTrue(ht.equal(-ht.array([-1.0, 1.0]), ht.array([1.0,
                                                                   -1.0])))

        a = ht.array([1 + 1j, 2 - 2j, 3, 4j, 5], split=0)
        b = out = ht.empty(5, dtype=ht.complex64, split=0)
        ht.negative(a, out=out)
        self.assertTrue(
            ht.equal(out, ht.array([-1 - 1j, -2 + 2j, -3, -4j, -5], split=0)))
        self.assertIs(out, b)

        with self.assertRaises(TypeError):
            ht.neg(1)
Example #12
0
    def __joint_log_likelihood(self, X):
        """
        Adapted to HeAT from scikit-learn.

        Calculates joint log-likelihood for n_samples to be assigned to each class.
        Returns ht.DNDarray joint_log_likelihood(n_samples, n_classes).
        """

        jll_size = self.classes_._DNDarray__array.numel()
        jll_shape = (X.shape[0], jll_size)
        joint_log_likelihood = ht.empty(jll_shape, dtype=X.dtype, split=X.split, device=X.device)
        for i in range(jll_size):
            jointi = ht.log(self.class_prior_[i])
            n_ij = -0.5 * ht.sum(ht.log(2.0 * ht.pi * self.sigma_[i, :]))
            n_ij -= 0.5 * ht.sum(((X - self.theta_[i, :]) ** 2) / (self.sigma_[i, :]), 1)
            joint_log_likelihood[:, i] = jointi + n_ij

        return joint_log_likelihood
Example #13
0
    def test_len(self):
        # vector
        a = ht.zeros((10,), device=ht_device)
        a_length = len(a)

        self.assertIsInstance(a_length, int)
        self.assertEqual(a_length, 10)

        # matrix
        b = ht.ones((50, 2), device=ht_device)
        b_length = len(b)

        self.assertIsInstance(b_length, int)
        self.assertEqual(b_length, 50)

        # split 5D array
        c = ht.empty((3, 4, 5, 6, 7), split=-1, device=ht_device)
        c_length = len(c)

        self.assertIsInstance(c_length, int)
        self.assertEqual(c_length, 3)
Example #14
0
    def test_ndim(self):
        a = ht.empty([2, 3, 3, 2])
        self.assertEqual(a.ndim, 4)

        with self.assertWarns(Warning):
            a.numdims
Example #15
0
    def test_outer(self):
        # test outer, a and b local, different dtypes
        a = ht.arange(3, dtype=ht.int32)
        b = ht.arange(8, dtype=ht.float32)
        ht_outer = ht.outer(a, b, split=None)
        np_outer = np.outer(a.numpy(), b.numpy())
        t_outer = torch.einsum("i,j->ij", a._DNDarray__array,
                               b._DNDarray__array)
        self.assertTrue((ht_outer.numpy() == np_outer).all())
        self.assertTrue(ht_outer._DNDarray__array.dtype is t_outer.dtype)

        # test outer, a and b distributed, no data on some ranks
        a_split = ht.arange(3, dtype=ht.float32, split=0)
        b_split = ht.arange(8, dtype=ht.float32, split=0)
        ht_outer_split = ht.outer(a_split, b_split, split=None)

        # a and b split 0, outer split 1
        ht_outer_split = ht.outer(a_split, b_split, split=1)
        self.assertTrue((ht_outer_split.numpy() == np_outer).all())
        self.assertTrue(ht_outer_split.split == 1)

        # a and b distributed, outer split unspecified
        ht_outer_split = ht.outer(a_split, b_split, split=None)
        self.assertTrue((ht_outer_split.numpy() == np_outer).all())
        self.assertTrue(ht_outer_split.split == 0)

        # a not distributed, outer.split = 1
        ht_outer_split = ht.outer(a, b_split, split=1)
        self.assertTrue((ht_outer_split.numpy() == np_outer).all())
        self.assertTrue(ht_outer_split.split == 1)

        # b not distributed, outer.split = 0
        ht_outer_split = ht.outer(a_split, b, split=0)
        self.assertTrue((ht_outer_split.numpy() == np_outer).all())
        self.assertTrue(ht_outer_split.split == 0)

        # a_split.ndim > 1 and a.split != 0
        a_split_3d = ht.random.randn(3, 3, 3, dtype=ht.float64, split=2)
        ht_outer_split = ht.outer(a_split_3d, b_split)
        np_outer_3d = np.outer(a_split_3d.numpy(), b_split.numpy())
        self.assertTrue((ht_outer_split.numpy() == np_outer_3d).all())
        self.assertTrue(ht_outer_split.split == 0)

        # write to out buffer
        ht_out = ht.empty((a.gshape[0], b.gshape[0]), dtype=ht.float32)
        ht.outer(a, b, out=ht_out)
        self.assertTrue((ht_out.numpy() == np_outer).all())
        ht_out_split = ht.empty((a_split.gshape[0], b_split.gshape[0]),
                                dtype=ht.float32,
                                split=1)
        ht.outer(a_split, b_split, out=ht_out_split, split=1)
        self.assertTrue((ht_out_split.numpy() == np_outer).all())

        # test exceptions
        t_a = torch.arange(3)
        with self.assertRaises(TypeError):
            ht.outer(t_a, b)
        np_b = np.arange(8)
        with self.assertRaises(TypeError):
            ht.outer(a, np_b)
        a_0d = ht.array(2.3)
        with self.assertRaises(RuntimeError):
            ht.outer(a_0d, b)
        t_out = torch.empty((a.gshape[0], b.gshape[0]), dtype=torch.float32)
        with self.assertRaises(TypeError):
            ht.outer(a, b, out=t_out)
        ht_out_wrong_dtype = ht.empty((a.gshape[0], b.gshape[0]),
                                      dtype=ht.float64)
        with self.assertRaises(TypeError):
            ht.outer(a, b, out=ht_out_wrong_dtype)
        ht_out_wrong_shape = ht.empty((7, b.gshape[0]), dtype=ht.float32)
        with self.assertRaises(ValueError):
            ht.outer(a, b, out=ht_out_wrong_shape)
        ht_out_wrong_split = ht.empty((a_split.gshape[0], b_split.gshape[0]),
                                      dtype=ht.float32,
                                      split=1)
        with self.assertRaises(ValueError):
            ht.outer(a_split, b_split, out=ht_out_wrong_split, split=0)
Example #16
0
    def test_expand_dims(self):
        # vector data
        a = ht.arange(10, device=ht_device)
        b = ht.expand_dims(a, 0)

        self.assertIsInstance(b, ht.DNDarray)
        self.assertEqual(len(b.shape), 2)

        self.assertEqual(b.shape[0], 1)
        self.assertEqual(b.shape[1], a.shape[0])

        self.assertEqual(b.lshape[0], 1)
        self.assertEqual(b.lshape[1], a.shape[0])

        self.assertIs(b.split, None)

        # vector data with out-of-bounds axis
        a = ht.arange(12, device=ht_device)
        b = a.expand_dims(1)

        self.assertIsInstance(b, ht.DNDarray)
        self.assertEqual(len(b.shape), 2)

        self.assertEqual(b.shape[0], a.shape[0])
        self.assertEqual(b.shape[1], 1)

        self.assertEqual(b.lshape[0], a.shape[0])
        self.assertEqual(b.lshape[1], 1)

        self.assertIs(b.split, None)

        # volume with intermediate axis
        a = ht.empty((3, 4, 5), device=ht_device)
        b = a.expand_dims(1)

        self.assertIsInstance(b, ht.DNDarray)
        self.assertEqual(len(b.shape), 4)

        self.assertEqual(b.shape[0], a.shape[0])
        self.assertEqual(b.shape[1], 1)
        self.assertEqual(b.shape[2], a.shape[1])
        self.assertEqual(b.shape[3], a.shape[2])

        self.assertEqual(b.lshape[0], a.shape[0])
        self.assertEqual(b.lshape[1], 1)
        self.assertEqual(b.lshape[2], a.shape[1])
        self.assertEqual(b.lshape[3], a.shape[2])

        self.assertIs(b.split, None)

        # volume with negative axis
        a = ht.empty((3, 4, 5), device=ht_device)
        b = a.expand_dims(-4)

        self.assertIsInstance(b, ht.DNDarray)
        self.assertEqual(len(b.shape), 4)

        self.assertEqual(b.shape[0], 1)
        self.assertEqual(b.shape[1], a.shape[0])
        self.assertEqual(b.shape[2], a.shape[1])
        self.assertEqual(b.shape[3], a.shape[2])

        self.assertEqual(b.lshape[0], 1)
        self.assertEqual(b.lshape[1], a.shape[0])
        self.assertEqual(b.lshape[2], a.shape[1])
        self.assertEqual(b.lshape[3], a.shape[2])

        self.assertIs(b.split, None)

        # split volume with negative axis expansion after the split
        a = ht.empty((3, 4, 5), split=1, device=ht_device)
        b = a.expand_dims(-2)

        self.assertIsInstance(b, ht.DNDarray)
        self.assertEqual(len(b.shape), 4)

        self.assertEqual(b.shape[0], a.shape[0])
        self.assertEqual(b.shape[1], a.shape[1])
        self.assertEqual(b.shape[2], 1)
        self.assertEqual(b.shape[3], a.shape[2])

        self.assertEqual(b.lshape[0], a.shape[0])
        self.assertLessEqual(b.lshape[1], a.shape[1])
        self.assertEqual(b.lshape[2], 1)
        self.assertEqual(b.lshape[3], a.shape[2])

        self.assertIs(b.split, 1)

        # split volume with negative axis expansion before the split
        a = ht.empty((3, 4, 5), split=2, device=ht_device)
        b = a.expand_dims(-3)

        self.assertIsInstance(b, ht.DNDarray)
        self.assertEqual(len(b.shape), 4)

        self.assertEqual(b.shape[0], a.shape[0])
        self.assertEqual(b.shape[1], 1)
        self.assertEqual(b.shape[2], a.shape[1])
        self.assertEqual(b.shape[3], a.shape[2])

        self.assertEqual(b.lshape[0], a.shape[0])
        self.assertEqual(b.lshape[1], 1)
        self.assertEqual(b.lshape[2], a.shape[1])
        self.assertLessEqual(b.lshape[3], a.shape[2])

        self.assertIs(b.split, 3)

        # exceptions
        with self.assertRaises(TypeError):
            ht.expand_dims("(3, 4, 5,)", 1)
        with self.assertRaises(TypeError):
            ht.empty((3, 4, 5), device=ht_device).expand_dims("1")
        with self.assertRaises(ValueError):
            ht.empty((3, 4, 5), device=ht_device).expand_dims(4)
        with self.assertRaises(ValueError):
            ht.empty((3, 4, 5), device=ht_device).expand_dims(-5)
Example #17
0
    def test_diag(self):
        size = ht.MPI_WORLD.size
        rank = ht.MPI_WORLD.rank

        data = torch.arange(size * 2, device=device)
        a = ht.array(data, device=ht_device)
        res = ht.diag(a)
        self.assertTrue(torch.equal(res._DNDarray__array, torch.diag(data)))

        res = ht.diag(a, offset=size)
        self.assertTrue(torch.equal(res._DNDarray__array, torch.diag(data, diagonal=size)))

        res = ht.diag(a, offset=-size)
        self.assertTrue(torch.equal(res._DNDarray__array, torch.diag(data, diagonal=-size)))

        a = ht.array(data, split=0, device=ht_device)
        res = ht.diag(a)
        self.assertEqual(res.split, a.split)
        self.assertEqual(res.shape, (size * 2, size * 2))
        self.assertEqual(res.lshape[res.split], 2)
        exp = torch.diag(data)
        for i in range(rank * 2, (rank + 1) * 2):
            self.assertTrue(torch.equal(res[i, i]._DNDarray__array, exp[i, i]))

        res = ht.diag(a, offset=size)
        self.assertEqual(res.split, a.split)
        self.assertEqual(res.shape, (size * 3, size * 3))
        self.assertEqual(res.lshape[res.split], 3)
        exp = torch.diag(data, diagonal=size)
        for i in range(rank * 3, min((rank + 1) * 3, a.shape[0])):
            self.assertTrue(torch.equal(res[i, i + size]._DNDarray__array, exp[i, i + size]))

        res = ht.diag(a, offset=-size)
        self.assertEqual(res.split, a.split)
        self.assertEqual(res.shape, (size * 3, size * 3))
        self.assertEqual(res.lshape[res.split], 3)
        exp = torch.diag(data, diagonal=-size)
        for i in range(max(size, rank * 3), (rank + 1) * 3):
            self.assertTrue(torch.equal(res[i, i - size]._DNDarray__array, exp[i, i - size]))

        self.assertTrue(ht.equal(ht.diag(ht.diag(a)), a))

        a = ht.random.rand(15, 20, 5, split=1, device=ht_device)
        res_1 = ht.diag(a)
        res_2 = ht.diagonal(a)
        self.assertTrue(ht.equal(res_1, res_2))

        with self.assertRaises(ValueError):
            ht.diag(data)

        with self.assertRaises(ValueError):
            ht.diag(a, offset=None)

        a = ht.arange(size, device=ht_device)
        with self.assertRaises(ValueError):
            ht.diag(a, offset="3")

        a = ht.empty([], device=ht_device)
        with self.assertRaises(ValueError):
            ht.diag(a)

        if rank == 0:
            data = torch.ones(size, dtype=torch.int32, device=device)
        else:
            data = torch.empty(0, dtype=torch.int32, device=device)
        a = ht.array(data, is_split=0, device=ht_device)
        res = ht.diag(a)
        self.assertTrue(
            torch.equal(
                res[rank, rank]._DNDarray__array, torch.tensor(1, dtype=torch.int32, device=device)
            )
        )

        self.assert_func_equal_for_tensor(
            np.arange(23),
            heat_func=ht.diag,
            numpy_func=np.diag,
            heat_args={"offset": 2},
            numpy_args={"k": 2},
        )

        self.assert_func_equal(
            (27,),
            heat_func=ht.diag,
            numpy_func=np.diag,
            heat_args={"offset": -3},
            numpy_args={"k": -3},
        )
Example #18
0
    def test_minimum(self):
        data1 = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
        data2 = [[0, 3, 2], [5, 4, 7], [6, 9, 8], [9, 10, 11]]

        ht_array1 = ht.array(data1)
        ht_array2 = ht.array(data2)
        comparison1 = torch.tensor(data1, device=self.device.torch_device)
        comparison2 = torch.tensor(data2, device=self.device.torch_device)

        # check minimum
        minimum = ht.minimum(ht_array1, ht_array2)

        self.assertIsInstance(minimum, ht.DNDarray)
        self.assertEqual(minimum.shape, (4, 3))
        self.assertEqual(minimum.lshape, (4, 3))
        self.assertEqual(minimum.split, None)
        self.assertEqual(minimum.dtype, ht.int64)
        self.assertEqual(minimum._DNDarray__array.dtype, torch.int64)
        self.assertTrue(
            (minimum._DNDarray__array == torch.min(comparison1,
                                                   comparison2)).all())

        # check minimum over float elements of split 3d tensors
        # TODO: add check for uneven distribution of dimensions (see Issue #273)
        size = ht.MPI_WORLD.size
        torch.manual_seed(1)
        random_volume_1 = ht.random.randn(12 * size, 3, 3, split=0)
        random_volume_2 = ht.random.randn(12 * size, 1, 3, split=0)
        minimum_volume = ht.minimum(random_volume_1, random_volume_2)

        self.assertIsInstance(minimum_volume, ht.DNDarray)
        self.assertEqual(minimum_volume.shape, (size * 12, 3, 3))
        self.assertEqual(minimum_volume.lshape, (size * 12, 3, 3))
        self.assertEqual(minimum_volume.dtype, ht.float32)
        self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float32)
        self.assertEqual(minimum_volume.split, random_volume_1.split)

        # check minimum over float elements of split 3d tensors with different split axis
        torch.manual_seed(1)
        random_volume_1_splitdiff = ht.random.randn(size * 3,
                                                    size * 3,
                                                    4,
                                                    split=0)
        random_volume_2_splitdiff = ht.random.randn(size * 3,
                                                    size * 3,
                                                    4,
                                                    split=1)
        minimum_volume_splitdiff = ht.minimum(random_volume_1_splitdiff,
                                              random_volume_2_splitdiff)
        self.assertIsInstance(minimum_volume_splitdiff, ht.DNDarray)
        self.assertEqual(minimum_volume_splitdiff.shape,
                         (size * 3, size * 3, 4))
        self.assertEqual(minimum_volume_splitdiff.lshape,
                         (size * 3, size * 3, 4))
        self.assertEqual(minimum_volume_splitdiff.dtype, ht.float32)
        self.assertEqual(minimum_volume_splitdiff._DNDarray__array.dtype,
                         torch.float32)
        self.assertEqual(minimum_volume_splitdiff.split, 0)

        random_volume_1_splitdiff = ht.random.randn(size * 3,
                                                    size * 3,
                                                    4,
                                                    split=1)
        random_volume_2_splitdiff = ht.random.randn(size * 3,
                                                    size * 3,
                                                    4,
                                                    split=0)
        minimum_volume_splitdiff = ht.minimum(random_volume_1_splitdiff,
                                              random_volume_2_splitdiff)
        self.assertEqual(minimum_volume_splitdiff.split, 0)

        random_volume_1_split_none = ht.random.randn(size * 3,
                                                     size * 3,
                                                     4,
                                                     split=None)
        random_volume_2_splitdiff = ht.random.randn(size * 3,
                                                    size * 3,
                                                    4,
                                                    split=1)
        minimum_volume_splitdiff = ht.minimum(random_volume_1_split_none,
                                              random_volume_2_splitdiff)
        self.assertEqual(minimum_volume_splitdiff.split, 1)

        random_volume_1_split_none = ht.random.randn(size * 3,
                                                     size * 3,
                                                     4,
                                                     split=0)
        random_volume_2_splitdiff = ht.random.randn(size * 3,
                                                    size * 3,
                                                    4,
                                                    split=None)
        minimum_volume_splitdiff = ht.minimum(random_volume_1_split_none,
                                              random_volume_2_splitdiff)
        self.assertEqual(minimum_volume_splitdiff.split, 0)

        # check output buffer
        out_shape = ht.stride_tricks.broadcast_shape(random_volume_1.gshape,
                                                     random_volume_2.gshape)
        output = ht.empty(out_shape)
        ht.minimum(random_volume_1, random_volume_2, out=output)
        self.assertIsInstance(output, ht.DNDarray)
        self.assertEqual(output.shape, (ht.MPI_WORLD.size * 12, 3, 3))
        self.assertEqual(output.lshape, (ht.MPI_WORLD.size * 12, 3, 3))
        self.assertEqual(output.dtype, ht.float32)
        self.assertEqual(output._DNDarray__array.dtype, torch.float32)
        self.assertEqual(output.split, random_volume_1.split)

        # check exceptions
        random_volume_3 = ht.random.randn(4, 2, 3, split=0)
        with self.assertRaises(ValueError):
            ht.minimum(random_volume_1, random_volume_3)
        random_volume_3 = torch.ones(12, 3, 3, device=self.device.torch_device)
        with self.assertRaises(TypeError):
            ht.minimum(random_volume_1, random_volume_3)
        output = torch.ones(12, 3, 3, device=self.device.torch_device)
        with self.assertRaises(TypeError):
            ht.minimum(random_volume_1, random_volume_2, out=output)
        output = ht.ones((12, 4, 3))
        with self.assertRaises(ValueError):
            ht.minimum(random_volume_1, random_volume_2, out=output)
Example #19
0
    def _initialize_cluster_centers(self, X):
        """
        Initializes the K-Means centroids.

        Parameters
        ----------
        X : ht.DNDarray, shape=(n_point, n_features)
            The data to initialize the clusters for.
        """
        # always initialize the random state
        if self.random_state is not None:
            ht.random.seed(self.random_state)

        # initialize the centroids by randomly picking some of the points
        if self.init == "random":
            # Samples will be equally distributed drawn from all involved processes
            _, displ, _ = X.comm.counts_displs_shape(shape=X.shape, axis=0)
            centroids = ht.empty((self.n_clusters, X.shape[1]),
                                 split=None,
                                 device=X.device,
                                 comm=X.comm)
            if (X.split is None) or (X.split == 0):
                for i in range(self.n_clusters):
                    samplerange = (
                        X.gshape[0] // self.n_clusters * i,
                        X.gshape[0] // self.n_clusters * (i + 1),
                    )
                    sample = ht.random.randint(samplerange[0],
                                               samplerange[1]).item()
                    proc = 0
                    for p in range(X.comm.size):
                        if displ[p] > sample:
                            break
                        proc = p
                    xi = ht.zeros(X.shape[1], dtype=X.dtype)
                    if X.comm.rank == proc:
                        idx = sample - displ[proc]
                        xi = ht.array(X.lloc[idx, :],
                                      device=X.device,
                                      comm=X.comm)
                    xi.comm.Bcast(xi, root=proc)
                    centroids[i, :] = xi

            else:
                raise NotImplementedError(
                    "Not implemented for other splitting-axes")

            self._cluster_centers = centroids

        # directly passed centroids
        elif isinstance(self.init, ht.DNDarray):
            if len(self.init.shape) != 2:
                raise ValueError(
                    "passed centroids need to be two-dimensional, but are {}".
                    format(len(self.init)))
            if self.init.shape[0] != self.n_clusters or self.init.shape[
                    1] != X.shape[1]:
                raise ValueError(
                    "passed centroids do not match cluster count or data shape"
                )
            self._cluster_centers = self.init.resplit(None)

        # kmeans++, smart centroid guessing
        elif self.init == "kmeans++":
            if (X.split is None) or (X.split == 0):
                centroids = ht.zeros((self.n_clusters, X.shape[1]),
                                     split=None,
                                     device=X.device,
                                     comm=X.comm)
                sample = ht.random.randint(0, X.shape[0] - 1).item()
                _, displ, _ = X.comm.counts_displs_shape(shape=X.shape, axis=0)
                proc = 0
                for p in range(X.comm.size):
                    if displ[p] > sample:
                        break
                    proc = p
                x0 = ht.zeros(X.shape[1],
                              dtype=X.dtype,
                              device=X.device,
                              comm=X.comm)
                if X.comm.rank == proc:
                    idx = sample - displ[proc]
                    x0 = ht.array(X.lloc[idx, :], device=X.device, comm=X.comm)
                x0.comm.Bcast(x0, root=proc)
                centroids[0, :] = x0
                for i in range(1, self.n_clusters):
                    distances = ht.spatial.distance.cdist(
                        X, centroids, quadratic_expansion=True)
                    D2 = distances.min(axis=1)
                    D2.resplit_(axis=None)
                    prob = D2 / D2.sum()
                    x = ht.random.rand().item()
                    sample = 0
                    sum = 0
                    for j in range(len(prob)):
                        if sum > x:
                            break
                        sum += prob[j].item()
                        sample = j
                    proc = 0
                    for p in range(X.comm.size):
                        if displ[p] > sample:
                            break
                        proc = p
                    xi = ht.zeros(X.shape[1], dtype=X.dtype)
                    if X.comm.rank == proc:
                        idx = sample - displ[proc]
                        xi = ht.array(X.lloc[idx, :],
                                      device=X.device,
                                      comm=X.comm)
                    xi.comm.Bcast(xi, root=proc)
                    centroids[i, :] = xi

            else:
                raise NotImplementedError(
                    "Not implemented for other splitting-axes")

            self._cluster_centers = centroids

        else:
            raise ValueError(
                'init needs to be one of "random", ht.DNDarray or "kmeans++", but was {}'
                .format(self.init))
Example #20
0
    def test_argmax(self):
        torch.manual_seed(1)
        data = ht.random.randn(3, 4, 5)

        # 3D local tensor, major axis
        result = ht.argmax(data, axis=0)
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.dtype, ht.int64)
        self.assertEqual(result._DNDarray__array.dtype, torch.int64)
        self.assertEqual(result.shape, (4, 5))
        self.assertEqual(result.lshape, (4, 5))
        self.assertEqual(result.split, None)
        self.assertTrue(
            (result._DNDarray__array == data._DNDarray__array.argmax(0)).all())

        # 3D local tensor, minor axis
        result = ht.argmax(data, axis=-1, keepdim=True)
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.dtype, ht.int64)
        self.assertEqual(result._DNDarray__array.dtype, torch.int64)
        self.assertEqual(result.shape, (3, 4, 1))
        self.assertEqual(result.lshape, (3, 4, 1))
        self.assertEqual(result.split, None)
        self.assertTrue(
            (result._DNDarray__array == data._DNDarray__array.argmax(
                -1, keepdim=True)).all())

        # 1D split tensor, no axis
        data = ht.arange(-10, 10, split=0)
        result = ht.argmax(data)
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.dtype, ht.int64)
        self.assertEqual(result._DNDarray__array.dtype, torch.int64)
        self.assertEqual(result.shape, (1, ))
        self.assertEqual(result.lshape, (1, ))
        self.assertEqual(result.split, None)
        self.assertTrue((result._DNDarray__array == torch.tensor(
            [19], device=self.device.torch_device)))

        # 2D split tensor, along the axis
        data = ht.array(ht.random.randn(4, 5), is_split=0)
        result = ht.argmax(data, axis=1)
        expected = torch.argmax(data._DNDarray__array, dim=1)
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.dtype, ht.int64)
        self.assertEqual(result._DNDarray__array.dtype, torch.int64)
        self.assertEqual(result.shape, (ht.MPI_WORLD.size * 4, ))
        self.assertEqual(result.lshape, (4, ))
        self.assertEqual(result.split, 0)
        self.assertTrue((result._DNDarray__array == expected).all())

        # 2D split tensor, across the axis
        size = ht.MPI_WORLD.size * 2
        data = ht.tril(ht.ones((size, size), split=0), k=-1)

        result = ht.argmax(data, axis=0)
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.dtype, ht.int64)
        self.assertEqual(result._DNDarray__array.dtype, torch.int64)
        self.assertEqual(result.shape, (size, ))
        self.assertEqual(result.lshape, (size, ))
        self.assertEqual(result.split, None)
        # skip test on gpu; argmax works different
        if not (torch.cuda.is_available() and result.device == ht.gpu):
            self.assertTrue((result._DNDarray__array != 0).all())

        # 2D split tensor, across the axis, output tensor
        size = ht.MPI_WORLD.size * 2
        data = ht.tril(ht.ones((size, size), split=0), k=-1)

        output = ht.empty((size, ))
        result = ht.argmax(data, axis=0, out=output)

        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(output.dtype, ht.int64)
        self.assertEqual(output._DNDarray__array.dtype, torch.int64)
        self.assertEqual(output.shape, (size, ))
        self.assertEqual(output.lshape, (size, ))
        self.assertEqual(output.split, None)
        # skip test on gpu; argmax works different
        if not (torch.cuda.is_available() and output.device == ht.gpu):
            self.assertTrue((output._DNDarray__array != 0).all())

        # check exceptions
        with self.assertRaises(TypeError):
            data.argmax(axis=(0, 1))
        with self.assertRaises(TypeError):
            data.argmax(axis=1.1)
        with self.assertRaises(TypeError):
            data.argmax(axis="y")
        with self.assertRaises(ValueError):
            ht.argmax(data, axis=-4)