Example #1
0
    def _spectral_embedding(self, X):
        """
        Helper function to embed the dataset X into the eigenvectors of the graph Laplacian matrix
        Returns
        -------
        ht.DNDarray, shape=(m_lanczos):
            Eigenvalues of the graph's Laplacian matrix.
        ht.DNDarray, shape=(n, m_lanczos):
            Eigenvectors of the graph's Laplacian matrix.
        """
        L = self._laplacian.construct(X)
        # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
        v0 = ht.ones((L.shape[0], ), dtype=L.dtype, split=0,
                     device=L.device) / math.sqrt(L.shape[0])
        V, T = ht.lanczos(L, self.n_lanczos, v0)

        # 4. Calculate and Sort Eigenvalues and Eigenvectors of tridiagonal matrix T
        eval, evec = torch.eig(T._DNDarray__array, eigenvectors=True)
        # If x is an Eigenvector of T, then y = V@x is the corresponding Eigenvector of L
        eval, idx = torch.sort(eval[:, 0], dim=0)
        eigenvalues = ht.array(eval)
        eigenvectors = ht.matmul(V, ht.array(evec))[:, idx]

        return eigenvalues, eigenvectors
    def test_eq(self):
        result = ht.array([[False, True], [False, False]])

        self.assertTrue(
            ht.equal(ht.eq(self.a_scalar, self.a_scalar), ht.array(True)))
        self.assertTrue(ht.equal(ht.eq(self.a_tensor, self.a_scalar), result))
        self.assertTrue(ht.equal(ht.eq(self.a_scalar, self.a_tensor), result))
        self.assertTrue(
            ht.equal(ht.eq(self.a_tensor, self.another_tensor), result))
        self.assertTrue(ht.equal(ht.eq(self.a_tensor, self.a_vector), result))
        self.assertTrue(
            ht.equal(ht.eq(self.a_tensor, self.an_int_scalar), result))
        self.assertTrue(
            ht.equal(ht.eq(self.a_split_tensor, self.a_tensor), result))

        self.assertEqual(
            ht.eq(self.a_split_tensor, self.a_tensor).dtype, ht.bool)

        with self.assertRaises(ValueError):
            ht.eq(self.a_tensor, self.another_vector)
        with self.assertRaises(TypeError):
            ht.eq(self.a_tensor, self.errorneous_type)
        with self.assertRaises(TypeError):
            ht.eq("self.a_tensor", "s")
Example #3
0
 def test_raises(self):
     length = torch.tensor([i + 20 for i in range(2)],
                           device=self.device.torch_device)
     test = torch.arange(torch.prod(length),
                         dtype=torch.float64,
                         device=self.device.torch_device).reshape(
                             [i + 20 for i in range(2)])
     a = ht.array(test, split=1)
     tiles = ht.tiling.SplitTiles(a)
     with self.assertRaises(TypeError):
         tiles["p"]
     with self.assertRaises(TypeError):
         tiles[0] = "p"
     with self.assertRaises(TypeError):
         tiles["p"] = "p"
Example #4
0
    def test_pow(self):
        result = ht.array([[1.0, 4.0], [9.0, 16.0]])
        commutated_result = ht.array([[2.0, 4.0], [8.0, 16.0]])

        self.assertTrue(
            ht.equal(ht.pow(self.a_scalar, self.a_scalar), ht.array(4.0)))
        self.assertTrue(ht.equal(ht.pow(self.a_tensor, self.a_scalar), result))
        self.assertTrue(
            ht.equal(ht.pow(self.a_scalar, self.a_tensor), commutated_result))
        self.assertTrue(
            ht.equal(ht.pow(self.a_tensor, self.another_tensor), result))
        self.assertTrue(ht.equal(ht.pow(self.a_tensor, self.a_vector), result))
        self.assertTrue(
            ht.equal(ht.pow(self.a_tensor, self.an_int_scalar), result))
        self.assertTrue(
            ht.equal(ht.pow(self.a_split_tensor, self.a_tensor),
                     commutated_result))

        with self.assertRaises(ValueError):
            ht.pow(self.a_tensor, self.another_vector)
        with self.assertRaises(TypeError):
            ht.pow(self.a_tensor, self.erroneous_type)
        with self.assertRaises(TypeError):
            ht.pow("T", "s")
Example #5
0
    def test_div(self):
        result = ht.array([[0.5, 1.0], [1.5, 2.0]])
        commutated_result = ht.array([[2.0, 1.0], [2.0 / 3.0, 0.5]])

        self.assertTrue(
            ht.equal(ht.div(self.a_scalar, self.a_scalar), ht.float32(1.0)))
        self.assertTrue(ht.equal(ht.div(self.a_tensor, self.a_scalar), result))
        self.assertTrue(
            ht.equal(ht.div(self.a_scalar, self.a_tensor), commutated_result))
        self.assertTrue(
            ht.equal(ht.div(self.a_tensor, self.another_tensor), result))
        self.assertTrue(ht.equal(ht.div(self.a_tensor, self.a_vector), result))
        self.assertTrue(
            ht.equal(ht.div(self.a_tensor, self.an_int_scalar), result))
        self.assertTrue(
            ht.equal(ht.div(self.a_split_tensor, self.a_tensor),
                     commutated_result))

        with self.assertRaises(ValueError):
            ht.div(self.a_tensor, self.another_vector)
        with self.assertRaises(TypeError):
            ht.div(self.a_tensor, self.erroneous_type)
        with self.assertRaises(TypeError):
            ht.div("T", "s")
Example #6
0
    def test_trunc(self):
        base_array = np.random.randn(20)

        comparison = torch.tensor(base_array, dtype=torch.float64, device=device).trunc()

        # trunc of float32
        float32_tensor = ht.array(base_array, dtype=ht.float32, device=ht_device)
        float32_floor = float32_tensor.trunc()
        self.assertIsInstance(float32_floor, ht.DNDarray)
        self.assertEqual(float32_floor.dtype, ht.float32)
        self.assertTrue((float32_floor._DNDarray__array == comparison.float()).all())

        # trunc of float64
        float64_tensor = ht.array(base_array, dtype=ht.float64, device=ht_device)
        float64_floor = float64_tensor.trunc()
        self.assertIsInstance(float64_floor, ht.DNDarray)
        self.assertEqual(float64_floor.dtype, ht.float64)
        self.assertTrue((float64_floor._DNDarray__array == comparison).all())

        # check exceptions
        with self.assertRaises(TypeError):
            ht.trunc([0, 1, 2, 3])
        with self.assertRaises(TypeError):
            ht.trunc(object())
Example #7
0
    def test_add(self):
        result = ht.array([[3.0, 4.0], [5.0, 6.0]])

        self.assertTrue(ht.equal(ht.add(self.a_scalar, self.a_scalar), ht.float32([4.0])))
        self.assertTrue(ht.equal(ht.add(self.a_tensor, self.a_scalar), result))
        self.assertTrue(ht.equal(ht.add(self.a_scalar, self.a_tensor), result))
        self.assertTrue(ht.equal(ht.add(self.a_tensor, self.another_tensor), result))
        self.assertTrue(ht.equal(ht.add(self.a_tensor, self.a_vector), result))
        self.assertTrue(ht.equal(ht.add(self.a_tensor, self.an_int_scalar), result))
        self.assertTrue(ht.equal(ht.add(self.a_split_tensor, self.a_tensor), result))

        with self.assertRaises(ValueError):
            ht.add(self.a_tensor, self.another_vector)
        with self.assertRaises(TypeError):
            ht.add(self.a_tensor, self.errorneous_type)
        with self.assertRaises(TypeError):
            ht.add("T", "s")
Example #8
0
    def test_log1p(self):
        elements = 15
        tmp = torch.arange(1,
                           elements,
                           dtype=torch.float64,
                           device=self.device.torch_device).log1p()
        comparison = ht.array(tmp)

        # logarithm of float32
        float32_tensor = ht.arange(1, elements, dtype=ht.float32)
        float32_log1p = ht.log1p(float32_tensor)
        self.assertIsInstance(float32_log1p, ht.DNDarray)
        self.assertEqual(float32_log1p.dtype, ht.float32)
        self.assertEqual(float32_log1p.dtype, ht.float32)
        self.assertTrue(
            ht.allclose(float32_log1p, comparison.astype(ht.float32)))

        # logarithm of float64
        float64_tensor = ht.arange(1, elements, dtype=ht.float64)
        float64_log1p = ht.log1p(float64_tensor)
        self.assertIsInstance(float64_log1p, ht.DNDarray)
        self.assertEqual(float64_log1p.dtype, ht.float64)
        self.assertEqual(float64_log1p.dtype, ht.float64)
        self.assertTrue(ht.allclose(float64_log1p, comparison))

        # logarithm of ints, automatic conversion to intermediate floats
        int32_tensor = ht.arange(1, elements, dtype=ht.int32)
        int32_log1p = ht.log1p(int32_tensor)
        self.assertIsInstance(int32_log1p, ht.DNDarray)
        self.assertEqual(int32_log1p.dtype, ht.float64)
        self.assertEqual(int32_log1p.dtype, ht.float64)
        self.assertTrue(ht.allclose(int32_log1p, comparison))

        # logarithm of longs, automatic conversion to intermediate floats
        int64_tensor = ht.arange(1, elements, dtype=ht.int64)
        int64_log1p = int64_tensor.log1p()
        self.assertIsInstance(int64_log1p, ht.DNDarray)
        self.assertEqual(int64_log1p.dtype, ht.float64)
        self.assertEqual(int64_log1p.dtype, ht.float64)
        self.assertTrue(ht.allclose(int64_log1p, comparison))

        # check exceptions
        with self.assertRaises(TypeError):
            ht.log1p([1, 2, 3])
        with self.assertRaises(TypeError):
            ht.log1p("hello world")
Example #9
0
    def test_sqrt(self):
        elements = 25
        tmp = torch.arange(elements,
                           dtype=torch.float64,
                           device=self.device.torch_device).sqrt()
        comparison = ht.array(tmp)

        # square roots of float32
        float32_tensor = ht.arange(elements, dtype=ht.float32)
        float32_sqrt = ht.sqrt(float32_tensor)
        self.assertIsInstance(float32_sqrt, ht.DNDarray)
        self.assertEqual(float32_sqrt.dtype, ht.float32)
        self.assertEqual(float32_sqrt.dtype, ht.float32)
        self.assertTrue(
            ht.allclose(float32_sqrt, comparison.astype(ht.float32), 1e-06))

        # square roots of float64
        float64_tensor = ht.arange(elements, dtype=ht.float64)
        float64_sqrt = ht.sqrt(float64_tensor)
        self.assertIsInstance(float64_sqrt, ht.DNDarray)
        self.assertEqual(float64_sqrt.dtype, ht.float64)
        self.assertEqual(float64_sqrt.dtype, ht.float64)
        self.assertTrue(ht.allclose(float64_sqrt, comparison, 1e-06))

        # square roots of ints, automatic conversion to intermediate floats
        int32_tensor = ht.arange(elements, dtype=ht.int32)
        int32_sqrt = ht.sqrt(int32_tensor)
        self.assertIsInstance(int32_sqrt, ht.DNDarray)
        self.assertEqual(int32_sqrt.dtype, ht.float64)
        self.assertEqual(int32_sqrt.dtype, ht.float64)
        self.assertTrue(ht.allclose(int32_sqrt, comparison, 1e-06))

        # square roots of longs, automatic conversion to intermediate floats
        int64_tensor = ht.arange(elements, dtype=ht.int64)
        int64_sqrt = int64_tensor.sqrt()
        self.assertIsInstance(int64_sqrt, ht.DNDarray)
        self.assertEqual(int64_sqrt.dtype, ht.float64)
        self.assertEqual(int64_sqrt.dtype, ht.float64)
        self.assertTrue(ht.allclose(int64_sqrt, comparison, 1e-06))

        # check exceptions
        with self.assertRaises(TypeError):
            ht.sqrt([1, 2, 3])
        with self.assertRaises(TypeError):
            ht.sqrt("hello world")
Example #10
0
    def test_invert(self):
        int8_tensor = ht.array([[0, 1], [2, -2]], dtype=ht.int8)
        uint8_tensor = ht.array([[23, 2], [45, 234]], dtype=ht.uint8)
        bool_tensor = ht.array([[False, True], [True, False]])
        float_tensor = ht.array([[0.4, 1.3], [1.3, -2.1]])
        int8_result = ht.array([[-1, -2], [-3, 1]])
        uint8_result = ht.array([[232, 253], [210, 21]])
        bool_result = ht.array([[True, False], [False, True]])

        self.assertTrue(ht.equal(ht.invert(int8_tensor), int8_result))
        self.assertTrue(ht.equal(ht.invert(int8_tensor.copy().resplit_(0)), int8_result))
        self.assertTrue(ht.equal(ht.invert(uint8_tensor), uint8_result))
        self.assertTrue(ht.equal(ht.invert(bool_tensor), bool_result))

        with self.assertRaises(TypeError):
            ht.invert(float_tensor)
Example #11
0
    def test_size_gnumel(self):
        a = ht.zeros((10, 10, 10), split=None)
        self.assertEqual(a.size, 10 * 10 * 10)
        self.assertEqual(a.gnumel, 10 * 10 * 10)

        a = ht.zeros((10, 10, 10), split=0)
        self.assertEqual(a.size, 10 * 10 * 10)
        self.assertEqual(a.gnumel, 10 * 10 * 10)

        a = ht.zeros((10, 10, 10), split=1)
        self.assertEqual(a.size, 10 * 10 * 10)
        self.assertEqual(a.gnumel, 10 * 10 * 10)

        a = ht.zeros((10, 10, 10), split=2)
        self.assertEqual(a.size, 10 * 10 * 10)
        self.assertEqual(a.gnumel, 10 * 10 * 10)

        self.assertEqual(ht.array(0).size, 1)
Example #12
0
    def test_size_gnumel(self):
        a = ht.zeros((10, 10, 10), split=None, device=ht_device)
        self.assertEqual(a.size, 10 * 10 * 10)
        self.assertEqual(a.gnumel, 10 * 10 * 10)

        a = ht.zeros((10, 10, 10), split=0, device=ht_device)
        self.assertEqual(a.size, 10 * 10 * 10)
        self.assertEqual(a.gnumel, 10 * 10 * 10)

        a = ht.zeros((10, 10, 10), split=1, device=ht_device)
        self.assertEqual(a.size, 10 * 10 * 10)
        self.assertEqual(a.gnumel, 10 * 10 * 10)

        a = ht.zeros((10, 10, 10), split=2, device=ht_device)
        self.assertEqual(a.size, 10 * 10 * 10)
        self.assertEqual(a.gnumel, 10 * 10 * 10)

        self.assertEqual(ht.array(0, device=ht_device).size, 1)
Example #13
0
    def test_logical_not(self):
        first_tensor = ht.array([[True, True], [False, False]])
        second_tensor = ht.array([[True, False], [True, False]])
        int_tensor = ht.array([[-1, 0], [2, 1]])
        float_tensor = ht.array([[-1.4, 0.2], [2.5, 1.3]])

        self.assertTrue(
            ht.equal(ht.logical_not(first_tensor),
                     ht.array([[False, False], [True, True]])))
        self.assertTrue(
            ht.equal(ht.logical_not(second_tensor),
                     ht.array([[False, True], [False, True]])))
        self.assertTrue(
            ht.equal(ht.logical_not(int_tensor),
                     ht.array([[False, True], [False, False]])))
        self.assertTrue(
            ht.equal(
                ht.logical_not(float_tensor.copy().resplit_(0)),
                ht.array([[False, False], [False, False]]),
            ))
Example #14
0
def create_fold(dataset_x, dataset_y, size, seed=None):
    """
    Randomly splits the dataset into two parts for cross-validation.

    Parameters
    ----------
    dataset_x : ht.DNDarray
        data vectors, required
    dataset_y : ht.DNDarray
        labels for dataset_x, required
    size : int
        the size of the split to create
    seed: int, optional
        seed for the random generator, allows deterministic testing

    Returns
    ----------
    fold_x : ht.DNDarray
        DNDarray of shape (size,) containing data vectors from dataset_x
    fold_y : ht.DNDarray
        DNDarray of shape(size,) containing labels from dataset_y
    verification_x : ht.DNDarray
        DNDarray of shape(len(dataset_x - size),) containing all items from dataset_x not in fold_x
    verification_y : ht.DNDarray
        DNDarray of shape(len(dataset_y - size),) containing all items from dataset_y not in fold_y
    """
    assert len(dataset_y) == len(dataset_x)
    assert size < len(dataset_x)

    data_length = len(dataset_x)

    if seed:
        random.seed(seed)
    indices = [i for i in range(data_length)]
    random.shuffle(indices)

    data_indices = ht.array(indices[0:size], split=0)
    verification_indices = ht.array(indices[size:], split=0)

    fold_x = ht.array(dataset_x[data_indices], is_split=0)
    fold_y = ht.array(dataset_y[data_indices], is_split=0)
    verification_y = ht.array(dataset_y[verification_indices], is_split=0)
    verification_x = ht.array(dataset_x[verification_indices], is_split=0)

    # Balance arrays
    fold_x.balance_()
    fold_y.balance_()
    verification_y.balance_()
    verification_x.balance_()

    return fold_x, fold_y, verification_x, verification_y
Example #15
0
def nonblocking_hook(grad_loc):
    # Pytorch Doc says, :attr:`grad` may not be modified itself, so it has to be cloned
    # (cf. https://pytorch.org/docs/stable/tensors.html#torch.Tensor.register_hook).
    # Seems to be true, since otherwise a Runtime Error is thrown when working on it
    grad_loc_cpy = grad_loc.clone()

    # counterbalance local gradient averaging
    grad_loc_cpy *= bLoc

    # wrap local gradient into heat tensor
    grad_ht = ht.array(grad_loc_cpy, copy=False)

    # perform MPI IAllreduce to compute global gradient, returns wait handle
    wait_handle = grad_ht.comm.Iallreduce(ht.MPI.IN_PLACE, grad_ht, ht.MPI.SUM)

    # inject wait handle into local gradient
    setattr(grad_loc_cpy, "wait_handle", wait_handle)

    return grad_loc_cpy
Example #16
0
    def test_exp2(self):
        elements = 10
        tmp = np.exp2(torch.arange(elements, dtype=torch.float64))
        comparison = ht.array(tmp)

        # exponential of float32
        float32_tensor = ht.arange(elements, dtype=ht.float32)
        float32_exp2 = ht.exp2(float32_tensor)
        self.assertIsInstance(float32_exp2, ht.DNDarray)
        self.assertEqual(float32_exp2.dtype, ht.float32)
        self.assertEqual(float32_exp2.dtype, ht.float32)
        self.assertTrue(
            ht.allclose(float32_exp2, comparison.astype(ht.float32)))

        # exponential of float64
        float64_tensor = ht.arange(elements, dtype=ht.float64)
        float64_exp2 = ht.exp2(float64_tensor)
        self.assertIsInstance(float64_exp2, ht.DNDarray)
        self.assertEqual(float64_exp2.dtype, ht.float64)
        self.assertEqual(float64_exp2.dtype, ht.float64)
        self.assertTrue(ht.allclose(float64_exp2, comparison))

        # exponential of ints, automatic conversion to intermediate floats
        int32_tensor = ht.arange(elements, dtype=ht.int32)
        int32_exp2 = ht.exp2(int32_tensor)
        self.assertIsInstance(int32_exp2, ht.DNDarray)
        self.assertEqual(int32_exp2.dtype, ht.float64)
        self.assertEqual(int32_exp2.dtype, ht.float64)
        self.assertTrue(ht.allclose(int32_exp2, comparison))

        # exponential of longs, automatic conversion to intermediate floats
        int64_tensor = ht.arange(elements, dtype=ht.int64)
        int64_exp2 = int64_tensor.exp2()
        self.assertIsInstance(int64_exp2, ht.DNDarray)
        self.assertEqual(int64_exp2.dtype, ht.float64)
        self.assertEqual(int64_exp2.dtype, ht.float64)
        self.assertTrue(ht.allclose(int64_exp2, comparison))

        # check exceptions
        with self.assertRaises(TypeError):
            ht.exp2([1, 2, 3])
        with self.assertRaises(TypeError):
            ht.exp2("hello world")
Example #17
0
    def test_split_zero(self):
        X = ht.load_hdf5("heat/datasets/iris.h5", dataset="data", split=0)

        # Generate keys for the iris.h5 dataset
        keys = []
        for i in range(50):
            keys.append(0)
        for i in range(50, 100):
            keys.append(1)
        for i in range(100, 150):
            keys.append(2)
        Y = ht.array(keys, split=0)

        knn = KNN(X, Y, 5)

        result = knn.predict(X)

        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.shape, Y.shape)
Example #18
0
    def test_neg(self):
        self.assertTrue(ht.equal(ht.neg(ht.array([-1, 1])), ht.array([1, -1])))
        self.assertTrue(ht.equal(-ht.array([-1.0, 1.0]), ht.array([1.0,
                                                                   -1.0])))

        a = ht.array([1 + 1j, 2 - 2j, 3, 4j, 5], split=0)
        b = out = ht.empty(5, dtype=ht.complex64, split=0)
        ht.negative(a, out=out)
        self.assertTrue(
            ht.equal(out, ht.array([-1 - 1j, -2 + 2j, -3, -4j, -5], split=0)))
        self.assertIs(out, b)

        with self.assertRaises(TypeError):
            ht.neg(1)
Example #19
0
    def test_sign(self):
        # floats 1d
        a = ht.array([-1, -0.5, 0, 0.5, 1])
        signed = ht.sign(a)
        comparison = ht.array([-1.0, -1, 0, 1, 1])

        self.assertEqual(signed.dtype, comparison.dtype)
        self.assertEqual(signed.shape, comparison.shape)
        self.assertEqual(signed.device, a.device)
        self.assertEqual(signed.split, a.split)
        self.assertTrue(ht.equal(signed, comparison))

        # complex + 2d + split
        a = ht.array([[1 - 2j, -0.5 + 1j], [0, 4 + 6j]], split=0)
        signed = ht.sign(a)
        comparison = ht.array([[1 + 0j, -1 + 0j], [0 + 0j, 1 + 0j]], split=0)

        self.assertEqual(signed.dtype, comparison.dtype)
        self.assertEqual(signed.shape, comparison.shape)
        self.assertEqual(signed.device, a.device)
        self.assertEqual(signed.split, a.split)
        self.assertTrue(ht.allclose(signed.real, comparison.real))
        self.assertTrue(ht.allclose(signed.imag, comparison.imag, atol=2e-5))

        # complex + split + out
        a = ht.array([[1 - 2j, -0.5 + 1j], [0, 4 + 6j]], split=1)
        b = ht.empty_like(a)
        signed = ht.sign(a, b)
        comparison = ht.array([[1 + 0j, -1 + 0j], [0 + 0j, 1 + 0j]], split=1)

        self.assertIs(b, signed)
        self.assertEqual(signed.dtype, comparison.dtype)
        self.assertEqual(signed.shape, comparison.shape)
        self.assertEqual(signed.device, a.device)
        self.assertEqual(signed.split, a.split)
        self.assertTrue(ht.allclose(signed.real, comparison.real))
        self.assertTrue(ht.allclose(signed.imag, comparison.imag, atol=2e-5))

        # zeros + 3d + complex + split
        a = ht.zeros((4, 4, 4), dtype=ht.complex128, split=2)
        signed = ht.sign(a)
        comparison = ht.zeros((4, 4, 4), dtype=ht.complex128, split=2)

        self.assertEqual(signed.dtype, comparison.dtype)
        self.assertEqual(signed.shape, comparison.shape)
        self.assertEqual(signed.device, a.device)
        self.assertEqual(signed.split, a.split)
        self.assertTrue(ht.allclose(signed.real, comparison.real))
        self.assertTrue(ht.allclose(signed.imag, comparison.imag, atol=2e-5))
Example #20
0
 def test_misc_coverage(self):
     length = torch.tensor([i + 5 for i in range(3)], device=self.device.torch_device)
     test = torch.arange(
         torch.prod(length), dtype=torch.float64, device=self.device.torch_device
     ).reshape([i + 5 for i in range(3)])
     a = ht.array(test, split=None)
     tiles = ht.tiling.SplitTiles(a)
     self.assertTrue(torch.all(tiles.tile_locations == a.comm.rank))
     a = ht.resplit(a, 0)
     tiles = ht.tiling.SplitTiles(a)
     if a.comm.size == 3:
         # definition of adjusting tests is he same logic as the code itself,
         #   therefore, fixed tests are issued for one process confic
         tile_dims = torch.tensor(
             [[2.0, 2.0, 1.0], [2.0, 2.0, 2.0], [3.0, 2.0, 2.0]], device=self.device.torch_device
         )
         res = tiles.tile_dimensions
         self.assertTrue(torch.equal(tile_dims, res))
         testing_tensor = torch.tensor(
             [
                 [
                     [168.0, 169.0, 170.0, 171.0, 172.0, 173.0, 174.0],
                     [175.0, 176.0, 177.0, 178.0, 179.0, 180.0, 181.0],
                     [182.0, 183.0, 184.0, 185.0, 186.0, 187.0, 188.0],
                     [189.0, 190.0, 191.0, 192.0, 193.0, 194.0, 195.0],
                     [196.0, 197.0, 198.0, 199.0, 200.0, 201.0, 202.0],
                     [203.0, 204.0, 205.0, 206.0, 207.0, 208.0, 209.0],
                 ]
             ],
             dtype=torch.float64,
             device=self.device.torch_device,
         )
         if a.comm.rank == 2:
             self.assertTrue(torch.equal(tiles[2], testing_tensor))
         tiles[2] = 1000
         sl = tiles[2]
         if a.comm.rank == 2:
             self.assertEqual(torch.Size([1, 6, 7]), sl.shape)
             self.assertTrue(torch.all(sl == 1000))
         else:
             self.assertTrue(sl is None)
Example #21
0
    def test_split_none(self):
        X = ht.load_hdf5("heat/datasets/iris.h5", dataset="data")

        # Generate keys for the iris.h5 dataset
        keys = []
        for i in range(50):
            keys.append(0)
        for i in range(50, 100):
            keys.append(1)
        for i in range(100, 150):
            keys.append(2)
        Y = ht.array(keys)

        knn = KNN(X, Y, 5)

        result = knn.predict(X)

        self.assertTrue(ht.is_estimator(knn))
        self.assertTrue(ht.is_classifier(knn))
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.shape, Y.shape)
Example #22
0
    def test_split_none(self):
        x = ht.load_hdf5("heat/datasets/iris.h5", dataset="data")

        # generate keys for the iris.h5 dataset
        keys = []
        for i in range(50):
            keys.append(0)
        for i in range(50, 100):
            keys.append(1)
        for i in range(100, 150):
            keys.append(2)
        y = ht.array(keys)

        knn = KNeighborsClassifier(n_neighbors=5)
        knn.fit(x, y)
        result = knn.predict(x)

        self.assertTrue(ht.is_estimator(knn))
        self.assertTrue(ht.is_classifier(knn))
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.shape, y.shape)
Example #23
0
    def test_bitwise_and(self):
        an_int_tensor = ht.array([[1, 2], [3, 4]], device=ht_device)
        an_int_vector = ht.array([2, 2], device=ht_device)
        another_int_vector = ht.array([2, 2, 2, 2], device=ht_device)
        int_result = ht.array([[0, 2], [2, 0]], device=ht_device)

        a_boolean_vector = ht.array([False, True, False, True],
                                    device=ht_device)
        another_boolean_vector = ht.array([False, False, True, True],
                                          device=ht_device)
        boolean_result = ht.array([False, False, False, True],
                                  device=ht_device)

        self.assertTrue(
            ht.equal(ht.bitwise_and(an_int_tensor, self.an_int_scalar),
                     int_result))
        self.assertTrue(
            ht.equal(ht.bitwise_and(an_int_tensor, an_int_vector), int_result))
        self.assertTrue(
            ht.equal(ht.bitwise_and(a_boolean_vector, another_boolean_vector),
                     boolean_result))
        self.assertTrue(
            ht.equal(
                ht.bitwise_and(an_int_tensor.copy().resplit_(0),
                               an_int_vector), int_result))

        with self.assertRaises(TypeError):
            ht.bitwise_and(self.a_tensor, self.another_tensor)
        with self.assertRaises(ValueError):
            ht.bitwise_and(an_int_vector, another_int_vector)
        with self.assertRaises(TypeError):
            ht.bitwise_and(self.a_tensor, self.errorneous_type)
        with self.assertRaises(TypeError):
            ht.bitwise_and("T", "s")
        with self.assertRaises(TypeError):
            ht.bitwise_and(an_int_tensor, "s")
        with self.assertRaises(TypeError):
            ht.bitwise_and(self.an_int_scalar, "s")
        with self.assertRaises(TypeError):
            ht.bitwise_and("s", self.an_int_scalar)
        with self.assertRaises(TypeError):
            ht.bitwise_and(self.an_int_scalar, self.a_scalar)
Example #24
0
def blocking_hook(grad_loc):
    # Pytorch Doc says, :attr:`grad` may not be modified itself, so it has to be cloned
    # (cf. https://pytorch.org/docs/stable/tensors.html#torch.Tensor.register_hook).
    # Seems to be true, since otherwise a Runtime Error is thrown when working on it
    grad_loc_cpy = grad_loc.clone()

    # counterbalance local gradient averaging
    grad_loc_cpy *= bLoc

    # wrap local gradient into heat tensor
    grad_ht = ht.array(grad_loc_cpy, copy=False)

    # perform MPI Allreduce to compute global gradient
    grad_ht.comm.Allreduce(ht.MPI.IN_PLACE, grad_ht, ht.MPI.SUM)

    # unwrap global gradient from heat tensor
    grad_glo = grad_ht._DNDarray__array

    # global gradient averaging
    grad_glo /= bGlo

    return grad_glo
Example #25
0
    def test_fmod(self):
        result = ht.array([[1.0, 0.0], [1.0, 0.0]], device=ht_device)
        an_int_tensor = ht.array([[5, 3], [4, 1]], device=ht_device)
        integer_result = ht.array([[1, 1], [0, 1]], device=ht_device)
        commutated_result = ht.array([[0.0, 0.0], [2.0, 2.0]],
                                     device=ht_device)
        zero_tensor = ht.zeros((2, 2), device=ht_device)

        a_float = ht.array([5.3], device=ht_device)
        another_float = ht.array([1.9], device=ht_device)
        result_float = ht.array([1.5], device=ht_device)

        self.assertTrue(
            ht.equal(ht.fmod(self.a_scalar, self.a_scalar), ht.float32([0.0])))
        self.assertTrue(
            ht.equal(ht.fmod(self.a_tensor, self.a_tensor), zero_tensor))
        self.assertTrue(
            ht.equal(ht.fmod(self.a_tensor, self.an_int_scalar), result))
        self.assertTrue(
            ht.equal(ht.fmod(self.a_tensor, self.another_tensor), result))
        self.assertTrue(ht.equal(ht.fmod(self.a_tensor, self.a_vector),
                                 result))
        self.assertTrue(
            ht.equal(ht.fmod(self.a_tensor, self.an_int_scalar), result))
        self.assertTrue(
            ht.equal(ht.fmod(an_int_tensor, self.an_int_scalar),
                     integer_result))
        self.assertTrue(
            ht.equal(ht.fmod(self.a_scalar, self.a_tensor), commutated_result))
        self.assertTrue(
            ht.equal(ht.fmod(self.a_split_tensor, self.a_tensor),
                     commutated_result))
        self.assertTrue(
            ht.allclose(ht.fmod(a_float, another_float), result_float))

        with self.assertRaises(ValueError):
            ht.fmod(self.a_tensor, self.another_vector)
        with self.assertRaises(TypeError):
            ht.fmod(self.a_tensor, self.errorneous_type)
        with self.assertRaises(TypeError):
            ht.fmod("T", "s")
Example #26
0
    def test_numpy(self):
        # ToDo: numpy does not work for distributed tensors du to issue#
        # Add additional tests if the issue is solved
        a = np.random.randn(10, 8)
        b = ht.array(a, device=ht_device)
        self.assertIsInstance(b.numpy(), np.ndarray)
        self.assertEqual(b.numpy().shape, a.shape)
        self.assertEqual(b.numpy().tolist(), b._DNDarray__array.cpu().numpy().tolist())

        a = ht.ones((10, 8), dtype=ht.float32, device=ht_device)
        b = np.ones((2, 2)).astype("float32")
        self.assertEqual(a.numpy().dtype, b.dtype)

        a = ht.ones((10, 8), dtype=ht.float64, device=ht_device)
        b = np.ones((2, 2)).astype("float64")
        self.assertEqual(a.numpy().dtype, b.dtype)

        a = ht.ones((10, 8), dtype=ht.int32, device=ht_device)
        b = np.ones((2, 2)).astype("int32")
        self.assertEqual(a.numpy().dtype, b.dtype)

        a = ht.ones((10, 8), dtype=ht.int64, device=ht_device)
        b = np.ones((2, 2)).astype("int64")
        self.assertEqual(a.numpy().dtype, b.dtype)
Example #27
0
 def test_sanitize_memory_layout(self):
     # non distributed, 2D
     a_torch = torch.arange(12,
                            device=self.device.torch_device).reshape(4, 3)
     a_heat_C = ht.array(a_torch)
     a_heat_F = ht.array(a_torch, order="F")
     self.assertTrue_memory_layout(a_heat_C, "C")
     self.assertTrue_memory_layout(a_heat_F, "F")
     # non distributed, 5D
     a_torch_5d = torch.arange(4 * 3 * 5 * 2 * 1,
                               device=self.device.torch_device).reshape(
                                   4, 3, 1, 2, 5)
     a_heat_5d_C = ht.array(a_torch_5d)
     a_heat_5d_F = ht.array(a_torch_5d, order="F")
     self.assertTrue_memory_layout(a_heat_5d_C, "C")
     self.assertTrue_memory_layout(a_heat_5d_F, "F")
     a_heat_5d_F_sum = a_heat_5d_F.sum(-2)
     a_torch_5d_sum = a_torch_5d.sum(-2)
     self.assert_array_equal(a_heat_5d_F_sum, a_torch_5d_sum)
     # distributed, split, 2D
     size = ht.communication.MPI_WORLD.size
     a_torch_2d = torch.arange(4 * size * 3 * size,
                               device=self.device.torch_device).reshape(
                                   4 * size, 3 * size)
     a_heat_2d_C_split = ht.array(a_torch_2d, split=0)
     a_heat_2d_F_split = ht.array(a_torch_2d, split=1, order="F")
     self.assertTrue_memory_layout(a_heat_2d_C_split, "C")
     self.assertTrue_memory_layout(a_heat_2d_F_split, "F")
     a_heat_2d_F_split_sum = a_heat_2d_F_split.sum(1)
     a_torch_2d_sum = a_torch_2d.sum(1)
     self.assert_array_equal(a_heat_2d_F_split_sum, a_torch_2d_sum)
     # distributed, split, 5D
     a_torch_5d = torch.arange(4 * 3 * 5 * 2 * size * 7,
                               device=self.device.torch_device).reshape(
                                   4, 3, 7, 2 * size, 5)
     a_heat_5d_C_split = ht.array(a_torch_5d, split=-2)
     a_heat_5d_F_split = ht.array(a_torch_5d, split=-2, order="F")
     self.assertTrue_memory_layout(a_heat_5d_C_split, "C")
     self.assertTrue_memory_layout(a_heat_5d_F_split, "F")
     a_heat_5d_F_split_sum = a_heat_5d_F_split.sum(-2)
     a_torch_5d_sum = a_torch_5d.sum(-2)
     self.assert_array_equal(a_heat_5d_F_split_sum, a_torch_5d_sum)
     # distributed, is_split, 2D
     a_heat_2d_C_issplit = ht.array(a_torch_2d, is_split=0)
     a_heat_2d_F_issplit = ht.array(a_torch_2d, is_split=1, order="F")
     self.assertTrue_memory_layout(a_heat_2d_C_issplit, "C")
     self.assertTrue_memory_layout(a_heat_2d_F_issplit, "F")
     a_heat_2d_F_issplit_sum = a_heat_2d_F_issplit.sum(1)
     a_torch_2d_sum = a_torch_2d.sum(1) * size
     self.assert_array_equal(a_heat_2d_F_issplit_sum, a_torch_2d_sum)
     # distributed, is_split, 5D
     a_heat_5d_C_issplit = ht.array(a_torch_5d, is_split=-2)
     a_heat_5d_F_issplit = ht.array(a_torch_5d, is_split=-2, order="F")
     self.assertTrue_memory_layout(a_heat_5d_C_issplit, "C")
     self.assertTrue_memory_layout(a_heat_5d_F_issplit, "F")
     a_heat_5d_F_issplit_sum = a_heat_5d_F_issplit.sum(-2)
     a_torch_5d_sum = a_torch_5d.sum(-2) * size
     self.assert_array_equal(a_heat_5d_F_issplit_sum, a_torch_5d_sum)
     # test exceptions
     with self.assertRaises(NotImplementedError):
         ht.zeros_like(a_heat_5d_C_split, order="K")
Example #28
0
    def _initialize_cluster_centers(self, X):
        """
        Initializes the K-Means centroids.

        Parameters
        ----------
        X : ht.DNDarray, shape=(n_point, n_features)
            The data to initialize the clusters for.
        """
        # always initialize the random state
        if self.random_state is not None:
            ht.random.seed(self.random_state)

        # initialize the centroids by randomly picking some of the points
        if self.init == "random":
            # Samples will be equally distributed drawn from all involved processes
            _, displ, _ = X.comm.counts_displs_shape(shape=X.shape, axis=0)
            centroids = ht.empty((self.n_clusters, X.shape[1]),
                                 split=None,
                                 device=X.device,
                                 comm=X.comm)
            if (X.split is None) or (X.split == 0):
                for i in range(self.n_clusters):
                    samplerange = (
                        X.gshape[0] // self.n_clusters * i,
                        X.gshape[0] // self.n_clusters * (i + 1),
                    )
                    sample = ht.random.randint(samplerange[0],
                                               samplerange[1]).item()
                    proc = 0
                    for p in range(X.comm.size):
                        if displ[p] > sample:
                            break
                        proc = p
                    xi = ht.zeros(X.shape[1], dtype=X.dtype)
                    if X.comm.rank == proc:
                        idx = sample - displ[proc]
                        xi = ht.array(X.lloc[idx, :],
                                      device=X.device,
                                      comm=X.comm)
                    xi.comm.Bcast(xi, root=proc)
                    centroids[i, :] = xi

            else:
                raise NotImplementedError(
                    "Not implemented for other splitting-axes")

            self._cluster_centers = centroids

        # directly passed centroids
        elif isinstance(self.init, ht.DNDarray):
            if len(self.init.shape) != 2:
                raise ValueError(
                    "passed centroids need to be two-dimensional, but are {}".
                    format(len(self.init)))
            if self.init.shape[0] != self.n_clusters or self.init.shape[
                    1] != X.shape[1]:
                raise ValueError(
                    "passed centroids do not match cluster count or data shape"
                )
            self._cluster_centers = self.init.resplit(None)

        # kmeans++, smart centroid guessing
        elif self.init == "kmeans++":
            if (X.split is None) or (X.split == 0):
                centroids = ht.zeros((self.n_clusters, X.shape[1]),
                                     split=None,
                                     device=X.device,
                                     comm=X.comm)
                sample = ht.random.randint(0, X.shape[0] - 1).item()
                _, displ, _ = X.comm.counts_displs_shape(shape=X.shape, axis=0)
                proc = 0
                for p in range(X.comm.size):
                    if displ[p] > sample:
                        break
                    proc = p
                x0 = ht.zeros(X.shape[1],
                              dtype=X.dtype,
                              device=X.device,
                              comm=X.comm)
                if X.comm.rank == proc:
                    idx = sample - displ[proc]
                    x0 = ht.array(X.lloc[idx, :], device=X.device, comm=X.comm)
                x0.comm.Bcast(x0, root=proc)
                centroids[0, :] = x0
                for i in range(1, self.n_clusters):
                    distances = ht.spatial.distance.cdist(
                        X, centroids, quadratic_expansion=True)
                    D2 = distances.min(axis=1)
                    D2.resplit_(axis=None)
                    prob = D2 / D2.sum()
                    x = ht.random.rand().item()
                    sample = 0
                    sum = 0
                    for j in range(len(prob)):
                        if sum > x:
                            break
                        sum += prob[j].item()
                        sample = j
                    proc = 0
                    for p in range(X.comm.size):
                        if displ[p] > sample:
                            break
                        proc = p
                    xi = ht.zeros(X.shape[1], dtype=X.dtype)
                    if X.comm.rank == proc:
                        idx = sample - displ[proc]
                        xi = ht.array(X.lloc[idx, :],
                                      device=X.device,
                                      comm=X.comm)
                    xi.comm.Bcast(xi, root=proc)
                    centroids[i, :] = xi

            else:
                raise NotImplementedError(
                    "Not implemented for other splitting-axes")

            self._cluster_centers = centroids

        else:
            raise ValueError(
                'init needs to be one of "random", ht.DNDarray or "kmeans++", but was {}'
                .format(self.init))
Example #29
0
    def test_diff(self):
        ht_array = ht.random.rand(20, 20, 20, split=None)
        arb_slice = [0] * 3
        for dim in range(0, 3):  # loop over 3 dimensions
            arb_slice[dim] = slice(None)
            tup_arb = tuple(arb_slice)
            np_array = ht_array[tup_arb].numpy()
            for ax in range(dim + 1):  # loop over the possible axis values
                for sp in range(dim +
                                1):  # loop over the possible split values
                    lp_array = ht.manipulations.resplit(ht_array[tup_arb], sp)
                    # loop to 3 for the number of times to do the diff
                    for nl in range(1, 4):
                        # only generating the number once and then
                        ht_diff = ht.diff(lp_array, n=nl, axis=ax)
                        np_diff = ht.array(np.diff(np_array, n=nl, axis=ax))

                        self.assertTrue(ht.equal(ht_diff, np_diff))
                        self.assertEqual(ht_diff.split, sp)
                        self.assertEqual(ht_diff.dtype, lp_array.dtype)

                        # test prepend/append. Note heat's intuitive casting vs. numpy's safe casting
                        append_shape = lp_array.gshape[:ax] + (
                            1, ) + lp_array.gshape[ax + 1:]
                        ht_append = ht.ones(append_shape,
                                            dtype=lp_array.dtype,
                                            split=lp_array.split)

                        ht_diff_pend = ht.diff(lp_array,
                                               n=nl,
                                               axis=ax,
                                               prepend=0,
                                               append=ht_append)
                        np_append = np.ones(
                            append_shape,
                            dtype=lp_array.larray.cpu().numpy().dtype)
                        np_diff_pend = ht.array(
                            np.diff(np_array,
                                    n=nl,
                                    axis=ax,
                                    prepend=0,
                                    append=np_append))
                        self.assertTrue(ht.equal(ht_diff_pend, np_diff_pend))
                        self.assertEqual(ht_diff_pend.split, sp)
                        self.assertEqual(ht_diff_pend.dtype, ht.float64)

        np_array = ht_array.numpy()
        ht_diff = ht.diff(ht_array, n=2)
        np_diff = ht.array(np.diff(np_array, n=2))
        self.assertTrue(ht.equal(ht_diff, np_diff))
        self.assertEqual(ht_diff.split, None)
        self.assertEqual(ht_diff.dtype, ht_array.dtype)

        ht_array = ht.random.rand(20, 20, 20, split=1, dtype=ht.float64)
        np_array = ht_array.copy().numpy()
        ht_diff = ht.diff(ht_array, n=2)
        np_diff = ht.array(np.diff(np_array, n=2))
        self.assertTrue(ht.equal(ht_diff, np_diff))
        self.assertEqual(ht_diff.split, 1)
        self.assertEqual(ht_diff.dtype, ht_array.dtype)

        # raises
        with self.assertRaises(ValueError):
            ht.diff(ht_array, n=-2)
        with self.assertRaises(TypeError):
            ht.diff(ht_array, axis="string")
        with self.assertRaises(TypeError):
            ht.diff("string", axis=2)
        t_prepend = torch.zeros(ht_array.gshape)
        with self.assertRaises(TypeError):
            ht.diff(ht_array, prepend=t_prepend)
        append_wrong_shape = ht.ones(ht_array.gshape)
        with self.assertRaises(ValueError):
            ht.diff(ht_array, axis=0, append=append_wrong_shape)
Example #30
0
    def test_add(self):
        # test basics
        result = ht.array([[3.0, 4.0], [5.0, 6.0]])

        self.assertTrue(
            ht.equal(ht.add(self.a_scalar, self.a_scalar), ht.float32(4.0)))
        self.assertTrue(ht.equal(ht.add(self.a_tensor, self.a_scalar), result))
        self.assertTrue(ht.equal(ht.add(self.a_scalar, self.a_tensor), result))
        self.assertTrue(
            ht.equal(ht.add(self.a_tensor, self.another_tensor), result))
        self.assertTrue(ht.equal(ht.add(self.a_tensor, self.a_vector), result))
        self.assertTrue(
            ht.equal(ht.add(self.a_tensor, self.an_int_scalar), result))
        self.assertTrue(
            ht.equal(ht.add(self.a_split_tensor, self.a_tensor), result))

        # Single element split
        a = ht.array([1], split=0)
        b = ht.array([1, 2], split=0)
        c = ht.add(a, b)
        self.assertTrue(ht.equal(c, ht.array([2, 3])))
        if c.comm.size > 1:
            if c.comm.rank < 2:
                self.assertEqual(c.larray.size()[0], 1)
            else:
                self.assertEqual(c.larray.size()[0], 0)

        # test with differently distributed DNDarrays
        a = ht.ones(10, split=0)
        b = ht.zeros(10, split=0)
        c = a[:-1] + b[1:]
        self.assertTrue((c == 1).all())
        self.assertTrue(c.lshape == a[:-1].lshape)

        c = a[1:-1] + b[1:-1]  # test unbalanced
        self.assertTrue((c == 1).all())
        self.assertTrue(c.lshape == a[1:-1].lshape)

        # test one unsplit
        a = ht.ones(10, split=None)
        b = ht.zeros(10, split=0)
        c = a[:-1] + b[1:]
        self.assertTrue((c == 1).all())
        self.assertEqual(c.lshape, b[1:].lshape)
        c = b[:-1] + a[1:]
        self.assertTrue((c == 1).all())
        self.assertEqual(c.lshape, b[:-1].lshape)

        # broadcast in split dimension
        a = ht.ones((1, 10), split=0)
        b = ht.zeros((2, 10), split=0)
        c = a + b
        self.assertTrue((c == 1).all())
        self.assertTrue(c.lshape == b.lshape)
        c = b + a
        self.assertTrue((c == 1).all())
        self.assertTrue(c.lshape == b.lshape)

        with self.assertRaises(ValueError):
            ht.add(self.a_tensor, self.another_vector)
        with self.assertRaises(TypeError):
            ht.add(self.a_tensor, self.erroneous_type)
        with self.assertRaises(TypeError):
            ht.add("T", "s")