import heat as ht import timeit import numpy as np import cupy as cp import time from print0 import print0 rank = ht.get_comm().rank N = 50 a = np.random.rand(10000, 10000) b = np.random.rand(10000, 10000) c = np.random.rand(10000, 10000) # print(type(a[0,0])) t1 = time.time() for i in range(N): d = c + a * b t2 = time.time() print0(rank, "Using numpy, time taken is ", t2-t1, "s.") a = ht.random.rand(10000, 10000, dtype=ht.float64, split=0, device='cpu') b = ht.random.rand(10000, 10000, dtype=ht.float64, split=0, device='cpu') c = ht.random.rand(10000, 10000, dtype=ht.float64, split=0, device='cpu') t1 = time.time() for i in range(N): d = c + a * b t2 = time.time() print0(rank, "Using HeAT on cpus, time taken is ", t2-t1, "s.") a = cp.random.rand(10000, 2500) b = cp.random.rand(10000, 2500)
def test_sort(self): size = ht.MPI_WORLD.size rank = ht.MPI_WORLD.rank tensor = torch.arange(size, device=device).repeat(size).reshape(size, size) data = ht.array(tensor, split=None, device=ht_device) result, result_indices = ht.sort(data, axis=0, descending=True) expected, exp_indices = torch.sort(tensor, dim=0, descending=True) self.assertTrue(torch.equal(result._DNDarray__array, expected)) self.assertTrue(torch.equal(result_indices._DNDarray__array, exp_indices)) result, result_indices = ht.sort(data, axis=1, descending=True) expected, exp_indices = torch.sort(tensor, dim=1, descending=True) self.assertTrue(torch.equal(result._DNDarray__array, expected)) self.assertTrue(torch.equal(result_indices._DNDarray__array, exp_indices)) data = ht.array(tensor, split=0, device=ht_device) exp_axis_zero = torch.arange(size, device=device).reshape(1, size) exp_indices = torch.tensor([[rank] * size], device=device) result, result_indices = ht.sort(data, descending=True, axis=0) self.assertTrue(torch.equal(result._DNDarray__array, exp_axis_zero)) self.assertTrue(torch.equal(result_indices._DNDarray__array, exp_indices)) exp_axis_one, exp_indices = ( torch.arange(size, device=device).reshape(1, size).sort(dim=1, descending=True) ) result, result_indices = ht.sort(data, descending=True, axis=1) self.assertTrue(torch.equal(result._DNDarray__array, exp_axis_one)) self.assertTrue(torch.equal(result_indices._DNDarray__array, exp_indices)) result1 = ht.sort(data, axis=1, descending=True) result2 = ht.sort(data, descending=True) self.assertTrue(ht.equal(result1[0], result2[0])) self.assertTrue(ht.equal(result1[1], result2[1])) data = ht.array(tensor, split=1, device=ht_device) exp_axis_zero = torch.tensor(rank, device=device).repeat(size).reshape(size, 1) indices_axis_zero = torch.arange(size, dtype=torch.int64, device=device).reshape(size, 1) result, result_indices = ht.sort(data, axis=0, descending=True) self.assertTrue(torch.equal(result._DNDarray__array, exp_axis_zero)) # comparison value is only true on CPU if result_indices._DNDarray__array.is_cuda is False: self.assertTrue(torch.equal(result_indices._DNDarray__array, indices_axis_zero)) exp_axis_one = torch.tensor(size - rank - 1, device=device).repeat(size).reshape(size, 1) result, result_indices = ht.sort(data, descending=True, axis=1) self.assertTrue(torch.equal(result._DNDarray__array, exp_axis_one)) self.assertTrue(torch.equal(result_indices._DNDarray__array, exp_axis_one)) tensor = torch.tensor( [ [[2, 8, 5], [7, 2, 3]], [[6, 5, 2], [1, 8, 7]], [[9, 3, 0], [1, 2, 4]], [[8, 4, 7], [0, 8, 9]], ], dtype=torch.int32, device=device, ) data = ht.array(tensor, split=0, device=ht_device) exp_axis_zero = torch.tensor([[2, 3, 0], [0, 2, 3]], dtype=torch.int32, device=device) if torch.cuda.is_available() and data.device == ht.gpu and size < 4: indices_axis_zero = torch.tensor( [[0, 2, 2], [3, 2, 0]], dtype=torch.int32, device=device ) else: indices_axis_zero = torch.tensor( [[0, 2, 2], [3, 0, 0]], dtype=torch.int32, device=device ) result, result_indices = ht.sort(data, axis=0) first = result[0]._DNDarray__array first_indices = result_indices[0]._DNDarray__array if rank == 0: self.assertTrue(torch.equal(first, exp_axis_zero)) self.assertTrue(torch.equal(first_indices, indices_axis_zero)) data = ht.array(tensor, split=1, device=ht_device) exp_axis_one = torch.tensor([[2, 2, 3]], dtype=torch.int32, device=device) indices_axis_one = torch.tensor([[0, 1, 1]], dtype=torch.int32, device=device) result, result_indices = ht.sort(data, axis=1) first = result[0]._DNDarray__array[:1] first_indices = result_indices[0]._DNDarray__array[:1] if rank == 0: self.assertTrue(torch.equal(first, exp_axis_one)) self.assertTrue(torch.equal(first_indices, indices_axis_one)) data = ht.array(tensor, split=2, device=ht_device) exp_axis_two = torch.tensor([[2], [2]], dtype=torch.int32, device=device) indices_axis_two = torch.tensor([[0], [1]], dtype=torch.int32, device=device) result, result_indices = ht.sort(data, axis=2) first = result[0]._DNDarray__array[:, :1] first_indices = result_indices[0]._DNDarray__array[:, :1] if rank == 0: self.assertTrue(torch.equal(first, exp_axis_two)) self.assertTrue(torch.equal(first_indices, indices_axis_two)) # out = ht.empty_like(data, device=ht_device) indices = ht.sort(data, axis=2, out=out) self.assertTrue(ht.equal(out, result)) self.assertTrue(ht.equal(indices, result_indices)) with self.assertRaises(ValueError): ht.sort(data, axis=3) with self.assertRaises(TypeError): ht.sort(data, axis="1") rank = ht.MPI_WORLD.rank data = ht.random.randn(100, 1, split=0, device=ht_device) result, _ = ht.sort(data, axis=0) counts, _, _ = ht.get_comm().counts_displs_shape(data.gshape, axis=0) for i, c in enumerate(counts): for idx in range(c - 1): if rank == i: self.assertTrue( torch.lt( result._DNDarray__array[idx], result._DNDarray__array[idx + 1] ).all() )