def test_process_grid_bcast(): P = dace.symbol('P', dace.int32) @dace.program def pgrid_bcast(A: dace.int32[10]): pgrid = dace.comm.Cart_create([1, P]) dace.comm.Bcast(A, grid=pgrid) from mpi4py import MPI commworld = MPI.COMM_WORLD rank = commworld.Get_rank() size = commworld.Get_size() if size < 2: raise ValueError("Please run this test with at least two processes.") sdfg = None if rank == 0: sdfg = pgrid_bcast.to_sdfg() func = utils.distributed_compile(sdfg, commworld) if rank == 0: A = np.arange(10, dtype=np.int32) else: A = np.zeros((10, ), dtype=np.int32) func(A=A, P=size) assert (np.array_equal(A, np.arange(10, dtype=np.int32)))
def test_redistribute_vector_2d_2d(): """ The numbers are example tile IDs, NOT MPI ranks. "(r)" means that the tile is a replica. ____________________ _______________________ ___________ |____________________| -> |0____|1____|2____|3____| -> |0 __|zero_| |0(r)_|1(r)_|2(r)_|3(r)_| |1____|zero_| |2____|zero_| |3____|zero_| """ P = dace.symbol('P', dace.int32) @dace.program def vector_2d_2d(A: dace.int32[8 * P]): a_grid = dace.comm.Cart_create([2, P // 2]) a_scatter_grid = dace.comm.Cart_sub(a_grid, [False, True], exact_grid=0) a_bcast_grid = dace.comm.Cart_sub(a_grid, [True, False]) b_grid = dace.comm.Cart_create([P // 2, 2]) b_scatter_grid = dace.comm.Cart_sub(b_grid, [True, False], exact_grid=0) b_bcast_grid = dace.comm.Cart_sub(b_grid, [False, True]) lA = np.empty_like(A, shape=(16, )) a_subarr = dace.comm.BlockScatter(A, lA, a_scatter_grid, a_bcast_grid) lB = np.zeros_like(A, shape=(16, )) b_subarr = dace.comm.Subarray((8 * P, ), lB, process_grid=b_scatter_grid) redistr = dace.comm.Redistribute(lA, a_subarr, lB, b_subarr) return lB from mpi4py import MPI commworld = MPI.COMM_WORLD rank = commworld.Get_rank() size = commworld.Get_size() even_size = (size // 2) * 2 if size < 2: raise ValueError("Please run this test with at least two processes.") sdfg = None if rank == 0: sdfg = vector_2d_2d.to_sdfg() func = utils.distributed_compile(sdfg, commworld) A = np.arange(8 * even_size, dtype=np.int32) lB_ref = A.reshape(even_size // 2, 16) if rank < even_size: lB = func(A=A, P=even_size) else: lB = func(A=np.zeros((1, ), dtype=np.int32), P=even_size) if rank < even_size: if rank % 2 == 0: assert (np.array_equal(lB, lB_ref[rank // 2])) else: assert (np.array_equal(lB, np.zeros_like(lB)))
def test_redistribute_matrix_2d_2d(): """ _______________________ _______________________ | | | | | | | | | | | | | |___________|___________| | | | | | | | | |_____|_____|_____|_____| -> |___________|___________| | | | | | | | | | | | | | |___________|___________| | | | | | | | | |_____|_____|_____|_____| |___________|___________| """ P = dace.symbol('P', dace.int32) @dace.program def matrix_2d_2d(A: dace.int32[4 * P, 16]): a_grid = dace.comm.Cart_create([2, P // 2]) b_grid = dace.comm.Cart_create([P // 2, 2]) B = np.empty_like(A, shape=(16, 4 * P)) a_arr = dace.comm.Subarray((8 * P, 8 * P), A, process_grid=a_grid) b_arr = dace.comm.Subarray((8 * P, 8 * P), B, process_grid=b_grid) rdistr = dace.comm.Redistribute(A, a_arr, B, b_arr) return B from mpi4py import MPI commworld = MPI.COMM_WORLD rank = commworld.Get_rank() size = commworld.Get_size() even_size = (size // 2) * 2 if size < 2: raise ValueError("Please run this test with at least two processes.") sdfg = None if rank == 0: sdfg = matrix_2d_2d.to_sdfg() func = utils.distributed_compile(sdfg, commworld) A = np.arange(64 * even_size * even_size, dtype=np.int32).reshape(8 * even_size, 8 * even_size) lA = A.reshape(2, 4 * even_size, even_size // 2, 16).transpose(0, 2, 1, 3) lB = A.reshape(even_size // 2, 16, 2, 4 * even_size).transpose(0, 2, 1, 3) if rank < even_size: B = func(A=lA[rank // (even_size // 2), rank % (even_size // 2)].copy(), P=even_size) else: B = func(A=np.zeros((1, ), dtype=np.int32), P=even_size) if rank < even_size: assert (np.array_equal(B, lB[rank // 2, rank % 2]))
def test_sub_grid(): P = dace.symbol('P', dace.int32) sdfg = dace.SDFG("sub_grid_test") sdfg.add_symbol('P', dace.int32) _, darr = sdfg.add_array("dims", (1, ), dtype=dace.int32) _, parr = sdfg.add_array("periods", (1, ), dtype=dace.int32) _, carr = sdfg.add_array("coords", (1, ), dtype=dace.int32) _, varr = sdfg.add_array("valid", (1, ), dtype=dace.bool_) state = sdfg.add_state("start") parent_pgrid_name = comm._cart_create(None, sdfg, state, [1, P]) pgrid_name = comm._cart_sub(None, sdfg, state, parent_pgrid_name, [False, True]) state2 = sdfg.add_state("main") sdfg.add_edge(state, state2, dace.InterstateEdge()) tasklet = state2.add_tasklet( "MPI_Cart_get", {}, {'d', 'p', 'c', 'v'}, f"MPI_Cart_get(__state->{pgrid_name}_comm, P, &d, &p, &c);\nv = __state->{pgrid_name}_valid;", dtypes.Language.CPP) dims = state2.add_write("dims") periods = state2.add_write("periods") coords = state2.add_write("coords") valid = state2.add_write("valid") state2.add_edge(tasklet, 'd', dims, None, dace.Memlet.from_array("dims", darr)) state2.add_edge(tasklet, 'p', periods, None, dace.Memlet.from_array("periods", parr)) state2.add_edge(tasklet, 'c', coords, None, dace.Memlet.from_array("coords", carr)) state2.add_edge(tasklet, 'v', valid, None, dace.Memlet("valid[0]")) from mpi4py import MPI commworld = MPI.COMM_WORLD rank = commworld.Get_rank() size = commworld.Get_size() if size < 2: raise ValueError("Please run this test with at least two processes.") func = utils.distributed_compile(sdfg, commworld) dims = np.zeros((1, ), dtype=np.int32) periods = np.zeros((1, ), dtype=np.int32) coords = np.zeros((1, ), dtype=np.int32) valid = np.zeros((1, ), dtype=np.bool_) func(dims=dims, periods=periods, coords=coords, valid=valid, P=size) assert (np.array_equal(dims, [size])) assert (np.array_equal(periods, [0])) assert (np.array_equal(coords, [rank])) assert (valid[0])
def test_sub_grid_bcast(): P = dace.symbol('P', dace.int32) @dace.program def subgrid_bcast(A: dace.int32[10], rank: dace.int32): pgrid = dace.comm.Cart_create([2, P // 2]) sgrid = dace.comm.Cart_sub(pgrid, [False, True]) dace.comm.Bcast(A, grid=pgrid) B = np.empty_like(A) B[:] = rank % 10 dace.comm.Bcast(B, grid=sgrid) A[:] = B from mpi4py import MPI commworld = MPI.COMM_WORLD rank = commworld.Get_rank() size = commworld.Get_size() last_rank = (size // 2) * 2 if size < 2: raise ValueError("Please run this test with at least two processes.") sdfg = None if rank == 0: sdfg = subgrid_bcast.to_sdfg() func = utils.distributed_compile(sdfg, commworld) if rank == 0: A = np.arange(10, dtype=np.int32) else: A = np.ones((10, ), dtype=np.int32) func(A=A, rank=rank, P=size) if rank < size // 2: assert (np.array_equal(A, np.zeros((10, ), dtype=np.int32))) elif rank < last_rank: assert (np.array_equal(A, np.full_like(A, fill_value=(size // 2) % 10))) else: assert (np.array_equal(A, np.full_like(A, fill_value=rank % 10)))
def test_subarray_gather(): P = dace.symbol('P', dace.int32) @dace.program def block_gather(lA: dace.int32[4 * P, 16]): gather_grid = dace.comm.Cart_create([2, P // 2]) A = np.empty_like(lA, shape=(8 * P, 8 * P)) subarray = dace.comm.BlockGather(lA, A, gather_grid) return A from mpi4py import MPI commworld = MPI.COMM_WORLD rank = commworld.Get_rank() size = commworld.Get_size() even_size = (size // 2) * 2 if size < 2: raise ValueError("Please run this test with at least two processes.") sdfg = None if rank == 0: sdfg = block_gather.to_sdfg() func = utils.distributed_compile(sdfg, commworld) A_ref = np.arange(64 * even_size * even_size, dtype=np.int32).reshape(8 * even_size, 8 * even_size) lA = A_ref.reshape(2, 4 * even_size, even_size // 2, 16).transpose(0, 2, 1, 3) if rank < even_size: A = func(lA=lA[rank // (even_size // 2), rank % (even_size // 2)].copy(), P=even_size) else: A = func(lA=np.zeros((1, ), dtype=np.int32), P=even_size) if rank == 0: assert (np.array_equal(A, A_ref))
def test_subarray_scatter_bcast(): P = dace.symbol('P', dace.int32) @dace.program def block_scatter_bcast(A: dace.int32[8 * P]): pgrid = dace.comm.Cart_create([2, P // 2]) scatter_grid = dace.comm.Cart_sub(pgrid, [False, True], exact_grid=0) bcast_grid = dace.comm.Cart_sub(pgrid, [True, False]) lA = np.empty_like(A, shape=(16, )) subarray = dace.comm.BlockScatter(A, lA, scatter_grid, bcast_grid) return lA from mpi4py import MPI commworld = MPI.COMM_WORLD rank = commworld.Get_rank() size = commworld.Get_size() even_size = (size // 2) * 2 if size < 2: raise ValueError("Please run this test with at least two processes.") sdfg = None if rank == 0: sdfg = block_scatter_bcast.to_sdfg() func = utils.distributed_compile(sdfg, commworld) A = np.arange(8 * even_size, dtype=np.int32) if rank == 0: lA = func(A=A, P=even_size) else: lA = func(A=np.zeros((1, ), dtype=np.int32), P=even_size) if rank < even_size: lbound = (rank % (even_size // 2)) * 16 ubound = (rank % (even_size // 2) + 1) * 16 assert (np.array_equal(lA, A[lbound:ubound]))
def test_subarray_gather_reduce(): P = dace.symbol('P', dace.int32) @dace.program def block_gather_reduce(lA: dace.int32[16]): pgrid = dace.comm.Cart_create([2, P // 2]) gather_grid = dace.comm.Cart_sub(pgrid, [False, True], exact_grid=0) reduce_grid = dace.comm.Cart_sub(pgrid, [True, False]) A = np.empty_like(lA, shape=(8 * P)) subarray = dace.comm.BlockGather(lA, A, gather_grid, reduce_grid) return A from mpi4py import MPI commworld = MPI.COMM_WORLD rank = commworld.Get_rank() size = commworld.Get_size() even_size = (size // 2) * 2 if size < 2: raise ValueError("Please run this test with at least two processes.") sdfg = None if rank == 0: sdfg = block_gather_reduce.to_sdfg() func = utils.distributed_compile(sdfg, commworld) A_ref = np.arange(8 * even_size, dtype=np.int32) if rank < even_size: lbound = (rank % (even_size // 2)) * 16 ubound = (rank % (even_size // 2) + 1) * 16 A = func(lA=A_ref[lbound:ubound].copy(), P=even_size) else: A = func(lA=np.zeros((1, ), dtype=np.int32), P=even_size) if rank == 0: assert (np.array_equal(A, 2 * A_ref))