def main(): data = hpx.GlobalMemory.alloc_cyclic(NUM_NODE, (DATA_PER_NODE, DIM), np.dtype(np.float)) generate_data_complete = hpx.And(NUM_NODE) for i in range(NUM_NODE): generate_data(data[i], node_size(i), rsync_lco=generate_data_complete) generate_data_complete.wait() data_this_block = data[0].try_pin() centers = data_this_block[:K] data[0].unpin() iterations = 0 while iterations < MAX_ITERATION: count_lco = hpx.Reduce(NUM_NODE, (K, ), np.dtype(np.int), initialize_count, sum_count) position_lco = hpx.Reduce(NUM_NODE, (K, DIM), np.dtype(np.float), initialize_position, sum_position) and_lco = hpx.And(NUM_NODE) for i in range(NUM_NODE): calculate_centers(data[i], node_size(i), centers, count_lco, position_lco, and_lco) counts = count_lco.get() positions = position_lco.get() centers = positions / counts.reshape((K, 1)) and_lco.wait() count_lco.delete_sync() position_lco.delete_sync() and_lco.delete_sync() iterations = iterations + 1 hpx.exit()
def main(): start = hpx.time_now() num_ranks = hpx.get_num_ranks() and_lco = hpx.And(num_ranks) for i in range(num_ranks): worker(hpx.THERE(i), i, num_ranks, rsync_lco=and_lco) and_lco.wait() print(hpx.time_elapsed_ms(start)) hpx.exit()
def main(num_action): start = hpx.time_now() and_lco = hpx.And(num_action) for i in range(num_action): calculate( hpx.HERE(), 5765760 // num_action, rsync_lco=and_lco) # 5040 is lcm(2,3,4,5,6,7,8,9,10,12,14,15,16) and_lco.wait() print(hpx.time_elapsed_ms(start)) hpx.exit()
def main(): num_ranks = hpx.get_num_ranks() print(num_ranks) array = hpx.GlobalMemory.alloc_cyclic(num_ranks, (64, 1024, 1024), dtype=np.dtype(float)) start = hpx.time_now() and_lco = hpx.And(num_ranks) for i in range(num_ranks): copy_from_array(hpx.THERE(i), array, i, num_ranks, rsync_lco=and_lco) and_lco.wait() print(hpx.time_elapsed_ms(start)) hpx.exit()
def main_action(): num_node = hpx.get_num_ranks() print("program runs on {0} nodes".format(num_node)) step_size = (high - low) / total_cells cell_per_node = total_cells // num_node result_lco = hpx.Reduce(num_node, (1, ), np.dtype(float), result_init, result_op) for i in range(num_node): calculate_integral(hpx.THERE(i), low + i * step_size * cell_per_node, step_size, cell_per_node, result_lco) print(result_lco.get()) print(fint(high) - fint(low)) hpx.exit()
def main_action(): # test addr arithmatic global_memory = hpx.GlobalMemory.alloc_local_at(3, 4, np.dtype(np.int), hpx.HERE()) global_addr = global_memory.addr + 2 * np.dtype(np.int).itemsize global_addr_2 = 2 * np.dtype(np.int).itemsize + global_memory.addr assert global_addr.addr == global_memory[0, 2].addr.addr assert global_addr.addr == global_addr_2.addr assert global_addr - global_memory.addr == 2 * np.dtype(np.int).itemsize assert (global_addr - 2 * np.dtype(np.int).itemsize).addr == global_memory.addr.addr hpx.exit()
def main(): time = np.zeros((18, )) for j in range(18): num_gil = 2**j start = hpx.time_now() and_lco = hpx.And(num_action) for i in range(num_action): calculate(hpx.HERE(), num_gil, rsync_lco=and_lco) and_lco.wait() current_time = hpx.time_elapsed_ms(start) print(current_time) time[j] = current_time print(time) time.dump("time.bin") hpx.exit()
def main(n_parts, n_partition, theta_c, domain_size): set_domain_size(hpx.NULL(), domain_size, sync='rsync') root = create_node(0.0, domain_size) parts = generate_parts(n_parts, domain_size, root.addr) done = hpx.Future(shape=(1, ), dtype=moment_type) partition_node(root[0], root[0], parts, n_parts, n_partition, sync='lsync', rsync_lco=done) done.wait() done.delete() alldone = hpx.And(n_parts) spawn_computation(root[0], root[0].addr.addr, root, alldone, theta_c) alldone.wait() alldone.delete() hpx.exit()
def main(): # test lsync reduce_lco = hpx.Reduce(5, (3, 4, 5), np.dtype(np.int), set_zero, add) for i in range(5): array = np.ones((3, 4, 5), dtype=np.int) reduce_lco.set(array, sync='lsync') return_array = reduce_lco.get() expect_array = np.zeros((3, 4, 5), dtype=np.int) expect_array[:] = 6 assert np.array_equal(return_array, expect_array) # test async reduce_lco = hpx.Reduce(5, (3, 4, 5), np.dtype(np.int), set_zero, add) for i in range(5): and_lco = hpx.Future() array = np.ones((3, 4, 5), dtype=np.int) reduce_lco.set(array, sync='async', lsync_lco=and_lco) and_lco.wait() return_array = reduce_lco.get() expect_array = np.zeros((3, 4, 5), dtype=np.int) expect_array[:] = 6 assert np.array_equal(return_array, expect_array) hpx.exit()
def main(): future = hpx.Future() set_lco(hpx.HERE(), future, 2) future.wait() future.delete() out_array = np.zeros((5, 6), dtype=int) return_an_array(hpx.HERE(), sync='rsync', out_array=out_array) assert np.array_equal(out_array, np.arange(30).reshape((5, 6))) future = hpx.Future((3, 4), dtype=np.dtype(int)) call_cc(hpx.HERE(), sync='lsync', rsync_lco=future) out_array = future.get() assert np.array_equal(out_array, np.arange(12).reshape((3, 4))) future.delete() # test lsync calling interface with gate future1 = hpx.Future() future2 = hpx.Future((3, 4), dtype=np.dtype(int)) set_funture_2(hpx.HERE(), future2, gate=future1) set_lco(hpx.HERE(), future1, 2) out_array = future2.get() assert np.array_equal(out_array, np.arange(12).reshape((3, 4))) future1.delete() future2.delete() # test rsync calling interface with gate future1 = hpx.Future() out_array = np.zeros((3, 4), dtype=int) set_lco(hpx.HERE(), future1, 2) set_future(hpx.HERE(), sync='rsync', gate=future1, out_array=out_array) assert np.array_equal(out_array, np.arange(12).reshape((3, 4))) future1.delete() rtv = np.arange(6).reshape((2, 3)) hpx.exit(rtv)
def main(): test_memory = hpx.GlobalMemory.alloc_local_at(3, (4, 5), np.dtype(np.int), hpx.HERE()) # test strides and offset initialization itemsize = np.dtype(np.int).itemsize assert test_memory.numBlock == (3, ) assert test_memory.blockShape == (4, 5) assert test_memory.strides == (20 * itemsize, 5 * itemsize, itemsize) # test indexing assert type(test_memory[2]) is hpx.GlobalAddressBlock assert test_memory[ 2].addr.addr == test_memory.addr.addr + itemsize * 20 * 2 assert test_memory[2].shape == (4, 5) assert test_memory[2].strides == (5 * itemsize, itemsize) assert type(test_memory[1:]) is hpx.GlobalMemory assert test_memory[1:].addr.addr == test_memory.addr.addr + itemsize * 20 assert test_memory[1:].numBlock == (2, ) assert test_memory[1:].blockShape == (4, 5) assert test_memory[1:].strides == (20 * itemsize, 5 * itemsize, itemsize) assert type(test_memory[1:, 2, 1:3]) is hpx.GlobalMemory assert test_memory[ 1:, 2, 1: 3].addr.addr == test_memory.addr.addr + itemsize * 20 + itemsize * 5 * 2 + itemsize * 1 assert test_memory[1:, 2, 1:3].numBlock == (2, ) assert test_memory[1:, 2, 1:3].blockShape == (2, ) assert test_memory[1:, 2, 1:3].strides == (20 * itemsize, itemsize) assert type(test_memory[1:, 1]) is hpx.GlobalMemory assert test_memory[ 1:, 1].addr.addr == test_memory.addr.addr + itemsize * 20 + itemsize * 5 assert test_memory[1:, 1].numBlock == (2, ) assert test_memory[1:, 1].blockShape == (5, ) assert test_memory[1:, 1].strides == (20 * itemsize, itemsize) sub_memory = test_memory[1:, 2:, :3] assert type(sub_memory[1]) is hpx.GlobalAddressBlock assert sub_memory[ 1].addr.addr == test_memory.addr.addr + itemsize * 20 * 2 + itemsize * 5 * 2 assert sub_memory[1].shape == (2, 3) assert sub_memory[1].strides == (5 * itemsize, itemsize) assert type(sub_memory[1:, 1]) is hpx.GlobalMemory assert sub_memory[ 1:, 1].addr.addr == test_memory.addr.addr + itemsize * 20 * 2 + itemsize * 5 * 3 assert sub_memory[1:, 1].numBlock == (1, ) assert sub_memory[1:, 1].blockShape == (3, ) sub_memory = test_memory[1] assert type(sub_memory[1:]) is hpx.GlobalAddressBlock assert sub_memory[ 1:].addr.addr == test_memory.addr.addr + 20 * itemsize + 5 * itemsize assert sub_memory[1:].shape == (3, 5) try: test_memory[3] except RuntimeError: pass else: raise RuntimeError("Runtime error not raised!") try: sub_memory[4] except RuntimeError: pass else: raise RuntimeError("Runtime error not raised!") try: sub_memory[2:5] except RuntimeError: pass else: raise RuntimeError("Runtime error not raised!") # test try_pin and unpin sub_block = test_memory[1, :2] array = sub_block.try_pin() assert isinstance(array, np.ndarray) array[0, 0] = 5 # test_memory[1, 0, 0] = 5 array[1, 1] = 10 # test_memory[1, 1, 1] = 10 sub_block.unpin() array = test_memory[1].try_pin() assert array[0, 0] == 5 assert array[1, 1] == 10 test_memory[1].unpin() sub_block = test_memory[1, :3, 1:] array = sub_block.try_pin() assert array[1, 0] == 10 # test get array = test_memory[1].get(sync='sync') assert array[0, 0] == 5 assert array[1, 1] == 10 # test set from_array = np.array([6, 11]) test_memory[2, 0, 2:4].set(from_array, sync='rsync') # test_memory[2,0,2:4] = [6,11] array = test_memory[2].try_pin() assert array[0, 2] == 6 assert array[0, 3] == 11 # when get and set on not contiguous gas, RuntimeError should be raised try: array = test_memory[1, :2, 1:].get(sync='sync') except RuntimeError: pass else: raise RuntimeError("Runtime error not raised!") try: test_memory[1, :2, 1].set(from_array, sync='rsync') except RuntimeError: pass else: raise RuntimeError("Runtime error not raised!") # test get and set on array with some dimension of size 1 test_memory_2 = hpx.GlobalMemory.alloc_local_at(2, (2, 2, 2, 2, 2), np.dtype(np.int), hpx.HERE()) from_array = np.array([[1, 2], [3, 4]]) test_memory_2[1, 1, 0, 1, :, :].set(from_array, sync='rsync') get_array = test_memory_2[1, 1, 0, :, :, :].get(sync='sync') assert np.array_equal(get_array[1], from_array) # test free test_memory.free_sync() hpx.exit()