async def talk_to_server(ip, port): global args global max_msg_log start_string = "in talk_to_server using " + args.object_type if args.blind_recv: start_string += " + blind recv" print(start_string) ep1 = ucp.get_endpoint(ip, port) ep2 = ucp.get_endpoint(ip, port) recv_msg = None if not args.blind_recv: recv_string1 = "hello from ucx client @" + socket.gethostname() if args.validate: recv_string1 = 'c' * (2 ** max_msg_log) recv_string2 = "hello from ucx client @" + socket.gethostname() if args.validate: recv_string2 = 'c' * (2 ** max_msg_log) recv_msg1 = get_msg(recv_string1, args.object_type) recv_msg2 = get_msg(recv_string2, args.object_type) recv_req1 = await ep1.recv_obj(recv_msg1, sys.getsizeof(recv_msg1)) recv_req2 = await ep2.recv_obj(recv_msg2, sys.getsizeof(recv_msg2)) else: recv_req1 = await ep1.recv_future() recv_req2 = await ep2.recv_future() recv_msg1 = ucp.get_obj_from_msg(recv_req1) recv_msg2 = ucp.get_obj_from_msg(recv_req2) print("about to send") send_string1 = "hello from ucx client ep2 @" + socket.gethostname() if args.validate: send_string = 'd' * (2 ** max_msg_log) send_string2 = "hello from ucx client ep1 @" + socket.gethostname() if args.validate: send_string = 'd' * (2 ** max_msg_log) send_msg1 = get_msg(send_string1, args.object_type) send_msg2 = get_msg(send_string2, args.object_type) send_req1 = await ep1.send_obj(send_msg1, sys.getsizeof(send_msg1)) send_req2 = await ep2.send_obj(send_msg2, sys.getsizeof(send_msg2)) if not args.validate: print_msg("client sent: ", send_msg1, args.object_type) print_msg("client sent: ", send_msg2, args.object_type) print_msg("client received: ", recv_msg1, args.object_type) print_msg("client received: ", recv_msg2, args.object_type) else: assert(recv_msg1 == get_msg('a' * (2 ** max_msg_log), args.object_type)) assert(recv_msg2 == get_msg('a' * (2 ** max_msg_log), args.object_type)) ucp.destroy_ep(ep1) ucp.destroy_ep(ep2) print('talk_to_server done')
async def tmp(): addr = ucp.get_address().encode('utf-8') ep1 = ucp.get_endpoint(addr, 13337) ep2 = ucp.get_endpoint(addr, 13338) await ep1.send_obj(b'hi') print("past send1") recv_ft1 = ep1.recv_future() await recv_ft1 print("past recv1") await ep2.send_obj(b'hi') recv_ft2 = ep2.recv_future() await recv_ft2 print("past recv2")
async def talk_to_server_async(ip, port): print("{}\t{}\t{}\t{}".format("Size (bytes)", "Latency (us)", "BW (GB/s)", "Issue (us)", "Progress (us)")) ep = ucp.get_endpoint(ip, port) for i in range(msg_log): msg_len = 2**i send_obj = b'0' * msg_len for j in range(warmup_iters): await recv(ep, msg_len) await ep.send_obj(send_obj) start = time.time() for j in range(max_iters): await recv(ep, msg_len) await ep.send_obj(send_obj) end = time.time() lat = end - start print("{}\t\t{:.2f}\t\t{:.2f}".format(msg_len, get_avg_us(lat, max_iters), ((msg_len / (lat / 2)) / 1000000))) print("past iters") ucp.destroy_ep(ep) print("past ep destroy")
async def talk_to_server(ip, port): global args msg_log = max_msg_log iters = max_iters server_ep = ucp.get_endpoint(ip, port) comm_ep = server_ep send_buffer_region = ucp.buffer_region() recv_buffer_region = ucp.buffer_region() if args.mem_type == 'cuda': send_buffer_region.alloc_cuda(1 << msg_log) recv_buffer_region.alloc_cuda(1 << msg_log) else: send_buffer_region.alloc_host(1 << msg_log) recv_buffer_region.alloc_host(1 << msg_log) for i in range(msg_log): msg_len = 2**i warmup_iters = int((0.1 * iters)) for j in range(warmup_iters): pending_list = [] for k in range(window_size): send_msg = ucp.ucp_msg(send_buffer_region) recv_msg = ucp.ucp_msg(recv_buffer_region) send_ft = comm_ep.send(send_msg, msg_len) recv_ft = comm_ep.recv(recv_msg, msg_len) pending_list.append(send_ft) pending_list.append(recv_ft) await asyncio.wait(pending_list) start = time.time() for j in range(iters): pending_list = [] for k in range(window_size): send_msg = ucp.ucp_msg(send_buffer_region) recv_msg = ucp.ucp_msg(recv_buffer_region) send_ft = comm_ep.send(send_msg, msg_len) recv_ft = comm_ep.recv(recv_msg, msg_len) pending_list.append(send_ft) pending_list.append(recv_ft) await asyncio.wait(pending_list) end = time.time() lat = end - start lat = ((lat / 2) / iters) * 1000000 if args.mem_type == 'cuda': send_buffer_region.free_cuda() recv_buffer_region.free_cuda() else: send_buffer_region.free_host() recv_buffer_region.free_host() ucp.destroy_ep(server_ep)
def talk_to_server(ip, port): global args msg_log = max_msg_log iters = max_iters server_ep = ucp.get_endpoint(ip, port) comm_ep = server_ep send_buffer_region = ucp.buffer_region() recv_buffer_region = ucp.buffer_region() if args.mem_type == 'cuda': send_buffer_region.alloc_cuda(1 << msg_log) recv_buffer_region.alloc_cuda(1 << msg_log) else: send_buffer_region.alloc_host(1 << msg_log) recv_buffer_region.alloc_host(1 << msg_log) for i in range(msg_log): msg_len = 2 ** i warmup_iters = int((0.1 * iters)) for j in range(warmup_iters): send_msg = ucp.ucp_msg(send_buffer_region) recv_msg = ucp.ucp_msg(recv_buffer_region) recv_req = comm_ep.recv(recv_msg, msg_len) recv_req.result() send_req = comm_ep.send(send_msg, msg_len) send_req.result() send_msg = [] recv_msg = [] for j in range(iters): send_msg.append(ucp.ucp_msg(send_buffer_region)) recv_msg.append(ucp.ucp_msg(recv_buffer_region)) start = time.time() for j in range(iters): recv_req = comm_ep.recv(recv_msg[j], msg_len) recv_req.result() send_req = comm_ep.send(send_msg[j], msg_len) send_req.result() end = time.time() lat = end - start lat = ((lat/2) / iters)* 1000000 if args.mem_type == 'cuda': send_buffer_region.free_cuda() recv_buffer_region.free_cuda() else: send_buffer_region.free_host() recv_buffer_region.free_host() ucp.destroy_ep(server_ep)
async def echo_pair(cuda_info=None): loop = asyncio.get_event_loop() listener = ucp.start_listener(ucp.make_server(cuda_info), is_coroutine=True) t = loop.create_task(listener.coroutine) client = ucp.get_endpoint(address.encode(), listener.port) try: yield listener, client finally: t.cancel() ucp.destroy_ep(client)
async def talk_to_server(ip, port): global max_msg_log global args start_string = "in talk_to_server" if args.blind_recv: start_string += " + blind recv" if args.check_data: start_string += " + data validity check" print(start_string) msg_log = max_msg_log ep = ucp.get_endpoint(ip, port) send_buffer_region = ucp.BufferRegion() send_buffer_region.alloc_host(1 << msg_log) send_msg = ucp.Message(send_buffer_region) recv_msg = None recv_buffer_region = None recv_req = None if not args.blind_recv: recv_buffer_region = ucp.BufferRegion() recv_buffer_region.alloc_host(1 << msg_log) recv_msg = ucp.Message(recv_buffer_region) if args.check_data: send_msg.set_mem(1, 1 << msg_log) if not args.blind_recv: recv_msg.set_mem(1, 1 << msg_log) if not args.blind_recv: recv_req = await ep.recv(recv_msg, 1 << msg_log) else: recv_req = await ep.recv_future() send_req = await ep.send(send_msg, 1 << msg_log) if args.check_data: errs = 0 errs = recv_req.check_mem(0, 1 << msg_log) print("num errs: " + str(errs)) send_buffer_region.free_host() if not args.blind_recv: recv_buffer_region.free_host() ucp.destroy_ep(ep) print("done with talk_to_server")
async def talk_to_server(ip, port): global args global max_msg_log msg_log = max_msg_log start_string = "in talk_to_server" if args.blind_recv: start_string += " + blind recv" if args.use_fast: start_string += " + using fast ops" print(start_string) ep = ucp.get_endpoint(ip, port) print("got endpoint") send_buffer_region = ucp.buffer_region() send_buffer_region.alloc_cuda(1 << msg_log) recv_msg = None recv_buffer_region = None recv_req = None if not args.blind_recv: recv_buffer_region = ucp.buffer_region() recv_buffer_region.alloc_cuda(1 << msg_log) recv_msg = ucp.ucp_msg(recv_buffer_region) send_msg = ucp.ucp_msg(send_buffer_region) if not args.blind_recv: if args.use_fast: recv_req = await ep.recv_fast(recv_msg, 1 << msg_log) else: recv_req = await ep.recv(recv_msg, 1 << msg_log) else: recv_req = await ep.recv_future() if args.use_fast: send_req = await ep.send_fast(send_msg, 1 << msg_log) else: send_req = await ep.send(send_msg, 1 << msg_log) send_buffer_region.free_cuda() if not args.blind_recv: recv_buffer_region.free_cuda() ucp.destroy_ep(ep) print("passed talk_to_server")
def talk_to_server(ip, port): global args msg_log = max_msg_log iters = max_iters server_ep = ucp.get_endpoint(ip, port) comm_ep = server_ep recv_buffer_region = ucp.buffer_region() if args.mem_type == 'cuda': recv_buffer_region.alloc_cuda(1 << msg_log) else: recv_buffer_region.alloc_host(1 << msg_log) for i in range(msg_log): msg_len = 2**i warmup_iters = int((0.1 * iters)) for j in range(warmup_iters): pending_list = [] for k in range(window_size): recv_msg = ucp.ucp_msg(recv_buffer_region) recv_ft = comm_ep.recv(recv_msg, msg_len) pending_list.append(recv_ft) while len(pending_list) > 0: for ft in pending_list: if ft.done() == True: pending_list.remove(ft) for j in range(iters): pending_list = [] for k in range(window_size): recv_msg = ucp.ucp_msg(recv_buffer_region) recv_ft = comm_ep.recv(recv_msg, msg_len) pending_list.append(recv_ft) while len(pending_list) > 0: for ft in pending_list: if ft.done() == True: pending_list.remove(ft) if args.mem_type == 'cuda': recv_buffer_region.free_cuda() else: recv_buffer_region.free_host() ucp.destroy_ep(server_ep)
async def talk_to_server(ip, port): # recv, send ep = ucp.get_endpoint(ip, port) if not args.blind_recv: recv_req = await ep.recv_obj(args.n_bytes) else: recv_req = await ep.recv_future() br = recv_req.get_buffer_region() print("about to reply") await ep.send_obj(br) ucp.destroy_ep(ep) print("talk_to_server done")
async def talk_to_server(ip, port): global args msg_log = max_msg_log iters = max_iters server_ep = ucp.get_endpoint(ip, port) send_buffer_region = ucp.buffer_region() if args.mem_type == 'cuda': send_buffer_region.alloc_cuda(1 << msg_log) else: send_buffer_region.alloc_host(1 << msg_log) send_msg = ucp.ucp_msg(send_buffer_region) for i in range(msg_log): msg_len = 2**i warmup_iters = int((0.1 * iters)) for j in range(warmup_iters): recv_req = server_ep.recv_future() await recv_req send_req = await server_ep.send(send_msg, msg_len) start = time.time() for j in range(iters): recv_req = server_ep.recv_future() await recv_req send_req = await server_ep.send(send_msg, msg_len) end = time.time() lat = end - start lat = ((lat / 2) / iters) * 1000000 if args.mem_type == 'cuda': send_buffer_region.free_cuda() else: send_buffer_region.free_host() ucp.destroy_ep(server_ep)
async def connect(host, port=13337, message=True, type_='bytes'): if type_ == 'memoryview': box = memoryview else: box = bytes print("3. Starting connect") ep = ucp.get_endpoint(host, port) if message: print("4. Client send") msg = box(client_msg) await ep.send_obj(msg, name='connect-send') # resp = await ep.recv_future() size = len(client_msg) resp = await ep.recv_obj(size) r_msg = ucp.get_obj_from_msg(resp) print("8. Client got message: {}".format(bytes(r_msg).decode())) print("9. Stopping client") ucp.destroy_ep(ep)
async def connect(host, port, n_bytes, n_iter, recv, np, verbose, increment): ep = ucp.get_endpoint(host.encode(), port) arr = np.zeros(n_bytes, dtype='u1') start = clock() for i in range(n_iter): await ep.send_obj(arr) if recv == 'recv_into': await ep.recv_into(arr, arr.nbytes) else: # This is failing right now msg = await ep.recv_obj(arr.nbytes, cuda=np.__name__ == 'cupy') arr = np.asarray(msg.get_obj()) stop = clock() expected = np.ones(n_bytes, dtype='u1') # 0 or n_iter expected *= (int(increment) * n_iter) np.testing.assert_array_equal(arr, expected) took = stop - start # 2 for round-trip, n_iter for number of trips. print("Roundtrip benchmark") print("-------------------") print(f"n_iter | {n_iter}") print(f"n_bytes | {format_bytes(n_bytes)}") print(f"recv | {recv}") print(f"object | {np.__name__}") print(f"inc | {increment}") print("\n===================") print(format_bytes(2 * n_iter * arr.nbytes / took), '/ s') print("===================") await ep.recv_future() await ep.send_obj(np.ones(1)) ep.close()
async def talk_to_server(ip, port): ep = ucp.get_endpoint(ip, port) await ep.send_obj(bytes(b"42")) ucp.destroy_ep(ep)