Example #1
0
def talk_to_client(client_ep):

    global args
    global cb_not_done

    msg_log = max_msg_log
    iters = max_iters
    comm_ep = client_ep

    send_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        send_buffer_region.alloc_cuda(1 << msg_log)
    else:
        send_buffer_region.alloc_host(1 << msg_log)

    print("{}\t\t{}".format("Size (bytes)", "Uni-Bandwidth (GB/s)"))

    for i in range(msg_log):
        msg_len = 2**i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            pending_list = []
            for k in range(window_size):
                send_msg = ucp.ucp_msg(send_buffer_region)
                send_ft = comm_ep.send(send_msg, msg_len)
                pending_list.append(send_ft)
            while len(pending_list) > 0:
                for ft in pending_list:
                    if ft.done() == True:
                        pending_list.remove(ft)

        start = time.time()
        for j in range(iters):
            pending_list = []
            for k in range(window_size):
                send_msg = ucp.ucp_msg(send_buffer_region)
                send_ft = comm_ep.send(send_msg, msg_len)
                pending_list.append(send_ft)
            while len(pending_list) > 0:
                for ft in pending_list:
                    if ft.done() == True:
                        pending_list.remove(ft)
        end = time.time()
        lat = end - start
        #lat = ((lat/2) / iters)* 1000000
        bw = (iters * window_size * msg_len) / lat
        bw = bw / 1e9  #GB/s
        print("{}\t\t{}".format(msg_len, bw))

    if args.mem_type == 'cuda':
        send_buffer_region.free_cuda()
    else:
        send_buffer_region.free_host()

    ucp.destroy_ep(client_ep)
    cb_not_done = False
    ucp.stop_server()
Example #2
0
async def talk_to_server(ip, port):

    global args

    msg_log = max_msg_log
    iters = max_iters

    server_ep = ucp.get_endpoint(ip, port)
    comm_ep = server_ep

    send_buffer_region = ucp.buffer_region()
    recv_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        send_buffer_region.alloc_cuda(1 << msg_log)
        recv_buffer_region.alloc_cuda(1 << msg_log)
    else:
        send_buffer_region.alloc_host(1 << msg_log)
        recv_buffer_region.alloc_host(1 << msg_log)

    for i in range(msg_log):
        msg_len = 2**i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            pending_list = []
            for k in range(window_size):
                send_msg = ucp.ucp_msg(send_buffer_region)
                recv_msg = ucp.ucp_msg(recv_buffer_region)
                send_ft = comm_ep.send(send_msg, msg_len)
                recv_ft = comm_ep.recv(recv_msg, msg_len)
                pending_list.append(send_ft)
                pending_list.append(recv_ft)
            await asyncio.wait(pending_list)

        start = time.time()
        for j in range(iters):
            pending_list = []
            for k in range(window_size):
                send_msg = ucp.ucp_msg(send_buffer_region)
                recv_msg = ucp.ucp_msg(recv_buffer_region)
                send_ft = comm_ep.send(send_msg, msg_len)
                recv_ft = comm_ep.recv(recv_msg, msg_len)
                pending_list.append(send_ft)
                pending_list.append(recv_ft)
            await asyncio.wait(pending_list)
        end = time.time()
        lat = end - start
        lat = ((lat / 2) / iters) * 1000000

    if args.mem_type == 'cuda':
        send_buffer_region.free_cuda()
        recv_buffer_region.free_cuda()
    else:
        send_buffer_region.free_host()
        recv_buffer_region.free_host()

    ucp.destroy_ep(server_ep)
Example #3
0
def talk_to_server(ip, port):

    global args

    msg_log = max_msg_log
    iters = max_iters

    server_ep = ucp.get_endpoint(ip, port)
    comm_ep = server_ep

    send_buffer_region = ucp.buffer_region()
    recv_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        send_buffer_region.alloc_cuda(1 << msg_log)
        recv_buffer_region.alloc_cuda(1 << msg_log)
    else:
        send_buffer_region.alloc_host(1 << msg_log)
        recv_buffer_region.alloc_host(1 << msg_log)

    for i in range(msg_log):
        msg_len = 2 ** i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            send_msg = ucp.ucp_msg(send_buffer_region)
            recv_msg = ucp.ucp_msg(recv_buffer_region)
            recv_req = comm_ep.recv(recv_msg, msg_len)
            recv_req.result()
            send_req = comm_ep.send(send_msg, msg_len)
            send_req.result()

        send_msg = []
        recv_msg = []
        for j in range(iters):
            send_msg.append(ucp.ucp_msg(send_buffer_region))
            recv_msg.append(ucp.ucp_msg(recv_buffer_region))

        start = time.time()
        for j in range(iters):
            recv_req = comm_ep.recv(recv_msg[j], msg_len)
            recv_req.result()
            send_req = comm_ep.send(send_msg[j], msg_len)
            send_req.result()
        end = time.time()
        lat = end - start
        lat = ((lat/2) / iters)* 1000000

    if args.mem_type == 'cuda':
        send_buffer_region.free_cuda()
        recv_buffer_region.free_cuda()
    else:
        send_buffer_region.free_host()
        recv_buffer_region.free_host()

    ucp.destroy_ep(server_ep)
Example #4
0
async def talk_to_server(ip, port):

    global max_msg_log
    global args

    start_string = "in talk_to_server"
    if args.blind_recv:
        start_string += " + blind recv"
    if args.check_data:
        start_string += " + data validity check"
    print(start_string)

    msg_log = max_msg_log

    ep = ucp.get_endpoint(ip, port)

    send_buffer_region = ucp.buffer_region()
    send_buffer_region.alloc_host(1 << msg_log)

    send_msg = ucp.ucp_msg(send_buffer_region)

    recv_msg = None
    recv_buffer_region = None
    recv_req = None

    if not args.blind_recv:
        recv_buffer_region = ucp.buffer_region()
        recv_buffer_region.alloc_host(1 << msg_log)
        recv_msg = ucp.ucp_msg(recv_buffer_region)

    if args.check_data:
        send_msg.set_mem(1, 1 << msg_log)
        if not args.blind_recv:
            recv_msg.set_mem(1, 1 << msg_log)

    if not args.blind_recv:
        recv_req = await ep.recv(recv_msg, 1 << msg_log)
    else:
        recv_req = await ep.recv_future()

    send_req = await ep.send(send_msg, 1 << msg_log)

    if args.check_data:
        errs = 0
        errs = recv_req.check_mem(0, 1 << msg_log)
        print("num errs: " + str(errs))

    send_buffer_region.free_host()
    if not args.blind_recv:
        recv_buffer_region.free_host()

    ucp.destroy_ep(ep)
    print("done with talk_to_server")
Example #5
0
async def talk_to_client(client_ep):

    global args

    msg_log = max_msg_log
    iters = max_iters
    comm_ep = client_ep

    send_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        send_buffer_region.alloc_cuda(1 << msg_log)
    else:
        send_buffer_region.alloc_host(1 << msg_log)

    print("{}\t\t{}".format("Size (bytes)", "Bi-Bandwidth (GB/s)"))

    for i in range(msg_log):
        msg_len = 2**i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            pending_list = []
            for k in range(window_size):
                send_msg = ucp.ucp_msg(send_buffer_region)
                send_ft = comm_ep.send(send_msg, msg_len)
                pending_list.append(send_ft)
            await asyncio.wait(pending_list)

        start = time.time()
        for j in range(iters):
            pending_list = []
            for k in range(window_size):
                send_msg = ucp.ucp_msg(send_buffer_region)
                send_ft = comm_ep.send(send_msg, msg_len)
                pending_list.append(send_ft)
            await asyncio.wait(pending_list)
        end = time.time()
        lat = end - start
        bw = (iters * window_size * msg_len) / lat
        bw = bw / 1e9  #GB/s
        print("{}\t\t{}".format(msg_len, bw))

    if args.mem_type == 'cuda':
        send_buffer_region.free_cuda()
    else:
        send_buffer_region.free_host()

    ucp.destroy_ep(client_ep)
    ucp.stop_server()
Example #6
0
async def talk_to_server(ip, port):

    global args
    global max_msg_log

    msg_log = max_msg_log

    start_string = "in talk_to_server"
    if args.blind_recv:
        start_string += " + blind recv"
    if args.use_fast:
        start_string += " + using fast ops"
    print(start_string)

    ep = ucp.get_endpoint(ip, port)
    print("got endpoint")

    send_buffer_region = ucp.buffer_region()
    send_buffer_region.alloc_cuda(1 << msg_log)

    recv_msg = None
    recv_buffer_region = None
    recv_req = None

    if not args.blind_recv:
        recv_buffer_region = ucp.buffer_region()
        recv_buffer_region.alloc_cuda(1 << msg_log)
        recv_msg = ucp.ucp_msg(recv_buffer_region)

    send_msg = ucp.ucp_msg(send_buffer_region)

    if not args.blind_recv:
        if args.use_fast:
            recv_req = await ep.recv_fast(recv_msg, 1 << msg_log)
        else:
            recv_req = await ep.recv(recv_msg, 1 << msg_log)
    else:
        recv_req = await ep.recv_future()

    if args.use_fast:
        send_req = await ep.send_fast(send_msg, 1 << msg_log)
    else:
        send_req = await ep.send(send_msg, 1 << msg_log)

    send_buffer_region.free_cuda()
    if not args.blind_recv:
        recv_buffer_region.free_cuda()
    ucp.destroy_ep(ep)

    print("passed talk_to_server")
Example #7
0
def talk_to_server(ip, port):

    global args

    msg_log = max_msg_log
    iters = max_iters

    server_ep = ucp.get_endpoint(ip, port)
    comm_ep = server_ep

    recv_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        recv_buffer_region.alloc_cuda(1 << msg_log)
    else:
        recv_buffer_region.alloc_host(1 << msg_log)

    for i in range(msg_log):
        msg_len = 2**i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            pending_list = []
            for k in range(window_size):
                recv_msg = ucp.ucp_msg(recv_buffer_region)
                recv_ft = comm_ep.recv(recv_msg, msg_len)
                pending_list.append(recv_ft)
            while len(pending_list) > 0:
                for ft in pending_list:
                    if ft.done() == True:
                        pending_list.remove(ft)

        for j in range(iters):
            pending_list = []
            for k in range(window_size):
                recv_msg = ucp.ucp_msg(recv_buffer_region)
                recv_ft = comm_ep.recv(recv_msg, msg_len)
                pending_list.append(recv_ft)
            while len(pending_list) > 0:
                for ft in pending_list:
                    if ft.done() == True:
                        pending_list.remove(ft)

    if args.mem_type == 'cuda':
        recv_buffer_region.free_cuda()
    else:
        recv_buffer_region.free_host()

    ucp.destroy_ep(server_ep)
Example #8
0
async def talk_to_client(client_ep):

    global args

    msg_log = max_msg_log
    iters = max_iters

    send_buffer_region = ucp.buffer_region()
    recv_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        send_buffer_region.alloc_cuda(1 << msg_log)
        recv_buffer_region.alloc_cuda(1 << msg_log)
    else:
        send_buffer_region.alloc_host(1 << msg_log)
        recv_buffer_region.alloc_host(1 << msg_log)

    send_msg = ucp.ucp_msg(send_buffer_region)
    recv_msg = ucp.ucp_msg(recv_buffer_region)

    print("{}\t\t{}".format("Size (bytes)", "Latency (us)"))

    for i in range(msg_log):
        msg_len = 2**i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            send_req = await client_ep.send_fast(send_msg, msg_len)
            recv_req = await client_ep.recv_fast(recv_msg, msg_len)

        start = time.time()
        for j in range(iters):
            send_req = await client_ep.send_fast(send_msg, msg_len)
            recv_req = await client_ep.recv_fast(recv_msg, msg_len)
        end = time.time()
        lat = end - start
        lat = ((lat / 2) / iters) * 1000000
        print("{}\t\t{}".format(msg_len, lat))

    if args.mem_type == 'cuda':
        send_buffer_region.free_cuda()
        recv_buffer_region.free_cuda()
    else:
        send_buffer_region.free_host()
        recv_buffer_region.free_host()

    ucp.destroy_ep(client_ep)
    ucp.stop_server()
Example #9
0
async def talk_to_server(ip, port):

    global args

    msg_log = max_msg_log
    iters = max_iters

    server_ep = ucp.get_endpoint(ip, port)

    send_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        send_buffer_region.alloc_cuda(1 << msg_log)
    else:
        send_buffer_region.alloc_host(1 << msg_log)

    send_msg = ucp.ucp_msg(send_buffer_region)

    for i in range(msg_log):
        msg_len = 2**i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            recv_req = server_ep.recv_future()
            await recv_req
            send_req = await server_ep.send(send_msg, msg_len)

        start = time.time()
        for j in range(iters):
            recv_req = server_ep.recv_future()
            await recv_req
            send_req = await server_ep.send(send_msg, msg_len)
        end = time.time()
        lat = end - start
        lat = ((lat / 2) / iters) * 1000000

    if args.mem_type == 'cuda':
        send_buffer_region.free_cuda()
    else:
        send_buffer_region.free_host()

    ucp.destroy_ep(server_ep)
Example #10
0
def talk_to_client(client_ep):

    global args
    global cb_not_done

    msg_log = max_msg_log
    iters = max_iters
    comm_ep = client_ep

    send_buffer_region = ucp.buffer_region()
    recv_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        send_buffer_region.alloc_cuda(1 << msg_log)
        recv_buffer_region.alloc_cuda(1 << msg_log)
    else:
        send_buffer_region.alloc_host(1 << msg_log)
        recv_buffer_region.alloc_host(1 << msg_log)

    print("{}\t\t{}\t\t{}\t\t{}".format("Size (bytes)", "Latency (us)",
                                        "Issue (us)", "Progress (us)"))

    for i in range(msg_log):
        msg_len = 2 ** i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            send_msg = ucp.ucp_msg(send_buffer_region)
            recv_msg = ucp.ucp_msg(recv_buffer_region)
            send_req = comm_ep.send(send_msg, msg_len)
            recv_req = comm_ep.recv(recv_msg, msg_len)
            send_req.result()
            recv_req.result()

        send_msg = []
        recv_msg = []
        for j in range(iters):
            send_msg.append(ucp.ucp_msg(send_buffer_region))
            recv_msg.append(ucp.ucp_msg(recv_buffer_region))

        start = time.time()
        issue_lat = 0
        progress_lat = 0

        for j in range(iters):

            tmp_start = time.time()
            send_req = comm_ep.send(send_msg[j], msg_len)
            tmp_end = time.time()
            issue_lat += (tmp_end - tmp_start)

            tmp_start = time.time()
            send_req.result()
            tmp_end = time.time()
            progress_lat += (tmp_end - tmp_start)

            tmp_start = time.time()
            recv_req = comm_ep.recv(recv_msg[j], msg_len)
            tmp_end = time.time()
            issue_lat += (tmp_end - tmp_start)

            tmp_start = time.time()
            recv_req.result()
            tmp_end = time.time()
            progress_lat += (tmp_end - tmp_start)

        end = time.time()
        lat = end - start
        lat = ((lat/2) / iters)* 1000000
        issue_lat = ((issue_lat/2) / iters)* 1000000
        progress_lat = ((progress_lat/2) / iters)* 1000000
        print("{}\t\t{}\t\t{}\t\t{}".format(msg_len, lat, issue_lat,
                                            progress_lat))

    if args.mem_type == 'cuda':
        send_buffer_region.free_cuda()
        recv_buffer_region.free_cuda()
    else:
        send_buffer_region.free_host()
        recv_buffer_region.free_host()

    ucp.destroy_ep(client_ep)
    cb_not_done = False