def test_comm_split(client): cb = Comms(comms_p2p=True, verbose=True) cb.init() dfs = [ client.submit( func_test_comm_split, cb.sessionId, 3, pure=False, workers=[w] ) for w in cb.worker_addresses ] wait(dfs, timeout=5) assert all([x.result() for x in dfs])
def test_send_recv(n_trials, client): cb = Comms(comms_p2p=True, verbose=True) cb.init() dfs = [ client.submit(func_test_send_recv, cb.sessionId, n_trials, pure=False, workers=[w]) for w in cb.worker_addresses ] wait(dfs, timeout=5) assert (list(map(lambda x: x.result(), dfs)))
def test_collectives(client, func, root_location): try: cb = Comms( verbose=True, client=client, nccl_root_location=root_location ) cb.init() for k, v in cb.worker_info(cb.worker_addresses).items(): dfs = [ client.submit( func_test_collective, func, cb.sessionId, v["rank"], pure=False, workers=[w], ) for w in cb.worker_addresses ] wait(dfs, timeout=5) assert all([x.result() for x in dfs]) finally: if cb: cb.destroy()
def test_collectives(client, func): cb = Comms(verbose=True) cb.init() for k, v in cb.worker_info(cb.worker_addresses).items(): dfs = [ client.submit(func_test_collective, func, cb.sessionId, v["rank"], pure=False, workers=[w]) for w in cb.worker_addresses ] wait(dfs, timeout=5) assert all([x.result() for x in dfs])
def test_nccl_root_placement(client, root_location): cb = None try: cb = Comms( verbose=True, client=client, nccl_root_location=root_location ) cb.init() worker_addresses = list( OrderedDict.fromkeys(client.scheduler_info()["workers"].keys()) ) if root_location in ("worker",): result = client.run( func_check_uid_on_worker, cb.sessionId, cb.uniqueId, workers=[worker_addresses[0]], )[worker_addresses[0]] elif root_location in ("scheduler",): result = client.run_on_scheduler( func_check_uid_on_scheduler, cb.sessionId, cb.uniqueId ) else: result = int(cb.uniqueId is None) assert result == 0 finally: if cb: cb.destroy()
def test_comms_init_no_p2p(cluster): client = Client(cluster) try: cb = Comms(verbose=True) cb.init() assert cb.nccl_initialized is True assert cb.ucx_initialized is False finally: cb.destroy() client.close()
def test_handles(cluster): client = Client(cluster) def _has_handle(sessionId): return local_handle(sessionId) is not None try: cb = Comms(verbose=True) cb.init() dfs = [ client.submit(_has_handle, cb.sessionId, pure=False, workers=[w]) for w in cb.worker_addresses ] wait(dfs, timeout=5) assert all(client.compute(dfs, sync=True)) finally: cb.destroy() client.close()