def _init_process_group_wrapper(world_size, rank, dev, backend, q): if rank == 0: dist.init_process_group(_LOCALHOST, 0, world_size, rank, dev, backend) q.put(dist.get_master_port()) else: port = q.get() dist.init_process_group(_LOCALHOST, port, world_size, rank, dev, backend)
def worker(rank, backend, q): if not mge.is_cuda_available(): return _init_process_group_wrapper(world_size, rank, rank, backend, q) assert dist.is_distributed() == True assert dist.get_master_ip() == _LOCALHOST assert dist.get_master_port() > 0 assert dist.get_world_size() == world_size assert dist.get_rank() == rank assert dist.get_backend() == backend