def pytorch_ring_init(ring): """ Import necessary modules and setup the ring """ import os from fiber.backend import get_backend import torch.distributed as dist backend = get_backend() rank = ring.rank master = ring.members[0] print("pytorch ring init, rank", rank) if rank != 0: wait = 0.1 while master.connected is False: print("ring.memebers[0].connected != True, wait", wait) time.sleep(wait) wait = wait * 2 master = ring.members[0] _, _, ifce = backend.get_listen_addr() os.environ["MASTER_ADDR"] = master.ip os.environ["MASTER_PORT"] = str(master.port) os.environ["GLOO_SOCKET_IFNAME"] = ifce print( ring.size, ring.rank, ifce, os.environ["MASTER_ADDR"], os.environ["MASTER_PORT"], ) dist.init_process_group("gloo", rank=ring.rank, world_size=ring.size)
def __init__(self, address=None, family=None, backlog=500, authkey=None): family = family or (address and address_type(address)) \ or default_family if family != 'AF_INET': raise NotImplementedError backend = get_backend() # TODO(jiale) Add support for other address family for # backend.get_listen_addr address = address or backend.get_listen_addr() self._address = address _validate_family(family) if family == 'AF_PIPE': # PIPE cannot be used across machines raise NotImplementedError else: # Listens on '0.0.0.0' so that it accepts connection regardless of # net interfaces. When connect connects, it uses a specific IP # address. self._listener = SocketListener(('0.0.0.0', 0), family, backlog) if authkey is not None and not isinstance(authkey, bytes): raise TypeError('authkey should be a byte string') self._authkey = authkey
def _target(self): rank = self.rank node = self.members[rank] backend = get_backend() ip, _, _ = backend.get_listen_addr() port = random.randint(30000, 50000) node.connected = True node.ip = ip node.port = port self.members[rank] = node self.initializer(self) self.func(rank, self.size)
def device(self, s1_mode, s2_mode): self.s1_mode = s1_mode self.s2_mode = s2_mode device = NanomsgDevice(self, s1_mode, s2_mode, default_addr=NanomsgContext.default_addr) in_port, out_port = device.bind() backend = get_backend() ip_ext, _, _ = backend.get_listen_addr() in_addr = "tcp://{}:{}".format(ip_ext, in_port) out_addr = "tcp://{}:{}".format(ip_ext, out_port) return device, in_addr, out_addr
def __init__(self): self.done = False backend = get_backend() ip, _, _ = backend.get_listen_addr() # Listens on '0.0.0.0' so that it accepts connection regardless of # net interfaces. When connect connects, it should use the address # obtained from backend.get_listen_addr(). d = ProcessDevice("r", "w") d.start() self._reader_addr = d.out_addr self._writer_addr = d.in_addr # client side # set reader to None because if reader is connected, Fiber socket will # fairly queue messages to all readers even if this reader is # not reading. #self.reader = None self.reader = LazyZConnection(("r", self._reader_addr,)) self.writer = LazyZConnection(("w", self._writer_addr,))
def __init__(self, process_obj, backend=None, launch=False): self.returncode = None self.backend = get_backend() ip, _, _ = self.backend.get_listen_addr() self.master_host = ip self.master_port = config.ipc_admin_master_port self.worker_port = config.ipc_admin_worker_port self.sock = None self.host = "" self.job = None self.pid = None self.process_obj = process_obj self._exiting = None self.sentinel = None self.ident = None if launch: self._launch(process_obj)
def device(self, s1_mode, s2_mode): backend = get_backend() ip_ext, _, _ = backend.get_listen_addr() ip_bind = "0.0.0.0" addr_bind = "tcp://{}".format(ip_bind) s1_type = self._mode_to_type[s1_mode] s2_type = self._mode_to_type[s2_mode] device = zmq.devices.ThreadDevice(in_type=s1_type, out_type=s2_type) _writer_port = device.bind_in_to_random_port(addr_bind, min_port=MIN_PORT, max_port=MAX_PORT, max_tries=100) _reader_port = device.bind_out_to_random_port(addr_bind, min_port=MIN_PORT, max_port=MAX_PORT, max_tries=100) _reader_ext_addr = "tcp://{}:{}".format(ip_ext, _reader_port) _writer_ext_addr = "tcp://{}:{}".format(ip_ext, _writer_port) return device, _reader_ext_addr, _writer_ext_addr