Beispiel #1
0
def pytorch_ring_init(ring):
    """ Import necessary modules and setup the ring """
    import os
    from fiber.backend import get_backend
    import torch.distributed as dist

    backend = get_backend()
    rank = ring.rank
    master = ring.members[0]
    print("pytorch ring init, rank", rank)

    if rank != 0:
        wait = 0.1
        while master.connected is False:
            print("ring.memebers[0].connected != True, wait", wait)
            time.sleep(wait)
            wait = wait * 2
            master = ring.members[0]

    _, _, ifce = backend.get_listen_addr()

    os.environ["MASTER_ADDR"] = master.ip
    os.environ["MASTER_PORT"] = str(master.port)
    os.environ["GLOO_SOCKET_IFNAME"] = ifce

    print(
        ring.size,
        ring.rank,
        ifce,
        os.environ["MASTER_ADDR"],
        os.environ["MASTER_PORT"],
    )
    dist.init_process_group("gloo", rank=ring.rank, world_size=ring.size)
Beispiel #2
0
    def __init__(self, address=None, family=None, backlog=500, authkey=None):
        family = family or (address and address_type(address)) \
                 or default_family
        if family != 'AF_INET':
            raise NotImplementedError

        backend = get_backend()
        # TODO(jiale) Add support for other address family for
        #  backend.get_listen_addr
        address = address or backend.get_listen_addr()
        self._address = address

        _validate_family(family)
        if family == 'AF_PIPE':
            # PIPE cannot be used across machines
            raise NotImplementedError
        else:
            # Listens on '0.0.0.0' so that it accepts connection regardless of
            # net interfaces. When connect connects, it uses a specific IP
            # address.
            self._listener = SocketListener(('0.0.0.0', 0), family, backlog)

        if authkey is not None and not isinstance(authkey, bytes):
            raise TypeError('authkey should be a byte string')

        self._authkey = authkey
Beispiel #3
0
    def _target(self):
        rank = self.rank
        node = self.members[rank]

        backend = get_backend()
        ip, _, _ = backend.get_listen_addr()
        port = random.randint(30000, 50000)

        node.connected = True
        node.ip = ip
        node.port = port
        self.members[rank] = node

        self.initializer(self)
        self.func(rank, self.size)
Beispiel #4
0
    def device(self, s1_mode, s2_mode):
        self.s1_mode = s1_mode
        self.s2_mode = s2_mode

        device = NanomsgDevice(self,
                               s1_mode,
                               s2_mode,
                               default_addr=NanomsgContext.default_addr)
        in_port, out_port = device.bind()

        backend = get_backend()
        ip_ext, _, _ = backend.get_listen_addr()

        in_addr = "tcp://{}:{}".format(ip_ext, in_port)
        out_addr = "tcp://{}:{}".format(ip_ext, out_port)

        return device, in_addr, out_addr
Beispiel #5
0
    def __init__(self):
        self.done = False
        backend = get_backend()
        ip, _, _ = backend.get_listen_addr()
        # Listens on '0.0.0.0' so that it accepts connection regardless of
        # net interfaces. When connect connects, it should use the address
        # obtained from backend.get_listen_addr().
        d = ProcessDevice("r", "w")
        d.start()

        self._reader_addr = d.out_addr
        self._writer_addr = d.in_addr

        # client side
        # set reader to None because if reader is connected, Fiber socket will
        # fairly queue  messages to all readers even if this reader is
        # not reading.
        #self.reader = None
        self.reader = LazyZConnection(("r", self._reader_addr,))
        self.writer = LazyZConnection(("w", self._writer_addr,))
Beispiel #6
0
    def __init__(self, process_obj, backend=None, launch=False):
        self.returncode = None
        self.backend = get_backend()

        ip, _, _ = self.backend.get_listen_addr()

        self.master_host = ip
        self.master_port = config.ipc_admin_master_port
        self.worker_port = config.ipc_admin_worker_port

        self.sock = None
        self.host = ""

        self.job = None
        self.pid = None
        self.process_obj = process_obj
        self._exiting = None
        self.sentinel = None
        self.ident = None

        if launch:
            self._launch(process_obj)
Beispiel #7
0
    def device(self, s1_mode, s2_mode):

        backend = get_backend()
        ip_ext, _, _ = backend.get_listen_addr()
        ip_bind = "0.0.0.0"
        addr_bind = "tcp://{}".format(ip_bind)

        s1_type = self._mode_to_type[s1_mode]
        s2_type = self._mode_to_type[s2_mode]

        device = zmq.devices.ThreadDevice(in_type=s1_type, out_type=s2_type)
        _writer_port = device.bind_in_to_random_port(addr_bind,
                                                     min_port=MIN_PORT,
                                                     max_port=MAX_PORT,
                                                     max_tries=100)
        _reader_port = device.bind_out_to_random_port(addr_bind,
                                                      min_port=MIN_PORT,
                                                      max_port=MAX_PORT,
                                                      max_tries=100)
        _reader_ext_addr = "tcp://{}:{}".format(ip_ext, _reader_port)
        _writer_ext_addr = "tcp://{}:{}".format(ip_ext, _writer_port)
        return device, _reader_ext_addr, _writer_ext_addr