def _tcp_rendezvous_handler(url: str, timeout: timedelta = default_pg_timeout, **kwargs): def _error(msg): return _rendezvous_error("tcp:// rendezvous: " + msg) result = urlparse(url) if not result.port: raise _error("port number missing") query: Dict[str, Union[int, str]] # mypy doesn't allow dict() to accept List of values (#257) query = dict( pair.split("=") for pair in filter(None, result.query.split( "&"))) # type: ignore[misc, arg-type] if "rank" not in query: raise _error("rank parameter missing") if "world_size" not in query: raise _error("world size parameter missing") rank = int(query["rank"]) world_size = int(query["world_size"]) start_daemon = rank == 0 assert result.hostname is not None store = TCPStore(result.hostname, result.port, world_size, start_daemon, timeout) yield (store, rank, world_size) # If this configuration is invalidated, there is nothing we can do about it raise RuntimeError("Unable to perform rerendezvous using tcp:// method")
def _env_rendezvous_handler(url: str, timeout: timedelta = default_pg_timeout, **kwargs): def _error(msg): return _rendezvous_error("env:// rendezvous: " + msg) def _env_error(var): return _error("environment variable %s expected, but not set" % var) result = urlparse(url) query: Dict[str, Union[int, str]] # mypy doesn't allow dict() to accept List of values (#257) query = dict( pair.split("=") for pair in filter(None, result.query.split( "&"))) # type: ignore[misc, arg-type] rank: Optional[Union[str, int]] world_size: Optional[Union[str, int]] master_port: Optional[Union[str, int]] if "rank" in query: rank = int(query["rank"]) else: rank = os.environ.get("RANK", None) if rank is None: raise _env_error("RANK") if "world_size" in query: world_size = int(query["world_size"]) else: world_size = os.environ.get("WORLD_SIZE", None) if world_size is None: raise _env_error("WORLD_SIZE") master_addr = os.environ.get("MASTER_ADDR", None) if master_addr is None: raise _env_error("MASTER_ADDR") master_port = os.environ.get("MASTER_PORT", None) if master_port is None: raise _env_error("MASTER_PORT") # Converting before creating the store rank = int(rank) world_size = int(world_size) master_port = int(master_port) # Now start the TCP store daemon on the rank 0 start_daemon = rank == 0 store = TCPStore(master_addr, master_port, world_size, start_daemon, timeout) yield (store, rank, world_size) # If this configuration is invalidated, there is nothing we can do about it raise RuntimeError("Unable to perform rerendezvous using env:// method")
def _tcp_rendezvous_handler(url, timeout=default_pg_timeout, **kwargs): def _error(msg): return _rendezvous_error("tcp:// rendezvous: " + msg) result = urlparse(url) if not result.port: raise _error("port number missing") query = dict(pair.split("=") for pair in filter(None, result.query.split("&"))) if "rank" not in query: raise _error("rank parameter missing") if "world_size" not in query: raise _error("world size parameter missing") rank = int(query["rank"]) world_size = int(query["world_size"]) start_daemon = rank == 0 store = TCPStore(result.hostname, result.port, world_size, start_daemon, timeout) yield (store, rank, world_size) # If this configuration is invalidated, there is nothing we can do about it raise RuntimeError("Unable to perform rerendezvous using tcp:// method")