コード例 #1
0
ファイル: rendezvous.py プロジェクト: btxuyenHCMUS/MTCNN-GPU
def _tcp_rendezvous_handler(url: str,
                            timeout: timedelta = default_pg_timeout,
                            **kwargs):
    def _error(msg):
        return _rendezvous_error("tcp:// rendezvous: " + msg)

    result = urlparse(url)
    if not result.port:
        raise _error("port number missing")
    query: Dict[str, Union[int, str]]
    # mypy doesn't allow dict() to accept List of values (#257)
    query = dict(
        pair.split("=") for pair in filter(None, result.query.split(
            "&")))  # type: ignore[misc, arg-type]
    if "rank" not in query:
        raise _error("rank parameter missing")
    if "world_size" not in query:
        raise _error("world size parameter missing")

    rank = int(query["rank"])
    world_size = int(query["world_size"])
    start_daemon = rank == 0
    assert result.hostname is not None
    store = TCPStore(result.hostname, result.port, world_size, start_daemon,
                     timeout)
    yield (store, rank, world_size)

    # If this configuration is invalidated, there is nothing we can do about it
    raise RuntimeError("Unable to perform rerendezvous using tcp:// method")
コード例 #2
0
ファイル: rendezvous.py プロジェクト: btxuyenHCMUS/MTCNN-GPU
def _env_rendezvous_handler(url: str,
                            timeout: timedelta = default_pg_timeout,
                            **kwargs):
    def _error(msg):
        return _rendezvous_error("env:// rendezvous: " + msg)

    def _env_error(var):
        return _error("environment variable %s expected, but not set" % var)

    result = urlparse(url)
    query: Dict[str, Union[int, str]]
    # mypy doesn't allow dict() to accept List of values (#257)
    query = dict(
        pair.split("=") for pair in filter(None, result.query.split(
            "&")))  # type: ignore[misc, arg-type]

    rank: Optional[Union[str, int]]
    world_size: Optional[Union[str, int]]
    master_port: Optional[Union[str, int]]

    if "rank" in query:
        rank = int(query["rank"])
    else:
        rank = os.environ.get("RANK", None)
        if rank is None:
            raise _env_error("RANK")

    if "world_size" in query:
        world_size = int(query["world_size"])
    else:
        world_size = os.environ.get("WORLD_SIZE", None)
        if world_size is None:
            raise _env_error("WORLD_SIZE")

    master_addr = os.environ.get("MASTER_ADDR", None)
    if master_addr is None:
        raise _env_error("MASTER_ADDR")

    master_port = os.environ.get("MASTER_PORT", None)
    if master_port is None:
        raise _env_error("MASTER_PORT")

    # Converting before creating the store
    rank = int(rank)
    world_size = int(world_size)
    master_port = int(master_port)

    # Now start the TCP store daemon on the rank 0
    start_daemon = rank == 0
    store = TCPStore(master_addr, master_port, world_size, start_daemon,
                     timeout)
    yield (store, rank, world_size)

    # If this configuration is invalidated, there is nothing we can do about it
    raise RuntimeError("Unable to perform rerendezvous using env:// method")
コード例 #3
0
def _tcp_rendezvous_handler(url, timeout=default_pg_timeout, **kwargs):
    def _error(msg):
        return _rendezvous_error("tcp:// rendezvous: " + msg)

    result = urlparse(url)
    if not result.port:
        raise _error("port number missing")
    query = dict(pair.split("=") for pair in filter(None, result.query.split("&")))
    if "rank" not in query:
        raise _error("rank parameter missing")
    if "world_size" not in query:
        raise _error("world size parameter missing")

    rank = int(query["rank"])
    world_size = int(query["world_size"])
    start_daemon = rank == 0
    store = TCPStore(result.hostname, result.port, world_size, start_daemon, timeout)
    yield (store, rank, world_size)

    # If this configuration is invalidated, there is nothing we can do about it
    raise RuntimeError("Unable to perform rerendezvous using tcp:// method")