Esempio n. 1
0
def test_native_distrib_single_node_spawn_nccl():
    world_size = torch.cuda.device_count()

    idist.spawn("nccl",
                _test_distrib_config,
                args=("nccl", world_size, "cuda"),
                nproc_per_node=world_size)
Esempio n. 2
0
def test_hvd_distrib_single_node_spawn():
    world_size = 4

    idist.spawn("horovod",
                _test_distrib_config,
                args=("horovod", world_size, "cpu"),
                nproc_per_node=world_size)
Esempio n. 3
0
def test_hvd_distrib_single_node_spawn_cuda():
    world_size = torch.cuda.device_count()

    idist.spawn("horovod",
                _test_distrib_config,
                args=("horovod", world_size, "cuda"),
                nproc_per_node=world_size)
Esempio n. 4
0
def test_hvd_distrib_multi_node_spawn_raise_error():
    world_size = 4

    with pytest.raises(RuntimeError, match=r"For multi-node configuration, please set 'hosts' argument instead"):
        idist.spawn(
            "horovod", _test_distrib_config, args=("horovod", world_size, "cpu"), nproc_per_node=world_size, nnodes=2
        )
Esempio n. 5
0
def test_xla_distrib_single_node_spawn_one_proc():
    try:
        idist.spawn("xla-tpu",
                    _test_distrib_config,
                    args=("xla-tpu", 1, "xla"),
                    nproc_per_node=1)
    except SystemExit:
        pass
Esempio n. 6
0
def test_xla_distrib_single_node_spawn_n_procs():
    n = int(os.environ["NUM_TPU_WORKERS"])
    try:
        idist.spawn("xla-tpu",
                    _test_distrib_config,
                    args=("xla-tpu", n, "xla"),
                    nproc_per_node=n)
    except SystemExit:
        pass
Esempio n. 7
0
def test_native_distrib_single_node_spawn_gloo():

    from datetime import timedelta

    timeout = timedelta(seconds=20)

    world_size = 4

    idist.spawn(
        "gloo", _test_distrib_config, args=("gloo", world_size, "cpu"), nproc_per_node=world_size, timeout=timeout
    )
Esempio n. 8
0
def _test_native_distrib_single_node_spawn(init_method, backend, device,
                                           **kwargs):
    world_size = 4 if device == "cpu" else torch.cuda.device_count()
    idist.spawn(
        backend,
        _test_distrib_config,
        args=(backend, world_size, device),
        nproc_per_node=world_size,
        init_method=init_method,
        **kwargs,
    )
Esempio n. 9
0
def test_hvd_distrib_spawn_no_hvd_support():
    with pytest.raises(ValueError, match=r"Backend should be one of"):
        idist.spawn("horovod",
                    _test_distrib_config,
                    args=("horovod", 1, "cpu"),
                    nproc_per_node=1)
Esempio n. 10
0
def test_xla_distrib_spawn_no_xla_support():
    with pytest.raises(ValueError, match=r"Backend should be one of"):
        idist.spawn("xla-tpu", _test_distrib_config, args=("xla-tpu", 1, "xla"), nproc_per_node=1)