예제 #1
0
def test_xla_distrib_single_node_no_spawn():
    idist.initialize("xla-tpu")
    _test_distrib_config(local_rank=0,
                         backend="xla-tpu",
                         ws=1,
                         true_device="xla")
    idist.finalize()
예제 #2
0
def _test_idist_methods_in_hvd_context(backend, device):
    # We explicitly set _model as _SerialModel
    # then call idist.* methods and check that they give correct values
    import horovod.torch as hvd

    from ignite.distributed.utils import _SerialModel, _set_model

    hvd.init()

    _set_model(_SerialModel())

    ws = hvd.size()
    rank = hvd.rank()
    local_rank = hvd.local_rank()

    if torch.cuda.is_available():
        torch.cuda.set_device(local_rank)

    _test_distrib_config(local_rank,
                         backend=backend,
                         ws=ws,
                         true_device=device,
                         rank=rank)

    hvd.shutdown()
예제 #3
0
def test_idist_methods_in_xla_context():
    # We explicitly set _model as _SerialModel
    # then call idist.* methods and check that they give correct values
    from ignite.distributed.utils import _set_model, _SerialModel

    _set_model(_SerialModel())

    _test_distrib_config(local_rank=0, backend="xla-tpu", ws=1, true_device="xla", rank=0)
예제 #4
0
def test_native_distrib_single_node_launch_tool_nccl(local_rank, world_size):
    import os

    rank = local_rank
    os.environ["RANK"] = "{}".format(rank)

    idist.initialize("nccl")
    _test_distrib_config(local_rank, "nccl", world_size, "cuda", rank)
    idist.finalize()
예제 #5
0
def _test_idist_methods_in_native_context(backend, device, local_rank):
    # We explicitly set _model as _SerialModel
    # then call idist.* methods and check that they give correct values
    from ignite.distributed.utils import _SerialModel, _set_model

    _set_model(_SerialModel())

    ws = dist.get_world_size()
    rank = dist.get_rank()
    _test_distrib_config(local_rank, backend=backend, ws=ws, true_device=device, rank=rank)
예제 #6
0
def test_native_distrib_single_node_launch_tool_gloo(local_rank, world_size):
    import os
    from datetime import timedelta

    timeout = timedelta(seconds=20)
    rank = local_rank
    os.environ["RANK"] = "{}".format(rank)

    idist.initialize("gloo", timeout=timeout)
    _test_distrib_config(local_rank, "gloo", world_size, "cpu", rank)
    idist.finalize()
예제 #7
0
def test_hvd_distrib_single_node_single_device():
    import horovod.torch as hvd

    idist.initialize("horovod")

    device = "cpu" if torch.cuda.device_count() < 1 else "cuda"
    local_rank = hvd.local_rank()
    world_size = hvd.size()
    rank = hvd.rank()
    _test_distrib_config(local_rank, "horovod", world_size, device, rank)
    idist.finalize()
예제 #8
0
def _test_idist_methods_in_xla_context_in_child_proc(index):
    # We explicitly set _model as _SerialModel
    # then call idist.* methods and check that they give correct values
    from ignite.distributed.utils import _set_model, _SerialModel

    _set_model(_SerialModel())

    import torch_xla.core.xla_model as xm

    _test_distrib_config(
        local_rank=index, backend="xla-tpu", ws=xm.xrt_world_size(), true_device="xla", rank=xm.get_ordinal()
    )
예제 #9
0
def _test_idist_methods_in_native_context_set_local_rank(backend, device, local_rank):
    # We explicitly set _model as _SerialModel
    # then call idist.* methods and check that they give correct values
    from ignite.distributed.utils import _SerialModel, _set_model

    _set_model(_SerialModel())

    lrank = int(os.environ["LOCAL_RANK"])
    del os.environ["LOCAL_RANK"]

    ws = dist.get_world_size()
    rank = dist.get_rank()

    idist.set_local_rank(local_rank)

    _test_distrib_config(local_rank=local_rank, backend=backend, ws=ws, true_device=device, rank=rank)

    os.environ["LOCAL_RANK"] = str(lrank)
예제 #10
0
def _test_native_distrib_single_node_launch_tool(backend,
                                                 device,
                                                 local_rank,
                                                 world_size,
                                                 init_method=None,
                                                 **kwargs):
    import os

    rank = local_rank
    os.environ["RANK"] = f"{rank}"

    idist.initialize(backend, init_method=init_method, **kwargs)
    _test_distrib_config(local_rank,
                         backend,
                         world_size,
                         device,
                         rank,
                         true_init_method=init_method)
    idist.finalize()