Example #1
0
        def _dist_worker_task_fn(backend: str, fn: Callable, args: Tuple, kwargs_dict: Mapping) -> None:
            from ignite.distributed.utils import _set_model, finalize

            model = _HorovodDistModel.create_from_backend(backend)
            _set_model(model)
            fn(model.get_local_rank(), *args, **kwargs_dict)
            finalize()
Example #2
0
        def _dist_worker_task_fn(backend, fn, args, kwargs_dict):
            from ignite.distributed.utils import _set_model, finalize

            model = _HorovodDistModel.create_from_backend(backend)
            _set_model(model)
            fn(model.get_local_rank(), *args, **kwargs_dict)
            finalize()
Example #3
0
        def _dist_worker_task_fn(
            local_rank: int,
            backend: str,
            fn: Callable,
            args: Tuple,
            kw_dict: Mapping,
            world_size: int,
            nprocs_per_node: int,
            node_rank: int,
            master_addr: str,
            master_port: str,
            kw: Any,
        ) -> None:
            from ignite.distributed.utils import _set_model, finalize

            copy_env_vars = os.environ.copy()

            os.environ["LOCAL_RANK"] = str(local_rank)
            os.environ["RANK"] = str(node_rank * nprocs_per_node + local_rank)
            os.environ["WORLD_SIZE"] = str(world_size)
            os.environ["MASTER_ADDR"] = str(master_addr)
            os.environ["MASTER_PORT"] = str(master_port)

            model = _NativeDistModel.create_from_backend(backend, **kw)
            _set_model(model)
            fn(local_rank, *args, **kw_dict)
            finalize()

            os.environ.clear()
            os.environ.update(copy_env_vars)
Example #4
0
def _test_idist_methods_in_hvd_context(backend, device):
    # We explicitly set _model as _SerialModel
    # then call idist.* methods and check that they give correct values
    import horovod.torch as hvd

    from ignite.distributed.utils import _SerialModel, _set_model

    hvd.init()

    _set_model(_SerialModel())

    ws = hvd.size()
    rank = hvd.rank()
    local_rank = hvd.local_rank()

    if torch.cuda.is_available():
        torch.cuda.set_device(local_rank)

    _test_distrib_config(local_rank,
                         backend=backend,
                         ws=ws,
                         true_device=device,
                         rank=rank)

    hvd.shutdown()
Example #5
0
def _destroy_mnodes_dist_context():
    dist.barrier()
    dist.destroy_process_group()

    from ignite.distributed.utils import _SerialModel, _set_model

    # We need to set synced model to initial state
    _set_model(_SerialModel())
Example #6
0
def test_idist_methods_in_xla_context():
    # We explicitly set _model as _SerialModel
    # then call idist.* methods and check that they give correct values
    from ignite.distributed.utils import _set_model, _SerialModel

    _set_model(_SerialModel())

    _test_distrib_config(local_rank=0, backend="xla-tpu", ws=1, true_device="xla", rank=0)
Example #7
0
def _test_sync(cls):
    from ignite.distributed.utils import _SerialModel, _set_model

    _set_model(_SerialModel())

    sync()

    from ignite.distributed.utils import _model

    assert isinstance(_model, cls), f"{type(_model)} vs {cls}"
Example #8
0
def _test_idist_methods_in_native_context(backend, device, local_rank):
    # We explicitly set _model as _SerialModel
    # then call idist.* methods and check that they give correct values
    from ignite.distributed.utils import _SerialModel, _set_model

    _set_model(_SerialModel())

    ws = dist.get_world_size()
    rank = dist.get_rank()
    _test_distrib_config(local_rank, backend=backend, ws=ws, true_device=device, rank=rank)
Example #9
0
def _test_sync(cls):
    from ignite.distributed.utils import _set_model, _SerialModel

    _set_model(_SerialModel())

    sync()

    from ignite.distributed.utils import _model

    assert isinstance(_model, cls), "{} vs {}".format(type(_model), cls)
Example #10
0
def _test_idist_methods_in_xla_context_in_child_proc(index):
    # We explicitly set _model as _SerialModel
    # then call idist.* methods and check that they give correct values
    from ignite.distributed.utils import _set_model, _SerialModel

    _set_model(_SerialModel())

    import torch_xla.core.xla_model as xm

    _test_distrib_config(
        local_rank=index, backend="xla-tpu", ws=xm.xrt_world_size(), true_device="xla", rank=xm.get_ordinal()
    )
Example #11
0
def _test_idist_methods_in_native_context_set_local_rank(backend, device, local_rank):
    # We explicitly set _model as _SerialModel
    # then call idist.* methods and check that they give correct values
    from ignite.distributed.utils import _SerialModel, _set_model

    _set_model(_SerialModel())

    lrank = int(os.environ["LOCAL_RANK"])
    del os.environ["LOCAL_RANK"]

    ws = dist.get_world_size()
    rank = dist.get_rank()

    idist.set_local_rank(local_rank)

    _test_distrib_config(local_rank=local_rank, backend=backend, ws=ws, true_device=device, rank=rank)

    os.environ["LOCAL_RANK"] = str(lrank)
Example #12
0
        def _dist_worker_task_fn(
            local_rank: int,
            backend: str,
            fn: Callable,
            args: Tuple,
            kw_dict: Mapping,
            world_size: int,
            nprocs_per_node: int,
            node_rank: int,
            master_addr: Optional[str],
            master_port: Optional[str],
            init_method: str,
            kw: Any,
        ) -> None:
            from ignite.distributed.utils import _set_model, finalize

            copy_env_vars = os.environ.copy()

            rank = node_rank * nprocs_per_node + local_rank
            os.environ["LOCAL_RANK"] = str(local_rank)
            os.environ["RANK"] = str(rank)
            os.environ["WORLD_SIZE"] = str(world_size)

            arg_world_size = world_size  # type: Optional[int]
            arg_rank = rank  # type: Optional[int]
            if init_method == "env://":
                os.environ["MASTER_ADDR"] = str(master_addr)
                os.environ["MASTER_PORT"] = str(master_port)
                arg_world_size = None
                arg_rank = None

            model = _NativeDistModel.create_from_backend(
                backend,
                init_method=init_method,
                world_size=arg_world_size,
                rank=arg_rank,
                **kw)
            _set_model(model)
            fn(local_rank, *args, **kw_dict)
            finalize()

            os.environ.clear()
            os.environ.update(copy_env_vars)
Example #13
0
        def _dist_worker_task_fn(local_rank, backend, fn, args, kw_dict,
                                 world_size, nprocs_per_node, node_rank,
                                 master_addr, master_port, kw):
            from ignite.distributed.utils import _set_model, finalize

            copy_env_vars = dict(os.environ)

            os.environ["LOCAL_RANK"] = str(local_rank)
            os.environ["RANK"] = str(node_rank * nprocs_per_node + local_rank)
            os.environ["WORLD_SIZE"] = str(world_size)
            os.environ["MASTER_ADDR"] = str(master_addr)
            os.environ["MASTER_PORT"] = str(master_port)

            model = _NativeDistModel.create_from_backend(backend, **kw)
            _set_model(model)
            fn(local_rank, *args, **kw_dict)
            finalize()

            os.environ = copy_env_vars