def _test_idist_methods_in_hvd_context(backend, device): # We explicitly set _model as _SerialModel # then call idist.* methods and check that they give correct values import horovod.torch as hvd from ignite.distributed.utils import _SerialModel, _set_model hvd.init() _set_model(_SerialModel()) ws = hvd.size() rank = hvd.rank() local_rank = hvd.local_rank() if torch.cuda.is_available(): torch.cuda.set_device(local_rank) _test_distrib_config(local_rank, backend=backend, ws=ws, true_device=device, rank=rank) hvd.shutdown()
def _restart_hvd(self, rank_response): os.environ[HorovodEnv.RENDEZVOUS_PORT] = str( rank_response.rendezvous_port) os.environ[HorovodEnv.RANK] = str(rank_response.rank_id) os.environ[HorovodEnv.SIZE] = str(rank_response.world_size) # Not using Horovod elastic feature in init, but need it for # allreduce to call allreduce op when size=1. os.environ[HorovodEnv.ELASTIC] = str(0) hvd.shutdown() hvd.init() os.environ[HorovodEnv.ELASTIC] = str(1) self._rendezvous_id = rank_response.rendezvous_id self.need_broadcast = True
def _hvd_task_with_init(func, args): import horovod.torch as hvd hvd.init() func(*args) hvd.shutdown()
def hvd_wrapper(request): hvd.init() yield hvd.shutdown()