예제 #1
0
 def _init_deepspeed_distributed(self) -> None:
     if platform.system() != "Windows":
         # do not set env variables on windows, allow deepspeed to control setup
         self._set_node_environment_variables()
         log.info(
             "initializing deepspeed distributed: "
             f"GLOBAL_RANK: {self.global_rank}, "
             f"MEMBER: {self.global_rank + 1}/{self.world_size}"
         )
     deepspeed.init_distributed(self.torch_distributed_backend, distributed_port=self.cluster_environment.main_port)
예제 #2
0
 def init_ddp_connection(self,
                         global_rank: Optional[int] = None,
                         world_size: Optional[int] = None) -> None:
     if platform.system() != "Windows":
         # do not set env variables on windows, allow deepspeed to control setup
         global_rank = global_rank if global_rank is not None else self.cluster_environment.global_rank(
         )
         world_size = world_size if world_size is not None else self.cluster_environment.world_size(
         )
         self._set_node_environment_variables(global_rank, world_size)
         log.info("initializing deepspeed distributed: "
                  f"GLOBAL_RANK: {global_rank}, "
                  f"MEMBER: {global_rank + 1}/{world_size}")
     deepspeed.init_distributed(
         self.torch_distributed_backend,
         distributed_port=self.cluster_environment.master_port())