def _init_bagua_distributed(self) -> None: self._set_node_environment_variables() log.info("Initializing Bagua Distributed: " f"GLOBAL_RANK: {self.global_rank}, " f"MEMBER: {self.global_rank + 1}/{self.world_size}") # need to set device first before initialize Bagua distributed environment # Note: setup_environment calls super().setup_distributed after calling init_distributed() torch.cuda.set_device(self.local_rank) if not is_initialized(): bagua.init_process_group()
def barrier(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def] if is_initialized(): barrier()