Esempio n. 1
0
    def _init_bagua_distributed(self) -> None:
        self._set_node_environment_variables()
        log.info("Initializing Bagua Distributed: "
                 f"GLOBAL_RANK: {self.global_rank}, "
                 f"MEMBER: {self.global_rank + 1}/{self.world_size}")

        # need to set device first before initialize Bagua distributed environment
        # Note: setup_environment calls super().setup_distributed after calling init_distributed()
        torch.cuda.set_device(self.local_rank)

        if not is_initialized():
            bagua.init_process_group()
Esempio n. 2
0
 def barrier(self, *args, **kwargs) -> None:  # type: ignore[no-untyped-def]
     if is_initialized():
         barrier()