def configure_distributed_args(args): if args.deepspeed_mpi: from deepspeed.utils.distributed import mpi_discovery mpi_discovery() args.local_rank = int(os.getenv('LOCAL_RANK', '0')) args.rank = int(os.getenv('RANK', '0')) args.world_size = int(os.getenv("WORLD_SIZE", '1')) args.model_parallel_size = min(args.model_parallel_size, args.world_size) if args.rank == 0: print('using world size: {} and model-parallel size: {} '.format( args.world_size, args.model_parallel_size))
def configure_distributed_args(self): """ Configures distributed training arguments from local variables set by deepspeed launcher. """ if self.deepspeed_mpi: from deepspeed.utils.distributed import mpi_discovery mpi_discovery() self.update_value("local_rank", int(os.getenv('LOCAL_RANK', '0'))) self.update_value("rank", int(os.getenv('RANK', '0'))) self.update_value("world_size", int(os.getenv("WORLD_SIZE", '1'))) if self.rank == 0: print( self.__class__.__name__ + ".configure_distributed_args() using world size: {} and model-parallel size: {} ".format( self.world_size, self.model_parallel_size), flush=True)