def _endpoint_to_world_rank_id(self, endpoint): world_endpoints = fleet.worker_endpoints() assert endpoint in world_endpoints, "endpoint (%s) not in worker_endpoints (%s) " % ( endpoint, fleet.world_device_ids()) return world_endpoints.index(endpoint)
def test_worker_endpoints(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) print(fleet.worker_endpoints(to_string=True))
print(main_programs[local_rank]) print("local rank: ", local_rank) print("local startup program: ", startup_programs[local_rank]) startup_program = startup_programs[local_rank] main_program = main_programs[local_rank] loss = Loss(Block(main_program)) optimizer = ascend_optimizer.AscendOptimizer(None, fetch_list=[]) optimizer.minimize(loss, startup_program, auto_dp=True, rank_table_file=os.getenv("RANK_TABLE_FILE", None)) exe = paddle.static.Executor(paddle.CPUPlace()) exe.run(startup_program) exe.run(main_program) worker_endpoints = fleet.worker_endpoints() world_device_ids = fleet.world_device_ids() local_device_ids = fleet.local_device_ids() local_rank = int(fleet.local_rank()) print("worker_endpoints:", worker_endpoints) print("world_device_ids:", world_device_ids) print("local_device_ids:", local_device_ids) print("local_rank:", local_rank) train(worker_endpoints, world_device_ids, local_device_ids, local_rank)
def test_worker_endpoints(): """test_worker_endpoints""" assert fleet.worker_endpoints() == [] print("{} ... ok".format(sys._getframe().f_code.co_name))