def _endpoint_to_world_rank_id(self, endpoint):
     world_endpoints = fleet.worker_endpoints()
     assert endpoint in world_endpoints, "endpoint (%s) not in worker_endpoints (%s) " % (
         endpoint, fleet.world_device_ids())
     return world_endpoints.index(endpoint)
 def test_worker_endpoints(self):
     role = role_maker.PaddleCloudRoleMaker(is_collective=True)
     fleet.init(role)
     print(fleet.worker_endpoints(to_string=True))
    print(main_programs[local_rank])

    print("local rank: ", local_rank)
    print("local startup program: ", startup_programs[local_rank])

    startup_program = startup_programs[local_rank]
    main_program = main_programs[local_rank]
    loss = Loss(Block(main_program))
    optimizer = ascend_optimizer.AscendOptimizer(None, fetch_list=[])
    optimizer.minimize(loss,
                       startup_program,
                       auto_dp=True,
                       rank_table_file=os.getenv("RANK_TABLE_FILE", None))

    exe = paddle.static.Executor(paddle.CPUPlace())
    exe.run(startup_program)
    exe.run(main_program)


worker_endpoints = fleet.worker_endpoints()
world_device_ids = fleet.world_device_ids()
local_device_ids = fleet.local_device_ids()
local_rank = int(fleet.local_rank())

print("worker_endpoints:", worker_endpoints)
print("world_device_ids:", world_device_ids)
print("local_device_ids:", local_device_ids)
print("local_rank:", local_rank)

train(worker_endpoints, world_device_ids, local_device_ids, local_rank)
Exemple #4
0
def test_worker_endpoints():
    """test_worker_endpoints"""
    assert fleet.worker_endpoints() == []
    print("{} ... ok".format(sys._getframe().f_code.co_name))