Esempio n. 1
0
def test_train(ray_start_4_cpus):
    def simple_fn(worker):
        local_rank = _train()
        return local_rank

    setting = RayExecutor.create_settings(timeout_s=30)
    hjob = RayExecutor(
        setting, num_workers=3, use_gpu=torch.cuda.is_available())
    hjob.start()
    result = hjob.execute(simple_fn)
    assert set(result) == {0, 1, 2}
    result = ray.get(hjob.run_remote(simple_fn, args=[None]))
    assert set(result) == {0, 1, 2}
    hjob.shutdown()
Esempio n. 2
0
def test_ray_exec_remote_func(ray_start_4_cpus):
    def simple_fn(num_epochs):
        import horovod.torch as hvd
        hvd.init()
        return hvd.rank() * num_epochs

    setting = RayExecutor.create_settings(timeout_s=30)
    hjob = RayExecutor(
        setting, num_hosts=1, num_slots=4, use_gpu=torch.cuda.is_available())
    hjob.start()
    object_refs = hjob.run_remote(simple_fn, args=[0])
    result = ray.get(object_refs)
    assert len(set(result)) == 1
    hjob.shutdown()