Beispiel #1
0
def test_actor_recursive(ray_start_regular_shared):
    @ray.remote
    class Actor:
        def __init__(self, delegate=None):
            self.delegate = delegate

        def f(self, x):
            if self.delegate:
                return ray.get(self.delegate.f.remote(x))
            return x * 2

    a = Actor.remote()
    b = Actor.remote(a)
    c = Actor.remote(b)

    result = ray.get([c.f.remote(i) for i in range(100)])
    assert result == [x * 2 for x in range(100)]

    result, _ = ray.wait([c.f.remote(i) for i in range(100)], num_returns=100)
    result = ray.get(result)
    assert result == [x * 2 for x in range(100)]
Beispiel #2
0
    def __dir__(self) -> List[str]:
        if self._dir is not None:
            return self._dir
        if ray.is_connected():

            @ray.remote(num_cpus=0)
            def get_dir(x):
                return dir(x)

            self._dir = ray.get(get_dir.remote(self))
            return self._dir
        return super().__dir__()
Beispiel #3
0
 def sum():
     numbers = f.remote()
     result = 0
     for i, ref in enumerate(numbers):
         result += ray.get(ref)
         inlined = ray.worker.global_worker.core_worker.object_exists(
             ref, memory_store_only=True)
         if i < 2:
             assert inlined
         else:
             assert not inlined
     return result
Beispiel #4
0
    def bar():
        # if the refs are inlined, the test fails.
        # refs = [ray.put(np.random.rand(1024) for _ in range(3))]
        # return ray.get(
        #     foo.remote(refs[0], refs[1], refs[2]))

        return ray.get(
            foo.remote(
                np.random.rand(1024),  # 8k
                np.random.rand(1024),  # 8k
                np.random.rand(1024),
            ))  # 8k
Beispiel #5
0
def test_call_actors_indirect_through_tasks(ray_start_regular_shared):
    @ray.remote
    class Counter:
        def __init__(self, value):
            self.value = int(value)

        def increase(self, delta):
            self.value += int(delta)
            return self.value

    @ray.remote
    def foo(object):
        return ray.get(object.increase.remote(1))

    @ray.remote
    def bar(object):
        return ray.get(object.increase.remote(1))

    @ray.remote
    def zoo(object):
        return ray.get(object[0].increase.remote(1))

    c = Counter.remote(0)
    for _ in range(0, 100):
        ray.get(foo.remote(c))
        ray.get(bar.remote(c))
        ray.get(zoo.remote([c]))
Beispiel #6
0
def test_create_remote_before_start(ray_start_regular_shared):
    """Creates remote objects (as though in a library) before
    starting the client.
    """
    from ray.util.client import ray

    @ray.remote
    class Returner:
        def doit(self):
            return "foo"

    @ray.remote
    def f(x):
        return x + 20

    # Prints in verbose tests
    print("Created remote functions")

    with ray_start_client_server() as ray:
        assert ray.get(f.remote(3)) == 23
        a = Returner.remote()
        assert ray.get(a.doit.remote()) == "foo"
Beispiel #7
0
def test_redefining_remote_functions(shutdown_only):
    ray.init(num_cpus=1)

    # Test that we can define a remote function in the shell.
    @ray.remote
    def f(x):
        return x + 1

    assert ray.get(f.remote(0)) == 1

    # Test that we can redefine the remote function.
    @ray.remote
    def f(x):
        return x + 10

    while True:
        val = ray.get(f.remote(0))
        assert val in [1, 10]
        if val == 10:
            break
        else:
            logger.info("Still using old definition of f, trying again.")

    # Check that we can redefine functions even when the remote function source
    # doesn't change (see https://github.com/ray-project/ray/issues/6130).
    @ray.remote
    def g():
        return nonexistent()

    with pytest.raises(RayTaskError, match="nonexistent"):
        ray.get(g.remote())

    def nonexistent():
        return 1

    # Redefine the function and make sure it succeeds.
    @ray.remote
    def g():
        return nonexistent()

    assert ray.get(g.remote()) == 1

    # Check the same thing but when the redefined function is inside of another
    # task.
    @ray.remote
    def h(i):
        @ray.remote
        def j():
            return i

        return j.remote()

    for i in range(20):
        assert ray.get(ray.get(h.remote(i))) == i
Beispiel #8
0
    def _init_class_info(self):
        # TODO: fetch Ray method decorators
        @ray.remote(num_cpus=0)
        def get_class_info(x):
            return x._ray_method_num_returns, x._ray_method_signatures

        self._method_num_returns, method_parameters = ray.get(
            get_class_info.remote(self))

        self._method_signatures = {}
        for method, parameters in method_parameters.items():
            self._method_signatures[method] = inspect.Signature(
                parameters=parameters)
Beispiel #9
0
def test_object_transfer_dump(ray_start_cluster):
    cluster = ray_start_cluster

    num_nodes = 3
    for i in range(num_nodes):
        cluster.add_node(resources={str(i): 1}, object_store_memory=10**9)
    ray.init(address=cluster.address)

    @ray.remote
    def f(x):
        return

    # These objects will live on different nodes.
    object_refs = [
        f._remote(args=[1], resources={str(i): 1}) for i in range(num_nodes)
    ]

    # Broadcast each object from each machine to each other machine.
    for object_ref in object_refs:
        ray.get([
            f._remote(args=[object_ref], resources={str(i): 1})
            for i in range(num_nodes)
        ])

    # The profiling information only flushes once every second.
    time.sleep(1.1)

    transfer_dump = ray.state.object_transfer_timeline()
    # Make sure the transfer dump can be serialized with JSON.
    json.loads(json.dumps(transfer_dump))
    assert len(transfer_dump) >= num_nodes**2
    assert len({
        event["pid"]
        for event in transfer_dump if event["name"] == "transfer_receive"
    }) == num_nodes
    assert len({
        event["pid"]
        for event in transfer_dump if event["name"] == "transfer_send"
    }) == num_nodes
Beispiel #10
0
def test_basic_named_actor(ray_start_regular_shared):
    """Test that ray.get_actor() can create and return a detached actor.
    """
    with ray_start_client_server() as ray:

        @ray.remote
        class Accumulator:
            def __init__(self):
                self.x = 0

            def inc(self):
                self.x += 1

            def get(self):
                return self.x

        # Create the actor
        actor = Accumulator.options(name="test_acc").remote()

        actor.inc.remote()
        actor.inc.remote()

        # Make sure the get_actor call works
        new_actor = ray.get_actor("test_acc")
        new_actor.inc.remote()
        assert ray.get(new_actor.get.remote()) == 3

        del actor

        actor = Accumulator.options(name="test_acc2",
                                    lifetime="detached").remote()
        actor.inc.remote()
        del actor

        detatched_actor = ray.get_actor("test_acc2")
        for i in range(5):
            detatched_actor.inc.remote()

        assert ray.get(detatched_actor.get.remote()) == 6
Beispiel #11
0
def test_worker_lease_reply_with_resources(ray_start_cluster_enabled):
    cluster = ray_start_cluster_enabled
    cluster.add_node(
        memory=2000 * 1024**2,
        num_cpus=1,
        _system_config={
            "gcs_resource_report_poll_period_ms": 1000000,
            "gcs_actor_scheduling_enabled": True,
        },
    )
    node2 = cluster.add_node(memory=1000 * 1024**2, num_cpus=1)
    ray.init(address=cluster.address)
    cluster.wait_for_nodes()

    @ray.remote(memory=1500 * 1024**2, num_cpus=0.01)
    def fun(signal):
        signal.send.remote()
        time.sleep(30)
        return 0

    signal = SignalActor.remote()
    fun.remote(signal)
    # Make sure that the `fun` is running.
    ray.get(signal.wait.remote())

    @ray.remote(memory=800 * 1024**2, num_cpus=0.01)
    class Foo:
        def method(self):
            return ray.worker.global_worker.node.unique_id

    foo1 = Foo.remote()
    o1 = foo1.method.remote()
    ready_list, remaining_list = ray.wait([o1], timeout=10)
    # If RequestWorkerLeaseReply carries normal task resources,
    # GCS will then schedule foo1 to node2. Otherwise,
    # GCS would keep trying to schedule foo1 to
    # node1 and getting rejected.
    assert len(ready_list) == 1 and len(remaining_list) == 0
    assert ray.get(o1) == node2.unique_id
Beispiel #12
0
def test_system_config_when_connecting(ray_start_cluster):
    config = {"object_pinning_enabled": 0, "object_timeout_milliseconds": 200}
    cluster = ray.cluster_utils.Cluster()
    cluster.add_node(
        _system_config=config, object_store_memory=100 * 1024 * 1024)
    cluster.wait_for_nodes()

    # Specifying _system_config when connecting to a cluster is disallowed.
    with pytest.raises(ValueError):
        ray.init(address=cluster.address, _system_config=config)

    # Check that the config was picked up (object pinning is disabled).
    ray.init(address=cluster.address)
    obj_ref = ray.put(np.zeros(40 * 1024 * 1024, dtype=np.uint8))

    for _ in range(5):
        put_ref = ray.put(np.zeros(40 * 1024 * 1024, dtype=np.uint8))
    del put_ref

    # This would not raise an exception if object pinning was enabled.
    with pytest.raises(ray.exceptions.ObjectLostError):
        ray.get(obj_ref)
Beispiel #13
0
def test_schedule_many_actors_and_normal_tasks(ray_start_cluster):
    cluster = ray_start_cluster

    node_count = 10
    actor_count = 50
    each_actor_task_count = 50
    normal_task_count = 1000
    node_memory = 2 * 1024**3

    for i in range(node_count):
        cluster.add_node(
            memory=node_memory,
            _system_config={"gcs_actor_scheduling_enabled": True}
            if i == 0 else {},
        )
    ray.init(address=cluster.address)
    cluster.wait_for_nodes()

    @ray.remote(memory=100 * 1024**2, num_cpus=0.01)
    class Foo:
        def method(self):
            return 2

    @ray.remote(memory=100 * 1024**2, num_cpus=0.01)
    def fun():
        return 1

    normal_task_object_list = [fun.remote() for _ in range(normal_task_count)]
    actor_list = [Foo.remote() for _ in range(actor_count)]
    actor_object_list = [
        actor.method.remote() for _ in range(each_actor_task_count)
        for actor in actor_list
    ]
    for object in ray.get(actor_object_list):
        assert object == 2

    for object in ray.get(normal_task_object_list):
        assert object == 1
Beispiel #14
0
def test_defining_remote_functions(shutdown_only):
    ray.init(num_cpus=3)

    # Test that we can close over plain old data.
    data = [
        np.zeros([3, 5]),
        (1, 2, "a"),
        [0.0, 1.0, 1 << 62],
        1 << 60,
        {"a": np.zeros(3)},
    ]

    @ray.remote
    def g():
        return data

    ray.get(g.remote())

    # Test that we can close over modules.
    @ray.remote
    def h():
        return np.zeros([3, 5])

    assert np.alltrue(ray.get(h.remote()) == np.zeros([3, 5]))

    @ray.remote
    def j():
        return time.time()

    ray.get(j.remote())

    # Test that we can define remote functions that call other remote
    # functions.
    @ray.remote
    def k(x):
        return x + 1

    @ray.remote
    def k2(x):
        return ray.get(k.remote(x))

    @ray.remote
    def m(x):
        return ray.get(k2.remote(x))

    assert ray.get(k.remote(1)) == 2
    assert ray.get(k2.remote(1)) == 2
    assert ray.get(m.remote(1)) == 2
Beispiel #15
0
def test_skip_plasma(ray_start_regular_shared):
    @ray.remote
    class Actor:
        def __init__(self):
            pass

        def f(self, x):
            return x * 2

    a = Actor.remote()
    obj_ref = a.f.remote(1)
    # it is not stored in plasma
    assert not ray.worker.global_worker.core_worker.object_exists(obj_ref)
    assert ray.get(obj_ref) == 2
Beispiel #16
0
 def background_thread(self, wait_objects):
     try:
         # Test wait
         ready, _ = ray.wait(
             wait_objects,
             num_returns=len(wait_objects),
             timeout=1000.0,
         )
         assert len(ready) == len(wait_objects)
         for _ in range(20):
             num = 10
             # Test remote call
             results = [echo.remote(i) for i in range(num)]
             assert ray.get(results) == list(range(num))
             # Test put and get
             objects = [ray.put(i) for i in range(num)]
             assert ray.get(objects) == list(range(num))
             time.sleep(random.randint(0, 10) / 1000.0)
     except Exception as e:
         with self.lock:
             self.thread_results.append(e)
     else:
         with self.lock:
             self.thread_results.append("ok")
Beispiel #17
0
def test_wait_cluster(ray_start_cluster):
    cluster = ray_start_cluster
    cluster.add_node(num_cpus=1, resources={"RemoteResource": 1})
    cluster.add_node(num_cpus=1, resources={"RemoteResource": 1})
    ray.init(address=cluster.address)

    @ray.remote(resources={"RemoteResource": 1})
    def f():
        return

    # Make sure we have enough workers on the remote nodes to execute some
    # tasks.
    tasks = [f.remote() for _ in range(10)]
    start = time.time()
    ray.get(tasks)
    end = time.time()

    # Submit some more tasks that can only be executed on the remote nodes.
    tasks = [f.remote() for _ in range(10)]
    # Sleep for a bit to let the tasks finish.
    time.sleep((end - start) * 2)
    _, unready = ray.wait(tasks, num_returns=len(tasks), timeout=0)
    # All remote tasks should have finished.
    assert len(unready) == 0
Beispiel #18
0
def test_get_with_timeout(ray_start_regular_shared):
    SignalActor = create_remote_signal_actor(ray)
    signal = SignalActor.remote()

    # Check that get() returns early if object is ready.
    start = time.time()
    ray.get(signal.wait.remote(should_wait=False), timeout=30)
    assert time.time() - start < 30

    # Check that get() raises a TimeoutError after the timeout if the object
    # is not ready yet.
    result_id = signal.wait.remote()
    with pytest.raises(GetTimeoutError):
        ray.get(result_id, timeout=0.1)

    assert issubclass(GetTimeoutError, TimeoutError)
    with pytest.raises(TimeoutError):
        ray.get(result_id, timeout=0.1)

    # Check that a subsequent get() returns early.
    ray.get(signal.send.remote())
    start = time.time()
    ray.get(result_id, timeout=30)
    assert time.time() - start < 30
Beispiel #19
0
def test_actor_concurrent(ray_start_regular_shared):
    @ray.remote
    class Batcher:
        def __init__(self):
            self.batch = []
            self.event = threading.Event()

        def add(self, x):
            self.batch.append(x)
            if len(self.batch) >= 3:
                self.event.set()
            else:
                self.event.wait()
            return sorted(self.batch)

    a = Batcher.options(max_concurrency=3).remote()
    x1 = a.add.remote(1)
    x2 = a.add.remote(2)
    x3 = a.add.remote(3)
    r1 = ray.get(x1)
    r2 = ray.get(x2)
    r3 = ray.get(x3)
    assert r1 == [1, 2, 3]
    assert r1 == r2 == r3
Beispiel #20
0
def test_task_arguments_inline_bytes_limit(ray_start_cluster):
    cluster = ray_start_cluster
    cluster.add_node(
        num_cpus=1,
        resources={"pin_head": 1},
        _system_config={
            "max_direct_call_object_size": 100 * 1024,
            # if task_rpc_inlined_bytes_limit is greater than
            # max_grpc_message_size, this test fails.
            "task_rpc_inlined_bytes_limit": 18 * 1024,
            "max_grpc_message_size": 20 * 1024,
        },
    )
    cluster.add_node(num_cpus=1, resources={"pin_worker": 1})
    ray.init(address=cluster.address)

    @ray.remote(resources={"pin_worker": 1})
    def foo(ref1, ref2, ref3):
        return ref1 == ref2 + ref3

    @ray.remote(resources={"pin_head": 1})
    def bar():
        # if the refs are inlined, the test fails.
        # refs = [ray.put(np.random.rand(1024) for _ in range(3))]
        # return ray.get(
        #     foo.remote(refs[0], refs[1], refs[2]))

        return ray.get(
            foo.remote(
                np.random.rand(1024),  # 8k
                np.random.rand(1024),  # 8k
                np.random.rand(1024),
            )
        )  # 8k

    ray.get(bar.remote())
Beispiel #21
0
def test_actor_large_objects(ray_start_regular_shared):
    @ray.remote
    class Actor:
        def __init__(self):
            pass

        def f(self):
            time.sleep(1)
            return np.zeros(10000000)

    a = Actor.remote()
    obj_ref = a.f.remote()
    assert not ray.worker.global_worker.core_worker.object_exists(obj_ref)
    done, _ = ray.wait([obj_ref])
    assert len(done) == 1
    assert ray.worker.global_worker.core_worker.object_exists(obj_ref)
    assert isinstance(ray.get(obj_ref), np.ndarray)
Beispiel #22
0
def test_variable_number_of_args(shutdown_only):
    ray.init(num_cpus=1)

    @ray.remote
    def varargs_fct1(*a):
        return " ".join(map(str, a))

    @ray.remote
    def varargs_fct2(a, *b):
        return " ".join(map(str, b))

    x = varargs_fct1.remote(0, 1, 2)
    assert ray.get(x) == "0 1 2"
    x = varargs_fct2.remote(0, 1, 2)
    assert ray.get(x) == "1 2"

    @ray.remote
    def f1(*args):
        return args

    @ray.remote
    def f2(x, y, *args):
        return x, y, args

    assert ray.get(f1.remote()) == ()
    assert ray.get(f1.remote(1)) == (1, )
    assert ray.get(f1.remote(1, 2, 3)) == (1, 2, 3)
    with pytest.raises(Exception):
        f2.remote()
    with pytest.raises(Exception):
        f2.remote(1)
    assert ray.get(f2.remote(1, 2)) == (1, 2, ())
    assert ray.get(f2.remote(1, 2, 3)) == (1, 2, (3, ))
    assert ray.get(f2.remote(1, 2, 3, 4)) == (1, 2, (3, 4))

    def testNoArgs(self):
        @ray.remote
        def no_op():
            pass

        self.ray_start()

        ray.get(no_op.remote())
Beispiel #23
0
def test_actor_call_order(shutdown_only):
    ray.init(num_cpus=4)

    @ray.remote
    def small_value():
        time.sleep(0.01 * np.random.randint(0, 10))
        return 0

    @ray.remote
    class Actor:
        def __init__(self):
            self.count = 0

        def inc(self, count, dependency):
            assert count == self.count
            self.count += 1
            return count

    a = Actor.remote()
    assert ray.get([a.inc.remote(i, small_value.remote())
                    for i in range(100)]) == list(range(100))
Beispiel #24
0
def test_task_output_inline_bytes_limit(ray_start_cluster):
    cluster = ray_start_cluster
    # Disable worker caching so worker leases are not reused; set object
    # inlining size threshold and enable storing of small objects in in-memory
    # object store so the borrowed ref is inlined.
    # set task_output_inlined_bytes_limit which only allows inline 20 bytes.
    cluster.add_node(
        num_cpus=1,
        resources={"pin_head": 1},
        _system_config={
            "worker_lease_timeout_milliseconds": 0,
            "max_direct_call_object_size": 100 * 1024,
            "task_output_inlined_bytes_limit": 20,
            "put_small_object_in_memory_store": True,
        },
    )
    cluster.add_node(num_cpus=1, resources={"pin_worker": 1})
    ray.init(address=cluster.address)

    @ray.remote(num_returns=5, resources={"pin_head": 1})
    def f():
        return list(range(5))

    @ray.remote(resources={"pin_worker": 1})
    def sum(numbers):
        result = 0
        for i, ref in enumerate(numbers):
            result += ray.get(ref)
            inlined = ray.worker.global_worker.core_worker.object_exists(
                ref, memory_store_only=True)
            if i < 2:
                assert inlined
            else:
                assert not inlined
        return result

    results = f.remote()
    g_ref = sum.remote(results)
    assert ray.get(g_ref) == 10
Beispiel #25
0
def test_actor_distribution_balance(ray_start_cluster_enabled, args):
    cluster = ray_start_cluster_enabled

    node_count = args[0]
    actor_count = args[1]

    for i in range(node_count):
        cluster.add_node(
            memory=1024**3,
            _system_config={"gcs_actor_scheduling_enabled": True}
            if i == 0 else {},
        )
    ray.init(address=cluster.address)
    cluster.wait_for_nodes()

    @ray.remote(memory=100 * 1024**2,
                num_cpus=0.01,
                scheduling_strategy="SPREAD")
    class Foo:
        def method(self):
            return ray.worker.global_worker.node.unique_id

    actor_distribution = {}
    actor_list = [Foo.remote() for _ in range(actor_count)]
    for actor in actor_list:
        node_id = ray.get(actor.method.remote())
        if node_id not in actor_distribution.keys():
            actor_distribution[node_id] = []
        actor_distribution[node_id].append(actor)

    if node_count >= actor_count:
        assert len(actor_distribution) == actor_count
        for node_id, actors in actor_distribution.items():
            assert len(actors) == 1
    else:
        assert len(actor_distribution) == node_count
        for node_id, actors in actor_distribution.items():
            assert len(actors) <= int(actor_count / node_count)
Beispiel #26
0
def test_duplicate_args(ray_start_regular_shared):
    @ray.remote
    def f(arg1, arg2, arg1_duplicate, kwarg1=None, kwarg2=None, kwarg1_duplicate=None):
        assert arg1 == kwarg1
        assert arg1 != arg2
        assert arg1 == arg1_duplicate
        assert kwarg1 != kwarg2
        assert kwarg1 == kwarg1_duplicate

    # Test by-value arguments.
    arg1 = [1]
    arg2 = [2]
    ray.get(f.remote(arg1, arg2, arg1, kwarg1=arg1, kwarg2=arg2, kwarg1_duplicate=arg1))

    # Test by-reference arguments.
    arg1 = ray.put([1])
    arg2 = ray.put([2])
    ray.get(f.remote(arg1, arg2, arg1, kwarg1=arg1, kwarg2=arg2, kwarg1_duplicate=arg1))

    # Test by-reference arguments on an actor task.
    @ray.remote
    class Actor:
        def f(
            self,
            arg1,
            arg2,
            arg1_duplicate,
            kwarg1=None,
            kwarg2=None,
            kwarg1_duplicate=None,
        ):
            assert arg1 == kwarg1
            assert arg1 != arg2
            assert arg1 == arg1_duplicate
            assert kwarg1 != kwarg2
            assert kwarg1 == kwarg1_duplicate

    actor = Actor.remote()
    ray.get(
        actor.f.remote(
            arg1, arg2, arg1, kwarg1=arg1, kwarg2=arg2, kwarg1_duplicate=arg1
        )
    )
Beispiel #27
0
def test_schedule_actor_and_normal_task(ray_start_cluster):
    cluster = ray_start_cluster
    cluster.add_node(
        memory=1024 ** 3, _system_config={"gcs_actor_scheduling_enabled": True}
    )
    ray.init(address=cluster.address)
    cluster.wait_for_nodes()

    @ray.remote(memory=600 * 1024 ** 2, num_cpus=0.01)
    class Foo:
        def method(self):
            return 2

    @ray.remote(memory=600 * 1024 ** 2, num_cpus=0.01)
    def fun(singal1, signal_actor2):
        signal_actor2.send.remote()
        ray.get(singal1.wait.remote())
        return 1

    singal1 = SignalActor.remote()
    signal2 = SignalActor.remote()

    o1 = fun.remote(singal1, signal2)
    # Make sure the normal task is executing.
    ray.get(signal2.wait.remote())

    # The normal task is blocked now.
    # Try to create actor and make sure this actor is not created for the time
    # being.
    foo = Foo.remote()
    o2 = foo.method.remote()
    ready_list, remaining_list = ray.wait([o2], timeout=2)
    assert len(ready_list) == 0 and len(remaining_list) == 1

    # Send a signal to unblock the normal task execution.
    ray.get(singal1.send.remote())

    # Check the result of normal task.
    assert ray.get(o1) == 1

    # Make sure the actor is created.
    assert ray.get(o2) == 2
Beispiel #28
0
 def f(self, x):
     if self.delegate:
         return ray.get(self.delegate.f.remote(x))
     return x * 2
Beispiel #29
0
 def zoo(object):
     return ray.get(object[0].increase.remote(1))
Beispiel #30
0
 def bar(object):
     return ray.get(object.increase.remote(1))