Beispiel #1
0
def benchmark_get_calls(ray):
    value = ray.put(0)

    def get_small():
        ray.get(value)

    timeit("client: get calls", get_small)
def benchmark_simple_actor(ray, results):
    @ray.remote(num_cpus=0)
    class Actor:
        def small_value(self):
            return b"ok"

        def small_value_arg(self, x):
            return b"ok"

        def small_value_batch(self, n):
            ray.get([self.small_value.remote() for _ in range(n)])

    a = Actor.remote()

    def actor_sync():
        ray.get(a.small_value.remote())

    results += timeit("client: 1:1 actor calls sync", actor_sync)

    def actor_async():
        ray.get([a.small_value.remote() for _ in range(1000)])

    results += timeit("client: 1:1 actor calls async", actor_async, 1000)

    a = Actor.options(max_concurrency=16).remote()

    def actor_concurrent():
        ray.get([a.small_value.remote() for _ in range(1000)])

    results += timeit("client: 1:1 actor calls concurrent", actor_concurrent,
                      1000)
Beispiel #3
0
def benchmark_remote_put_calls(ray):
    @ray.remote
    def do_put_small():
        for _ in range(100):
            ray.put(0)

    def put_multi_small():
        ray.get([do_put_small.remote() for _ in range(10)])

    timeit("client: remote put calls", put_multi_small, 1000)
def benchmark_put_large(ray, results):
    arr = np.zeros(100 * 1024 * 1024, dtype=np.int64)

    def put_large():
        ray.put(arr)

    results += timeit("client: put gigabytes", put_large, 8 * 0.1)
def benchmark_remote_put_calls(ray, results):
    @ray.remote
    def do_put_small():
        for _ in range(100):
            ray.put(0)

    def put_multi_small():
        ray.get([do_put_small.remote() for _ in range(10)])

    results += timeit("client: tasks and put batch", put_multi_small, 1000)
def benchmark_tasks_and_get_batch(ray, results):
    @ray.remote
    def small_value():
        return b"ok"

    def small_value_batch():
        submitted = [small_value.remote() for _ in range(1000)]
        ray.get(submitted)
        return 0

    results += timeit("client: tasks and get batch", small_value_batch)
def benchmark_put_calls(ray, results):
    def put_small():
        ray.put(0)

    results += timeit("client: put calls", put_small)
Beispiel #8
0
def main(results=None):
    results = results or []

    check_optimized_build()

    print("Tip: set TESTS_TO_RUN='pattern' to run a subset of benchmarks")

    ray.init(_system_config={"put_small_object_in_memory_store": True})

    value = ray.put(0)

    def get_small():
        ray.get(value)

    results += timeit("single client get calls", get_small)

    def put_small():
        ray.put(0)

    results += timeit("single client put calls", put_small)

    @ray.remote
    def do_put_small():
        for _ in range(100):
            ray.put(0)

    def put_multi_small():
        ray.get([do_put_small.remote() for _ in range(10)])

    results += timeit("multi client put calls", put_multi_small, 1000)

    ray.shutdown()
    ray.init(_system_config={"put_small_object_in_memory_store": False})

    value = ray.put(0)
    arr = np.zeros(100 * 1024 * 1024, dtype=np.int64)

    results += timeit("single client get calls (Plasma Store)", get_small)

    results += timeit("single client put calls (Plasma Store)", put_small)

    results += timeit("multi client put calls (Plasma Store)", put_multi_small,
                      1000)

    def put_large():
        ray.put(arr)

    results += timeit("single client put gigabytes", put_large, 8 * 0.1)

    @ray.remote
    def do_put():
        for _ in range(10):
            ray.put(np.zeros(10 * 1024 * 1024, dtype=np.int64))

    def put_multi():
        ray.get([do_put.remote() for _ in range(10)])

    results += timeit("multi client put gigabytes", put_multi, 10 * 8 * 0.1)

    def small_task():
        ray.get(small_value.remote())

    results += timeit("single client tasks sync", small_task)

    def small_task_async():
        ray.get([small_value.remote() for _ in range(1000)])

    results += timeit("single client tasks async", small_task_async, 1000)

    n = 10000
    m = 4
    actors = [Actor.remote() for _ in range(m)]

    def multi_task():
        submitted = [a.small_value_batch.remote(n) for a in actors]
        ray.get(submitted)

    results += timeit("multi client tasks async", multi_task, n * m)

    a = Actor.remote()

    def actor_sync():
        ray.get(a.small_value.remote())

    results += timeit("1:1 actor calls sync", actor_sync)

    a = Actor.remote()

    def actor_async():
        ray.get([a.small_value.remote() for _ in range(1000)])

    results += timeit("1:1 actor calls async", actor_async, 1000)

    a = Actor.options(max_concurrency=16).remote()

    def actor_concurrent():
        ray.get([a.small_value.remote() for _ in range(1000)])

    results += timeit("1:1 actor calls concurrent", actor_concurrent, 1000)

    n = 5000
    n_cpu = multiprocessing.cpu_count() // 2
    actors = [Actor._remote() for _ in range(n_cpu)]
    client = Client.remote(actors)

    def actor_async_direct():
        ray.get(client.small_value_batch.remote(n))

    results += timeit("1:n actor calls async", actor_async_direct,
                      n * len(actors))

    n_cpu = multiprocessing.cpu_count() // 2
    a = [Actor.remote() for _ in range(n_cpu)]

    @ray.remote
    def work(actors):
        ray.get([actors[i % n_cpu].small_value.remote() for i in range(n)])

    def actor_multi2():
        ray.get([work.remote(a) for _ in range(m)])

    results += timeit("n:n actor calls async", actor_multi2, m * n)

    n = 1000
    actors = [Actor._remote() for _ in range(n_cpu)]
    clients = [Client.remote(a) for a in actors]

    def actor_multi2_direct_arg():
        ray.get([c.small_value_batch_arg.remote(n) for c in clients])

    results += timeit("n:n actor calls with arg async",
                      actor_multi2_direct_arg, n * len(clients))

    a = AsyncActor.remote()

    def actor_sync():
        ray.get(a.small_value.remote())

    results += timeit("1:1 async-actor calls sync", actor_sync)

    a = AsyncActor.remote()

    def async_actor():
        ray.get([a.small_value.remote() for _ in range(1000)])

    results += timeit("1:1 async-actor calls async", async_actor, 1000)

    a = AsyncActor.remote()

    def async_actor():
        ray.get([a.small_value_with_arg.remote(i) for i in range(1000)])

    results += timeit("1:1 async-actor calls with args async", async_actor,
                      1000)

    n = 5000
    n_cpu = multiprocessing.cpu_count() // 2
    actors = [AsyncActor.remote() for _ in range(n_cpu)]
    client = Client.remote(actors)

    def async_actor_async():
        ray.get(client.small_value_batch.remote(n))

    results += timeit("1:n async-actor calls async", async_actor_async,
                      n * len(actors))

    n = 5000
    m = 4
    n_cpu = multiprocessing.cpu_count() // 2
    a = [AsyncActor.remote() for _ in range(n_cpu)]

    @ray.remote
    def async_actor_work(actors):
        ray.get([actors[i % n_cpu].small_value.remote() for i in range(n)])

    def async_actor_multi():
        ray.get([async_actor_work.remote(a) for _ in range(m)])

    results += timeit("n:n async-actor calls async", async_actor_multi, m * n)
    ray.shutdown()

    client_microbenchmark_main(results)

    return results
Beispiel #9
0
def main(results=None):
    results = results or []

    check_optimized_build()

    print("Tip: set TESTS_TO_RUN='pattern' to run a subset of benchmarks")

    ray.init()

    value = ray.put(0)

    def get_small():
        ray.get(value)

    def put_small():
        ray.put(0)

    @ray.remote
    def do_put_small():
        for _ in range(100):
            ray.put(0)

    def put_multi_small():
        ray.get([do_put_small.remote() for _ in range(10)])

    arr = np.zeros(100 * 1024 * 1024, dtype=np.int64)

    results += timeit("single client get calls (Plasma Store)", get_small)

    results += timeit("single client put calls (Plasma Store)", put_small)

    results += timeit("multi client put calls (Plasma Store)", put_multi_small,
                      1000)

    def put_large():
        ray.put(arr)

    results += timeit("single client put gigabytes", put_large, 8 * 0.1)

    def small_value_batch():
        submitted = [small_value.remote() for _ in range(1000)]
        ray.get(submitted)
        return 0

    results += timeit("single client tasks and get batch", small_value_batch)

    @ray.remote
    def do_put():
        for _ in range(10):
            ray.put(np.zeros(10 * 1024 * 1024, dtype=np.int64))

    def put_multi():
        ray.get([do_put.remote() for _ in range(10)])

    results += timeit("multi client put gigabytes", put_multi, 10 * 8 * 0.1)

    obj_containing_ref = create_object_containing_ref.remote()

    def get_containing_object_ref():
        ray.get(obj_containing_ref)

    results += timeit("single client get object containing 10k refs",
                      get_containing_object_ref)

    def small_task():
        ray.get(small_value.remote())

    results += timeit("single client tasks sync", small_task)

    def small_task_async():
        ray.get([small_value.remote() for _ in range(1000)])

    results += timeit("single client tasks async", small_task_async, 1000)

    n = 10000
    m = 4
    actors = [Actor.remote() for _ in range(m)]

    def multi_task():
        submitted = [a.small_value_batch.remote(n) for a in actors]
        ray.get(submitted)

    results += timeit("multi client tasks async", multi_task, n * m)

    a = Actor.remote()

    def actor_sync():
        ray.get(a.small_value.remote())

    results += timeit("1:1 actor calls sync", actor_sync)

    a = Actor.remote()

    def actor_async():
        ray.get([a.small_value.remote() for _ in range(1000)])

    results += timeit("1:1 actor calls async", actor_async, 1000)

    a = Actor.options(max_concurrency=16).remote()

    def actor_concurrent():
        ray.get([a.small_value.remote() for _ in range(1000)])

    results += timeit("1:1 actor calls concurrent", actor_concurrent, 1000)

    n = 5000
    n_cpu = multiprocessing.cpu_count() // 2
    actors = [Actor._remote() for _ in range(n_cpu)]
    client = Client.remote(actors)

    def actor_async_direct():
        ray.get(client.small_value_batch.remote(n))

    results += timeit("1:n actor calls async", actor_async_direct,
                      n * len(actors))

    n_cpu = multiprocessing.cpu_count() // 2
    a = [Actor.remote() for _ in range(n_cpu)]

    @ray.remote
    def work(actors):
        ray.get([actors[i % n_cpu].small_value.remote() for i in range(n)])

    def actor_multi2():
        ray.get([work.remote(a) for _ in range(m)])

    results += timeit("n:n actor calls async", actor_multi2, m * n)

    n = 1000
    actors = [Actor._remote() for _ in range(n_cpu)]
    clients = [Client.remote(a) for a in actors]

    def actor_multi2_direct_arg():
        ray.get([c.small_value_batch_arg.remote(n) for c in clients])

    results += timeit("n:n actor calls with arg async",
                      actor_multi2_direct_arg, n * len(clients))

    a = AsyncActor.remote()

    def actor_sync():
        ray.get(a.small_value.remote())

    results += timeit("1:1 async-actor calls sync", actor_sync)

    a = AsyncActor.remote()

    def async_actor():
        ray.get([a.small_value.remote() for _ in range(1000)])

    results += timeit("1:1 async-actor calls async", async_actor, 1000)

    a = AsyncActor.remote()

    def async_actor():
        ray.get([a.small_value_with_arg.remote(i) for i in range(1000)])

    results += timeit("1:1 async-actor calls with args async", async_actor,
                      1000)

    n = 5000
    n_cpu = multiprocessing.cpu_count() // 2
    actors = [AsyncActor.remote() for _ in range(n_cpu)]
    client = Client.remote(actors)

    def async_actor_async():
        ray.get(client.small_value_batch.remote(n))

    results += timeit("1:n async-actor calls async", async_actor_async,
                      n * len(actors))

    n = 5000
    m = 4
    n_cpu = multiprocessing.cpu_count() // 2
    a = [AsyncActor.remote() for _ in range(n_cpu)]

    @ray.remote
    def async_actor_work(actors):
        ray.get([actors[i % n_cpu].small_value.remote() for i in range(n)])

    def async_actor_multi():
        ray.get([async_actor_work.remote(a) for _ in range(m)])

    results += timeit("n:n async-actor calls async", async_actor_multi, m * n)
    ray.shutdown()

    NUM_PGS = 100
    NUM_BUNDLES = 1
    ray.init(resources={"custom": 100})

    def placement_group_create_removal(num_pgs):
        pgs = [
            ray.util.placement_group(bundles=[{
                "custom": 0.001
            } for _ in range(NUM_BUNDLES)]) for _ in range(num_pgs)
        ]
        [pg.wait(timeout_seconds=30) for pg in pgs]
        # Include placement group removal here to clean up.
        # If we don't clean up placement groups, the whole performance
        # gets slower as it runs more.
        # Since timeit function runs multiple times without
        # the cleaning logic, we should have this method here.
        for pg in pgs:
            ray.util.remove_placement_group(pg)

    results += timeit("placement group create/removal",
                      lambda: placement_group_create_removal(NUM_PGS), NUM_PGS)
    ray.shutdown()

    client_microbenchmark_main(results)

    return results
def test_placement_group_perf(num_pgs, num_bundles, num_pending_pgs):
    # Run the placement group performance benchmark given arguments.
    assert ray.cluster_resources()["custom"] >= (RESOURCES_VALUE * num_pgs *
                                                 num_bundles)

    def placement_group_create(num_pgs):
        pgs = [
            ray.util.placement_group(bundles=[{
                "custom": 0.001
            } for _ in range(num_bundles)],
                                     strategy="SPREAD") for _ in range(num_pgs)
        ]
        [pg.wait(timeout_seconds=30) for pg in pgs]
        for pg in pgs:
            ray.util.remove_placement_group(pg)

    print(f"Num pending pgs: {num_pending_pgs}, "
          f"Num pgs: {num_pgs}, "
          f"Num bundles {num_bundles}")

    # Get the throughput.
    throughput = timeit("placement group create per second",
                        lambda: placement_group_create(num_pgs), num_pgs)

    # Get fine-grained scheduling stats.
    latencies = []
    e2e_latencies = []
    scheduling_attempts = []
    for entry in ray.util.placement_group_table().values():
        latency = entry["stats"]["scheduling_latency_ms"]
        e2e_latency = entry["stats"]["end_to_end_creation_latency_ms"]
        scheduling_attempt = entry["stats"]["scheduling_attempt"]
        latencies.append(latency)
        e2e_latencies.append(e2e_latency)
        scheduling_attempts.append(scheduling_attempt)
    latencies = sorted(latencies)
    e2e_latencies = sorted(e2e_latencies)
    scheduling_attempts = sorted(scheduling_attempts)

    # Pure scheduling latency without queuing time.
    print("P50 scheduling latency ms: "
          f"{latencies[int(len(latencies) * 0.5)]}")
    print("P95 scheduling latency ms: "
          f"{latencies[int(len(latencies) * 0.95)]}")
    print("P99 scheduling latency ms: "
          f"{latencies[int(len(latencies) * 0.99)]}")

    # Scheduling latency including queueing time.
    print("P50 e2e scheduling latency ms: "
          f"{e2e_latencies[int(len(e2e_latencies) * 0.5)]}")
    print("P95 e2e scheduling latency ms: "
          f"{e2e_latencies[int(len(e2e_latencies) * 0.95)]}")
    print("P99 e2e scheduling latency ms: "
          f"{e2e_latencies[int(len(e2e_latencies) * 0.99)]}")

    # Number of time scheduling was retried before succeeds.
    print("P50 scheduling attempts: "
          f"{scheduling_attempts[int(len(scheduling_attempts) * 0.5)]}")
    print("P95 scheduling attempts: "
          f"{scheduling_attempts[int(len(scheduling_attempts) * 0.95)]}")
    print("P99 scheduling attempts: "
          f"{scheduling_attempts[int(len(scheduling_attempts) * 0.99)]}")

    return {
        "pg_creation_per_second":
        throughput[0][1],
        "p50_scheduling_latency_ms":
        latencies[int(len(latencies) * 0.5)],
        "p50_e2e_pg_creation_latency_ms":
        e2e_latencies[int(len(e2e_latencies) * 0.5)]
    }
Beispiel #11
0
def benchmark_put_calls(ray):
    def put_small():
        ray.put(0)

    timeit("client: put calls", put_small)