Beispiel #1
0
def test_maxtasksperchild(shutdown_only):
    def f(args):
        return os.getpid()

    pool = Pool(5, maxtasksperchild=1)
    assert len(set(pool.map(f, range(20)))) == 20
    pool.terminate()
    pool.join()
def launch_long_running_tasks(num_pool=5):
    # doing the work, collecting data, updating the database
    # create an Actor pool of num_pool workers nodes
    pool = Pool(num_pool)
    results = []
    for result in pool.map(task, range(1, 500, 10)):
        results.append(result)
    pool.terminate()
    return results
Beispiel #3
0
def test_initializer(shutdown_only):
    def init(dirname):
        with open(os.path.join(dirname, str(os.getpid())), "w") as f:
            print("hello", file=f)

    with tempfile.TemporaryDirectory() as dirname:
        num_processes = 4
        pool = Pool(processes=num_processes, initializer=init, initargs=(dirname,))

        assert len(os.listdir(dirname)) == 4
        pool.terminate()
        pool.join()
Beispiel #4
0
def test_ray_init(monkeypatch, shutdown_only):
    def getpid(args):
        return os.getpid()

    def check_pool_size(pool, size):
        args = [tuple() for _ in range(size)]
        assert len(set(pool.map(getpid, args))) == size

    # Check that starting a pool starts ray if not initialized.
    pool = Pool(processes=2)
    assert ray.is_initialized()
    assert int(ray.cluster_resources()["CPU"]) == 2
    check_pool_size(pool, 2)
    pool.terminate()
    pool.join()
    ray.shutdown()

    # Set up the cluster id so that gcs is talking with a different
    # storage prefix
    monkeypatch.setenv("RAY_external_storage_namespace", "new_cluster")
    ray._raylet.Config.initialize("")

    # Check that starting a pool doesn't affect ray if there is a local
    # ray cluster running.
    ray.init(num_cpus=3)
    assert ray.is_initialized()
    pool = Pool(processes=2)
    assert int(ray.cluster_resources()["CPU"]) == 3
    check_pool_size(pool, 2)
    pool.terminate()
    pool.join()
    ray.shutdown()

    # Check that trying to start a pool on an existing ray cluster throws an
    # error if there aren't enough CPUs for the number of processes.
    ray.init(num_cpus=1)
    assert ray.is_initialized()
    with pytest.raises(ValueError):
        Pool(processes=2)
    assert int(ray.cluster_resources()["CPU"]) == 1
    ray.shutdown()
Beispiel #5
0
def test_connect_to_ray(monkeypatch, ray_start_cluster):
    def getpid(args):
        return os.getpid()

    def check_pool_size(pool, size):
        args = [tuple() for _ in range(size)]
        assert len(set(pool.map(getpid, args))) == size

    address = ray_start_cluster.address
    # Use different numbers of CPUs to distinguish between starting a local
    # ray cluster and connecting to an existing one.
    start_cpus = 1  # Set in fixture.
    init_cpus = 2

    # Set up the cluster id so that gcs is talking with a different
    # storage prefix
    monkeypatch.setenv("RAY_external_storage_namespace", "new_cluster")
    ray._raylet.Config.initialize("")

    # Check that starting a pool still starts ray if RAY_ADDRESS not set.
    pool = Pool(processes=init_cpus)
    assert ray.is_initialized()
    assert int(ray.cluster_resources()["CPU"]) == init_cpus
    check_pool_size(pool, init_cpus)
    pool.terminate()
    pool.join()
    ray.shutdown()

    # Check that starting a pool connects to a running ray cluster if
    # ray_address is passed in.
    pool = Pool(ray_address=address)
    assert ray.is_initialized()
    assert int(ray.cluster_resources()["CPU"]) == start_cpus
    check_pool_size(pool, start_cpus)
    pool.terminate()
    pool.join()
    ray.shutdown()

    monkeypatch.setenv("RAY_external_storage_namespace", "new_cluster2")
    ray._raylet.Config.initialize("")

    # Set RAY_ADDRESS, so pools should connect to the running ray cluster.
    os.environ["RAY_ADDRESS"] = address

    # Check that starting a pool connects to a running ray cluster if
    # RAY_ADDRESS is set.
    pool = Pool()
    assert ray.is_initialized()
    assert int(ray.cluster_resources()["CPU"]) == start_cpus
    check_pool_size(pool, start_cpus)
    pool.terminate()
    pool.join()
    ray.shutdown()

    # Check that trying to start a pool on an existing ray cluster throws an
    # error if there aren't enough CPUs for the number of processes.
    with pytest.raises(Exception):
        Pool(processes=start_cpus + 1)
    assert int(ray.cluster_resources()["CPU"]) == start_cpus
    ray.shutdown()
Beispiel #6
0
def test_ray_init(shutdown_only):
    def getpid(args):
        return os.getpid()

    def check_pool_size(pool, size):
        args = [tuple() for _ in range(size)]
        assert len(set(pool.map(getpid, args))) == size

    # Check that starting a pool starts ray if not initialized.
    pool = Pool(processes=2)
    assert ray.is_initialized()
    assert int(ray.cluster_resources()["CPU"]) == 2
    check_pool_size(pool, 2)
    pool.terminate()
    pool.join()
    ray.shutdown()

    # Check that starting a pool doesn't affect ray if there is a local
    # ray cluster running.
    ray.init(num_cpus=3)
    assert ray.is_initialized()
    pool = Pool(processes=2)
    assert int(ray.cluster_resources()["CPU"]) == 3
    check_pool_size(pool, 2)
    pool.terminate()
    pool.join()
    ray.shutdown()

    # Check that trying to start a pool on an existing ray cluster throws an
    # error if there aren't enough CPUs for the number of processes.
    ray.init(num_cpus=1)
    assert ray.is_initialized()
    with pytest.raises(ValueError):
        Pool(processes=2)
    assert int(ray.cluster_resources()["CPU"]) == 1
    ray.shutdown()
Beispiel #7
0
def pool_4_processes():
    pool = Pool(processes=4)
    yield pool
    pool.terminate()
    pool.join()
    ray.shutdown()
Beispiel #8
0
def pool():
    pool = Pool(processes=1)
    yield pool
    pool.terminate()
    ray.shutdown()
Beispiel #9
0
    # Let's try multiprocess for each core
    # Since this is CPU I/O bound task, we should get better performance
    # the serial and threading
    #
    start = time.time()
    mp_pool = mp.Pool(get_cpu_count())
    with mp_pool as p:
        prime_numbers = p.map(is_prime, list(range(num)))
    end = time.time()
    mp_pool.terminate()

    print(
        f"Multi Process access: Time elapsed: {end - start:4.2f} sec to compute all primes in {num} are {sum(list(prime_numbers))}"
    )

    # Let's try that with Ray multiprocessing pool
    ray.init()
    ray_pool = Pool(get_cpu_count())
    lst = list(range(num))
    results = []
    start = time.time()
    for result in ray_pool.map(is_prime, lst):
        results.append(result)
    end = time.time()
    ray_pool.terminate()
    print(
        f"Ray Distributed Multi Process access: Time elapsed: {end - start:4.2f} sec to compute all primes in {num} are {sum(results)}"
    )
    ray.shutdown()