Python Cluster 예제들, ray.test.cluster_utils.Cluster Python 예제들

예제 #1

0

파일 보기

파일: multi_node_test_2.py 프로젝트: stvreumi/ray

def start_connected_cluster():
    # Start the Ray processes.
    g = Cluster(initialize_head=True, connect=True)
    yield g
    # The code after the yield will run as teardown code.
    ray.shutdown()
    g.shutdown()

예제 #2

0

파일 보기

def create_cluster(num_nodes):
    cluster = Cluster()
    for i in range(num_nodes):
        cluster.add_node(resources={str(i): 100}, object_store_memory=10**9)

    ray.init(redis_address=cluster.redis_address)
    return cluster

예제 #3

0

파일 보기

파일: stress_tests.py 프로젝트: jamescasbon/ray

def ray_start_reconstruction(request):
    num_nodes = request.param

    plasma_store_memory = 10**9

    cluster = Cluster(
        initialize_head=True,
        head_node_args={
            "num_cpus": 1,
            "object_store_memory": plasma_store_memory // num_nodes,
            "redis_max_memory": 10**7,
            "redirect_output": True,
            "_internal_config": json.dumps({
                "initial_reconstruction_timeout_milliseconds": 200
            })
        })
    for i in range(num_nodes - 1):
        cluster.add_node(
            num_cpus=1,
            object_store_memory=plasma_store_memory // num_nodes,
            redirect_output=True,
            _internal_config=json.dumps({
                "initial_reconstruction_timeout_milliseconds": 200
            }))
    ray.init(redis_address=cluster.redis_address)

    yield plasma_store_memory, num_nodes, cluster

    # Clean up the Ray cluster.
    ray.shutdown()
    cluster.shutdown()

예제 #4

0

파일 보기

파일: node_manager_test.py 프로젝트: jamescasbon/ray

def ray_start_empty_cluster():
    cluster = Cluster()
    yield cluster

    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()

예제 #5

0

파일 보기

def ray_start_empty_cluster():
    cluster = Cluster()
    yield cluster

    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()

예제 #6

0

파일 보기

파일: test_ray_init.py 프로젝트: jamescasbon/ray

    def test_redis_password_cluster(self, password, shutdown_only):
        @ray.remote
        def f():
            return 1

        node_args = {"redis_password": password}
        cluster = Cluster(
            initialize_head=True, connect=True, head_node_args=node_args)
        cluster.add_node(**node_args)

        object_id = f.remote()
        ray.get(object_id)

예제 #7

0

파일 보기

def cluster_start():
    # Start the Ray processes.
    cluster = Cluster(initialize_head=True,
                      connect=True,
                      head_node_args={
                          "resources":
                          dict(CPU=1),
                          "_internal_config":
                          json.dumps({"num_heartbeats_timeout": 10})
                      })
    yield cluster
    ray.shutdown()
    cluster.shutdown()

예제 #8

0

파일 보기

def start_connected_cluster():
    # Start the Ray processes.
    g = Cluster(initialize_head=True,
                connect=True,
                head_node_args={
                    "resources": dict(CPU=1),
                    "_internal_config":
                    json.dumps({"num_heartbeats_timeout": 10})
                })
    yield g
    # The code after the yield will run as teardown code.
    ray.shutdown()
    g.shutdown()

예제 #9

0

파일 보기

파일: component_failures_test.py 프로젝트: jamescasbon/ray

def ray_start_workers_separate_multinode(request):
    num_nodes = request.param[0]
    num_initial_workers = request.param[1]
    # Start the Ray processes.
    cluster = Cluster()
    for _ in range(num_nodes):
        cluster.add_node(num_cpus=num_initial_workers)
    ray.init(redis_address=cluster.redis_address)

    yield num_nodes, num_initial_workers
    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()

예제 #10

0

파일 보기

def start_connected_longer_cluster():
    """Creates a cluster with a longer timeout."""
    g = Cluster(initialize_head=True,
                connect=True,
                head_node_args={
                    "resources": dict(CPU=1),
                    "_internal_config":
                    json.dumps({"num_heartbeats_timeout": 20})
                })
    yield g
    # The code after the yield will run as teardown code.
    ray.shutdown()
    g.shutdown()

예제 #11

0

파일 보기

파일: test_ray_init.py 프로젝트: zdpau/ray-1

    def test_redis_password_cluster(self, password, shutdown_only):
        @ray.remote
        def f():
            return 1

        node_args = {"redis_password": password}
        cluster = Cluster(initialize_head=True,
                          connect=True,
                          head_node_args=node_args)
        cluster.add_node(**node_args)

        object_id = f.remote()
        ray.get(object_id)

예제 #12

0

파일 보기

def run(args, parser):
    if args.config_file:
        with open(args.config_file) as f:
            experiments = yaml.load(f)
    else:
        # Note: keep this in sync with tune/config_parser.py
        experiments = {
            args.experiment_name: {  # i.e. log to ~/ray_results/default
                "run": args.run,
                "checkpoint_freq": args.checkpoint_freq,
                "local_dir": args.local_dir,
                "resources_per_trial": (
                    args.resources_per_trial and
                    resources_to_json(args.resources_per_trial)),
                "stop": args.stop,
                "config": dict(args.config, env=args.env),
                "restore": args.restore,
                "num_samples": args.num_samples,
                "upload_dir": args.upload_dir,
            }
        }

    for exp in experiments.values():
        if not exp.get("run"):
            parser.error("the following arguments are required: --run")
        if not exp.get("env") and not exp.get("config", {}).get("env"):
            parser.error("the following arguments are required: --env")

    if args.ray_num_nodes:
        cluster = Cluster()
        for _ in range(args.ray_num_nodes):
            cluster.add_node(
                resources={
                    "num_cpus": args.ray_num_cpus or 1,
                    "num_gpus": args.ray_num_gpus or 0,
                },
                object_store_memory=args.ray_object_store_memory,
                redis_max_memory=args.ray_redis_max_memory)
        ray.init(redis_address=cluster.redis_address)
    else:
        ray.init(
            redis_address=args.redis_address,
            object_store_memory=args.ray_object_store_memory,
            redis_max_memory=args.ray_redis_max_memory,
            num_cpus=args.ray_num_cpus,
            num_gpus=args.ray_num_gpus)
    run_experiments(
        experiments,
        scheduler=_make_scheduler(args),
        queue_trials=args.queue_trials,
        resume=args.resume)

예제 #13

0

파일 보기

파일: multi_node_test_2.py 프로젝트: stvreumi/ray

def test_cluster():
    """Basic test for adding and removing nodes in cluster."""
    g = Cluster(initialize_head=False)
    node = g.add_node()
    node2 = g.add_node()
    assert node.all_processes_alive()
    assert node2.all_processes_alive()
    g.remove_node(node2)
    g.remove_node(node)
    assert not any(node.any_processes_alive() for node in g.list_all_nodes())

예제 #14

0

파일 보기

파일: test_global_state.py 프로젝트: jamescasbon/ray

def cluster_start():
    # Start the Ray processes.
    cluster = Cluster(
        initialize_head=True,
        connect=True,
        head_node_args={
            "num_cpus": 1,
            "_internal_config": json.dumps({
                "num_heartbeats_timeout": 10
            })
        })
    yield cluster
    ray.shutdown()
    cluster.shutdown()

예제 #15

0

파일 보기

파일: multi_node_test_2.py 프로젝트: jamescasbon/ray

def start_connected_cluster():
    # Start the Ray processes.
    g = Cluster(
        initialize_head=True,
        connect=True,
        head_node_args={
            "num_cpus": 1,
            "_internal_config": json.dumps({
                "num_heartbeats_timeout": 10
            })
        })
    yield g
    # The code after the yield will run as teardown code.
    ray.shutdown()
    g.shutdown()

예제 #16

0

파일 보기

파일: multi_node_test_2.py 프로젝트: jamescasbon/ray

def start_connected_longer_cluster():
    """Creates a cluster with a longer timeout."""
    g = Cluster(
        initialize_head=True,
        connect=True,
        head_node_args={
            "num_cpus": 1,
            "_internal_config": json.dumps({
                "num_heartbeats_timeout": 20
            })
        })
    yield g
    # The code after the yield will run as teardown code.
    ray.shutdown()
    g.shutdown()

예제 #17

0

파일 보기

파일: cluster_tests.py 프로젝트: sohailkhanmarwat/ray

def start_connected_emptyhead_cluster():
    """Starts head with no resources."""

    cluster = Cluster(initialize_head=True,
                      connect=True,
                      head_node_args={
                          "resources":
                          dict(CPU=0),
                          "_internal_config":
                          json.dumps({"num_heartbeats_timeout": 10})
                      })
    # Pytest doesn't play nicely with imports
    _register_all()
    yield cluster
    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()

예제 #18

0

파일 보기

def test_cluster():
    """Basic test for adding and removing nodes in cluster."""
    g = Cluster(initialize_head=False)
    node = g.add_node()
    node2 = g.add_node()
    assert node.remaining_processes_alive()
    assert node2.remaining_processes_alive()
    g.remove_node(node2)
    g.remove_node(node)
    assert not any(n.any_processes_alive() for n in [node, node2])

예제 #19

0

파일 보기

파일: cluster_tests.py 프로젝트: jamescasbon/ray

def start_connected_emptyhead_cluster():
    """Starts head with no resources."""

    cluster = Cluster(
        initialize_head=True,
        connect=True,
        head_node_args={
            "num_cpus": 0,
            "_internal_config": json.dumps({
                "num_heartbeats_timeout": 10
            })
        })
    # Pytest doesn't play nicely with imports
    _register_all()
    yield cluster
    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()

예제 #20

0

파일 보기

파일: stress_tests.py 프로젝트: jamescasbon/ray

def ray_start_combination(request):
    num_nodes = request.param[0]
    num_workers_per_scheduler = request.param[1]
    # Start the Ray processes.
    cluster = Cluster(
        initialize_head=True,
        head_node_args={
            "num_cpus": 10,
            "redis_max_memory": 10**7
        })
    for i in range(num_nodes - 1):
        cluster.add_node(num_cpus=10)
    ray.init(redis_address=cluster.redis_address)

    yield num_nodes, num_workers_per_scheduler
    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()

예제 #21

0

파일 보기

파일: component_failures_test.py 프로젝트: jamescasbon/ray

def ray_initialize_cluster():
    # Start with 4 workers and 4 cores.
    num_nodes = 4
    num_workers_per_scheduler = 8

    cluster = Cluster()
    for _ in range(num_nodes):
        cluster.add_node(
            num_cpus=num_workers_per_scheduler,
            _internal_config=json.dumps({
                "initial_reconstruction_timeout_milliseconds": 1000,
                "num_heartbeats_timeout": 10,
            }))
    ray.init(redis_address=cluster.redis_address)

    yield None

    ray.shutdown()
    cluster.shutdown()

예제 #22

0

파일 보기

파일: cluster_tests.py 프로젝트: shyamalschandra/ray

def _start_new_cluster():
    cluster = Cluster(initialize_head=True,
                      connect=True,
                      head_node_args={
                          "resources":
                          dict(CPU=1),
                          "_internal_config":
                          json.dumps({"num_heartbeats_timeout": 10})
                      })
    # Pytest doesn't play nicely with imports
    _register_all()
    return cluster

예제 #23

0

파일 보기

파일: multi_node_test_2.py 프로젝트: jamescasbon/ray

def test_cluster():
    """Basic test for adding and removing nodes in cluster."""
    g = Cluster(initialize_head=False)
    node = g.add_node()
    node2 = g.add_node()
    assert node.all_processes_alive()
    assert node2.all_processes_alive()
    g.remove_node(node2)
    g.remove_node(node)
    assert not any(n.any_processes_alive() for n in [node, node2])

예제 #24

0

파일 보기

def ray_start_reconstruction(request):
    num_nodes = request.param

    plasma_store_memory = 10**9

    cluster = Cluster(
        initialize_head=True,
        head_node_args={
            "num_cpus": 1,
            "object_store_memory": plasma_store_memory // num_nodes,
            "redis_max_memory": 10**7,
            "redirect_output": True,
            "_internal_config": json.dumps({
                "initial_reconstruction_timeout_milliseconds": 200
            })
        })
    for i in range(num_nodes - 1):
        cluster.add_node(
            num_cpus=1,
            object_store_memory=plasma_store_memory // num_nodes,
            redirect_output=True,
            _internal_config=json.dumps({
                "initial_reconstruction_timeout_milliseconds": 200
            }))
    ray.init(redis_address=cluster.redis_address)

    yield plasma_store_memory, num_nodes, cluster

    # Clean up the Ray cluster.
    ray.shutdown()
    cluster.shutdown()

예제 #25

0

파일 보기

def ray_start_workers_separate_multinode(request):
    num_nodes = request.param[0]
    num_initial_workers = request.param[1]
    # Start the Ray processes.
    cluster = Cluster()
    for _ in range(num_nodes):
        cluster.add_node(num_cpus=num_initial_workers)
    ray.init(redis_address=cluster.redis_address)

    yield num_nodes, num_initial_workers
    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()

예제 #26

0

파일 보기

def ray_start_two_nodes():
    # Start the Ray processes.
    cluster = Cluster()
    for _ in range(2):
        cluster.add_node(num_cpus=0,
                         _internal_config=json.dumps(
                             {"num_heartbeats_timeout": 40}))
    ray.init(redis_address=cluster.redis_address)

    yield cluster
    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()

예제 #27

0

파일 보기

def ray_start_cluster():
    node_args = {
        "resources":
        dict(CPU=8),
        "_internal_config":
        json.dumps({
            "initial_reconstruction_timeout_milliseconds": 1000,
            "num_heartbeats_timeout": 10
        })
    }
    # Start with 4 worker nodes and 8 cores each.
    g = Cluster(initialize_head=True, connect=True, head_node_args=node_args)
    workers = []
    for _ in range(4):
        workers.append(g.add_node(**node_args))
    g.wait_for_nodes()
    yield g
    ray.shutdown()
    g.shutdown()

예제 #28

0

파일 보기

파일: stress_tests.py 프로젝트: anke522/ray-1

def ray_start_combination(request):
    num_nodes = request.param[0]
    num_workers_per_scheduler = request.param[1]
    # Start the Ray processes.
    cluster = Cluster(initialize_head=True,
                      head_node_args={
                          "num_cpus": 10,
                          "redis_max_memory": 10**7
                      })
    for i in range(num_nodes - 1):
        cluster.add_node(num_cpus=10)
    ray.init(redis_address=cluster.redis_address)

    yield num_nodes, num_workers_per_scheduler, cluster
    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()

예제 #29

0

파일 보기

def ray_start_cluster():
    node_args = {
        "num_cpus":
        4,
        "_internal_config":
        json.dumps({
            "initial_reconstruction_timeout_milliseconds": 1000,
            "num_heartbeats_timeout": 10
        })
    }
    # Start with 3 worker nodes and 4 cores each.
    cluster = Cluster(initialize_head=True,
                      connect=True,
                      head_node_args=node_args)
    workers = []
    for _ in range(3):
        workers.append(cluster.add_node(**node_args))
    cluster.wait_for_nodes()
    yield cluster
    ray.shutdown()
    cluster.shutdown()

예제 #30

0

파일 보기

파일: component_failures_test.py 프로젝트: jamescasbon/ray

def ray_start_cluster():
    node_args = {
        "num_cpus": 8,
        "_internal_config": json.dumps({
            "initial_reconstruction_timeout_milliseconds": 1000,
            "num_heartbeats_timeout": 10
        })
    }
    # Start with 4 worker nodes and 8 cores each.
    cluster = Cluster(
        initialize_head=True, connect=True, head_node_args=node_args)
    workers = []
    for _ in range(4):
        workers.append(cluster.add_node(**node_args))
    cluster.wait_for_nodes()
    yield cluster
    ray.shutdown()
    cluster.shutdown()

예제 #31

0

파일 보기

def ray_initialize_cluster():
    # Start with 4 workers and 4 cores.
    num_nodes = 4
    num_workers_per_scheduler = 8

    cluster = Cluster()
    for _ in range(num_nodes):
        cluster.add_node(num_cpus=num_workers_per_scheduler,
                         _internal_config=json.dumps({
                             "initial_reconstruction_timeout_milliseconds":
                             1000,
                             "num_heartbeats_timeout":
                             10,
                         }))
    ray.init(redis_address=cluster.redis_address)

    yield cluster

    ray.shutdown()
    cluster.shutdown()

예제 #32

0

파일 보기

파일: multi_node_test_2.py 프로젝트: jamescasbon/ray

def test_shutdown():
    g = Cluster(initialize_head=False)
    node = g.add_node()
    node2 = g.add_node()
    g.shutdown()
    assert not any(n.any_processes_alive() for n in [node, node2])

예제 #33

0

파일 보기

파일: train_new.py 프로젝트: valldabo2/orderbookrl

def run(args, parser):
    if args.config_file:
        with open(args.config_file) as f:
            experiments = yaml.load(f)
    else:
        # Note: keep this in sync with tune/config_parser.py
        experiments = {
            args.experiment_name: {  # i.e. log to ~/ray_results/default
                "run": args.run,
                "checkpoint_freq": args.checkpoint_freq,
                "local_dir": args.local_dir,
                "resources_per_trial": (
                    args.resources_per_trial and
                    resources_to_json(args.resources_per_trial)),
                "stop": args.stop,
                "config": dict(args.config, env=args.env),
                "restore": args.restore,
                "num_samples": args.num_samples,
                "upload_dir": args.upload_dir,
            }
        }

    # The default maximum number of bytes to allocate to the object store unless
    # overridden by the user.
    DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES = 20 * 10**9
    # The smallest cap on the memory used by the object store that we allow.
    OBJECT_STORE_MINIMUM_MEMORY_BYTES = 10**7
    # The default maximum number of bytes that the non-primary Redis shards are
    # allowed to use unless overridden by the user.
    DEFAULT_REDIS_MAX_MEMORY_BYTES = 10**10
    # The smallest cap on the memory used by Redis that we allow.
    REDIS_MINIMUM_MEMORY_BYTES = 10**7

    def on_episode_end(info):
        episode = info["episode"]
        env = info['env'].get_unwrapped()[0]
        if hasattr(env, 'capital'):
            capital_return = (env.capital -
                              env.initial_funds) / env.initial_funds
            episode.custom_metrics['capital_return'] = capital_return

    key = list(experiments.keys())[0]
    experiments[key]["config"]["callbacks"] = {
        "on_episode_end": tune.function(on_episode_end)
    }

    for exp in experiments.values():
        if not exp.get("run"):
            parser.error("the following arguments are required: --run")
        if not exp.get("env") and not exp.get("config", {}).get("env"):
            parser.error("the following arguments are required: --env")

    if args.ray_num_nodes:
        cluster = Cluster()
        for _ in range(args.ray_num_nodes):
            cluster.add_node(resources={
                "num_cpus": args.ray_num_cpus or 1,
                "num_gpus": args.ray_num_gpus or 0,
            },
                             object_store_memory=args.ray_object_store_memory,
                             redis_max_memory=args.ray_redis_max_memory)
        ray.init(redis_address=cluster.redis_address)
    else:
        print('init')
        ray.init(redis_address=args.redis_address,
                 object_store_memory=int(0.5 * 10**9),
                 redis_max_memory=int(0.5 * 10**9),
                 num_cpus=args.ray_num_cpus,
                 num_gpus=args.ray_num_gpus)
    run_experiments(experiments,
                    scheduler=_make_scheduler(args),
                    queue_trials=args.queue_trials,
                    resume=args.resume)

예제 #34

0

파일 보기

def test_shutdown():
    g = Cluster(initialize_head=False)
    node = g.add_node()
    node2 = g.add_node()
    g.shutdown()
    assert not any(n.any_processes_alive() for n in [node, node2])