コード例 #1
0
def ray_start_empty_cluster():
    cluster = Cluster()
    yield cluster

    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()
コード例 #2
0
ファイル: test_stress.py プロジェクト: robertnishihara/ray
def ray_start_reconstruction(request):
    num_nodes = request.param

    plasma_store_memory = int(0.5 * 10**9)

    cluster = Cluster(
        initialize_head=True,
        head_node_args={
            "num_cpus": 1,
            "object_store_memory": plasma_store_memory // num_nodes,
            "redis_max_memory": 10**7,
            "_internal_config": json.dumps({
                "initial_reconstruction_timeout_milliseconds": 200
            })
        })
    for i in range(num_nodes - 1):
        cluster.add_node(
            num_cpus=1,
            object_store_memory=plasma_store_memory // num_nodes,
            _internal_config=json.dumps({
                "initial_reconstruction_timeout_milliseconds": 200
            }))
    ray.init(redis_address=cluster.redis_address)

    yield plasma_store_memory, num_nodes, cluster

    # Clean up the Ray cluster.
    ray.shutdown()
    cluster.shutdown()
コード例 #3
0
def ray_start_workers_separate_multinode(request):
    num_nodes = request.param[0]
    num_initial_workers = request.param[1]
    # Start the Ray processes.
    cluster = Cluster()
    for _ in range(num_nodes):
        cluster.add_node(num_cpus=num_initial_workers)
    ray.init(redis_address=cluster.redis_address)

    yield num_nodes, num_initial_workers
    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()
コード例 #4
0
def cluster_start():
    # Start the Ray processes.
    cluster = Cluster(
        initialize_head=True,
        connect=True,
        head_node_args={
            "num_cpus": 1,
            "_internal_config": json.dumps({
                "num_heartbeats_timeout": 10
            })
        })
    yield cluster
    ray.shutdown()
    cluster.shutdown()
コード例 #5
0
def start_connected_cluster():
    # Start the Ray processes.
    g = Cluster(
        initialize_head=True,
        connect=True,
        head_node_args={
            "num_cpus": 1,
            "_internal_config": json.dumps({
                "num_heartbeats_timeout": 10
            })
        })
    yield g
    # The code after the yield will run as teardown code.
    ray.shutdown()
    g.shutdown()
コード例 #6
0
ファイル: test_failure.py プロジェクト: robertnishihara/ray
def ray_start_two_nodes():
    # Start the Ray processes.
    cluster = Cluster()
    for _ in range(2):
        cluster.add_node(
            num_cpus=0,
            _internal_config=json.dumps({
                "num_heartbeats_timeout": 40
            }))
    ray.init(redis_address=cluster.redis_address)

    yield cluster
    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()
コード例 #7
0
def start_connected_longer_cluster():
    """Creates a cluster with a longer timeout."""
    g = Cluster(
        initialize_head=True,
        connect=True,
        head_node_args={
            "num_cpus": 1,
            "_internal_config": json.dumps({
                "num_heartbeats_timeout": 20
            })
        })
    yield g
    # The code after the yield will run as teardown code.
    ray.shutdown()
    g.shutdown()
コード例 #8
0
def ray_start_cluster():
    node_args = {
        "num_cpus": 4,
        "_internal_config": json.dumps({
            "initial_reconstruction_timeout_milliseconds": 1000,
            "num_heartbeats_timeout": 10
        })
    }
    # Start with 3 worker nodes and 4 cores each.
    cluster = Cluster(
        initialize_head=True, connect=True, head_node_args=node_args)
    workers = []
    for _ in range(3):
        workers.append(cluster.add_node(**node_args))
    cluster.wait_for_nodes()
    yield cluster
    ray.shutdown()
    cluster.shutdown()
コード例 #9
0
ファイル: test_cluster.py プロジェクト: zsiqxx/ray
def start_connected_emptyhead_cluster():
    """Starts head with no resources."""

    cluster = Cluster(
        initialize_head=True,
        connect=True,
        head_node_args={
            "num_cpus": 0,
            "_internal_config": json.dumps({
                "num_heartbeats_timeout": 10
            })
        })
    # Pytest doesn't play nicely with imports
    _register_all()
    yield cluster
    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()
コード例 #10
0
ファイル: test_stress.py プロジェクト: robertnishihara/ray
def ray_start_combination(request):
    num_nodes = request.param[0]
    num_workers_per_scheduler = request.param[1]
    # Start the Ray processes.
    cluster = Cluster(
        initialize_head=True,
        head_node_args={
            "num_cpus": 10,
            "redis_max_memory": 10**7
        })
    for i in range(num_nodes - 1):
        cluster.add_node(num_cpus=10)
    ray.init(redis_address=cluster.redis_address)

    yield num_nodes, num_workers_per_scheduler, cluster
    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()
コード例 #11
0
ファイル: test_stress.py プロジェクト: mkhlz/ray
def ray_start_combination(request):
    num_nodes = request.param[0]
    num_workers_per_scheduler = request.param[1]
    # Start the Ray processes.
    cluster = Cluster(
        initialize_head=True,
        head_node_args={
            "num_cpus": 10,
            "redis_max_memory": 10**7
        })
    for i in range(num_nodes - 1):
        cluster.add_node(num_cpus=10)
    ray.init(address=cluster.address)

    yield num_nodes, num_workers_per_scheduler, cluster
    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()
コード例 #12
0
ファイル: test_cluster.py プロジェクト: robertnishihara/ray
def start_connected_emptyhead_cluster():
    """Starts head with no resources."""

    cluster = Cluster(
        initialize_head=True,
        connect=True,
        head_node_args={
            "num_cpus": 0,
            "_internal_config": json.dumps({
                "num_heartbeats_timeout": 10
            })
        })
    # Pytest doesn't play nicely with imports
    _register_all()
    yield cluster
    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()
コード例 #13
0
ファイル: test_tempfile.py プロジェクト: ray1201/ray-1
def test_temp_plasma_store_socket():
    ray.init(plasma_store_socket_name="/tmp/i_am_a_temp_socket")
    assert os.path.exists(
        "/tmp/i_am_a_temp_socket"), "Specified socket path not found."
    ray.shutdown()
    try:
        os.remove("/tmp/i_am_a_temp_socket")
    except OSError:
        pass  # It could have been removed by Ray.
    cluster = Cluster(True)
    cluster.add_node(plasma_store_socket_name="/tmp/i_am_a_temp_socket_2")
    assert os.path.exists(
        "/tmp/i_am_a_temp_socket_2"), "Specified socket path not found."
    cluster.shutdown()
    try:
        os.remove("/tmp/i_am_a_temp_socket_2")
    except OSError:
        pass  # It could have been removed by Ray.
コード例 #14
0
def ray_initialize_cluster():
    # Start with 4 workers and 4 cores.
    num_nodes = 4
    num_workers_per_scheduler = 8

    cluster = Cluster()
    for _ in range(num_nodes):
        cluster.add_node(
            num_cpus=num_workers_per_scheduler,
            _internal_config=json.dumps({
                "initial_reconstruction_timeout_milliseconds": 1000,
                "num_heartbeats_timeout": 10,
            }))
    ray.init(redis_address=cluster.redis_address)

    yield cluster

    ray.shutdown()
    cluster.shutdown()
コード例 #15
0
def _ray_start_cluster(**kwargs):
    init_kwargs = get_default_fixture_ray_kwargs()
    num_nodes = 0
    do_init = False
    # num_nodes & do_init are not arguments for ray.init, so delete them.
    if "num_nodes" in kwargs:
        num_nodes = kwargs["num_nodes"]
        del kwargs["num_nodes"]
    if "do_init" in kwargs:
        do_init = kwargs["do_init"]
        del kwargs["do_init"]
    elif num_nodes > 0:
        do_init = True
    init_kwargs.update(kwargs)
    cluster = Cluster()
    remote_nodes = []
    for _ in range(num_nodes):
        remote_nodes.append(cluster.add_node(**init_kwargs))
    if do_init:
        ray.init(redis_address=cluster.redis_address)
    yield cluster
    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()
コード例 #16
0
ファイル: test_multi_node_2.py プロジェクト: ray1201/ray-1
def test_shutdown():
    g = Cluster(initialize_head=False)
    node = g.add_node()
    node2 = g.add_node()
    g.shutdown()
    assert not any(n.any_processes_alive() for n in [node, node2])
コード例 #17
0
class RayExecutor(Executor):
    def __init__(self, **kwargs):
        super(RayExecutor, self).__init__()
        mode = kwargs.get('ray_mode', 'local')
        self.resource_idx = 0
        if mode == 'local':
            node_kwargs = {
                'num_cpus': 4,
                'object_store_memory': 10**9,
                'resources': {
                    'Node_0': 100
                }
            }
            self.cluster = Cluster(initialize_head=True,
                                   head_node_args=node_kwargs)
            self.num_nodes = kwargs.get('ray_num_nodes', 4)
            self.nodes = []
            self.resources = []
            i = 1
            for _ in range(self.num_nodes):
                node, resource = self._create_local_node(i, node_kwargs)
                self.nodes.append(node)
                self.resources.append(resource)
            self._create_local_node(i, node_kwargs)
            redis_address = self.cluster.redis_address
            ray.init(redis_address=redis_address)
        else:
            redis_address = kwargs.get('redis_address', '127.0.0.1')
            ray.init(redis_address=redis_address)

            self.resources = []
            self.nodes = ray.global_state.client_table()
            for node in self.nodes:
                for resource in node['Resources']:
                    if 'Node' in resource and resource != 'Node_0':
                        self.resources.append(resource)

    def __del__(self):
        self.cluster.shutdown()
        ray.disconnect()

    def _create_local_node(self, i, node_kwargs):
        resource = 'Node_{}'.format(i)
        node_kwargs['resources'] = {resource: 100}
        node = self.cluster.add_node(**node_kwargs)
        return node, resource

    def get_next_resource(self):
        resource = self.resources[self.resource_idx % self.num_nodes]
        self.resource_idx += 1
        return resource

    def exec(self, dag):
        num_stages = len(dag)
        actors = []
        task_handles = []
        for i in range(num_stages):
            stage = dag.pop()
            for operator in stage:
                actor = OperatorActor._remote(
                    args=[operator],
                    kwargs={},
                    resources={self.get_next_resource(): 1})
                actors.append(actor)
                task_handles.append(actor.run.remote())

        ray.get(task_handles)
コード例 #18
0
class RayExecutorQueueTest(unittest.TestCase):
    def setUp(self):
        self.trial_executor = RayTrialExecutor(queue_trials=True,
                                               refresh_period=0)
        self.cluster = Cluster(initialize_head=True,
                               connect=True,
                               head_node_args={
                                   "num_cpus":
                                   1,
                                   "_internal_config":
                                   json.dumps({"num_heartbeats_timeout": 10})
                               })
        # Pytest doesn't play nicely with imports
        _register_all()

    def tearDown(self):
        ray.shutdown()
        self.cluster.shutdown()
        _register_all()  # re-register the evicted objects

    def testQueueTrial(self):
        """Tests that reset handles NotImplemented properly."""
        def create_trial(cpu, gpu=0):
            return Trial("__fake", resources=Resources(cpu=cpu, gpu=gpu))

        cpu_only = create_trial(1, 0)
        self.assertTrue(self.trial_executor.has_resources(cpu_only.resources))
        self.trial_executor.start_trial(cpu_only)

        gpu_only = create_trial(0, 1)
        self.assertTrue(self.trial_executor.has_resources(gpu_only.resources))

    def testHeadBlocking(self):
        def create_trial(cpu, gpu=0):
            return Trial("__fake", resources=Resources(cpu=cpu, gpu=gpu))

        gpu_trial = create_trial(1, 1)
        self.assertTrue(self.trial_executor.has_resources(gpu_trial.resources))
        self.trial_executor.start_trial(gpu_trial)

        # TODO(rliaw): This behavior is probably undesirable, but right now
        # trials with different resource requirements is not often used.
        cpu_only_trial = create_trial(1, 0)
        self.assertFalse(
            self.trial_executor.has_resources(cpu_only_trial.resources))

        self.cluster.add_node(num_cpus=1, num_gpus=1)
        self.cluster.wait_for_nodes()

        self.assertTrue(
            self.trial_executor.has_resources(cpu_only_trial.resources))
        self.trial_executor.start_trial(cpu_only_trial)

        cpu_only_trial2 = create_trial(1, 0)
        self.assertTrue(
            self.trial_executor.has_resources(cpu_only_trial2.resources))
        self.trial_executor.start_trial(cpu_only_trial2)

        cpu_only_trial3 = create_trial(1, 0)
        self.assertFalse(
            self.trial_executor.has_resources(cpu_only_trial3.resources))
コード例 #19
0
def test_shutdown():
    g = Cluster(initialize_head=False)
    node = g.add_node()
    node2 = g.add_node()
    g.shutdown()
    assert not any(n.any_processes_alive() for n in [node, node2])