Ejemplo n.º 1
0
    def add_node(self, **override_kwargs):
        """Adds a node to the local Ray Cluster.

        All nodes are by default started with the following settings:
            cleanup=True,
            num_cpus=1,
            object_store_memory=100 * (2**20) # 100 MB

        Args:
            override_kwargs: Keyword arguments used in `start_ray_head`
                and `start_ray_node`. Overrides defaults.

        Returns:
            Node object of the added Ray node.
        """
        node_kwargs = {
            "num_cpus": 1,
            "object_store_memory": 100 * (2**20)  # 100 MB
        }
        node_kwargs.update(override_kwargs)
        ray_params = RayParams(
            node_ip_address=services.get_node_ip_address(), **node_kwargs)

        if self.head_node is None:
            ray_params.update(include_webui=False)
            address_info = services.start_ray_head(ray_params, cleanup=True)
            self.redis_address = address_info["redis_address"]
            # TODO(rliaw): Find a more stable way than modifying global state.
            process_dict_copy = services.all_processes.copy()
            for key in services.all_processes:
                services.all_processes[key] = []
            node = Node(address_info, process_dict_copy)
            self.head_node = node
        else:
            ray_params.update(redis_address=self.redis_address)
            address_info = services.start_ray_node(ray_params, cleanup=True)
            # TODO(rliaw): Find a more stable way than modifying global state.
            process_dict_copy = services.all_processes.copy()
            for key in services.all_processes:
                services.all_processes[key] = []
            node = Node(address_info, process_dict_copy)
            self.worker_nodes[node] = address_info
        logger.info("Starting Node with raylet socket {}".format(
            address_info["raylet_socket_name"]))

        return node
Ejemplo n.º 2
0
def ray_start_workers_separate():
    # Start the Ray processes.
    ray_params = RayParams(num_cpus=1,
                           start_ray_local=True,
                           redirect_output=True)
    ray.worker._init(ray_params)
    yield None
    # The code after the yield will run as teardown code.
    ray.shutdown()
Ejemplo n.º 3
0
def _test_component_failed(component_type):
    """Kill a component on all worker nodes and check workload succeeds."""
    # Start with 4 workers and 4 cores.
    num_local_schedulers = 4
    num_workers_per_scheduler = 8
    ray_params = RayParams(num_local_schedulers=num_local_schedulers,
                           start_ray_local=True,
                           num_cpus=[num_workers_per_scheduler] *
                           num_local_schedulers,
                           redirect_output=True,
                           _internal_config=json.dumps({
                               "initial_reconstruction_timeout_milliseconds":
                               1000,
                               "num_heartbeats_timeout":
                               10,
                           }))
    ray.worker._init(ray_params)

    # Submit many tasks with many dependencies.
    @ray.remote
    def f(x):
        return x

    @ray.remote
    def g(*xs):
        return 1

    # Kill the component on all nodes except the head node as the tasks
    # execute. Do this in a loop while submitting tasks between each
    # component failure.
    time.sleep(0.1)
    components = ray.services.all_processes[component_type]
    for process in components[1:]:
        # Submit a round of tasks with many dependencies.
        x = 1
        for _ in range(1000):
            x = f.remote(x)

        xs = [g.remote(1)]
        for _ in range(100):
            xs.append(g.remote(*xs))
            xs.append(g.remote(1))

        # Kill a component on one of the nodes.
        process.terminate()
        time.sleep(1)
        process.kill()
        process.wait()
        assert not process.poll() is None

        # Make sure that we can still get the objects after the
        # executing tasks died.
        ray.get(x)
        ray.get(xs)
Ejemplo n.º 4
0
def ray_start_combination(request):
    num_local_schedulers = request.param[0]
    num_workers_per_scheduler = request.param[1]
    # Start the Ray processes.
    ray_params = RayParams(start_ray_local=True,
                           num_local_schedulers=num_local_schedulers,
                           num_cpus=10)
    ray.worker._init(ray_params)
    yield num_local_schedulers, num_workers_per_scheduler
    # The code after the yield will run as teardown code.
    ray.shutdown()
Ejemplo n.º 5
0
def ray_start_two_nodes():
    # Start the Ray processes.
    ray_params = RayParams(start_ray_local=True,
                           num_local_schedulers=2,
                           num_cpus=0,
                           _internal_config=json.dumps(
                               {"num_heartbeats_timeout": 40}))
    ray.worker._init(ray_params)
    yield None
    # The code after the yield will run as teardown code.
    ray.shutdown()
Ejemplo n.º 6
0
def ray_start_workers_separate_multinode(request):
    num_local_schedulers = request.param[0]
    num_initial_workers = request.param[1]
    # Start the Ray processes.
    ray_params = RayParams(num_local_schedulers=num_local_schedulers,
                           start_ray_local=True,
                           num_cpus=[num_initial_workers] *
                           num_local_schedulers,
                           redirect_output=True)
    ray.worker._init(ray_params)
    yield num_local_schedulers, num_initial_workers
    # The code after the yield will run as teardown code.
    ray.shutdown()
Ejemplo n.º 7
0
    def add_node(self, **override_kwargs):
        """Adds a node to the local Ray Cluster.

        All nodes are by default started with the following settings:
            cleanup=True,
            resources={"CPU": 1},
            object_store_memory=100 * (2**20) # 100 MB

        Args:
            override_kwargs: Keyword arguments used in `start_ray_head`
                and `start_ray_node`. Overrides defaults.

        Returns:
            Node object of the added Ray node.
        """
        node_kwargs = {
            "resources": {
                "CPU": 1
            },
            "object_store_memory": 100 * (2**20)  # 100 MB
        }
        node_kwargs.update(override_kwargs)
        ray_params = RayParams(node_ip_address=services.get_node_ip_address(),
                               **node_kwargs)

        if self.head_node is None:
            ray_params.update(include_webui=False)
            address_info = services.start_ray_head(ray_params, cleanup=True)
            self.redis_address = address_info["redis_address"]
            # TODO(rliaw): Find a more stable way than modifying global state.
            process_dict_copy = services.all_processes.copy()
            for key in services.all_processes:
                services.all_processes[key] = []
            node = Node(address_info, process_dict_copy)
            self.head_node = node
        else:
            ray_params.update(redis_address=self.redis_address)
            address_info = services.start_ray_node(ray_params, cleanup=True)
            # TODO(rliaw): Find a more stable way than modifying global state.
            process_dict_copy = services.all_processes.copy()
            for key in services.all_processes:
                services.all_processes[key] = []
            node = Node(address_info, process_dict_copy)
            self.worker_nodes[node] = address_info
        logger.info("Starting Node with raylet socket {}".format(
            address_info["raylet_socket_names"]))

        return node
Ejemplo n.º 8
0
def start(node_ip_address, redis_address, redis_port, num_redis_shards,
          redis_max_clients, redis_password, redis_shard_ports,
          object_manager_port, node_manager_port, object_store_memory,
          redis_max_memory, num_workers, num_cpus, num_gpus, resources, head,
          no_ui, block, plasma_directory, huge_pages, autoscaling_config,
          no_redirect_worker_output, no_redirect_output,
          plasma_store_socket_name, raylet_socket_name, temp_dir,
          internal_config):
    # Convert hostnames to numerical IP address.
    if node_ip_address is not None:
        node_ip_address = services.address_to_ip(node_ip_address)
    if redis_address is not None:
        redis_address = services.address_to_ip(redis_address)

    try:
        resources = json.loads(resources)
    except Exception:
        raise Exception("Unable to parse the --resources argument using "
                        "json.loads. Try using a format like\n\n"
                        "    --resources='{\"CustomResource1\": 3, "
                        "\"CustomReseource2\": 2}'")

    ray_params = RayParams(
        node_ip_address=node_ip_address,
        object_manager_port=object_manager_port,
        node_manager_port=node_manager_port,
        num_workers=num_workers,
        object_store_memory=object_store_memory,
        redis_password=redis_password,
        redirect_worker_output=not no_redirect_worker_output,
        redirect_output=not no_redirect_output,
        num_cpus=num_cpus,
        num_gpus=num_gpus,
        resources=resources,
        plasma_directory=plasma_directory,
        huge_pages=huge_pages,
        plasma_store_socket_name=plasma_store_socket_name,
        raylet_socket_name=raylet_socket_name,
        temp_dir=temp_dir,
        _internal_config=internal_config)

    if head:
        # Start Ray on the head node.
        if redis_shard_ports is not None:
            redis_shard_ports = redis_shard_ports.split(",")
            # Infer the number of Redis shards from the ports if the number is
            # not provided.
            if num_redis_shards is None:
                num_redis_shards = len(redis_shard_ports)
            # Check that the arguments match.
            if len(redis_shard_ports) != num_redis_shards:
                raise Exception("If --redis-shard-ports is provided, it must "
                                "have the form '6380,6381,6382', and the "
                                "number of ports provided must equal "
                                "--num-redis-shards (which is 1 if not "
                                "provided)")

        if redis_address is not None:
            raise Exception("If --head is passed in, a Redis server will be "
                            "started, so a Redis address should not be "
                            "provided.")

        # Get the node IP address if one is not provided.
        ray_params.update_if_absent(
            node_ip_address=services.get_node_ip_address())
        logger.info("Using IP address {} for this node.".format(
            ray_params.node_ip_address))
        ray_params.update_if_absent(redis_port=redis_port,
                                    redis_shard_ports=redis_shard_ports,
                                    redis_max_memory=redis_max_memory,
                                    num_redis_shards=num_redis_shards,
                                    redis_max_clients=redis_max_clients,
                                    include_webui=(not no_ui),
                                    autoscaling_config=autoscaling_config)

        address_info = services.start_ray_head(ray_params, cleanup=False)
        logger.info(address_info)
        logger.info(
            "\nStarted Ray on this node. You can add additional nodes to "
            "the cluster by calling\n\n"
            "    ray start --redis-address {}{}{}\n\n"
            "from the node you wish to add. You can connect a driver to the "
            "cluster from Python by running\n\n"
            "    import ray\n"
            "    ray.init(redis_address=\"{}{}{}\")\n\n"
            "If you have trouble connecting from a different machine, check "
            "that your firewall is configured properly. If you wish to "
            "terminate the processes that have been started, run\n\n"
            "    ray stop".format(
                address_info["redis_address"],
                " --redis-password " if redis_password else "",
                redis_password if redis_password else "",
                address_info["redis_address"],
                "\", redis_password=\"" if redis_password else "",
                redis_password if redis_password else ""))
    else:
        # Start Ray on a non-head node.
        if redis_port is not None:
            raise Exception("If --head is not passed in, --redis-port is not "
                            "allowed")
        if redis_shard_ports is not None:
            raise Exception("If --head is not passed in, --redis-shard-ports "
                            "is not allowed")
        if redis_address is None:
            raise Exception("If --head is not passed in, --redis-address must "
                            "be provided.")
        if num_redis_shards is not None:
            raise Exception("If --head is not passed in, --num-redis-shards "
                            "must not be provided.")
        if redis_max_clients is not None:
            raise Exception("If --head is not passed in, --redis-max-clients "
                            "must not be provided.")
        if no_ui:
            raise Exception("If --head is not passed in, the --no-ui flag is "
                            "not relevant.")
        redis_ip_address, redis_port = redis_address.split(":")

        # Wait for the Redis server to be started. And throw an exception if we
        # can't connect to it.
        services.wait_for_redis_to_start(redis_ip_address,
                                         int(redis_port),
                                         password=redis_password)

        # Create a Redis client.
        redis_client = services.create_redis_client(redis_address,
                                                    password=redis_password)

        # Check that the verion information on this node matches the version
        # information that the cluster was started with.
        services.check_version_info(redis_client)

        # Get the node IP address if one is not provided.
        ray_params.update_if_absent(
            node_ip_address=services.get_node_ip_address(redis_address))
        logger.info("Using IP address {} for this node.".format(
            ray_params.node_ip_address))
        # Check that there aren't already Redis clients with the same IP
        # address connected with this Redis instance. This raises an exception
        # if the Redis server already has clients on this node.
        check_no_existing_redis_clients(ray_params.node_ip_address,
                                        redis_client)
        ray_params.redis_address = redis_address
        address_info = services.start_ray_node(ray_params, cleanup=False)
        logger.info(address_info)
        logger.info("\nStarted Ray on this node. If you wish to terminate the "
                    "processes that have been started, run\n\n"
                    "    ray stop")

    if block:
        import time
        while True:
            time.sleep(30)
Ejemplo n.º 9
0
    args = parser.parse_args()

    info = {
        "node_ip_address": args.node_ip_address,
        "redis_address": args.redis_address,
        "redis_password": args.redis_password,
        "store_socket_name": args.object_store_name,
        "raylet_socket_name": args.raylet_name,
    }

    ray.utils.setup_logger(args.logging_level, args.logging_format)

    ray_params = RayParams(node_ip_address=args.node_ip_address,
                           redis_address=args.redis_address,
                           redis_password=args.redis_password,
                           plasma_store_socket_name=args.object_store_name,
                           raylet_socket_name=args.raylet_name,
                           temp_dir=args.temp_dir,
                           load_code_from_local=args.load_code_from_local)

    node = ray.node.Node(ray_params,
                         head=False,
                         shutdown_at_exit=False,
                         connect_only=True)
    ray.worker._global_node = node

    # TODO(suquark): Use "node" as the input of "connect".
    ray.worker.connect(info,
                       redis_password=args.redis_password,
                       mode=ray.WORKER_MODE,
                       load_code_from_local=args.load_code_from_local)
Ejemplo n.º 10
0
        if len(config_list) > 1:
            i = 0
            while i < len(config_list):
                internal_config[config_list[i]] = config_list[i + 1]
                i += 2

    raylet_ip_address = args.raylet_ip_address
    if raylet_ip_address is None:
        raylet_ip_address = args.node_ip_address

    ray_params = RayParams(
        node_ip_address=args.node_ip_address,
        raylet_ip_address=raylet_ip_address,
        node_manager_port=args.node_manager_port,
        redis_address=args.redis_address,
        redis_password=args.redis_password,
        plasma_store_socket_name=args.object_store_name,
        raylet_socket_name=args.raylet_name,
        temp_dir=args.temp_dir,
        load_code_from_local=args.load_code_from_local,
        _internal_config=json.dumps(internal_config),
    )

    node = ray.node.Node(ray_params,
                         head=False,
                         shutdown_at_exit=False,
                         spawn_reaper=False,
                         connect_only=True)
    ray.worker._global_node = node
    ray.worker.connect(node,
                       mode=ray.WORKER_MODE,
                       internal_config=internal_config)
Ejemplo n.º 11
0
    if raylet_ip_address is None:
        raylet_ip_address = args.node_ip_address

    code_search_path = args.code_search_path
    if code_search_path is not None:
        for p in code_search_path.split(":"):
            if os.path.isfile(p):
                p = os.path.dirname(p)
            sys.path.append(p)

    ray_params = RayParams(
        node_ip_address=args.node_ip_address,
        raylet_ip_address=raylet_ip_address,
        node_manager_port=args.node_manager_port,
        redis_address=args.redis_address,
        redis_password=args.redis_password,
        plasma_store_socket_name=args.object_store_name,
        raylet_socket_name=args.raylet_name,
        temp_dir=args.temp_dir,
        load_code_from_local=args.load_code_from_local,
        metrics_agent_port=args.metrics_agent_port,
    )

    node = ray.node.Node(ray_params,
                         head=False,
                         shutdown_at_exit=False,
                         spawn_reaper=False,
                         connect_only=True)
    ray.worker._global_node = node
    ray.worker.connect(node, mode=mode)

    # Setup log file.
Ejemplo n.º 12
0
def main(args):
    ray.ray_logging.setup_logger(args.logging_level, args.logging_format)

    if args.worker_type == "WORKER":
        mode = ray.WORKER_MODE
    elif args.worker_type == "SPILL_WORKER":
        mode = ray.SPILL_WORKER_MODE
    elif args.worker_type == "RESTORE_WORKER":
        mode = ray.RESTORE_WORKER_MODE
    else:
        raise ValueError("Unknown worker type: " + args.worker_type)

    # NOTE(suquark): We must initialize the external storage before we
    # connect to raylet. Otherwise we may receive requests before the
    # external storage is intialized.
    if mode == ray.RESTORE_WORKER_MODE or mode == ray.SPILL_WORKER_MODE:
        from ray import external_storage
        if args.object_spilling_config:
            object_spilling_config = base64.b64decode(
                args.object_spilling_config)
            object_spilling_config = json.loads(object_spilling_config)
        else:
            object_spilling_config = {}
        external_storage.setup_external_storage(object_spilling_config)

    raylet_ip_address = args.raylet_ip_address
    if raylet_ip_address is None:
        raylet_ip_address = args.node_ip_address

    code_search_path = args.code_search_path
    if code_search_path is not None:
        for p in code_search_path.split(":"):
            if os.path.isfile(p):
                p = os.path.dirname(p)
            sys.path.append(p)

    ray_params = RayParams(
        node_ip_address=args.node_ip_address,
        raylet_ip_address=raylet_ip_address,
        node_manager_port=args.node_manager_port,
        redis_address=args.redis_address,
        redis_password=args.redis_password,
        plasma_store_socket_name=args.object_store_name,
        raylet_socket_name=args.raylet_name,
        temp_dir=args.temp_dir,
        load_code_from_local=args.load_code_from_local,
        metrics_agent_port=args.metrics_agent_port,
    )

    node = ray.node.Node(ray_params,
                         head=False,
                         shutdown_at_exit=False,
                         spawn_reaper=False,
                         connect_only=True)
    ray.worker._global_node = node
    ray.worker.connect(node, mode=mode)

    # Redirect stdout and stderr to the default worker interceptor logger.
    # NOTE: We deprecated redirect_worker_output arg,
    # so we don't need to handle here.
    stdout_interceptor = StandardStreamInterceptor(
        setup_and_get_worker_interceptor_logger(is_for_stdout=True),
        intercept_stdout=True)
    stderr_interceptor = StandardStreamInterceptor(
        setup_and_get_worker_interceptor_logger(is_for_stdout=False),
        intercept_stdout=False)
    with redirect_stdout(stdout_interceptor):
        with redirect_stderr(stderr_interceptor):
            if mode == ray.WORKER_MODE:
                ray.worker.global_worker.main_loop()
            elif (mode == ray.RESTORE_WORKER_MODE
                  or mode == ray.SPILL_WORKER_MODE):
                # It is handled by another thread in the C++ core worker.
                # We just need to keep the worker alive.
                while True:
                    time.sleep(100000)
            else:
                raise ValueError(f"Unexcepted worker mode: {mode}")
Ejemplo n.º 13
0
def start(node_ip_address, redis_address, redis_port, num_redis_shards,
          redis_max_clients, redis_password, redis_shard_ports,
          object_manager_port, node_manager_port, object_store_memory,
          redis_max_memory, num_workers, num_cpus, num_gpus, resources, head,
          no_ui, block, plasma_directory, huge_pages, autoscaling_config,
          no_redirect_worker_output, no_redirect_output,
          plasma_store_socket_name, raylet_socket_name, temp_dir,
          internal_config):
    # Convert hostnames to numerical IP address.
    if node_ip_address is not None:
        node_ip_address = services.address_to_ip(node_ip_address)
    if redis_address is not None:
        redis_address = services.address_to_ip(redis_address)

    try:
        resources = json.loads(resources)
    except Exception:
        raise Exception("Unable to parse the --resources argument using "
                        "json.loads. Try using a format like\n\n"
                        "    --resources='{\"CustomResource1\": 3, "
                        "\"CustomReseource2\": 2}'")

    ray_params = RayParams(
        node_ip_address=node_ip_address,
        object_manager_port=object_manager_port,
        node_manager_port=node_manager_port,
        num_workers=num_workers,
        object_store_memory=object_store_memory,
        redis_password=redis_password,
        redirect_worker_output=not no_redirect_worker_output,
        redirect_output=not no_redirect_output,
        num_cpus=num_cpus,
        num_gpus=num_gpus,
        resources=resources,
        plasma_directory=plasma_directory,
        huge_pages=huge_pages,
        plasma_store_socket_name=plasma_store_socket_name,
        raylet_socket_name=raylet_socket_name,
        temp_dir=temp_dir,
        _internal_config=internal_config)

    if head:
        # Start Ray on the head node.
        if redis_shard_ports is not None:
            redis_shard_ports = redis_shard_ports.split(",")
            # Infer the number of Redis shards from the ports if the number is
            # not provided.
            if num_redis_shards is None:
                num_redis_shards = len(redis_shard_ports)
            # Check that the arguments match.
            if len(redis_shard_ports) != num_redis_shards:
                raise Exception("If --redis-shard-ports is provided, it must "
                                "have the form '6380,6381,6382', and the "
                                "number of ports provided must equal "
                                "--num-redis-shards (which is 1 if not "
                                "provided)")

        if redis_address is not None:
            raise Exception("If --head is passed in, a Redis server will be "
                            "started, so a Redis address should not be "
                            "provided.")

        # Get the node IP address if one is not provided.
        ray_params.update_if_absent(
            node_ip_address=services.get_node_ip_address())
        logger.info("Using IP address {} for this node.".format(
            ray_params.node_ip_address))
        ray_params.update_if_absent(
            redis_port=redis_port,
            redis_shard_ports=redis_shard_ports,
            redis_max_memory=redis_max_memory,
            num_redis_shards=num_redis_shards,
            redis_max_clients=redis_max_clients,
            include_webui=(not no_ui),
            autoscaling_config=autoscaling_config)

        address_info = services.start_ray_head(ray_params, cleanup=False)
        logger.info(address_info)
        logger.info(
            "\nStarted Ray on this node. You can add additional nodes to "
            "the cluster by calling\n\n"
            "    ray start --redis-address {}{}{}\n\n"
            "from the node you wish to add. You can connect a driver to the "
            "cluster from Python by running\n\n"
            "    import ray\n"
            "    ray.init(redis_address=\"{}{}{}\")\n\n"
            "If you have trouble connecting from a different machine, check "
            "that your firewall is configured properly. If you wish to "
            "terminate the processes that have been started, run\n\n"
            "    ray stop".format(
                address_info["redis_address"], " --redis-password "
                if redis_password else "", redis_password if redis_password
                else "", address_info["redis_address"], "\", redis_password=\""
                if redis_password else "", redis_password
                if redis_password else ""))
    else:
        # Start Ray on a non-head node.
        if redis_port is not None:
            raise Exception("If --head is not passed in, --redis-port is not "
                            "allowed")
        if redis_shard_ports is not None:
            raise Exception("If --head is not passed in, --redis-shard-ports "
                            "is not allowed")
        if redis_address is None:
            raise Exception("If --head is not passed in, --redis-address must "
                            "be provided.")
        if num_redis_shards is not None:
            raise Exception("If --head is not passed in, --num-redis-shards "
                            "must not be provided.")
        if redis_max_clients is not None:
            raise Exception("If --head is not passed in, --redis-max-clients "
                            "must not be provided.")
        if no_ui:
            raise Exception("If --head is not passed in, the --no-ui flag is "
                            "not relevant.")
        redis_ip_address, redis_port = redis_address.split(":")

        # Wait for the Redis server to be started. And throw an exception if we
        # can't connect to it.
        services.wait_for_redis_to_start(
            redis_ip_address, int(redis_port), password=redis_password)

        # Create a Redis client.
        redis_client = services.create_redis_client(
            redis_address, password=redis_password)

        # Check that the verion information on this node matches the version
        # information that the cluster was started with.
        services.check_version_info(redis_client)

        # Get the node IP address if one is not provided.
        ray_params.update_if_absent(
            node_ip_address=services.get_node_ip_address(redis_address))
        logger.info("Using IP address {} for this node.".format(
            ray_params.node_ip_address))
        # Check that there aren't already Redis clients with the same IP
        # address connected with this Redis instance. This raises an exception
        # if the Redis server already has clients on this node.
        check_no_existing_redis_clients(ray_params.node_ip_address,
                                        redis_client)
        ray_params.redis_address = redis_address
        address_info = services.start_ray_node(ray_params, cleanup=False)
        logger.info(address_info)
        logger.info("\nStarted Ray on this node. If you wish to terminate the "
                    "processes that have been started, run\n\n"
                    "    ray stop")

    if block:
        import time
        while True:
            time.sleep(30)
Ejemplo n.º 14
0
def ray_start_reconstruction(request):
    num_local_schedulers = request.param

    # Start the Redis global state store.
    node_ip_address = "127.0.0.1"
    redis_address, redis_shards = ray.services.start_redis(node_ip_address)
    redis_ip_address = ray.services.get_ip_address(redis_address)
    redis_port = ray.services.get_port(redis_address)
    time.sleep(0.1)

    # Start the Plasma store instances with a total of 1GB memory.
    plasma_store_memory = 10**9
    plasma_addresses = []
    object_store_memory = plasma_store_memory // num_local_schedulers
    for i in range(num_local_schedulers):
        store_stdout_file, store_stderr_file = (
            ray.tempfile_services.new_plasma_store_log_file(i, True))
        plasma_addresses.append(
            ray.services.start_plasma_store(
                node_ip_address,
                redis_address,
                object_store_memory=object_store_memory,
                store_stdout_file=store_stdout_file,
                store_stderr_file=store_stderr_file))

    # Start the rest of the services in the Ray cluster.
    address_info = {
        "redis_address": redis_address,
        "redis_shards": redis_shards,
        "object_store_addresses": plasma_addresses
    }
    ray_params = RayParams(address_info=address_info,
                           start_ray_local=True,
                           num_local_schedulers=num_local_schedulers,
                           num_cpus=[1] * num_local_schedulers,
                           redirect_output=True,
                           _internal_config=json.dumps({
                               "initial_reconstruction_timeout_milliseconds":
                               200
                           }))
    ray.worker._init(ray_params)

    yield (redis_ip_address, redis_port, plasma_store_memory,
           num_local_schedulers)

    # The code after the yield will run as teardown code.
    assert ray.services.all_processes_alive()

    # Determine the IDs of all local schedulers that had a task scheduled
    # or submitted.
    state = ray.experimental.state.GlobalState()
    state._initialize_global_state(redis_ip_address, redis_port)
    if os.environ.get("RAY_USE_NEW_GCS") == "on":
        tasks = state.task_table()
        local_scheduler_ids = {
            task["LocalSchedulerID"]
            for task in tasks.values()
        }

    # Make sure that all nodes in the cluster were used by checking that
    # the set of local scheduler IDs that had a task scheduled or submitted
    # is equal to the total number of local schedulers started. We add one
    # to the total number of local schedulers to account for
    # NIL_LOCAL_SCHEDULER_ID. This is the local scheduler ID associated
    # with the driver task, since it is not scheduled by a particular local
    # scheduler.
    if os.environ.get("RAY_USE_NEW_GCS") == "on":
        assert len(local_scheduler_ids) == num_local_schedulers + 1

    # Clean up the Ray cluster.
    ray.shutdown()