Beispiel #1
0
def get_head_updater(config):
    provider = get_provider(config)
    head_node_tags = {
        TAG_RAY_NODE_TYPE: "Head",
    }
    nodes = provider.nodes(head_node_tags)
    if len(nodes) > 0:
        head_node = nodes[0]
    else:
        print("Head node of cluster ({}) not found!".format(
            config["cluster_name"]))
        sys.exit(1)

    runtime_hash = hash_runtime_conf(config["file_mounts"], config)
    return NodeUpdaterProcess(head_node,
                              config["provider"],
                              config["auth"],
                              config["cluster_name"],
                              config["file_mounts"], [],
                              runtime_hash,
                              redirect_output=False)
Beispiel #2
0
def file_sync(config_file):
    """Returns head node IP for given configuration file if exists."""
    config = yaml.load(open(config_file).read())
    validate_config(config)
    config = fillout_defaults(config)
    importer = NODE_PROVIDERS.get(config["provider"]["type"])
    if not importer:
        raise NotImplementedError("Unsupported provider {}".format(
            config["provider"]))

    bootstrap_config, provider_cls = importer()
    config = bootstrap_config(config)

    provider = provider_cls(config["provider"], config["cluster_name"])
    head_node_tags = {
        TAG_RAY_NODE_TYPE: "Head",
    }
    nodes = provider.nodes(head_node_tags)
    if len(nodes) > 0:
        head_node = nodes[0]
    else:
        print("Head node of cluster ({}) not found!".format(
            config["cluster_name"]))
        sys.exit(1)

    runtime_hash = hash_runtime_conf(config["file_mounts"], config)

    updater = NodeUpdaterProcess(
        head_node,
        config["provider"],
        config["auth"],
        config["cluster_name"],
        config["file_mounts"],
        [],
        runtime_hash,
        redirect_output=False)
    updater.sync_files(config["file_mounts"])
Beispiel #3
0
def get_or_create_head_node(config, config_file, no_restart, restart_only, yes,
                            override_cluster_name):
    """Create the cluster head node, which in turn creates the workers."""

    provider = get_node_provider(config["provider"], config["cluster_name"])
    head_node_tags = {
        TAG_RAY_NODE_TYPE: "head",
    }
    nodes = provider.nodes(head_node_tags)
    if len(nodes) > 0:
        head_node = nodes[0]
    else:
        head_node = None

    if not head_node:
        confirm("This will create a new cluster", yes)
    elif not no_restart:
        confirm("This will restart cluster services", yes)

    launch_hash = hash_launch_conf(config["head_node"], config["auth"])
    if head_node is None or provider.node_tags(head_node).get(
            TAG_RAY_LAUNCH_CONFIG) != launch_hash:
        if head_node is not None:
            confirm("Head node config out-of-date. It will be terminated", yes)
            logger.info("Terminating outdated head node {}".format(head_node))
            provider.terminate_node(head_node)
        logger.info("Launching new head node...")
        head_node_tags[TAG_RAY_LAUNCH_CONFIG] = launch_hash
        head_node_tags[TAG_RAY_NODE_NAME] = "ray-{}-head".format(
            config["cluster_name"])
        provider.create_node(config["head_node"], head_node_tags, 1)

    nodes = provider.nodes(head_node_tags)
    assert len(nodes) == 1, "Failed to create head node."
    head_node = nodes[0]

    # TODO(ekl) right now we always update the head node even if the hash
    # matches. We could prompt the user for what they want to do in this case.
    runtime_hash = hash_runtime_conf(config["file_mounts"], config)
    logger.info("Updating files on head node...")

    # Rewrite the auth config so that the head node can update the workers
    remote_key_path = "~/ray_bootstrap_key.pem"
    remote_config = copy.deepcopy(config)
    remote_config["auth"]["ssh_private_key"] = remote_key_path

    # Adjust for new file locations
    new_mounts = {}
    for remote_path in config["file_mounts"]:
        new_mounts[remote_path] = remote_path
    remote_config["file_mounts"] = new_mounts
    remote_config["no_restart"] = no_restart

    # Now inject the rewritten config and SSH key into the head node
    remote_config_file = tempfile.NamedTemporaryFile("w",
                                                     prefix="ray-bootstrap-")
    remote_config_file.write(json.dumps(remote_config))
    remote_config_file.flush()
    config["file_mounts"].update({
        remote_key_path:
        config["auth"]["ssh_private_key"],
        "~/ray_bootstrap_config.yaml":
        remote_config_file.name
    })

    if restart_only:
        init_commands = config["head_start_ray_commands"]
    elif no_restart:
        init_commands = (config["setup_commands"] +
                         config["head_setup_commands"])
    else:
        init_commands = (config["setup_commands"] +
                         config["head_setup_commands"] +
                         config["head_start_ray_commands"])

    updater = NodeUpdaterProcess(head_node,
                                 config["provider"],
                                 config["auth"],
                                 config["cluster_name"],
                                 config["file_mounts"],
                                 init_commands,
                                 runtime_hash,
                                 redirect_output=False)
    updater.start()
    updater.join()

    # Refresh the node cache so we see the external ip if available
    provider.nodes(head_node_tags)

    if updater.exitcode != 0:
        logger.error("Updating {} failed".format(
            provider.external_ip(head_node)))
        sys.exit(1)
    logger.info("Head node up-to-date, IP address is: {}".format(
        provider.external_ip(head_node)))

    monitor_str = "tail -n 100 -f /tmp/ray/session_*/logs/monitor*"
    for s in init_commands:
        if ("ray start" in s and "docker exec" in s
                and "--autoscaling-config" in s):
            monitor_str = "docker exec {} /bin/sh -c {}".format(
                config["docker"]["container_name"], quote(monitor_str))
    if override_cluster_name:
        modifiers = " --cluster-name={}".format(quote(override_cluster_name))
    else:
        modifiers = ""
    print("To monitor auto-scaling activity, you can run:\n\n"
          "  ray exec {} {}{}\n".format(config_file, quote(monitor_str),
                                        modifiers))
    print("To open a console on the cluster:\n\n"
          "  ray attach {}{}\n".format(config_file, modifiers))
    print("To ssh manually to the cluster, run:\n\n"
          "  ssh -i {} {}@{}\n".format(config["auth"]["ssh_private_key"],
                                       config["auth"]["ssh_user"],
                                       provider.external_ip(head_node)))
Beispiel #4
0
def get_or_create_head_node(config, config_file, no_restart, restart_only, yes,
                            override_cluster_name):
    """Create the cluster head node, which in turn creates the workers."""
    provider = get_node_provider(config["provider"], config["cluster_name"])
    try:
        head_node_tags = {
            TAG_RAY_NODE_TYPE: "head",
        }
        nodes = provider.non_terminated_nodes(head_node_tags)
        if len(nodes) > 0:
            head_node = nodes[0]
        else:
            head_node = None

        if not head_node:
            confirm("This will create a new cluster", yes)
        elif not no_restart:
            confirm("This will restart cluster services", yes)

        launch_hash = hash_launch_conf(config["head_node"], config["auth"])
        if head_node is None or provider.node_tags(head_node).get(
                TAG_RAY_LAUNCH_CONFIG) != launch_hash:
            if head_node is not None:
                confirm("Head node config out-of-date. It will be terminated",
                        yes)
                logger.info(
                    "get_or_create_head_node: "
                    "Terminating outdated head node {}".format(head_node))
                provider.terminate_node(head_node)
            logger.info("get_or_create_head_node: Launching new head node...")
            head_node_tags[TAG_RAY_LAUNCH_CONFIG] = launch_hash
            head_node_tags[TAG_RAY_NODE_NAME] = "ray-{}-head".format(
                config["cluster_name"])
            provider.create_node(config["head_node"], head_node_tags, 1)

        nodes = provider.non_terminated_nodes(head_node_tags)
        assert len(nodes) == 1, "Failed to create head node."
        head_node = nodes[0]

        # TODO(ekl) right now we always update the head node even if the hash
        # matches. We could prompt the user for what they want to do here.
        runtime_hash = hash_runtime_conf(config["file_mounts"], config)
        logger.info("get_or_create_head_node: Updating files on head node...")

        # Rewrite the auth config so that the head node can update the workers
        remote_key_path = "~/ray_bootstrap_key.pem"
        remote_config = copy.deepcopy(config)
        remote_config["auth"]["ssh_private_key"] = remote_key_path

        # Adjust for new file locations
        new_mounts = {}
        for remote_path in config["file_mounts"]:
            new_mounts[remote_path] = remote_path
        remote_config["file_mounts"] = new_mounts
        remote_config["no_restart"] = no_restart

        # Now inject the rewritten config and SSH key into the head node
        remote_config_file = tempfile.NamedTemporaryFile(
            "w", prefix="ray-bootstrap-")
        remote_config_file.write(json.dumps(remote_config))
        remote_config_file.flush()
        config["file_mounts"].update({
            remote_key_path: config["auth"]["ssh_private_key"],
            "~/ray_bootstrap_config.yaml": remote_config_file.name
        })

        if restart_only:
            init_commands = config["head_start_ray_commands"]
        elif no_restart:
            init_commands = (
                config["setup_commands"] + config["head_setup_commands"])
        else:
            init_commands = (
                config["setup_commands"] + config["head_setup_commands"] +
                config["head_start_ray_commands"])

        updater = NodeUpdaterThread(
            node_id=head_node,
            provider_config=config["provider"],
            provider=provider,
            auth_config=config["auth"],
            cluster_name=config["cluster_name"],
            file_mounts=config["file_mounts"],
            initialization_commands=config["initialization_commands"],
            setup_commands=init_commands,
            runtime_hash=runtime_hash,
        )
        updater.start()
        updater.join()

        # Refresh the node cache so we see the external ip if available
        provider.non_terminated_nodes(head_node_tags)

        if config.get("provider", {}).get("use_internal_ips", False) is True:
            head_node_ip = provider.internal_ip(head_node)
        else:
            head_node_ip = provider.external_ip(head_node)

        if updater.exitcode != 0:
            logger.error("get_or_create_head_node: "
                         "Updating {} failed".format(head_node_ip))
            sys.exit(1)
        logger.info(
            "get_or_create_head_node: "
            "Head node up-to-date, IP address is: {}".format(head_node_ip))

        monitor_str = "tail -n 100 -f /tmp/ray/session_*/logs/monitor*"
        use_docker = bool(config["docker"]["container_name"])
        if override_cluster_name:
            modifiers = " --cluster-name={}".format(
                quote(override_cluster_name))
        else:
            modifiers = ""
        print("To monitor auto-scaling activity, you can run:\n\n"
              "  ray exec {} {}{}{}\n".format(
                  config_file, "--docker " if use_docker else " ",
                  quote(monitor_str), modifiers))
        print("To open a console on the cluster:\n\n"
              "  ray attach {}{}\n".format(config_file, modifiers))

        print("To ssh manually to the cluster, run:\n\n"
              "  ssh -i {} {}@{}\n".format(config["auth"]["ssh_private_key"],
                                           config["auth"]["ssh_user"],
                                           head_node_ip))
    finally:
        provider.cleanup()
Beispiel #5
0
def get_or_create_head_node(config, no_restart):
    """Create the cluster head node, which in turn creates the workers."""

    provider = get_node_provider(config["provider"], config["cluster_name"])
    head_node_tags = {
        TAG_RAY_NODE_TYPE: "Head",
    }
    nodes = provider.nodes(head_node_tags)
    if len(nodes) > 0:
        head_node = nodes[0]
    else:
        head_node = None

    if not head_node:
        confirm("This will create a new cluster")
    elif not no_restart:
        confirm("This will restart your cluster")

    launch_hash = hash_launch_conf(config["head_node"], config["auth"])
    if head_node is None or provider.node_tags(head_node).get(
            TAG_RAY_LAUNCH_CONFIG) != launch_hash:
        if head_node is not None:
            print("Terminating outdated head node {}".format(head_node))
            provider.terminate_node(head_node)
        print("Launching new head node...")
        head_node_tags[TAG_RAY_LAUNCH_CONFIG] = launch_hash
        head_node_tags[TAG_NAME] = "ray-{}-head".format(config["cluster_name"])
        provider.create_node(config["head_node"], head_node_tags, 1)

    nodes = provider.nodes(head_node_tags)
    assert len(nodes) == 1, "Failed to create head node."
    head_node = nodes[0]

    # TODO(ekl) right now we always update the head node even if the hash
    # matches. We could prompt the user for what they want to do in this case.
    runtime_hash = hash_runtime_conf(config["file_mounts"], config)
    print("Updating files on head node...")

    # Rewrite the auth config so that the head node can update the workers
    remote_key_path = "~/ray_bootstrap_key.pem"
    remote_config = copy.deepcopy(config)
    remote_config["auth"]["ssh_private_key"] = remote_key_path

    # Adjust for new file locations
    new_mounts = {}
    for remote_path in config["file_mounts"]:
        new_mounts[remote_path] = remote_path
    remote_config["file_mounts"] = new_mounts
    remote_config["no_restart"] = no_restart

    # Now inject the rewritten config and SSH key into the head node
    remote_config_file = tempfile.NamedTemporaryFile("w",
                                                     prefix="ray-bootstrap-")
    remote_config_file.write(json.dumps(remote_config))
    remote_config_file.flush()
    config["file_mounts"].update({
        remote_key_path:
        config["auth"]["ssh_private_key"],
        "~/ray_bootstrap_config.yaml":
        remote_config_file.name
    })

    if no_restart:
        init_commands = (config["setup_commands"] +
                         config["head_setup_commands"])
    else:
        init_commands = (config["setup_commands"] +
                         config["head_setup_commands"] +
                         config["head_start_ray_commands"])

    updater = NodeUpdaterProcess(head_node,
                                 config["provider"],
                                 config["auth"],
                                 config["cluster_name"],
                                 config["file_mounts"],
                                 init_commands,
                                 runtime_hash,
                                 redirect_output=False)
    updater.start()
    updater.join()

    # Refresh the node cache so we see the external ip if available
    provider.nodes(head_node_tags)

    if updater.exitcode != 0:
        print("Error: updating {} failed".format(
            provider.external_ip(head_node)))
        sys.exit(1)
    print("Head node up-to-date, IP address is: {}".format(
        provider.external_ip(head_node)))
    print("To monitor auto-scaling activity, you can run:\n\n"
          "  ssh -i {} {}@{} 'tail -f /tmp/raylogs/monitor-*'\n".format(
              config["auth"]["ssh_private_key"], config["auth"]["ssh_user"],
              provider.external_ip(head_node)))
    print("To login to the cluster, run:\n\n"
          "  ssh -i {} {}@{}\n".format(config["auth"]["ssh_private_key"],
                                       config["auth"]["ssh_user"],
                                       provider.external_ip(head_node)))
Beispiel #6
0
def get_or_create_head_node(config, config_file, no_restart, restart_only, yes,
                            override_cluster_name):
    """Create the cluster head node, which in turn creates the workers."""
    provider = get_node_provider(config["provider"], config["cluster_name"])
    config_file = os.path.abspath(config_file)
    try:
        head_node_tags = {
            TAG_RAY_NODE_TYPE: NODE_TYPE_HEAD,
        }
        nodes = provider.non_terminated_nodes(head_node_tags)
        if len(nodes) > 0:
            head_node = nodes[0]
        else:
            head_node = None

        if not head_node:
            confirm("This will create a new cluster", yes)
        elif not no_restart:
            confirm("This will restart cluster services", yes)

        launch_hash = hash_launch_conf(config["head_node"], config["auth"])
        if head_node is None or provider.node_tags(head_node).get(
                TAG_RAY_LAUNCH_CONFIG) != launch_hash:
            if head_node is not None:
                confirm("Head node config out-of-date. It will be terminated",
                        yes)
                logger.info(
                    "get_or_create_head_node: "
                    "Shutting down outdated head node {}".format(head_node))
                provider.terminate_node(head_node)
            logger.info("get_or_create_head_node: Launching new head node...")
            head_node_tags[TAG_RAY_LAUNCH_CONFIG] = launch_hash
            head_node_tags[TAG_RAY_NODE_NAME] = "ray-{}-head".format(
                config["cluster_name"])
            provider.create_node(config["head_node"], head_node_tags, 1)

        start = time.time()
        head_node = None
        while True:
            if time.time() - start > 5:
                raise RuntimeError("Failed to create head node.")
            nodes = provider.non_terminated_nodes(head_node_tags)
            if len(nodes) == 1:
                head_node = nodes[0]
                break
            time.sleep(1)

        # TODO(ekl) right now we always update the head node even if the hash
        # matches. We could prompt the user for what they want to do here.
        runtime_hash = hash_runtime_conf(config["file_mounts"], config)
        logger.info("get_or_create_head_node: Updating files on head node...")

        # Rewrite the auth config so that the head node can update the workers
        remote_config = copy.deepcopy(config)
        if config["provider"]["type"] != "kubernetes":
            remote_key_path = "~/ray_bootstrap_key.pem"
            remote_config["auth"]["ssh_private_key"] = remote_key_path

        # Adjust for new file locations
        new_mounts = {}
        for remote_path in config["file_mounts"]:
            new_mounts[remote_path] = remote_path
        remote_config["file_mounts"] = new_mounts
        remote_config["no_restart"] = no_restart

        # Now inject the rewritten config and SSH key into the head node
        remote_config_file = tempfile.NamedTemporaryFile(
            "w", prefix="ray-bootstrap-")
        remote_config_file.write(json.dumps(remote_config))
        remote_config_file.flush()
        config["file_mounts"].update(
            {"~/ray_bootstrap_config.yaml": remote_config_file.name})
        if config["provider"]["type"] != "kubernetes":
            config["file_mounts"].update({
                remote_key_path:
                config["auth"]["ssh_private_key"],
            })

        if restart_only:
            init_commands = []
            ray_start_commands = config["head_start_ray_commands"]
        elif no_restart:
            init_commands = config["head_setup_commands"]
            ray_start_commands = []
        else:
            init_commands = config["head_setup_commands"]
            ray_start_commands = config["head_start_ray_commands"]

        updater = NodeUpdaterThread(
            node_id=head_node,
            provider_config=config["provider"],
            provider=provider,
            auth_config=config["auth"],
            cluster_name=config["cluster_name"],
            file_mounts=config["file_mounts"],
            initialization_commands=config["initialization_commands"],
            setup_commands=init_commands,
            ray_start_commands=ray_start_commands,
            runtime_hash=runtime_hash,
        )
        updater.start()
        updater.join()

        # Refresh the node cache so we see the external ip if available
        provider.non_terminated_nodes(head_node_tags)

        if config.get("provider", {}).get("use_internal_ips", False) is True:
            head_node_ip = provider.internal_ip(head_node)
        else:
            head_node_ip = provider.external_ip(head_node)

        if updater.exitcode != 0:
            logger.error("get_or_create_head_node: "
                         "Updating {} failed".format(head_node_ip))
            sys.exit(1)
        logger.info(
            "get_or_create_head_node: "
            "Head node up-to-date, IP address is: {}".format(head_node_ip))

        monitor_str = "tail -n 100 -f /tmp/ray/session_*/logs/monitor*"
        use_docker = "docker" in config and bool(
            config["docker"]["container_name"])
        if override_cluster_name:
            modifiers = " --cluster-name={}".format(
                quote(override_cluster_name))
        else:
            modifiers = ""
        print("To monitor auto-scaling activity, you can run:\n\n"
              "  ray exec {} {}{}{}\n".format(
                  config_file, "--docker " if use_docker else "",
                  quote(monitor_str), modifiers))
        print("To open a console on the cluster:\n\n"
              "  ray attach {}{}\n".format(config_file, modifiers))

        print("To get a remote shell to the cluster manually, run:\n\n"
              "  {}\n".format(updater.cmd_runner.remote_shell_command_str()))
    finally:
        provider.cleanup()
def get_or_create_head_node(config):
    """Create the cluster head node, which in turn creates the workers."""

    provider = get_node_provider(config["provider"], config["cluster_name"])
    head_node_tags = {
        TAG_RAY_NODE_TYPE: "Head",
    }
    nodes = provider.nodes(head_node_tags)
    if len(nodes) > 0:
        head_node = nodes[0]
    else:
        head_node = None

    launch_hash = hash_launch_conf(config["head_node"], config["auth"])
    if head_node is None or provider.node_tags(head_node).get(
            TAG_RAY_LAUNCH_CONFIG) != launch_hash:
        if head_node is not None:
            print("Terminating outdated head node {}".format(head_node))
            provider.terminate_node(head_node)
        print("Launching new head node...")
        head_node_tags[TAG_RAY_LAUNCH_CONFIG] = launch_hash
        head_node_tags[TAG_NAME] = "ray-{}-head".format(config["cluster_name"])
        provider.create_node(config["head_node"], head_node_tags, 1)

    nodes = provider.nodes(head_node_tags)
    assert len(nodes) == 1, "Failed to create head node."
    head_node = nodes[0]

    runtime_hash = hash_runtime_conf(config["file_mounts"], config)

    if provider.node_tags(head_node).get(
            TAG_RAY_RUNTIME_CONFIG) != runtime_hash:
        print("Updating files on head node...")

        # Rewrite the auth config so that the head node can update the workers
        remote_key_path = "~/ray_bootstrap_key.pem"
        remote_config = copy.deepcopy(config)
        remote_config["auth"]["ssh_private_key"] = remote_key_path

        # Adjust for new file locations
        new_mounts = {}
        for remote_path in config["file_mounts"]:
            new_mounts[remote_path] = remote_path
        remote_config["file_mounts"] = new_mounts

        # Now inject the rewritten config and SSH key into the head node
        remote_config_file = tempfile.NamedTemporaryFile(
            "w", prefix="ray-bootstrap-")
        remote_config_file.write(json.dumps(remote_config))
        remote_config_file.flush()
        config["file_mounts"].update({
            remote_key_path: config["auth"]["ssh_private_key"],
            "~/ray_bootstrap_config.yaml": remote_config_file.name
        })

        updater = NodeUpdaterProcess(
            head_node,
            config["provider"],
            config["auth"],
            config["cluster_name"],
            config["file_mounts"],
            config["head_init_commands"],
            runtime_hash,
            redirect_output=False)
        updater.start()
        updater.join()
        if updater.exitcode != 0:
            print("Error: updating {} failed".format(
                provider.external_ip(head_node)))
            sys.exit(1)
    print(
        "Head node up-to-date, IP address is: {}".format(
            provider.external_ip(head_node)))
    print(
        "To monitor auto-scaling activity, you can run:\n\n"
        "  ssh -i {} {}@{} 'tail -f /tmp/raylogs/monitor-*'\n".format(
            config["auth"]["ssh_private_key"],
            config["auth"]["ssh_user"],
            provider.external_ip(head_node)))
Beispiel #8
0
def get_or_create_head_node(config, no_restart, yes):
    """Create the cluster head node, which in turn creates the workers."""

    provider = get_node_provider(config["provider"], config["cluster_name"])
    head_node_tags = {
        TAG_RAY_NODE_TYPE: "Head",
    }
    nodes = provider.nodes(head_node_tags)
    if len(nodes) > 0:
        head_node = nodes[0]
    else:
        head_node = None

    if not head_node:
        confirm("This will create a new cluster", yes)
    elif not no_restart:
        confirm("This will restart cluster services", yes)

    launch_hash = hash_launch_conf(config["head_node"], config["auth"])
    if head_node is None or provider.node_tags(head_node).get(
            TAG_RAY_LAUNCH_CONFIG) != launch_hash:
        if head_node is not None:
            confirm("Head node config out-of-date. It will be terminated", yes)
            print("Terminating outdated head node {}".format(head_node))
            provider.terminate_node(head_node)
        print("Launching new head node...")
        head_node_tags[TAG_RAY_LAUNCH_CONFIG] = launch_hash
        head_node_tags[TAG_NAME] = "ray-{}-head".format(config["cluster_name"])
        provider.create_node(config["head_node"], head_node_tags, 1)

    nodes = provider.nodes(head_node_tags)
    assert len(nodes) == 1, "Failed to create head node."
    head_node = nodes[0]

    # TODO(ekl) right now we always update the head node even if the hash
    # matches. We could prompt the user for what they want to do in this case.
    runtime_hash = hash_runtime_conf(config["file_mounts"], config)
    print("Updating files on head node...")

    # Rewrite the auth config so that the head node can update the workers
    remote_key_path = "~/ray_bootstrap_key.pem"
    remote_config = copy.deepcopy(config)
    remote_config["auth"]["ssh_private_key"] = remote_key_path

    # Adjust for new file locations
    new_mounts = {}
    for remote_path in config["file_mounts"]:
        new_mounts[remote_path] = remote_path
    remote_config["file_mounts"] = new_mounts
    remote_config["no_restart"] = no_restart

    # Now inject the rewritten config and SSH key into the head node
    remote_config_file = tempfile.NamedTemporaryFile(
        "w", prefix="ray-bootstrap-")
    remote_config_file.write(json.dumps(remote_config))
    remote_config_file.flush()
    config["file_mounts"].update({
        remote_key_path: config["auth"]["ssh_private_key"],
        "~/ray_bootstrap_config.yaml": remote_config_file.name
    })

    if no_restart:
        init_commands = (
            config["setup_commands"] + config["head_setup_commands"])
    else:
        init_commands = (
            config["setup_commands"] + config["head_setup_commands"] +
            config["head_start_ray_commands"])

    updater = NodeUpdaterProcess(
        head_node,
        config["provider"],
        config["auth"],
        config["cluster_name"],
        config["file_mounts"],
        init_commands,
        runtime_hash,
        redirect_output=False)
    updater.start()
    updater.join()

    # Refresh the node cache so we see the external ip if available
    provider.nodes(head_node_tags)

    if updater.exitcode != 0:
        print("Error: updating {} failed".format(
            provider.external_ip(head_node)))
        sys.exit(1)
    print(
        "Head node up-to-date, IP address is: {}".format(
            provider.external_ip(head_node)))

    monitor_str = "tail -f /tmp/raylogs/monitor-*"
    for s in init_commands:
        if ("ray start" in s and "docker exec" in s and
                "--autoscaling-config" in s):
            monitor_str = "docker exec {} /bin/sh -c {}".format(
                        config["docker"]["container_name"],
                        quote(monitor_str))
    print(
        "To monitor auto-scaling activity, you can run:\n\n"
        "  ssh -i {} {}@{} {}\n".format(
            config["auth"]["ssh_private_key"],
            config["auth"]["ssh_user"],
            provider.external_ip(head_node),
            quote(monitor_str)))
    print(
        "To login to the cluster, run:\n\n"
        "  ssh -i {} {}@{}\n".format(
            config["auth"]["ssh_private_key"],
            config["auth"]["ssh_user"],
            provider.external_ip(head_node)))