Example #1
0
def teardown_cluster(config_file, yes, workers_only, override_cluster_name):
    """Destroys all nodes of a Ray cluster described by a config json."""

    config = yaml.load(open(config_file).read())
    if override_cluster_name is not None:
        config["cluster_name"] = override_cluster_name
    validate_config(config)
    config = fillout_defaults(config)

    confirm("This will destroy your cluster", yes)

    provider = get_node_provider(config["provider"], config["cluster_name"])

    if not workers_only:
        for node in provider.nodes({TAG_RAY_NODE_TYPE: "head"}):
            logger.info("Terminating head node {}".format(node))
            provider.terminate_node(node)

    nodes = provider.nodes({TAG_RAY_NODE_TYPE: "worker"})
    while nodes:
        for node in nodes:
            logger.info("Terminating worker {}".format(node))
            provider.terminate_node(node)
        time.sleep(5)
        nodes = provider.nodes({TAG_RAY_NODE_TYPE: "worker"})
Example #2
0
 def _test_invalid_config(self, config_path):
     with open(os.path.join(RAY_PATH, config_path)) as f:
         config = yaml.safe_load(f)
     try:
         validate_config(config)
         self.fail("Expected validation to fail for {}".format(config_path))
     except jsonschema.ValidationError:
         pass
Example #3
0
 def testValidateDefaultConfig(self):
     for config_path in CONFIG_PATHS:
         with open(config_path) as f:
             config = yaml.safe_load(f)
         config = fillout_defaults(config)
         try:
             validate_config(config)
         except Exception:
             self.fail("Config did not pass validation test!")
Example #4
0
 def testValidateDefaultConfig(self):
     config = {}
     config["provider"] = {
         "type": "aws",
         "region": "us-east-1",
         "availability_zone": "us-east-1a",
     }
     config = fillout_defaults(config)
     try:
         validate_config(config)
     except Exception:
         self.fail("Default config did not pass validation test!")
Example #5
0
 def testValidateDefaultConfig(self):
     config = {}
     config["provider"] = {
         "type": "aws",
         "region": "us-east-1",
         "availability_zone": "us-east-1a",
     }
     config = fillout_defaults(config)
     try:
         validate_config(config)
     except Exception:
         self.fail("Default config did not pass validation test!")
Example #6
0
 def testValidateNetworkConfig(self):
     web_yaml = "https://raw.githubusercontent.com/ray-project/ray/" \
         "master/python/ray/autoscaler/aws/example-full.yaml"
     response = urllib.request.urlopen(web_yaml, timeout=5)
     content = response.read()
     with tempfile.TemporaryFile() as f:
         f.write(content)
         f.seek(0)
         config = yaml.safe_load(f)
     config = fillout_defaults(config)
     try:
         validate_config(config)
     except Exception:
         self.fail("Config did not pass validation test!")
Example #7
0
def teardown_cluster(config_file, yes, workers_only, override_cluster_name,
                     keep_min_workers):
    """Destroys all nodes of a Ray cluster described by a config json."""

    config = yaml.safe_load(open(config_file).read())
    if override_cluster_name is not None:
        config["cluster_name"] = override_cluster_name
    config = fillout_defaults(config)
    validate_config(config)

    confirm("This will destroy your cluster", yes)

    provider = get_node_provider(config["provider"], config["cluster_name"])
    try:

        def remaining_nodes():

            workers = provider.non_terminated_nodes({
                TAG_RAY_NODE_TYPE: NODE_TYPE_WORKER
            })

            if keep_min_workers:
                min_workers = config.get("min_workers", 0)
                logger.info("teardown_cluster: "
                            "Keeping {} nodes...".format(min_workers))
                workers = random.sample(workers, len(workers) - min_workers)

            if workers_only:
                return workers

            head = provider.non_terminated_nodes({
                TAG_RAY_NODE_TYPE: NODE_TYPE_HEAD
            })

            return head + workers

        # Loop here to check that both the head and worker nodes are actually
        #   really gone
        A = remaining_nodes()
        with LogTimer("teardown_cluster: done."):
            while A:
                logger.info("teardown_cluster: "
                            "Shutting down {} nodes...".format(len(A)))
                provider.terminate_nodes(A)
                time.sleep(1)
                A = remaining_nodes()
    finally:
        provider.cleanup()
Example #8
0
def teardown_cluster(config_file, yes, workers_only, override_cluster_name):
    """Destroys all nodes of a Ray cluster described by a config json."""

    config = yaml.load(open(config_file).read())
    if override_cluster_name is not None:
        config["cluster_name"] = override_cluster_name
    validate_config(config)
    config = fillout_defaults(config)

    confirm("This will destroy your cluster", yes)

    provider = get_node_provider(config["provider"], config["cluster_name"])

    try:

        def remaining_nodes():
            if workers_only:
                A = []
            else:
                A = [
                    node_id for node_id in provider.non_terminated_nodes({
                        TAG_RAY_NODE_TYPE: "head"
                    })
                ]

            A += [
                node_id for node_id in provider.non_terminated_nodes({
                    TAG_RAY_NODE_TYPE: "worker"
                })
            ]
            return A

        # Loop here to check that both the head and worker nodes are actually
        #   really gone
        A = remaining_nodes()
        with LogTimer("teardown_cluster: Termination done."):
            while A:
                logger.info("teardown_cluster: "
                            "Terminating {} nodes...".format(len(A)))
                provider.terminate_nodes(A)
                time.sleep(1)
                A = remaining_nodes()
    finally:
        provider.cleanup()
Example #9
0
def teardown_cluster(config_file, yes, workers_only, override_cluster_name):
    """Destroys all nodes of a Ray cluster described by a config json."""

    config = yaml.load(open(config_file).read())
    if override_cluster_name is not None:
        config["cluster_name"] = override_cluster_name
    validate_config(config)
    config = fillout_defaults(config)

    confirm("This will destroy your cluster", yes)

    provider = get_node_provider(config["provider"], config["cluster_name"])

    try:

        def remaining_nodes():
            if workers_only:
                A = []
            else:
                A = [
                    node_id for node_id in provider.non_terminated_nodes({
                        TAG_RAY_NODE_TYPE: "head"
                    })
                ]

            A += [
                node_id for node_id in provider.non_terminated_nodes({
                    TAG_RAY_NODE_TYPE: "worker"
                })
            ]
            return A

        # Loop here to check that both the head and worker nodes are actually
        #   really gone
        A = remaining_nodes()
        with LogTimer("teardown_cluster: Termination done."):
            while A:
                logger.info("teardown_cluster: "
                            "Terminating {} nodes...".format(len(A)))
                provider.terminate_nodes(A)
                time.sleep(1)
                A = remaining_nodes()
    finally:
        provider.cleanup()
Example #10
0
def create_or_update_cluster(config_file, override_min_workers,
                             override_max_workers, no_restart):
    """Create or updates an autoscaling Ray cluster from a config json."""

    config = yaml.load(open(config_file).read())

    validate_config(config)
    if override_min_workers is not None:
        config["min_workers"] = override_min_workers
    if override_max_workers is not None:
        config["max_workers"] = override_max_workers

    importer = NODE_PROVIDERS.get(config["provider"]["type"])
    if not importer:
        raise NotImplementedError("Unsupported provider {}".format(
            config["provider"]))

    bootstrap_config, _ = importer()
    config = bootstrap_config(config)
    get_or_create_head_node(config, no_restart)
def teardown_cluster(config_file):
    """Destroys all nodes of a Ray cluster described by a config json."""

    config = yaml.load(open(config_file).read())

    validate_config(config)
    provider = get_node_provider(config["provider"], config["cluster_name"])
    head_node_tags = {
        TAG_RAY_NODE_TYPE: "Head",
    }
    for node in provider.nodes(head_node_tags):
        print("Terminating head node {}".format(node))
        provider.terminate_node(node)
    nodes = provider.nodes({})
    while nodes:
        for node in nodes:
            print("Terminating worker {}".format(node))
            provider.terminate_node(node)
        time.sleep(5)
        nodes = provider.nodes({})
Example #12
0
def _bootstrap_config(config):
    config = fillout_defaults(config)

    hasher = hashlib.sha1()
    hasher.update(json.dumps([config], sort_keys=True).encode("utf-8"))
    cache_key = os.path.join(tempfile.gettempdir(),
                             "ray-config-{}".format(hasher.hexdigest()))
    if os.path.exists(cache_key):
        return json.loads(open(cache_key).read())
    validate_config(config)

    importer = NODE_PROVIDERS.get(config["provider"]["type"])
    if not importer:
        raise NotImplementedError("Unsupported provider {}".format(
            config["provider"]))

    bootstrap_config, _ = importer()
    resolved_config = bootstrap_config(config)
    with open(cache_key, "w") as f:
        f.write(json.dumps(resolved_config))
    return resolved_config
Example #13
0
def _bootstrap_config(config):
    config = fillout_defaults(config)

    hasher = hashlib.sha1()
    hasher.update(json.dumps([config], sort_keys=True).encode("utf-8"))
    cache_key = os.path.join(tempfile.gettempdir(),
                             "ray-config-{}".format(hasher.hexdigest()))
    if os.path.exists(cache_key):
        return json.loads(open(cache_key).read())
    validate_config(config)

    importer = NODE_PROVIDERS.get(config["provider"]["type"])
    if not importer:
        raise NotImplementedError("Unsupported provider {}".format(
            config["provider"]))

    bootstrap_config, _ = importer()
    resolved_config = bootstrap_config(config)
    with open(cache_key, "w") as f:
        f.write(json.dumps(resolved_config))
    return resolved_config
Example #14
0
def create_or_update_cluster(
        config_file, override_min_workers, override_max_workers,
        no_restart, yes):
    """Create or updates an autoscaling Ray cluster from a config json."""

    config = yaml.load(open(config_file).read())
    validate_config(config)
    config = fillout_defaults(config)

    if override_min_workers is not None:
        config["min_workers"] = override_min_workers
    if override_max_workers is not None:
        config["max_workers"] = override_max_workers

    importer = NODE_PROVIDERS.get(config["provider"]["type"])
    if not importer:
        raise NotImplementedError(
            "Unsupported provider {}".format(config["provider"]))

    bootstrap_config, _ = importer()
    config = bootstrap_config(config)
    get_or_create_head_node(config, no_restart, yes)
Example #15
0
def teardown_cluster(config_file, yes):
    """Destroys all nodes of a Ray cluster described by a config json."""

    config = yaml.load(open(config_file).read())
    validate_config(config)
    config = fillout_defaults(config)

    confirm("This will destroy your cluster", yes)

    provider = get_node_provider(config["provider"], config["cluster_name"])
    head_node_tags = {
        TAG_RAY_NODE_TYPE: "Head",
    }
    for node in provider.nodes(head_node_tags):
        print("Terminating head node {}".format(node))
        provider.terminate_node(node)
    nodes = provider.nodes({})
    while nodes:
        for node in nodes:
            print("Terminating worker {}".format(node))
            provider.terminate_node(node)
        time.sleep(5)
        nodes = provider.nodes({})
Example #16
0
File: commands.py Project: nskh/ray
def file_sync(config_file):
    """Returns head node IP for given configuration file if exists."""
    config = yaml.load(open(config_file).read())
    validate_config(config)
    config = fillout_defaults(config)
    importer = NODE_PROVIDERS.get(config["provider"]["type"])
    if not importer:
        raise NotImplementedError("Unsupported provider {}".format(
            config["provider"]))

    bootstrap_config, provider_cls = importer()
    config = bootstrap_config(config)

    provider = provider_cls(config["provider"], config["cluster_name"])
    head_node_tags = {
        TAG_RAY_NODE_TYPE: "Head",
    }
    nodes = provider.nodes(head_node_tags)
    if len(nodes) > 0:
        head_node = nodes[0]
    else:
        print("Head node of cluster ({}) not found!".format(
            config["cluster_name"]))
        sys.exit(1)

    runtime_hash = hash_runtime_conf(config["file_mounts"], config)

    updater = NodeUpdaterProcess(
        head_node,
        config["provider"],
        config["auth"],
        config["cluster_name"],
        config["file_mounts"],
        [],
        runtime_hash,
        redirect_output=False)
    updater.sync_files(config["file_mounts"])
Example #17
0
    def testValidation(self):
        """Ensures that schema validation is working."""
        config = copy.deepcopy(SMALL_CLUSTER)
        try:
            validate_config(config)
        except Exception:
            self.fail("Test config did not pass validation test!")

        config["blah"] = "blah"
        with pytest.raises(ValueError):
            validate_config(config)
        del config["blah"]

        config["provider"]["blah"] = "blah"
        with pytest.raises(ValueError):
            validate_config(config)
        del config["provider"]["blah"]

        del config["provider"]
        with pytest.raises(ValueError):
            validate_config(config)
Example #18
0
    def testValidation(self):
        """Ensures that schema validation is working."""
        config = copy.deepcopy(SMALL_CLUSTER)
        try:
            validate_config(config)
        except Exception:
            self.fail("Test config did not pass validation test!")

        config["blah"] = "blah"
        with self.assertRaises(ValueError):
            validate_config(config)
        del config["blah"]

        config["provider"]["blah"] = "blah"
        with self.assertRaises(ValueError):
            validate_config(config)
        del config["provider"]["blah"]

        del config["provider"]
        with self.assertRaises(ValueError):
            validate_config(config)
Example #19
0
def load_config(yaml_file_name):
    config = yaml.load(open(yaml_file_name).read())
    validate_config(config)
    return fillout_defaults(config)