コード例 #1
0
 def testValidateDefaultConfig(self):
     for config_path in CONFIG_PATHS:
         with open(config_path) as f:
             config = yaml.safe_load(f)
         config = fillout_defaults(config)
         try:
             validate_config(config)
         except Exception:
             self.fail("Config did not pass validation test!")
コード例 #2
0
 def testValidateDefaultConfig(self):
     config = {}
     config["provider"] = {
         "type": "aws",
         "region": "us-east-1",
         "availability_zone": "us-east-1a",
     }
     config = fillout_defaults(config)
     try:
         validate_config(config)
     except Exception:
         self.fail("Default config did not pass validation test!")
コード例 #3
0
ファイル: autoscaler_test.py プロジェクト: adgirish/ray
 def testValidateDefaultConfig(self):
     config = {}
     config["provider"] = {
         "type": "aws",
         "region": "us-east-1",
         "availability_zone": "us-east-1a",
     }
     config = fillout_defaults(config)
     try:
         validate_config(config)
     except Exception:
         self.fail("Default config did not pass validation test!")
コード例 #4
0
 def testValidateNetworkConfig(self):
     web_yaml = "https://raw.githubusercontent.com/ray-project/ray/" \
         "master/python/ray/autoscaler/aws/example-full.yaml"
     response = urllib.request.urlopen(web_yaml, timeout=5)
     content = response.read()
     with tempfile.TemporaryFile() as f:
         f.write(content)
         f.seek(0)
         config = yaml.safe_load(f)
     config = fillout_defaults(config)
     try:
         validate_config(config)
     except Exception:
         self.fail("Config did not pass validation test!")
コード例 #5
0
ファイル: commands.py プロジェクト: zqxyz73/ray
def teardown_cluster(config_file, yes, workers_only, override_cluster_name,
                     keep_min_workers):
    """Destroys all nodes of a Ray cluster described by a config json."""

    config = yaml.safe_load(open(config_file).read())
    if override_cluster_name is not None:
        config["cluster_name"] = override_cluster_name
    config = fillout_defaults(config)
    validate_config(config)

    confirm("This will destroy your cluster", yes)

    provider = get_node_provider(config["provider"], config["cluster_name"])
    try:

        def remaining_nodes():

            workers = provider.non_terminated_nodes({
                TAG_RAY_NODE_TYPE: NODE_TYPE_WORKER
            })

            if keep_min_workers:
                min_workers = config.get("min_workers", 0)
                logger.info("teardown_cluster: "
                            "Keeping {} nodes...".format(min_workers))
                workers = random.sample(workers, len(workers) - min_workers)

            if workers_only:
                return workers

            head = provider.non_terminated_nodes({
                TAG_RAY_NODE_TYPE: NODE_TYPE_HEAD
            })

            return head + workers

        # Loop here to check that both the head and worker nodes are actually
        #   really gone
        A = remaining_nodes()
        with LogTimer("teardown_cluster: done."):
            while A:
                logger.info("teardown_cluster: "
                            "Shutting down {} nodes...".format(len(A)))
                provider.terminate_nodes(A)
                time.sleep(1)
                A = remaining_nodes()
    finally:
        provider.cleanup()
コード例 #6
0
ファイル: commands.py プロジェクト: robertnishihara/ray
def teardown_cluster(config_file, yes, workers_only, override_cluster_name):
    """Destroys all nodes of a Ray cluster described by a config json."""

    config = yaml.load(open(config_file).read())
    if override_cluster_name is not None:
        config["cluster_name"] = override_cluster_name
    validate_config(config)
    config = fillout_defaults(config)

    confirm("This will destroy your cluster", yes)

    provider = get_node_provider(config["provider"], config["cluster_name"])

    try:

        def remaining_nodes():
            if workers_only:
                A = []
            else:
                A = [
                    node_id for node_id in provider.non_terminated_nodes({
                        TAG_RAY_NODE_TYPE: "head"
                    })
                ]

            A += [
                node_id for node_id in provider.non_terminated_nodes({
                    TAG_RAY_NODE_TYPE: "worker"
                })
            ]
            return A

        # Loop here to check that both the head and worker nodes are actually
        #   really gone
        A = remaining_nodes()
        with LogTimer("teardown_cluster: Termination done."):
            while A:
                logger.info("teardown_cluster: "
                            "Terminating {} nodes...".format(len(A)))
                provider.terminate_nodes(A)
                time.sleep(1)
                A = remaining_nodes()
    finally:
        provider.cleanup()
コード例 #7
0
ファイル: commands.py プロジェクト: wanghuimu/ray
def teardown_cluster(config_file, yes, workers_only, override_cluster_name):
    """Destroys all nodes of a Ray cluster described by a config json."""

    config = yaml.load(open(config_file).read())
    if override_cluster_name is not None:
        config["cluster_name"] = override_cluster_name
    validate_config(config)
    config = fillout_defaults(config)

    confirm("This will destroy your cluster", yes)

    provider = get_node_provider(config["provider"], config["cluster_name"])

    try:

        def remaining_nodes():
            if workers_only:
                A = []
            else:
                A = [
                    node_id for node_id in provider.non_terminated_nodes({
                        TAG_RAY_NODE_TYPE: "head"
                    })
                ]

            A += [
                node_id for node_id in provider.non_terminated_nodes({
                    TAG_RAY_NODE_TYPE: "worker"
                })
            ]
            return A

        # Loop here to check that both the head and worker nodes are actually
        #   really gone
        A = remaining_nodes()
        with LogTimer("teardown_cluster: Termination done."):
            while A:
                logger.info("teardown_cluster: "
                            "Terminating {} nodes...".format(len(A)))
                provider.terminate_nodes(A)
                time.sleep(1)
                A = remaining_nodes()
    finally:
        provider.cleanup()
コード例 #8
0
 def testReportsConfigFailures(self):
     config = copy.deepcopy(SMALL_CLUSTER)
     config["provider"]["type"] = "external"
     config = fillout_defaults(config)
     config["provider"]["type"] = "mock"
     config_path = self.write_config(config)
     self.provider = MockProvider()
     runner = MockProcessRunner(fail_cmds=["setup_cmd"])
     autoscaler = StandardAutoscaler(config_path,
                                     LoadMetrics(),
                                     max_failures=0,
                                     process_runner=runner,
                                     update_interval_s=0)
     autoscaler.update()
     autoscaler.update()
     self.waitForNodes(2)
     self.provider.finish_starting_nodes()
     autoscaler.update()
     self.waitForNodes(
         2, tag_filters={TAG_RAY_NODE_STATUS: STATUS_UPDATE_FAILED})
コード例 #9
0
ファイル: commands.py プロジェクト: robertnishihara/ray
def _bootstrap_config(config):
    config = fillout_defaults(config)

    hasher = hashlib.sha1()
    hasher.update(json.dumps([config], sort_keys=True).encode("utf-8"))
    cache_key = os.path.join(tempfile.gettempdir(),
                             "ray-config-{}".format(hasher.hexdigest()))
    if os.path.exists(cache_key):
        return json.loads(open(cache_key).read())
    validate_config(config)

    importer = NODE_PROVIDERS.get(config["provider"]["type"])
    if not importer:
        raise NotImplementedError("Unsupported provider {}".format(
            config["provider"]))

    bootstrap_config, _ = importer()
    resolved_config = bootstrap_config(config)
    with open(cache_key, "w") as f:
        f.write(json.dumps(resolved_config))
    return resolved_config
コード例 #10
0
 def testReportsConfigFailures(self):
     config = copy.deepcopy(SMALL_CLUSTER)
     config["provider"]["type"] = "external"
     config = fillout_defaults(config)
     config["provider"]["type"] = "mock"
     config_path = self.write_config(config)
     self.provider = MockProvider()
     runner = MockProcessRunner(fail_cmds=["cmd1"])
     autoscaler = StandardAutoscaler(config_path,
                                     LoadMetrics(),
                                     max_failures=0,
                                     process_runner=runner,
                                     update_interval_s=0)
     autoscaler.update()
     autoscaler.update()
     self.waitForNodes(2)
     for node in self.provider.mock_nodes.values():
         node.state = "running"
     autoscaler.update()
     self.waitForNodes(2,
                       tag_filters={TAG_RAY_NODE_STATUS: "update-failed"})
コード例 #11
0
ファイル: commands.py プロジェクト: zionzheng/ray
def create_or_update_cluster(config_file, override_min_workers,
                             override_max_workers, no_restart, yes):
    """Create or updates an autoscaling Ray cluster from a config json."""

    config = yaml.load(open(config_file).read())
    validate_config(config)
    config = fillout_defaults(config)

    if override_min_workers is not None:
        config["min_workers"] = override_min_workers
    if override_max_workers is not None:
        config["max_workers"] = override_max_workers

    importer = NODE_PROVIDERS.get(config["provider"]["type"])
    if not importer:
        raise NotImplementedError("Unsupported provider {}".format(
            config["provider"]))

    bootstrap_config, _ = importer()
    config = bootstrap_config(config)
    get_or_create_head_node(config, no_restart, yes)
コード例 #12
0
ファイル: commands.py プロジェクト: adgirish/ray
def create_or_update_cluster(
        config_file, override_min_workers, override_max_workers,
        no_restart, yes):
    """Create or updates an autoscaling Ray cluster from a config json."""

    config = yaml.load(open(config_file).read())
    validate_config(config)
    config = fillout_defaults(config)

    if override_min_workers is not None:
        config["min_workers"] = override_min_workers
    if override_max_workers is not None:
        config["max_workers"] = override_max_workers

    importer = NODE_PROVIDERS.get(config["provider"]["type"])
    if not importer:
        raise NotImplementedError(
            "Unsupported provider {}".format(config["provider"]))

    bootstrap_config, _ = importer()
    config = bootstrap_config(config)
    get_or_create_head_node(config, no_restart, yes)
コード例 #13
0
ファイル: commands.py プロジェクト: zionzheng/ray
def teardown_cluster(config_file, yes):
    """Destroys all nodes of a Ray cluster described by a config json."""

    config = yaml.load(open(config_file).read())
    validate_config(config)
    config = fillout_defaults(config)

    confirm("This will destroy your cluster", yes)

    provider = get_node_provider(config["provider"], config["cluster_name"])
    head_node_tags = {
        TAG_RAY_NODE_TYPE: "Head",
    }
    for node in provider.nodes(head_node_tags):
        print("Terminating head node {}".format(node))
        provider.terminate_node(node)
    nodes = provider.nodes({})
    while nodes:
        for node in nodes:
            print("Terminating worker {}".format(node))
            provider.terminate_node(node)
        time.sleep(5)
        nodes = provider.nodes({})
コード例 #14
0
ファイル: commands.py プロジェクト: adgirish/ray
def teardown_cluster(config_file, yes):
    """Destroys all nodes of a Ray cluster described by a config json."""

    config = yaml.load(open(config_file).read())
    validate_config(config)
    config = fillout_defaults(config)

    confirm("This will destroy your cluster", yes)

    provider = get_node_provider(config["provider"], config["cluster_name"])
    head_node_tags = {
        TAG_RAY_NODE_TYPE: "Head",
    }
    for node in provider.nodes(head_node_tags):
        print("Terminating head node {}".format(node))
        provider.terminate_node(node)
    nodes = provider.nodes({})
    while nodes:
        for node in nodes:
            print("Terminating worker {}".format(node))
            provider.terminate_node(node)
        time.sleep(5)
        nodes = provider.nodes({})
コード例 #15
0
ファイル: commands.py プロジェクト: nskh/ray
def file_sync(config_file):
    """Returns head node IP for given configuration file if exists."""
    config = yaml.load(open(config_file).read())
    validate_config(config)
    config = fillout_defaults(config)
    importer = NODE_PROVIDERS.get(config["provider"]["type"])
    if not importer:
        raise NotImplementedError("Unsupported provider {}".format(
            config["provider"]))

    bootstrap_config, provider_cls = importer()
    config = bootstrap_config(config)

    provider = provider_cls(config["provider"], config["cluster_name"])
    head_node_tags = {
        TAG_RAY_NODE_TYPE: "Head",
    }
    nodes = provider.nodes(head_node_tags)
    if len(nodes) > 0:
        head_node = nodes[0]
    else:
        print("Head node of cluster ({}) not found!".format(
            config["cluster_name"]))
        sys.exit(1)

    runtime_hash = hash_runtime_conf(config["file_mounts"], config)

    updater = NodeUpdaterProcess(
        head_node,
        config["provider"],
        config["auth"],
        config["cluster_name"],
        config["file_mounts"],
        [],
        runtime_hash,
        redirect_output=False)
    updater.sync_files(config["file_mounts"])
コード例 #16
0
def load_config(yaml_file_name):
    config = yaml.load(open(yaml_file_name).read())
    validate_config(config)
    return fillout_defaults(config)