def testImportingCorrectClass(self): """Check correct import when coordinator_address is in config yaml.""" provider_config = {"coordinator_address": "fake_address:1234"} coordinator_node_provider = NODE_PROVIDERS.get("local")( provider_config) assert coordinator_node_provider is CoordinatorSenderNodeProvider local_node_provider = NODE_PROVIDERS.get("local")({}) assert local_node_provider is LocalNodeProvider
def _bootstrap_config(config: Dict[str, Any], no_config_cache: bool = False) -> Dict[str, Any]: config = prepare_config(config) hasher = hashlib.sha1() hasher.update(json.dumps([config], sort_keys=True).encode("utf-8")) cache_key = os.path.join(tempfile.gettempdir(), "ray-config-{}".format(hasher.hexdigest())) if os.path.exists(cache_key) and not no_config_cache: cli_logger.old_info(logger, "Using cached config at {}", cache_key) config_cache = json.loads(open(cache_key).read()) if config_cache.get("_version", -1) == CONFIG_CACHE_VERSION: # todo: is it fine to re-resolve? afaik it should be. # we can have migrations otherwise or something # but this seems overcomplicated given that resolving is # relatively cheap try_reload_log_state(config_cache["config"]["provider"], config_cache.get("provider_log_info")) cli_logger.verbose("Loaded cached config from " + cf.bold("{}"), cache_key) return config_cache["config"] else: cli_logger.warning( "Found cached cluster config " "but the version " + cf.bold("{}") + " " "(expected " + cf.bold("{}") + ") does not match.\n" "This is normal if cluster launcher was updated.\n" "Config will be re-resolved.", config_cache.get("_version", "none"), CONFIG_CACHE_VERSION) validate_config(config) importer = NODE_PROVIDERS.get(config["provider"]["type"]) if not importer: raise NotImplementedError("Unsupported provider {}".format( config["provider"])) provider_cls = importer(config["provider"]) with cli_logger.timed( # todo: better message "Bootstraping {} config", PROVIDER_PRETTY_NAMES.get(config["provider"]["type"])): resolved_config = provider_cls.bootstrap_config(config) if not no_config_cache: with open(cache_key, "w") as f: config_cache = { "_version": CONFIG_CACHE_VERSION, "provider_log_info": try_get_log_state(config["provider"]), "config": resolved_config } f.write(json.dumps(config_cache)) return resolved_config
def get_provider(config): """Returns provider. Modifies config""" importer = NODE_PROVIDERS.get(config["provider"]["type"]) if not importer: raise NotImplementedError("Unsupported provider {}".format( config["provider"])) bootstrap_config, provider_cls = importer() config = bootstrap_config(config) return provider_cls(config["provider"], config["cluster_name"])
def create_or_update_cluster(config_file, override_min_workers, override_max_workers, no_restart): """Create or updates an autoscaling Ray cluster from a config json.""" config = yaml.load(open(config_file).read()) validate_config(config) if override_min_workers is not None: config["min_workers"] = override_min_workers if override_max_workers is not None: config["max_workers"] = override_max_workers importer = NODE_PROVIDERS.get(config["provider"]["type"]) if not importer: raise NotImplementedError("Unsupported provider {}".format( config["provider"])) bootstrap_config, _ = importer() config = bootstrap_config(config) get_or_create_head_node(config, no_restart)
def _bootstrap_config(config): config = fillout_defaults(config) hasher = hashlib.sha1() hasher.update(json.dumps([config], sort_keys=True).encode("utf-8")) cache_key = os.path.join(tempfile.gettempdir(), "ray-config-{}".format(hasher.hexdigest())) if os.path.exists(cache_key): return json.loads(open(cache_key).read()) validate_config(config) importer = NODE_PROVIDERS.get(config["provider"]["type"]) if not importer: raise NotImplementedError("Unsupported provider {}".format( config["provider"])) bootstrap_config, _ = importer() resolved_config = bootstrap_config(config) with open(cache_key, "w") as f: f.write(json.dumps(resolved_config)) return resolved_config
def create_or_update_cluster( config_file, override_min_workers, override_max_workers, no_restart, yes): """Create or updates an autoscaling Ray cluster from a config json.""" config = yaml.load(open(config_file).read()) validate_config(config) config = fillout_defaults(config) if override_min_workers is not None: config["min_workers"] = override_min_workers if override_max_workers is not None: config["max_workers"] = override_max_workers importer = NODE_PROVIDERS.get(config["provider"]["type"]) if not importer: raise NotImplementedError( "Unsupported provider {}".format(config["provider"])) bootstrap_config, _ = importer() config = bootstrap_config(config) get_or_create_head_node(config, no_restart, yes)
def _bootstrap_config(config, no_config_cache=False): config = prepare_config(config) hasher = hashlib.sha1() hasher.update(json.dumps([config], sort_keys=True).encode("utf-8")) cache_key = os.path.join(tempfile.gettempdir(), "ray-config-{}".format(hasher.hexdigest())) if os.path.exists(cache_key) and not no_config_cache: logger.info("Using cached config at {}".format(cache_key)) return json.loads(open(cache_key).read()) validate_config(config) importer = NODE_PROVIDERS.get(config["provider"]["type"]) if not importer: raise NotImplementedError("Unsupported provider {}".format( config["provider"])) provider_cls = importer(config["provider"]) resolved_config = provider_cls.bootstrap_config(config) if not no_config_cache: with open(cache_key, "w") as f: f.write(json.dumps(resolved_config)) return resolved_config
def file_sync(config_file): """Returns head node IP for given configuration file if exists.""" config = yaml.load(open(config_file).read()) validate_config(config) config = fillout_defaults(config) importer = NODE_PROVIDERS.get(config["provider"]["type"]) if not importer: raise NotImplementedError("Unsupported provider {}".format( config["provider"])) bootstrap_config, provider_cls = importer() config = bootstrap_config(config) provider = provider_cls(config["provider"], config["cluster_name"]) head_node_tags = { TAG_RAY_NODE_TYPE: "Head", } nodes = provider.nodes(head_node_tags) if len(nodes) > 0: head_node = nodes[0] else: print("Head node of cluster ({}) not found!".format( config["cluster_name"])) sys.exit(1) runtime_hash = hash_runtime_conf(config["file_mounts"], config) updater = NodeUpdaterProcess( head_node, config["provider"], config["auth"], config["cluster_name"], config["file_mounts"], [], runtime_hash, redirect_output=False) updater.sync_files(config["file_mounts"])
def create_or_update_cluster(config_file: str, override_min_workers: Optional[int], override_max_workers: Optional[int], no_restart: bool, restart_only: bool, yes: bool, override_cluster_name: Optional[str], no_config_cache: bool = False, redirect_command_output: bool = False, use_login_shells: bool = True) -> None: """Create or updates an autoscaling Ray cluster from a config json.""" set_using_login_shells(use_login_shells) if not use_login_shells: cmd_output_util.set_allow_interactive(False) if redirect_command_output is None: # Do not redirect by default. cmd_output_util.set_output_redirected(False) else: cmd_output_util.set_output_redirected(redirect_command_output) if use_login_shells: cli_logger.warning( "Commands running under a login shell can produce more " "output than special processing can handle.") cli_logger.warning( "Thus, the output from subcommands will be logged as is.") cli_logger.warning( "Consider using {}, {}.", cf.bold("--use-normal-shells"), cf.underlined("if you tested your workflow and it is compatible")) cli_logger.newline() def handle_yaml_error(e): cli_logger.error("Cluster config invalid") cli_logger.newline() cli_logger.error("Failed to load YAML file " + cf.bold("{}"), config_file) cli_logger.newline() with cli_logger.verbatim_error_ctx("PyYAML error:"): cli_logger.error(e) cli_logger.abort() try: config = yaml.safe_load(open(config_file).read()) except FileNotFoundError: cli_logger.abort( "Provided cluster configuration file ({}) does not exist", cf.bold(config_file)) raise except yaml.parser.ParserError as e: handle_yaml_error(e) raise except yaml.scanner.ScannerError as e: handle_yaml_error(e) raise # todo: validate file_mounts, ssh keys, etc. importer = NODE_PROVIDERS.get(config["provider"]["type"]) if not importer: cli_logger.abort( "Unknown provider type " + cf.bold("{}") + "\n" "Available providers are: {}", config["provider"]["type"], cli_logger.render_list([ k for k in NODE_PROVIDERS.keys() if NODE_PROVIDERS[k] is not None ])) raise NotImplementedError("Unsupported provider {}".format( config["provider"])) cli_logger.success("Cluster configuration valid") printed_overrides = False def handle_cli_override(key, override): if override is not None: if key in config: nonlocal printed_overrides printed_overrides = True cli_logger.warning( "`{}` override provided on the command line.\n" " Using " + cf.bold("{}") + cf.dimmed(" [configuration file has " + cf.bold("{}") + "]"), key, override, config[key]) config[key] = override handle_cli_override("min_workers", override_min_workers) handle_cli_override("max_workers", override_max_workers) handle_cli_override("cluster_name", override_cluster_name) if printed_overrides: cli_logger.newline() cli_logger.labeled_value("Cluster", config["cluster_name"]) # disable the cli_logger here if needed # because it only supports aws if config["provider"]["type"] != "aws": cli_logger.old_style = True cli_logger.newline() config = _bootstrap_config(config, no_config_cache=no_config_cache) try_logging_config(config) get_or_create_head_node(config, config_file, no_restart, restart_only, yes, override_cluster_name)
def create_or_update_cluster(config_file: str, override_min_workers: Optional[int], override_max_workers: Optional[int], no_restart: bool, restart_only: bool, yes: bool, override_cluster_name: Optional[str], no_config_cache: bool, log_old_style: bool, log_color: str, verbose: int) -> None: """Create or updates an autoscaling Ray cluster from a config json.""" cli_logger.old_style = log_old_style cli_logger.color_mode = log_color cli_logger.verbosity = verbose # todo: disable by default when the command output handling PR makes it in cli_logger.dump_command_output = True cli_logger.detect_colors() def handle_yaml_error(e): cli_logger.error( "Cluster config invalid.\n" "Failed to load YAML file " + cf.bold("{}"), config_file) cli_logger.newline() with cli_logger.verbatim_error_ctx("PyYAML error:"): cli_logger.error(e) cli_logger.abort() try: config = yaml.safe_load(open(config_file).read()) except FileNotFoundError: cli_logger.abort( "Provided cluster configuration file ({}) does not exist.", cf.bold(config_file)) except yaml.parser.ParserError as e: handle_yaml_error(e) except yaml.scanner.ScannerError as e: handle_yaml_error(e) # todo: validate file_mounts, ssh keys, etc. importer = NODE_PROVIDERS.get(config["provider"]["type"]) if not importer: cli_logger.abort( "Unknown provider type " + cf.bold("{}") + "\n" "Available providers are: {}", config["provider"]["type"], cli_logger.render_list([ k for k in NODE_PROVIDERS.keys() if NODE_PROVIDERS[k] is not None ])) raise NotImplementedError("Unsupported provider {}".format( config["provider"])) cli_logger.success("Cluster configuration valid.\n") printed_overrides = False def handle_cli_override(key, override): if override is not None: if key in config: nonlocal printed_overrides printed_overrides = True cli_logger.warning( "`{}` override provided on the command line.\n" " Using " + cf.bold("{}") + cf.dimmed(" [configuration file has " + cf.bold("{}") + "]"), key, override, config[key]) config[key] = override handle_cli_override("min_workers", override_min_workers) handle_cli_override("max_workers", override_max_workers) handle_cli_override("cluster_name", override_cluster_name) if printed_overrides: cli_logger.newline() cli_logger.labeled_value("Cluster", config["cluster_name"]) # disable the cli_logger here if needed # because it only supports aws if config["provider"]["type"] != "aws": cli_logger.old_style = True config = _bootstrap_config(config, no_config_cache) if config["provider"]["type"] != "aws": cli_logger.old_style = False try_logging_config(config) get_or_create_head_node(config, config_file, no_restart, restart_only, yes, override_cluster_name)