def run_docker_stop(node, container_name): try: updater = NodeUpdaterThread( node_id=node, provider_config=config["provider"], provider=provider, auth_config=config["auth"], cluster_name=config["cluster_name"], file_mounts=config["file_mounts"], initialization_commands=[], setup_commands=[], ray_start_commands=[], runtime_hash="", file_mounts_contents_hash="", is_head_node=False, docker_config=config.get("docker")) _exec( updater, f"docker stop {container_name}", False, False, run_env="host") except Exception: cli_logger.warning(f"Docker stop failed on {node}") cli_logger.old_warning(logger, f"Docker stop failed on {node}")
def run_docker_stop(node, container_name): try: exec_cluster(config_file, cmd=f"docker stop {container_name}", run_env="host", screen=False, tmux=False, stop=False, start=False, override_cluster_name=override_cluster_name, port_forward=None, with_output=False) except Exception: cli_logger.warning(f"Docker stop failed on {node}") cli_logger.old_warning(logger, f"Docker stop failed on {node}")
def warn_about_bad_start_command(start_commands): ray_start_cmd = list(filter(lambda x: "ray start" in x, start_commands)) if len(ray_start_cmd) == 0: cli_logger.warning( "Ray runtime will not be started because `{}` is not in `{}`.", cf.bold("ray start"), cf.bold("head_start_ray_commands")) cli_logger.old_warning( logger, "Ray start is not included in the head_start_ray_commands section." ) if not any("autoscaling-config" in x for x in ray_start_cmd): cli_logger.warning( "The head node will not launch any workers because " "`{}` does not have `{}` set.\n" "Potential fix: add `{}` to the `{}` command under `{}`.", cf.bold("ray start"), cf.bold("--autoscaling-config"), cf.bold("--autoscaling-config=~/ray_bootstrap_config.yaml"), cf.bold("ray start"), cf.bold("head_start_ray_commands")) cli_logger.old_warning( logger, "Ray start on the head node does not have the flag" "--autoscaling-config set. The head node will not launch" "workers. Add --autoscaling-config=~/ray_bootstrap_config.yaml" "to ray start in the head_start_ray_commands section.")
def _set_ssh_ip_if_required(self): if self.ssh_ip is not None: return # We assume that this never changes. # I think that's reasonable. deadline = time.time() + NODE_START_WAIT_S with LogTimer(self.log_prefix + "Got IP"): ip = self._wait_for_ip(deadline) cli_logger.doassert(ip is not None, "Could not get node IP.") # todo: msg assert ip is not None, "Unable to find IP of node" self.ssh_ip = ip # This should run before any SSH commands and therefore ensure that # the ControlPath directory exists, allowing SSH to maintain # persistent sessions later on. try: os.makedirs(self.ssh_control_path, mode=0o700, exist_ok=True) except OSError as e: cli_logger.warning("{}", str(e)) # todo: msg cli_logger.old_warning(logger, "{}", str(e))
def submit(cluster_config_file, screen, tmux, stop, start, cluster_name, no_config_cache, port_forward, script, args, script_args, log_style, log_color, verbose): """Uploads and runs a script on the specified cluster. The script is automatically synced to the following location: os.path.join("~", os.path.basename(script)) Example: >>> ray submit [CLUSTER.YAML] experiment.py -- --smoke-test """ cli_logger.configure(log_style, log_color, verbose) cli_logger.doassert(not (screen and tmux), "`{}` and `{}` are incompatible.", cf.bold("--screen"), cf.bold("--tmux")) cli_logger.doassert( not (script_args and args), "`{0}` and `{1}` are incompatible. Use only `{1}`.\n" "Example: `{2}`", cf.bold("--args"), cf.bold("-- <args ...>"), cf.bold("ray submit script.py -- --arg=123 --flag")) assert not (screen and tmux), "Can specify only one of `screen` or `tmux`." assert not (script_args and args), "Use -- --arg1 --arg2 for script args." if args: cli_logger.warning( "`{}` is deprecated and will be removed in the future.", cf.bold("--args")) cli_logger.warning("Use `{}` instead. Example: `{}`.", cf.bold("-- <args ...>"), cf.bold("ray submit script.py -- --arg=123 --flag")) cli_logger.newline() cli_logger.old_warning( logger, "ray submit [yaml] [script.py] --args=... is deprecated and " "will be removed in a future version of Ray. Use " "`ray submit [yaml] script.py -- --arg1 --arg2` instead.") if start: create_or_update_cluster( config_file=cluster_config_file, override_min_workers=None, override_max_workers=None, no_restart=False, restart_only=False, yes=True, override_cluster_name=cluster_name, no_config_cache=no_config_cache, redirect_command_output=False, use_login_shells=True) target = os.path.basename(script) target = os.path.join("~", target) rsync( cluster_config_file, script, target, cluster_name, no_config_cache=no_config_cache, down=False) command_parts = ["python", target] if script_args: command_parts += list(script_args) elif args is not None: command_parts += [args] port_forward = [(port, port) for port in list(port_forward)] cmd = " ".join(command_parts) exec_cluster( cluster_config_file, cmd=cmd, run_env="docker", screen=screen, tmux=tmux, stop=stop, start=False, override_cluster_name=cluster_name, no_config_cache=no_config_cache, port_forward=port_forward)