Exemple #1
0
    def terminate_node(self, node_id):
        node = self._get_cached_node(node_id)
        if self.cache_stopped_nodes:
            if node.spot_instance_request_id:
                cli_logger.print(
                    "Terminating instance {} " +
                    cf.dimmed("(cannot stop spot instances, only terminate)"),
                    node_id)  # todo: show node name?

                cli_logger.old_info(
                    logger,
                    "AWSNodeProvider: terminating node {} (spot nodes cannot "
                    "be stopped, only terminated)", node_id)
                node.terminate()
            else:
                cli_logger.print("Stopping instance {} " + cf.dimmed(
                    "(to terminate instead, "
                    "set `cache_stopped_nodes: False` "
                    "under `provider` in the cluster configuration)"),
                                 node_id)  # todo: show node name?

                cli_logger.old_info(
                    logger,
                    "AWSNodeProvider: stopping node {}. To terminate nodes "
                    "on stop, set 'cache_stopped_nodes: False' in the "
                    "provider config.".format(node_id))
                node.stop()
        else:
            node.terminate()

        self.tag_cache.pop(node_id, None)
        self.tag_cache_pending.pop(node_id, None)
Exemple #2
0
        def remaining_nodes():
            workers = provider.non_terminated_nodes(
                {TAG_RAY_NODE_KIND: NODE_KIND_WORKER})

            if keep_min_workers:
                min_workers = config.get("min_workers", 0)

                cli_logger.print(
                    "{} random worker nodes will not be shut down. " +
                    cf.dimmed("(due to {})"), cf.bold(min_workers),
                    cf.bold("--keep-min-workers"))
                cli_logger.old_info(logger,
                                    "teardown_cluster: Keeping {} nodes...",
                                    min_workers)

                workers = random.sample(workers, len(workers) - min_workers)

            # todo: it's weird to kill the head node but not all workers
            if workers_only:
                cli_logger.print(
                    "The head node will not be shut down. " +
                    cf.dimmed("(due to {})"), cf.bold("--workers-only"))

                return workers

            head = provider.non_terminated_nodes(
                {TAG_RAY_NODE_KIND: NODE_KIND_HEAD})

            return head + workers
Exemple #3
0
def show():
    """
    Show the modifiers and colors
    """
    # modifiers
    sys.stdout.write(colorful.bold('bold') + ' ')
    sys.stdout.write(colorful.dimmed('dimmed') + ' ')
    sys.stdout.write(colorful.italic('italic') + ' ')
    sys.stdout.write(colorful.underlined('underlined') + ' ')
    sys.stdout.write(colorful.inversed('inversed') + ' ')
    sys.stdout.write(colorful.concealed('concealed') + ' ')
    sys.stdout.write(colorful.struckthrough('struckthrough') + '\n')

    # foreground colors
    sys.stdout.write(colorful.red('red') + ' ')
    sys.stdout.write(colorful.green('green') + ' ')
    sys.stdout.write(colorful.yellow('yellow') + ' ')
    sys.stdout.write(colorful.blue('blue') + ' ')
    sys.stdout.write(colorful.magenta('magenta') + ' ')
    sys.stdout.write(colorful.cyan('cyan') + ' ')
    sys.stdout.write(colorful.white('white') + '\n')

    # background colors
    sys.stdout.write(colorful.on_red('red') + ' ')
    sys.stdout.write(colorful.on_green('green') + ' ')
    sys.stdout.write(colorful.on_yellow('yellow') + ' ')
    sys.stdout.write(colorful.on_blue('blue') + ' ')
    sys.stdout.write(colorful.on_magenta('magenta') + ' ')
    sys.stdout.write(colorful.on_cyan('cyan') + ' ')
    sys.stdout.write(colorful.on_white('white') + '\n')
Exemple #4
0
    def terminate_nodes(self, node_ids):
        if not node_ids:
            return
        if self.cache_stopped_nodes:
            spot_ids = []
            on_demand_ids = []

            for node_id in node_ids:
                if self._get_cached_node(node_id).spot_instance_request_id:
                    spot_ids += [node_id]
                else:
                    on_demand_ids += [node_id]

            if on_demand_ids:
                # todo: show node names?
                cli_logger.print(
                    "Stopping instances {} " + cf.dimmed(
                        "(to terminate instead, "
                        "set `cache_stopped_nodes: False` "
                        "under `provider` in the cluster configuration)"),
                    cli_logger.render_list(on_demand_ids))
                cli_logger.old_info(
                    logger,
                    "AWSNodeProvider: stopping nodes {}. To terminate nodes "
                    "on stop, set 'cache_stopped_nodes: False' in the "
                    "provider config.", on_demand_ids)

                self.ec2.meta.client.stop_instances(InstanceIds=on_demand_ids)
            if spot_ids:
                cli_logger.print(
                    "Terminating instances {} " +
                    cf.dimmed("(cannot stop spot instances, only terminate)"),
                    cli_logger.render_list(spot_ids))
                cli_logger.old_info(
                    logger,
                    "AWSNodeProvider: terminating nodes {} (spot nodes cannot "
                    "be stopped, only terminated)", spot_ids)

                self.ec2.meta.client.terminate_instances(InstanceIds=spot_ids)
        else:
            self.ec2.meta.client.terminate_instances(InstanceIds=node_ids)

        for node_id in node_ids:
            self.tag_cache.pop(node_id, None)
            self.tag_cache_pending.pop(node_id, None)
Exemple #5
0
 def handle_cli_override(key, override):
     if override is not None:
         if key in config:
             nonlocal printed_overrides
             printed_overrides = True
             cli_logger.warning(
                 "`{}` override provided on the command line.\n"
                 "  Using " + cf.bold("{}") +
                 cf.dimmed(" [configuration file has " + cf.bold("{}") +
                           "]"), key, override, config[key])
         config[key] = override
Exemple #6
0
    def wait_ready(self, deadline):
        with cli_logger.group("Waiting for SSH to become available",
                              _numbered=("[]", 1, 6)):
            with LogTimer(self.log_prefix + "Got remote shell"):
                cli_logger.old_info(logger, "{}Waiting for remote shell...",
                                    self.log_prefix)

                cli_logger.print("Running `{}` as a test.", cf.bold("uptime"))
                first_conn_refused_time = None
                while time.time() < deadline and \
                        not self.provider.is_terminated(self.node_id):
                    try:
                        cli_logger.old_debug(logger,
                                             "{}Waiting for remote shell...",
                                             self.log_prefix)

                        # Run outside of the container
                        self.cmd_runner.run("uptime", run_env="host")
                        cli_logger.old_debug(logger, "Uptime succeeded.")
                        cli_logger.success("Success.")
                        return True
                    except ProcessRunnerError as e:
                        first_conn_refused_time = \
                            cmd_output_util.handle_ssh_fails(
                                e, first_conn_refused_time,
                                retry_interval=READY_CHECK_INTERVAL)
                        time.sleep(READY_CHECK_INTERVAL)
                    except Exception as e:
                        # TODO(maximsmol): we should not be ignoring
                        # exceptions if they get filtered properly
                        # (new style log + non-interactive shells)
                        #
                        # however threading this configuration state
                        # is a pain and I'm leaving it for later

                        retry_str = str(e)
                        if hasattr(e, "cmd"):
                            retry_str = "(Exit Status {}): {}".format(
                                e.returncode, " ".join(e.cmd))

                        cli_logger.print(
                            "SSH still not available {}, "
                            "retrying in {} seconds.", cf.dimmed(retry_str),
                            cf.bold(str(READY_CHECK_INTERVAL)))
                        cli_logger.old_debug(logger,
                                             "{}Node not up, retrying: {}",
                                             self.log_prefix, retry_str)

                        time.sleep(READY_CHECK_INTERVAL)

        assert False, "Unable to connect to node"
def run_cmd_redirected(cmd, silent=False, use_login_shells=False):
    """Run a command and optionally redirect output to a file.

    Args:
        cmd (List[str]): Command to run.
        silent (bool): If true, the command output will be silenced completely
                       (redirected to /dev/null), unless verbose logging
                       is enabled. Use this for runnign utility commands like
                       rsync.
    """
    if silent and cli_logger.verbosity < 1:
        return _run_and_process_output(
            cmd, stdout_file=None, use_login_shells=use_login_shells)

    if not is_output_redirected():
        return _run_and_process_output(
            cmd, stdout_file=sys.stdout, use_login_shells=use_login_shells)
    else:
        tmpfile_path = os.path.join(
            tempfile.gettempdir(), "ray-up-{}-{}.txt".format(
                cmd[0], time.time()))
        with open(
                tmpfile_path,
                mode="w",
                # line buffering
                buffering=1) as tmp:
            cli_logger.verbose("Command stdout is redirected to {}",
                               cf.bold(tmp.name))
            cli_logger.verbose(
                cf.dimmed("Use --dump-command-output to "
                          "dump to terminal instead."))

            return _run_and_process_output(
                cmd,
                stdout_file=tmp,
                stderr_file=tmp,
                use_login_shells=use_login_shells)
Exemple #8
0
def _format_msg(msg: str,
                *args: Any,
                _tags: Dict[str, Any] = None,
                _numbered: Tuple[str, int, int] = None,
                _no_format: bool = None,
                **kwargs: Any):
    """Formats a message for printing.

    Renders `msg` using the built-in `str.format` and the passed-in
    `*args` and `**kwargs`.

    Args:
        *args (Any): `.format` arguments for `msg`.
        _tags (Dict[str, Any]):
            key-value pairs to display at the end of
            the message in square brackets.

            If a tag is set to `True`, it is printed without the value,
            the presence of the tag treated as a "flag".

            E.g. `_format_msg("hello", _tags=dict(from=mom, signed=True))`
                 `hello [from=Mom, signed]`
        _numbered (Tuple[str, int, int]):
            `(brackets, i, n)`

            The `brackets` string is composed of two "bracket" characters,
            `i` is the index, `n` is the total.

            The string `{i}/{n}` surrounded by the "brackets" is
            prepended to the message.

            This is used to number steps in a procedure, with different
            brackets specifying different major tasks.

            E.g. `_format_msg("hello", _numbered=("[]", 0, 5))`
                 `[0/5] hello`
        _no_format (bool):
            If `_no_format` is `True`,
            `.format` will not be called on the message.

            Useful if the output is user-provided or may otherwise
            contain an unexpected formatting string (e.g. "{}").

    Returns:
        The formatted message.
    """

    if isinstance(msg, str) or isinstance(msg, ColorfulString):
        tags_str = ""
        if _tags is not None:
            tags_list = []
            for k, v in _tags.items():
                if v is True:
                    tags_list += [k]
                    continue
                if v is False:
                    continue

                tags_list += [k + "=" + v]
            if tags_list:
                tags_str = cf.reset(
                    cf.dimmed(" [{}]".format(", ".join(tags_list))))

        numbering_str = ""
        if _numbered is not None:
            chars, i, n = _numbered
            numbering_str = cf.dimmed(chars[0] + str(i) + "/" + str(n) +
                                      chars[1]) + " "

        if _no_format:
            # todo: throw if given args/kwargs?
            return numbering_str + msg + tags_str
        return numbering_str + cf.format(msg, *args, **kwargs) + tags_str

    if kwargs:
        raise ValueError("We do not support printing kwargs yet.")

    res = [msg, *args]
    res = [str(x) for x in res]
    return ", ".join(res)
Exemple #9
0
    def confirm(self,
                yes: bool,
                msg: str,
                *args: Any,
                _abort: bool = False,
                _default: bool = False,
                **kwargs: Any):
        """Display a confirmation dialog.

        Valid answers are "y/yes/true/1" and "n/no/false/0".

        Args:
            yes (bool): If `yes` is `True` the dialog will default to "yes"
                        and continue without waiting for user input.
            _abort (bool):
                If `_abort` is `True`,
                "no" means aborting the program.
            _default (bool):
                The default action to take if the user just presses enter
                with no input.
        """
        if self.old_style:
            return

        should_abort = _abort
        default = _default

        if not self.interactive and not yes:
            # no formatting around --yes here since this is non-interactive
            self.error("This command requires user confirmation. "
                       "When running non-interactively, supply --yes to skip.")
            raise ValueError("Non-interactive confirm without --yes.")

        if default:
            yn_str = "Y/n"
        else:
            yn_str = "y/N"

        confirm_str = cf.underlined("Confirm [" + yn_str + "]:") + " "

        rendered_message = _format_msg(msg, *args, **kwargs)
        # the rendered message ends with ascii coding
        if rendered_message and not msg.endswith("\n"):
            rendered_message += " "

        msg_len = len(rendered_message.split("\n")[-1])
        complete_str = rendered_message + confirm_str

        if yes:
            self._print(complete_str + "y " +
                        cf.dimmed("[automatic, due to --yes]"))
            return True

        self._print(complete_str, _linefeed=False)

        res = None
        yes_answers = ["y", "yes", "true", "1"]
        no_answers = ["n", "no", "false", "0"]
        try:
            while True:
                ans = sys.stdin.readline()
                ans = ans.lower()

                if ans == "\n":
                    res = default
                    break

                ans = ans.strip()
                if ans in yes_answers:
                    res = True
                    break
                if ans in no_answers:
                    res = False
                    break

                indent = " " * msg_len
                self.error("{}Invalid answer: {}. "
                           "Expected {} or {}", indent, cf.bold(ans.strip()),
                           self.render_list(yes_answers, "/"),
                           self.render_list(no_answers, "/"))
                self._print(indent + confirm_str, _linefeed=False)
        except KeyboardInterrupt:
            self.newline()
            res = default

        if not res and should_abort:
            # todo: make sure we tell the user if they
            # need to do cleanup
            self._print("Exiting...")
            raise SilentClickException(
                "Exiting due to the response to confirm(should_abort=True).")

        return res
Exemple #10
0
def stop(force, verbose, log_style, log_color):
    """Stop Ray processes manually on the local machine."""

    cli_logger.configure(log_style, log_color, verbose)

    # Note that raylet needs to exit before object store, otherwise
    # it cannot exit gracefully.
    is_linux = sys.platform.startswith("linux")
    processes_to_kill = [
        # The first element is the substring to filter.
        # The second element, if True, is to filter ps results by command name
        # (only the first 15 charactors of the executable name on Linux);
        # if False, is to filter ps results by command with all its arguments.
        # See STANDARD FORMAT SPECIFIERS section of
        # http://man7.org/linux/man-pages/man1/ps.1.html
        # about comm and args. This can help avoid killing non-ray processes.
        # Format:
        # Keyword to filter, filter by command (True)/filter by args (False)
        ["raylet", True],
        ["plasma_store", True],
        ["gcs_server", True],
        ["monitor.py", False],
        ["redis-server", False],
        ["default_worker.py", False],  # Python worker.
        ["ray::", True],  # Python worker. TODO(mehrdadn): Fix for Windows
        ["io.ray.runtime.runner.worker.DefaultWorker", False],  # Java worker.
        ["log_monitor.py", False],
        ["reporter.py", False],
        ["dashboard.py", False],
        ["ray_process_reaper.py", False],
    ]

    process_infos = []
    for proc in psutil.process_iter(["name", "cmdline"]):
        try:
            process_infos.append((proc, proc.name(), proc.cmdline()))
        except psutil.Error:
            pass

    total_found = 0
    total_stopped = 0
    for keyword, filter_by_cmd in processes_to_kill:
        if filter_by_cmd and is_linux and len(keyword) > 15:
            # getting here is an internal bug, so we do not use cli_logger
            msg = ("The filter string should not be more than {} "
                   "characters. Actual length: {}. Filter: {}").format(
                       15, len(keyword), keyword)
            raise ValueError(msg)

        found = []
        for candidate in process_infos:
            proc, proc_cmd, proc_args = candidate
            corpus = (proc_cmd
                      if filter_by_cmd else subprocess.list2cmdline(proc_args))
            if keyword in corpus:
                found.append(candidate)

        for proc, proc_cmd, proc_args in found:
            total_found += 1

            proc_string = str(subprocess.list2cmdline(proc_args))
            if verbose:
                operation = "Terminating" if force else "Killing"
                cli_logger.old_info(logger, "%s process %s: %s", operation,
                                    proc.pid, proc_string)
            try:
                if force:
                    proc.kill()
                else:
                    # TODO(mehrdadn): On Windows, this is forceful termination.
                    # We don't want CTRL_BREAK_EVENT, because that would
                    # terminate the entire process group. What to do?
                    proc.terminate()

                if force:
                    cli_logger.verbose("Killed `{}` {} ", cf.bold(proc_string),
                                       cf.dimmed("(via SIGKILL)"))
                else:
                    cli_logger.verbose("Send termination request to `{}` {}",
                                       cf.bold(proc_string),
                                       cf.dimmed("(via SIGTERM)"))

                total_stopped += 1
            except psutil.NoSuchProcess:
                cli_logger.verbose(
                    "Attempted to stop `{}`, but process was already dead.",
                    cf.bold(proc_string))
                pass
            except (psutil.Error, OSError) as ex:
                cli_logger.error("Could not terminate `{}` due to {}",
                                 cf.bold(proc_string), str(ex))
                cli_logger.old_error(logger, "Error: %s", ex)

    if total_found == 0:
        cli_logger.print("Did not find any active Ray processes.")
    else:
        if total_stopped == total_found:
            cli_logger.success("Stopped all {} Ray processes.", total_stopped)
        else:
            cli_logger.warning(
                "Stopped only {} out of {} Ray processes. "
                "Set `{}` to see more details.", total_stopped, total_found,
                cf.bold("-v"))
            cli_logger.warning("Try running the command again, or use `{}`.",
                               cf.bold("--force"))
Exemple #11
0
    results = []
    for module in all_modules:
        print(colorful.bold("\n###### Check module '{}' ######\n".format(module)))

        result = Result()
        result.module = module
        result.modules = lbuild_query_modules(module)

        # Check previous results
        for previous in results:
            if result.modules == previous.modules:
                print("Similar to {}".format(previous.module))
                result.similar = previous.module
                result.success = previous.success
                break
        else:
            # Unknown combination of modules, build the source code
            result.success = build(module)

        results.append(result)

    print(colorful.bold("\n\nResults:\n"))
    for result in results:
        result_output = colorful.green("SUCCESS") if result.success else colorful.red("FAIL")
        print("- {} - {}".format(result.module, result_output))
        if result.similar is not None:
            print(colorful.dimmed("    Same modules as {}".format(result.similar)))
        else:
            print(colorful.dimmed("    {}".format(
                ", ".join([x.replace("outpost", "") for x in result.modules]))))