Example #1
0
def dashboard(cluster_config_file, cluster_name, port, remote_port):
    """Port-forward a Ray cluster's dashboard to the local machine."""
    # Sleeping in a loop is preferable to `sleep infinity` because the latter
    # only works on linux.
    # Find the first open port sequentially from `remote_port`.
    try:
        port_forward = [
            (port, remote_port),
        ]
        click.echo("Attempting to establish dashboard locally at"
                   " localhost:{} connected to"
                   " remote port {}".format(port, remote_port))
        # We want to probe with a no-op that returns quickly to avoid
        # exceptions caused by network errors.
        exec_cluster(
            cluster_config_file,
            override_cluster_name=cluster_name,
            port_forward=port_forward)
        click.echo("Successfully established connection.")
    except Exception as e:
        raise click.ClickException(
            "Failed to forward dashboard from remote port {1} to local port "
            "{0}. There are a couple possibilities: \n 1. The remote port is "
            "incorrectly specified \n 2. The local port {0} is already in "
            "use.\n The exception is: {2}".format(port, remote_port, e)) \
                from None
Example #2
0
def run_on_cluster(cluster_config: Union[dict, str],
                   *,
                   cmd: Optional[str] = None,
                   run_env: str = "auto",
                   no_config_cache: bool = False,
                   port_forward: Optional[commands.Port_forward] = None,
                   with_output: bool = False) -> Optional[str]:
    """Runs a command on the specified cluster.

    Args:
        cluster_config (Union[str, dict]): Either the config dict of the
            cluster, or a path pointing to a file containing the config.
        cmd (str): the command to run, or None for a no-op command.
        run_env (str): whether to run the command on the host or in a
            container. Select between "auto", "host" and "docker".
        no_config_cache (bool): Whether to disable the config cache and fully
            resolve all environment settings from the Cloud provider again.
        port_forward ( (int,int) or list[(int,int)]): port(s) to forward.
        with_output (bool): Whether to capture command output.

    Returns:
        The output of the command as a string.
    """
    with _as_config_file(cluster_config) as config_file:
        return commands.exec_cluster(config_file,
                                     cmd=cmd,
                                     run_env=run_env,
                                     screen=False,
                                     tmux=False,
                                     stop=False,
                                     start=False,
                                     override_cluster_name=None,
                                     no_config_cache=no_config_cache,
                                     port_forward=port_forward,
                                     with_output=with_output)
Example #3
0
def exec(cluster_config_file, cmd, run_env, screen, tmux, stop, start,
         cluster_name, no_config_cache, port_forward, log_style, log_color,
         verbose):
    """Execute a command via SSH on a Ray cluster."""
    cli_logger.configure(log_style, log_color, verbose)

    port_forward = [(port, port) for port in list(port_forward)]

    exec_cluster(
        cluster_config_file,
        cmd=cmd,
        run_env=run_env,
        screen=screen,
        tmux=tmux,
        stop=stop,
        start=start,
        override_cluster_name=cluster_name,
        no_config_cache=no_config_cache,
        port_forward=port_forward)
Example #4
0
def launch_example_cluster(example_module_name, example_argv, config_file,
                           screen, tmux, stop, start, override_cluster_name,
                           port_forward):
    """Launches the example on autoscaled ray cluster through ray exec_cmd.

    This handles basic validation and sanity checks for the experiment, and
    then executes the command on autoscaled ray cluster. If necessary, it will
    also fill in more useful defaults for our workflow (i.e. for tmux and
    cluster_name).
    """
    example_module = importlib.import_module(example_module_name)

    example_args = example_module.get_parser().parse_args(example_argv)
    variant_spec = example_module.get_variant_spec(example_args)

    experiment_kwargs = generate_experiment_kwargs(variant_spec, example_args)

    experiments_info = get_experiments_info([experiment_kwargs])
    total_number_of_trials = experiments_info['total_number_of_trials']

    if not example_args.upload_dir:
        confirm_yes_no(
            "`upload_dir` is empty. No results will be uploaded to cloud"
            " storage. Use `--upload-dir` argument to set upload dir."
            " Continue without upload directory?\n(yes/no) ")

    confirm_yes_no(f"Launch {total_number_of_trials} trials?\n(yes/no) ")

    override_cluster_name = override_cluster_name or unique_cluster_name(
        example_args)

    cluster_command_parts = ('softlearning', 'run_example_cluster',
                             example_module_name, *example_argv)
    cluster_command = ' '.join(cluster_command_parts)

    return exec_cluster(config_file=config_file,
                        cmd=cluster_command,
                        docker=False,
                        screen=screen,
                        tmux=tmux,
                        stop=stop,
                        start=start,
                        override_cluster_name=override_cluster_name,
                        port_forward=port_forward)
Example #5
0
def submit(cluster_config_file, screen, tmux, stop, start, cluster_name,
           no_config_cache, port_forward, script, args, script_args, log_style,
           log_color, verbose):
    """Uploads and runs a script on the specified cluster.

    The script is automatically synced to the following location:

        os.path.join("~", os.path.basename(script))

    Example:
        >>> ray submit [CLUSTER.YAML] experiment.py -- --smoke-test
    """
    cli_logger.configure(log_style, log_color, verbose)

    cli_logger.doassert(not (screen and tmux),
                        "`{}` and `{}` are incompatible.", cf.bold("--screen"),
                        cf.bold("--tmux"))
    cli_logger.doassert(
        not (script_args and args),
        "`{0}` and `{1}` are incompatible. Use only `{1}`.\n"
        "Example: `{2}`", cf.bold("--args"), cf.bold("-- <args ...>"),
        cf.bold("ray submit script.py -- --arg=123 --flag"))

    assert not (screen and tmux), "Can specify only one of `screen` or `tmux`."
    assert not (script_args and args), "Use -- --arg1 --arg2 for script args."

    if args:
        cli_logger.warning(
            "`{}` is deprecated and will be removed in the future.",
            cf.bold("--args"))
        cli_logger.warning("Use `{}` instead. Example: `{}`.",
                           cf.bold("-- <args ...>"),
                           cf.bold("ray submit script.py -- --arg=123 --flag"))
        cli_logger.newline()

    if start:
        create_or_update_cluster(
            config_file=cluster_config_file,
            override_min_workers=None,
            override_max_workers=None,
            no_restart=False,
            restart_only=False,
            yes=True,
            override_cluster_name=cluster_name,
            no_config_cache=no_config_cache,
            redirect_command_output=False,
            use_login_shells=True)
    target = os.path.basename(script)
    target = os.path.join("~", target)
    rsync(
        cluster_config_file,
        script,
        target,
        cluster_name,
        no_config_cache=no_config_cache,
        down=False)

    command_parts = ["python", target]
    if script_args:
        command_parts += list(script_args)
    elif args is not None:
        command_parts += [args]

    port_forward = [(port, port) for port in list(port_forward)]
    cmd = " ".join(command_parts)
    exec_cluster(
        cluster_config_file,
        cmd=cmd,
        run_env="docker",
        screen=screen,
        tmux=tmux,
        stop=stop,
        start=False,
        override_cluster_name=cluster_name,
        no_config_cache=no_config_cache,
        port_forward=port_forward)