Beispiel #1
0
def get_local_dump_archive(stream: bool = False,
                           output: Optional[str] = None,
                           logs: bool = True,
                           debug_state: bool = True,
                           pip: bool = True,
                           processes: bool = True,
                           processes_verbose: bool = False) -> Optional[str]:
    if stream and output:
        raise ValueError(
            "You can only use either `--output` or `--stream`, but not both.")

    parameters = GetParameters(
        logs=logs,
        debug_state=debug_state,
        pip=pip,
        processes=processes,
        processes_verbose=processes_verbose)

    with Archive() as archive:
        get_all_local_data(archive, parameters)

    tmp = archive.file

    if stream:
        with open(tmp, "rb") as fp:
            os.write(1, fp.read())
        os.remove(tmp)
        return None

    target = output or os.path.join(os.getcwd(), os.path.basename(tmp))
    os.rename(tmp, target)
    cli_logger.print(f"Created local data archive at {target}")

    return target
Beispiel #2
0
def get_cluster_dump_archive(cluster_config_file: Optional[str] = None,
                             host: Optional[str] = None,
                             ssh_user: Optional[str] = None,
                             ssh_key: Optional[str] = None,
                             docker: Optional[str] = None,
                             local: Optional[bool] = None,
                             output: Optional[str] = None,
                             logs: bool = True,
                             debug_state: bool = True,
                             pip: bool = True,
                             processes: bool = True,
                             processes_verbose: bool = False) -> Optional[str]:

    # Inform the user what kind of logs are collected (before actually
    # collecting, so they can abort)
    content_str = ""
    if logs:
        content_str += \
            "  - The logfiles of your Ray session\n" \
            "    This usually includes Python outputs (stdout/stderr)\n"

    if debug_state:
        content_str += \
            "  - Debug state information on your Ray cluster \n" \
            "    e.g. number of workers, drivers, objects, etc.\n"

    if pip:
        content_str += "  - Your installed Python packages (`pip freeze`)\n"

    if processes:
        content_str += \
            "  - Information on your running Ray processes\n" \
            "    This includes command line arguments\n"

    cli_logger.warning(
        "You are about to create a cluster dump. This will collect data from "
        "cluster nodes.\n\n"
        "The dump will contain this information:\n\n"
        f"{content_str}\n"
        f"If you are concerned about leaking private information, extract "
        f"the archive and inspect its contents before sharing it with "
        f"anyone.")

    # Parse arguments (e.g. fetch info from cluster config)
    cluster_config_file, hosts, ssh_user, ssh_key, docker, cluster_name = \
        _info_from_params(cluster_config_file, host, ssh_user, ssh_key, docker)

    nodes = [
        Node(
            host=h,
            ssh_user=ssh_user,
            ssh_key=ssh_key,
            docker_container=docker) for h in hosts
    ]

    if not nodes:
        cli_logger.error(
            "No nodes found. Specify with `--host` or by passing a ray "
            "cluster config to `--cluster`.")
        return None

    if cluster_config_file:
        nodes[0].is_head = True

    if local is None:
        # If called with a cluster config, this was probably started
        # from a laptop
        local = not bool(cluster_config_file)

    parameters = GetParameters(
        logs=logs,
        debug_state=debug_state,
        pip=pip,
        processes=processes,
        processes_verbose=processes_verbose)

    with Archive() as archive:
        if local:
            create_archive_for_local_and_remote_nodes(
                archive, remote_nodes=nodes, parameters=parameters)
        else:
            create_archive_for_remote_nodes(
                archive, remote_nodes=nodes, parameters=parameters)

    if not output:
        if cluster_name:
            filename = f"{cluster_name}_" \
                       f"{datetime.datetime.now():%Y-%m-%d_%H-%M-%S}.tar.gz"
        else:
            filename = f"collected_logs_" \
                       f"{datetime.datetime.now():%Y-%m-%d_%H-%M-%S}.tar.gz"
        output = os.path.join(os.getcwd(), filename)
    else:
        output = os.path.expanduser(output)

    os.rename(archive.file, output)
    return output