import argparse import warnings from typing import List, Union from determined import __version__ from determined.common.declarative_argparse import Arg, Cmd, add_args from .aws.cli import args_description as aws_args_description from .gcp.cli import args_description as gcp_args_description from .local.cli import args_description as local_args_description args_subs: List[Union[Arg, Cmd]] = [ # TODO(DET-5171): Remove --version flag when det-deploy is deprecated. Arg("--version", action="version", version="%(prog)s {}".format(__version__)), Arg("--no-preflight-checks", action="store_true", help="Disable preflight checks"), Arg( "--no-wait-for-master", action="store_true", help="Do not wait for master to come up after AWS or GCP clusters are deployed", ), local_args_description, aws_args_description, gcp_args_description, ] DEPLOY_CMD_NAME = "d|eploy" args_description = Cmd( DEPLOY_CMD_NAME, None, "manage deployments", args_subs,
args_description = Cmd( "e|xperiment", None, "manage experiments", [ # Inspection commands. Cmd( "list", list_experiments, "list experiments", [ Arg( "--all", "-a", action="store_true", help="show all experiments (including archived and other users')", ), Arg("--csv", action="store_true", help="print as CSV"), ], is_default=True, ), Cmd("config", config, "display experiment config", [experiment_id_arg("experiment ID")]), Cmd( "describe", describe, "describe experiment", [ Arg("experiment_ids", help="comma-separated list of experiment IDs to describe"), Arg("--metrics", action="store_true", help="display full metrics"), Group(
@authentication.required def remove_templates(args: Namespace) -> None: api.delete(args.master, path="templates/" + args.template_name) print(colored("Removed template {}".format(args.template_name), "green")) # fmt: off args_description = [ Cmd("template tpl", None, "manage config templates", [ Cmd("list ls", list_template, "list config templates", [ Arg("-d", "--details", action="store_true", help="show the configs of the templates"), ], is_default=True), Cmd("describe", describe_template, "describe config template", [ Arg("template_name", type=str, help="template name"), ]), Cmd("set", set_template, "set config template", [ Arg("template_name", help="template name"), Arg("template_file", type=FileType("r"), help="config template file (.yaml)") ]), Cmd("remove rm", remove_templates, "remove config template", [Arg("template_name", help="template name")]), ])
"gcp", None, "GCP help", [ Cmd( "down", handle_down, "delete gcp cluster", [ ArgGroup( "optional named arguments", None, [ Arg( "--local-state-path", type=str, default=os.getcwd(), help="local directory for storing cluster state", ), Arg( "--yes", action="store_true", help="no prompt when deleting resources", ), Arg( "--no-prompt", dest="yes", action="store_true", help=argparse.SUPPRESS, ), ], ),
str(port), "{}@{}".format(username, shell["id"]), *additional_opts, ] subprocess.run(cmd) print(colored("To reconnect, run: det shell open {}".format(shell["id"]), "green")) # fmt: off args_description = [ Cmd("shell", None, "manage shells", [ Cmd("list", command.list, "list shells", [ Arg("-q", "--quiet", action="store_true", help="only display the IDs"), Arg("--all", "-a", action="store_true", help="show all shells (including other users')") ], is_default=True), Cmd("config", command.config, "display shell config", [ Arg("id", type=str, help="shell ID"), ]), Cmd("start", start_shell, "start a new shell", [ Arg("ssh_opts", nargs="*", help="additional SSH options when connecting to the shell"), Arg("--config-file", default=None, type=FileType("r"), help="command config file (.yaml)"), Arg("-v", "--volume", action="append", default=[], help=VOLUME_DESC), Arg("-c", "--context", default=None, type=Path, help=CONTEXT_DESC), Arg("--config", action="append", default=[], help=CONFIG_DESC),
print(colored("Using search configuration:", "green")) yml = yaml.YAML() yml.indent(mapping=2, sequence=4, offset=2) yml.dump(experiment_config["searcher"], sys.stdout) print() print("This search will create a total of {} trial(s).".format( sum(j["results"].values()))) print(tabulate.tabulate(values, headers, tablefmt="presto"), flush=False) # fmt: off args_description = [ Arg("-u", "--user", help="run as the given user", metavar="username", default=None), Arg("-m", "--master", help="master address", metavar="address", default=get_default_master_address()), Arg("-v", "--version", action="version", help="print CLI version and exit", version="%(prog)s {}".format(determined.__version__)), experiment.args_description, checkpoint.args_description, Cmd(
args_description = [ Cmd( "m|odel", None, "manage models", [ Cmd( "list", list_models, "list all models in the registry", [ Arg( "--sort-by", type=str, choices=["name", "description", "creation_time", "last_updated_time"], default="last_updated_time", help="sort models by the given field", ), Arg( "--order-by", type=str, choices=["asc", "desc"], default="asc", help="order models in either ascending or descending order", ), Arg("--json", action="store_true", help="print as JSON"), ], is_default=True, ), Cmd(
None, "GKE help", [ Cmd( "up", handle_up, "create gke cluster", [ ArgGroup( "required named arguments", None, [ Arg( "--cluster-id", type=str, default=None, required=True, help="a unique name for the gke cluster", ), ], ), ArgGroup( "optional named arguments", None, [ Arg( "--agent-node-pool-name", "--gpu-node-pool-name", type=str, default=None, help="a unique name for the GPU node pool",
render_event_stream(msg) @authentication_required def open_notebook(args: Namespace) -> None: resp = api.get(args.master, "api/v1/notebooks/{}".format(args.notebook_id)).json()["notebook"] check_eq(resp["state"], "STATE_RUNNING", "Notebook must be in a running state") api.open(args.master, resp["serviceAddress"]) # fmt: off args_description = [ Cmd("notebook", None, "manage notebooks", [ Cmd("list ls", command.list, "list notebooks", [ Arg("-q", "--quiet", action="store_true", help="only display the IDs"), Arg("--all", "-a", action="store_true", help="show all notebooks (including other users')") ], is_default=True), Cmd("config", command.config, "display notebook config", [ Arg("id", type=str, help="notebook ID"), ]), Cmd("start", start_notebook, "start a new notebook", [ Arg("--config-file", default=None, type=FileType("r"), help="command config file (.yaml)"), Arg("-v", "--volume", action="append", default=[], help=VOLUME_DESC), Arg("-c", "--context", default=None, type=Path, help=CONTEXT_DESC), Arg("--config", action="append", default=[], help=CONFIG_DESC), Arg("--template", type=str,
return deploy_aws("down", args) args_description = Cmd( "aws", None, "AWS help", [ Cmd( "list", handle_list, "list CloudFormation stacks", [ Arg( "--region", type=str, default=None, help="AWS region", ), Arg("--profile", type=str, default=None, help="AWS profile"), ], ), Cmd( "down", handle_down, "delete CloudFormation stack", [ ArgGroup( "required named arguments", None, [ Arg(
args.master, "api/v1/notebooks/{}".format(notebook_id)).json()["notebook"] check_eq(resp["state"], "STATE_RUNNING", "Notebook must be in a running state") api.browser_open(args.master, resp["serviceAddress"]) # fmt: off args_description = [ Cmd("notebook", None, "manage notebooks", [ Cmd("list ls", command.list_tasks, "list notebooks", [ Arg("-q", "--quiet", action="store_true", help="only display the IDs"), Arg("--all", "-a", action="store_true", help="show all notebooks (including other users')") ], is_default=True), Cmd("config", command.config, "display notebook config", [ Arg("notebook_id", type=str, help="notebook ID"), ]), Cmd("start", start_notebook, "start a new notebook", [ Arg("--config-file", default=None, type=FileType("r"), help="command config file (.yaml)"),
def agent_id_completer(_1: str, parsed_args: argparse.Namespace, _2: Any) -> List[str]: r = api.get(parsed_args.master, "agents") return list(r.json().keys()) # fmt: off args_description = [ Cmd("a|gent", None, "manage agents", [ Cmd("list", list_agents, "list agents", [ Group( Arg("--csv", action="store_true", help="print as CSV"), Arg("--json", action="store_true", help="print as JSON"), ), ], is_default=True), Cmd("enable", patch_agent(True), "enable agent", [ Group( Arg("agent_id", help="agent ID", nargs="?", completer=agent_id_completer), Arg("--all", action="store_true", help="enable all agents"), ) ]), Cmd("disable", patch_agent(False), "disable agent", [ Group(
print("Deletion of checkpoints {} is in progress".format(args.checkpoints_uuids)) else: print("Aborting deletion of checkpoints.") main_cmd = Cmd( "c|heckpoint", None, "manage checkpoints", [ Cmd( "download", download, "download checkpoint from persistent storage", [ Arg("uuid", type=str, help="Download a checkpoint by specifying its UUID."), Arg( "-o", "--output-dir", type=str, help="Desired output directory for the checkpoint.", ), Arg( "-q", "--quiet", action="store_true", help="Only print the path to the checkpoint.", ), ], ), Cmd(
follow=args.follow, agent_ids=args.agent_ids, container_ids=args.container_ids, rank_ids=args.rank_ids, sources=args.sources, stdtypes=args.stdtypes, level_above=args.level, timestamp_before=args.timestamp_before, timestamp_after=args.timestamp_after, ) common_log_options = [ Arg( "-f", "--follow", action="store_true", help="follow the logs of a running task, similar to tail -f", ), Group( Arg( "--head", type=int, help= "number of lines to show, counting from the beginning of the log", ), Arg( "--tail", type=int, help="number of lines to show, counting from the end of the log", ), ),
# fetch logs. response = api.get( args.master, "logs", params={"greater_than_id": str(latest_log_id)} ) latest_log_id = process_response(response, latest_log_id) except KeyboardInterrupt: break # fmt: off args_description = [ Cmd("master", None, "manage master", [ Cmd("config", config, "fetch master config", [ Group(format_args["json"], format_args["yaml"]) ]), Cmd("info", get_master, "fetch master info", [ Group(format_args["json"], format_args["yaml"]) ]), Cmd("logs", logs, "fetch master logs", [ Arg("-f", "--follow", action="store_true", help="follow the logs of master, similar to tail -f"), Arg("--tail", type=int, help="number of lines to show, counting from the end " "of the log (default is all)") ]), ]) ] # type: List[Any] # fmt: on
def experiment_id_arg(help: str) -> Arg: # noqa: A002 return Arg("experiment_id", type=int, help=help)
timestamp_after=args.timestamp_after, ) args_description = [ Cmd( "t|rial", None, "manage trials", [ Cmd( "describe", describe_trial, "describe trial", [ Arg("trial_id", type=int, help="trial ID"), Arg("--metrics", action="store_true", help="display full metrics"), Group( Arg("--csv", action="store_true", help="print as CSV"), Arg("--json", action="store_true", help="print JSON"), ), ], ), Cmd( "download", download, "download checkpoint for trial", [ Arg("trial_id", type=int, help="trial ID"), Group( Arg(
print(f"Successfully un-archived project {args.project_name}.") args_description = [ Cmd( "p|roject", None, "manage projects", [ Cmd( "list", list_workspace_projects, "list the projects associated with a workspace", [ Arg("workspace_name", type=str, help="name of the workspace"), Arg( "--sort-by", type=str, choices=["id", "name"], default="id", help="sort workspaces by the given field", ), Arg( "--order-by", type=str, choices=["asc", "desc"], default="asc", help= "order workspaces in either ascending or descending order",
context_path=args.context, )["command"] if args.detach: print(resp["id"]) return api.pprint_task_logs(args.master, resp["id"], follow=True) # fmt: off args_description = [ Cmd("command cmd", None, "manage commands", [ Cmd("list ls", command.list_tasks, "list commands", [ Arg("-q", "--quiet", action="store_true", help="only display the IDs"), Arg("--all", "-a", action="store_true", help="show all commands (including other users')"), Group( Arg("--csv", action="store_true", help="print as CSV"), Arg("--json", action="store_true", help="print as JSON"), ), ], is_default=True), Cmd("config", command.config, "display command config", [ Arg("command_id", type=str, help="command ID"), ]), Cmd("run", run_command, "create command", [ Arg("entrypoint", type=str, nargs=REMAINDER, help="entrypoint command and arguments to execute"), Arg("--config-file", default=None, type=FileType("r"),
import argparse import warnings from typing import List, Union from determined import __version__ from determined.common.declarative_argparse import Arg, Cmd, add_args from .aws.cli import args_description as aws_args_description from .gcp.cli import args_description as gcp_args_description from .local.cli import args_description as local_args_description args_subs: List[Union[Arg, Cmd]] = [ # TODO(DET-5171): Remove --version flag when det-deploy is deprecated. Arg("--version", action="version", version="%(prog)s {}".format(__version__)), Arg("--no-preflight-checks", action="store_true", help="Disable preflight checks"), Arg( "--no-wait-for-master", action="store_true", help= "Do not wait for master to come up after AWS or GCP clusters are deployed", ), Arg( "--image-repo-prefix", type=str, default="determinedai", help= "Docker image repository to use for determined-master and determined-agent images",
@authentication_required def remove_client(parsed_args: Namespace) -> None: try: api.delete(parsed_args.master, "oauth2/clients/{}".format(parsed_args.client_id)) except NotFoundException: raise EnterpriseOnlyError("API not found: oauth2/clients") # fmt: off args_description = [ Cmd("oauth", None, "manage OAuth", [ Cmd("client", None, "manage clients", [ Cmd("list", list_clients, "list OAuth client applications", [], is_default=True), Cmd("add", add_client, "add OAuth client application", [ Arg("name", type=str, help="descriptive name"), Arg("domain", type=str, help="redirect domain"), ]), Cmd("remove", remove_client, "remove OAuth client application", [ Arg("client_id", help="OAuth client ID to remove"), ]), ]) ]) ] # type: List[Any] # fmt: on
print(default_template) args_description = Cmd( "aws", None, "AWS help", [ Cmd( "list", handle_list, "list CloudFormation stacks", [ Arg( "--region", type=str, default=None, help="AWS region", ), Arg("--profile", type=str, default=None, help="AWS profile"), ], ), Cmd( "down", handle_down, "delete CloudFormation stack", [ ArgGroup( "required named arguments", None, [ Arg(
checkpoint = Determined(args.master, None).get_checkpoint(args.uuid) render_checkpoint(checkpoint) args_description = Cmd( "c|heckpoint", None, "manage checkpoints", [ Cmd( "download", download, "download checkpoint from persistent storage", [ Arg("uuid", type=str, help="Download a checkpoint by specifying its UUID."), Arg( "-o", "--output-dir", type=str, help="Desired output directory for the checkpoint.", ), Arg( "-q", "--quiet", action="store_true", help="Only print the path to the checkpoint.", ), ], ),
if args.json else "resources/allocation/aggregated") print_response(api.get(args.master, path, params=params)) args_description = [ Cmd( "res|ources", None, "query historical resource allocation", [ Cmd( "raw", raw, "get raw allocation information", [ Arg("timestamp_after"), Arg("timestamp_before"), Arg("--json", action="store_true", help="output JSON rather than CSV"), ], ), Cmd( "agg|regated", aggregated, "get aggregated allocation information", [ Arg("start_date", help="first date to include"), Arg("end_date", help="last date to include"), Arg("--json", action="store_true",
args_description = Cmd( "local", None, "local help", [ Cmd( "cluster-up", handle_cluster_up, "Create a Determined cluster", [ Group( Arg( "--master-config-path", type=Path, default=None, help="path to master configuration", ), Arg( "--storage-host-path", type=Path, default=DEFAULT_STORAGE_HOST_PATH, help= "Storage location for cluster data (e.g. checkpoints)", ), ), Arg( "--agents", type=int, default=1, help="number of agents to start (on this machine)",
url = "commands/{}/events".format(resp["id"]) with api.ws(args.master, url) as ws: for msg in ws: render_event_stream(msg) # fmt: off args_description = [ Cmd("command cmd", None, "manage commands", [ Cmd("list ls", command.list, "list commands", [ Arg("-q", "--quiet", action="store_true", help="only display the IDs"), Arg("--all", "-a", action="store_true", help="show all commands (including other users')"), ], is_default=True), Cmd("config", command.config, "display command config", [ Arg("id", type=str, help="command ID"), ]), Cmd("run", run_command, "create command", [ Arg("entrypoint", type=str, nargs=REMAINDER, help="entrypoint command and arguments to execute"),
def experiment_id_arg(help: str) -> Arg: return Arg("experiment_id", type=int, help=help, completer=experiment_id_completer)
subprocess.run(cmd) print( colored(f"To reconnect, run: det shell open {shell['id']}", "green")) # fmt: off args_description = [ Cmd("shell", None, "manage shells", [ Cmd("list", partial(command.list_tasks), "list shells", [ Arg("-q", "--quiet", action="store_true", help="only display the IDs"), Arg("--all", "-a", action="store_true", help="show all shells (including other users')"), Group(format_args["json"], format_args["csv"]), ], is_default=True), Cmd("config", partial(command.config), "display shell config", [ Arg("shell_id", type=str, help="shell ID"), ]), Cmd("start", start_shell, "start a new shell", [ Arg("ssh_opts", nargs="*", help="additional SSH options when connecting to the shell"),
args.master, "api/v1/tensorboards/{}".format(tensorboard_id)).json()["tensorboard"] check_eq(resp["state"], "STATE_RUNNING", "TensorBoard must be in a running state") api.browser_open(args.master, resp["serviceAddress"]) # fmt: off args_description = [ Cmd("tensorboard", None, "manage TensorBoard instances", [ Cmd("list ls", partial(command.list_tasks), "list TensorBoard instances", [ Arg("-q", "--quiet", action="store_true", help="only display the IDs"), Arg("--all", "-a", action="store_true", help="show all TensorBoards (including other users')"), Group(format_args["json"], format_args["csv"]), ], is_default=True), Cmd("start", start_tensorboard, "start new TensorBoard instance", [ Arg("experiment_ids", type=int, nargs="*", help= "experiment IDs to load into TensorBoard. At most 100 trials from " "the specified experiment will be loaded into TensorBoard. If the "
OPERATION_TO_FN[args.command](args) args_description = Cmd( "local", None, "local help", [ Cmd( "cluster-up", handle_cluster_up, "Create a Determined cluster", [ Arg( "--master-config-path", type=str, default=None, help="path to master configuration", ), Arg( "--agents", type=int, default=1, help="number of agents to start (on this machine)", ), Arg("--master-port", type=int, default=8080, help="port to expose master on"), Arg( "--cluster-name", type=str,