コード例 #1
0
main_cmd = Cmd(
    "c|heckpoint",
    None,
    "manage checkpoints",
    [
        Cmd(
            "download",
            download,
            "download checkpoint from persistent storage",
            [
                Arg("uuid", type=str, help="Download a checkpoint by specifying its UUID."),
                Arg(
                    "-o",
                    "--output-dir",
                    type=str,
                    help="Desired output directory for the checkpoint.",
                ),
                Arg(
                    "-q",
                    "--quiet",
                    action="store_true",
                    help="Only print the path to the checkpoint.",
                ),
            ],
        ),
        Cmd(
            "describe",
            describe,
            "describe checkpoint",
            [Arg("uuid", type=str, help="checkpoint uuid to describe")],
        ),
        Cmd(
            "delete",
            delete_checkpoints,
            "delete checkpoints",
            [
                Arg("checkpoints_uuids", help="comma-separated list of checkpoints to delete"),
                Arg(
                    "--yes",
                    action="store_true",
                    default=False,
                    help="automatically answer yes to prompts",
                ),
            ],
        ),
    ],
)
コード例 #2
0
args_description = Cmd(
    "local",
    None,
    "local help",
    [
        Cmd(
            "cluster-up",
            handle_cluster_up,
            "Create a Determined cluster",
            [
                Group(
                    Arg(
                        "--master-config-path",
                        type=Path,
                        default=None,
                        help="path to master configuration",
                    ),
                    Arg(
                        "--storage-host-path",
                        type=Path,
                        default=DEFAULT_STORAGE_HOST_PATH,
                        help=
                        "Storage location for cluster data (e.g. checkpoints)",
                    ),
                ),
                Arg(
                    "--agents",
                    type=int,
                    default=1,
                    help="number of agents to start (on this machine)",
                ),
                Arg("--master-port",
                    type=int,
                    default=8080,
                    help="port to expose master on"),
                Arg(
                    "--cluster-name",
                    type=str,
                    default="determined",
                    help="name for the cluster resources",
                ),
                Arg("--det-version",
                    type=str,
                    default=None,
                    help="version or commit to use"),
                Arg(
                    "--db-password",
                    type=str,
                    default="postgres",
                    help="password for master database",
                ),
                Arg(
                    "--delete-db",
                    action="store_true",
                    help="remove current master database",
                ),
                BoolOptArg(
                    "--gpu",
                    "--no-gpu",
                    dest="gpu",
                    default=("darwin" not in sys.platform),
                    true_help="enable GPU support for agent",
                    false_help="disable GPU support for agent",
                ),
                Arg(
                    "--no-autorestart",
                    help=
                    "disable container auto-restart (recommended for local development)",
                    action="store_true",
                ),
                Arg(
                    "--auto-bind-mount",
                    type=str,
                    default=None,
                    help=
                    "directory to mount into task containers (default: user's home directory)",
                ),
                Arg(
                    "--no-auto-bind-mount",
                    help=
                    "disable mounting user's home directory into task containers",
                    action="store_true",
                ),
            ],
        ),
        Cmd(
            "cluster-down",
            handle_cluster_down,
            "Stop a Determined cluster",
            [
                Arg(
                    "--cluster-name",
                    type=str,
                    default="determined",
                    help="name for the cluster resources",
                ),
                Arg(
                    "--delete-db",
                    action="store_true",
                    help="remove current master database",
                ),
            ],
        ),
        Cmd(
            "master-up",
            handle_master_up,
            "Start a Determined master",
            [
                Group(
                    Arg(
                        "--master-config-path",
                        type=str,
                        default=None,
                        help="path to master configuration",
                    ),
                    Arg(
                        "--storage-host-path",
                        type=str,
                        default=DEFAULT_STORAGE_HOST_PATH,
                        help=
                        "Storage location for cluster data (e.g. checkpoints)",
                    ),
                ),
                Arg("--master-port",
                    type=int,
                    default=8080,
                    help="port to expose master on"),
                Arg(
                    "--master-name",
                    type=str,
                    default="determined",
                    help="name for the cluster resources",
                ),
                Arg("--det-version",
                    type=str,
                    default=None,
                    help="version or commit to use"),
                Arg(
                    "--db-password",
                    type=str,
                    default="postgres",
                    help="password for master database",
                ),
                Arg(
                    "--delete-db",
                    action="store_true",
                    help="remove current master database",
                ),
                Arg(
                    "--no-autorestart",
                    help=
                    "disable container auto-restart (recommended for local development)",
                    action="store_true",
                ),
                Arg(
                    "--auto-bind-mount",
                    type=str,
                    default=str(Path.home()),
                    help=
                    "directory to mount into task containers (default: user's home directory)",
                ),
                Arg(
                    "--no-auto-bind-mount",
                    help=
                    "disable mounting user's home directory into task containers",
                    action="store_true",
                ),
                Arg(
                    "--cluster-name",
                    type=str,
                    default="determined",
                    help="name for the cluster resources",
                ),
            ],
        ),
        Cmd(
            "master-down",
            handle_master_down,
            "Stop a Determined master",
            [
                Arg(
                    "--master-name",
                    type=str,
                    default="determined",
                    help="name for the cluster resources",
                ),
                Arg(
                    "--delete-db",
                    action="store_true",
                    help="remove current master database",
                ),
                Arg(
                    "--cluster-name",
                    type=str,
                    default="determined",
                    help="name for the cluster resources",
                ),
            ],
        ),
        Cmd(
            "logs",
            handle_logs,
            "Show the logs of a Determined cluster",
            [
                Arg(
                    "--cluster-name",
                    type=str,
                    default="determined",
                    help="name for the cluster resources",
                ),
                Arg("--no-follow",
                    help="disable following logs",
                    action="store_true"),
            ],
        ),
        Cmd(
            "agent-up",
            handle_agent_up,
            "Start a Determined agent",
            [
                Arg("master_host", type=str, help="master hostname"),
                Arg("--master-port",
                    type=int,
                    default=8080,
                    help="master port"),
                Arg("--det-version",
                    type=str,
                    default=None,
                    help="version or commit to use"),
                Arg("--agent-name",
                    type=str,
                    default="det-agent",
                    help="agent name"),
                Arg("--agent-label",
                    type=str,
                    default=None,
                    help="agent label"),
                Arg("--agent-resource-pool",
                    type=str,
                    default=None,
                    help="agent resource pool"),
                BoolOptArg(
                    "--gpu",
                    "--no-gpu",
                    dest="gpu",
                    default=("darwin" not in sys.platform),
                    true_help="enable GPU support for agent",
                    false_help="disable GPU support for agent",
                ),
                Arg(
                    "--no-autorestart",
                    help=
                    "disable container auto-restart (recommended for local development)",
                    action="store_true",
                ),
                Arg(
                    "--cluster-name",
                    type=str,
                    default="determined",
                    help="name for the cluster resources",
                ),
            ],
        ),
        Cmd(
            "agent-down",
            handle_agent_down,
            "Stop a Determined agent",
            [
                Arg("--agent-name",
                    type=str,
                    default="det-agent",
                    help="agent name"),
                Arg("--all",
                    help="stop all running agents",
                    action="store_true"),
                Arg(
                    "--cluster-name",
                    type=str,
                    default="determined",
                    help="name for the cluster resources",
                ),
            ],
        ),
    ],
)
コード例 #3
0

# fmt: off

args_description = [
    Cmd("template tpl", None, "manage config templates", [
        Cmd("list ls",
            list_template,
            "list config templates", [
                Arg("-d",
                    "--details",
                    action="store_true",
                    help="show the configs of the templates"),
            ],
            is_default=True),
        Cmd("describe", describe_template, "describe config template", [
            Arg("template_name", type=str, help="template name"),
        ]),
        Cmd("set", set_template, "set config template", [
            Arg("template_name", help="template name"),
            Arg("template_file",
                type=FileType("r"),
                help="config template file (.yaml)")
        ]),
        Cmd("remove rm", remove_templates, "remove config template",
            [Arg("template_name", help="template name")]),
    ])
]  # type: List[Any]

# fmt: on
コード例 #4
0
ファイル: oauth.py プロジェクト: wbwatkinson/determined
@authentication_required
def remove_client(parsed_args: Namespace) -> None:
    try:
        api.delete(parsed_args.master,
                   "oauth2/clients/{}".format(parsed_args.client_id))
    except NotFoundException:
        raise EnterpriseOnlyError("API not found: oauth2/clients")


# fmt: off

args_description = [
    Cmd("oauth", None, "manage OAuth", [
        Cmd("client", None, "manage clients", [
            Cmd("list",
                list_clients,
                "list OAuth client applications", [],
                is_default=True),
            Cmd("add", add_client, "add OAuth client application", [
                Arg("name", type=str, help="descriptive name"),
                Arg("domain", type=str, help="redirect domain"),
            ]),
            Cmd("remove", remove_client, "remove OAuth client application", [
                Arg("client_id", help="OAuth client ID to remove"),
            ]),
        ])
    ])
]  # type: List[Any]

# fmt: on
コード例 #5
0
ファイル: checkpoint.py プロジェクト: wbwatkinson/determined
args_description = Cmd(
    "c|heckpoint",
    None,
    "manage checkpoints",
    [
        Cmd(
            "download",
            download,
            "download checkpoint from persistent storage",
            [
                Arg("uuid",
                    type=str,
                    help="Download a checkpoint by specifying its UUID."),
                Arg(
                    "-o",
                    "--output-dir",
                    type=str,
                    help="Desired output directory for the checkpoint.",
                ),
                Arg(
                    "-q",
                    "--quiet",
                    action="store_true",
                    help="Only print the path to the checkpoint.",
                ),
            ],
        ),
        Cmd(
            "describe",
            describe,
            "describe checkpoint",
            [Arg("uuid", type=str, help="checkpoint uuid to describe")],
        ),
    ],
)
コード例 #6
0
 Cmd(
     "t|rial",
     None,
     "manage trials",
     [
         Cmd(
             "describe",
             describe_trial,
             "describe trial",
             [
                 Arg("trial_id", type=int, help="trial ID"),
                 Arg("--metrics", action="store_true", help="display full metrics"),
                 Group(
                     Arg("--csv", action="store_true", help="print as CSV"),
                     Arg("--json", action="store_true", help="print JSON"),
                 ),
             ],
         ),
         Cmd(
             "download",
             download,
             "download checkpoint for trial",
             [
                 Arg("trial_id", type=int, help="trial ID"),
                 Group(
                     Arg(
                         "--best",
                         action="store_true",
                         help="download the checkpoint with the best validation metric",
                     ),
                     Arg(
                         "--latest",
                         action="store_true",
                         help="download the most recent checkpoint",
                     ),
                     Arg(
                         "--uuid",
                         type=str,
                         help="download a checkpoint by specifying its UUID",
                     ),
                     required=True,
                 ),
                 Arg(
                     "-o",
                     "--output-dir",
                     type=str,
                     default=None,
                     help="Desired output directory for the checkpoint",
                 ),
                 Arg(
                     "--sort-by",
                     type=str,
                     default=None,
                     help="The name of the validation metric to sort on. This argument is only "
                     "used with --best. If --best is passed without --sort-by, the "
                     "experiment's searcher metric is assumed. If this argument is specified, "
                     "--smaller-is-better must also be specified.",
                 ),
                 Arg(
                     "--smaller-is-better",
                     type=lambda s: bool(distutils.util.strtobool(s)),
                     default=None,
                     help="The sort order for metrics when using --best with --sort-by. For "
                     "example, 'accuracy' would require passing '--smaller-is-better false'. If "
                     "--sort-by is specified, this argument must be specified.",
                 ),
                 Arg(
                     "-q",
                     "--quiet",
                     action="store_true",
                     help="only print the path to the checkpoint",
                 ),
             ],
         ),
         Cmd(
             "logs",
             trial_logs,
             "fetch trial logs",
             [
                 Arg("trial_id", type=int, help="trial ID"),
                 Arg(
                     "-f",
                     "--follow",
                     action="store_true",
                     help="follow the logs of a running trial, similar to tail -f",
                 ),
                 Group(
                     Arg(
                         "--head",
                         type=int,
                         help="number of lines to show, counting from the beginning "
                         "of the log (default is all)",
                     ),
                     Arg(
                         "--tail",
                         type=int,
                         help="number of lines to show, counting from the end "
                         "of the log (default is all)",
                     ),
                 ),
                 Arg(
                     "--agent-id",
                     dest="agent_ids",
                     action="append",
                     help="agents to show logs from (repeat for multiple values)",
                 ),
                 Arg(
                     "--container-id",
                     dest="container_ids",
                     action="append",
                     help="containers to show logs from (repeat for multiple values)",
                 ),
                 Arg(
                     "--rank-id",
                     dest="rank_ids",
                     type=int,
                     action="append",
                     help="containers to show logs from (repeat for multiple values)",
                 ),
                 Arg(
                     "--timestamp-before",
                     help="show logs only from before (RFC 3339 format)",
                 ),
                 Arg(
                     "--timestamp-after",
                     help="show logs only from after (RFC 3339 format)",
                 ),
                 Arg(
                     "--level",
                     dest="level",
                     help="show logs with this level or higher "
                     + "(TRACE, DEBUG, INFO, WARNING, ERROR, CRITICAL)",
                 ),
                 Arg(
                     "--source",
                     dest="sources",
                     action="append",
                     help="sources to show logs from (repeat for multiple values)",
                 ),
                 Arg(
                     "--stdtype",
                     dest="stdtypes",
                     action="append",
                     help="output stream to show logs from (repeat for multiple values)",
                 ),
             ],
         ),
         Cmd(
             "kill", kill_trial, "forcibly terminate a trial", [Arg("trial_id", help="trial ID")]
         ),
     ],
 ),
コード例 #7
0
 Cmd("command cmd", None, "manage commands", [
     Cmd("list ls", command.list_tasks, "list commands", [
         Arg("-q", "--quiet", action="store_true",
             help="only display the IDs"),
         Arg("--all", "-a", action="store_true",
             help="show all commands (including other users')"),
         Group(
             Arg("--csv", action="store_true", help="print as CSV"),
             Arg("--json", action="store_true", help="print as JSON"),
         ),
     ], is_default=True),
     Cmd("config", command.config,
         "display command config", [
             Arg("command_id", type=str, help="command ID"),
         ]),
     Cmd("run", run_command, "create command", [
         Arg("entrypoint", type=str, nargs=REMAINDER,
             help="entrypoint command and arguments to execute"),
         Arg("--config-file", default=None, type=FileType("r"),
             help="command config file (.yaml)"),
         Arg("-v", "--volume", action="append", default=[],
             help=VOLUME_DESC),
         Arg("-c", "--context", default=None, type=Path, help=CONTEXT_DESC),
         Arg("--config", action="append", default=[], help=CONFIG_DESC),
         Arg("--template", type=str,
             help="name of template to apply to the command configuration"),
         Arg("-d", "--detach", action="store_true",
             help="run in the background and print the ID")
     ]),
     Cmd("logs", lambda *args, **kwargs: task.logs(*args, **kwargs), "fetch command logs", [
         Arg("task_id", help="command ID", metavar="command_id"),
         *task.common_log_options,
     ]),
     Cmd("kill", command.kill, "forcibly terminate a command", [
         Arg("command_id", help="command ID", nargs=ONE_OR_MORE),
         Arg("-f", "--force", action="store_true", help="ignore errors"),
     ]),
     Cmd("set", None, "set command attributes", [
         Cmd("priority", command.set_priority, "set command priority", [
             Arg("command_id", help="command ID"),
             Arg("priority", type=int, help="priority"),
         ]),
     ]),
 ])
コード例 #8
0
ファイル: project.py プロジェクト: determined-ai/determined
 Cmd(
     "p|roject",
     None,
     "manage projects",
     [
         Cmd(
             "list",
             list_workspace_projects,
             "list the projects associated with a workspace",
             [
                 Arg("workspace_name",
                     type=str,
                     help="name of the workspace"),
                 Arg(
                     "--sort-by",
                     type=str,
                     choices=["id", "name"],
                     default="id",
                     help="sort workspaces by the given field",
                 ),
                 Arg(
                     "--order-by",
                     type=str,
                     choices=["asc", "desc"],
                     default="asc",
                     help=
                     "order workspaces in either ascending or descending order",
                 ),
                 *pagination_args,
                 Arg("--json", action="store_true", help="print as JSON"),
             ],
         ),
         Cmd(
             "list-experiments",
             list_project_experiments,
             "list the experiments associated with a project",
             [
                 Arg("workspace_name",
                     type=str,
                     help="name of the workspace"),
                 Arg("project_name", type=str, help="name of the project"),
                 Arg(
                     "--all",
                     "-a",
                     action="store_true",
                     default=False,
                     help=
                     "show all experiments (including archived and other users')",
                 ),
                 Arg(
                     "--sort-by",
                     type=str,
                     choices=["id", "name"],
                     default="id",
                     help="sort workspaces by the given field",
                 ),
                 Arg(
                     "--order-by",
                     type=str,
                     choices=["asc", "desc"],
                     default="asc",
                     help=
                     "order workspaces in either ascending or descending order",
                 ),
                 *pagination_args,
                 Arg("--json", action="store_true", help="print as JSON"),
             ],
         ),
         Cmd(
             "create",
             create_project,
             "create project",
             [
                 Arg("workspace_name",
                     type=str,
                     help="name of the workspace"),
                 Arg("name", type=str, help="name of the project"),
                 Arg("--description",
                     type=str,
                     help="description of the project"),
                 Arg("--json", action="store_true", help="print as JSON"),
             ],
         ),
         Cmd(
             "delete",
             delete_project,
             "delete project",
             [
                 Arg("workspace_name",
                     type=str,
                     help="name of the workspace"),
                 Arg("project_name", type=str, help="name of the project"),
                 Arg(
                     "--yes",
                     action="store_true",
                     default=False,
                     help="automatically answer yes to prompts",
                 ),
             ],
         ),
         Cmd(
             "archive",
             archive_project,
             "archive project",
             [
                 Arg("workspace_name",
                     type=str,
                     help="name of the workspace"),
                 Arg("project_name", type=str, help="name of the project"),
             ],
         ),
         Cmd(
             "unarchive",
             unarchive_project,
             "unarchive project",
             [
                 Arg("workspace_name",
                     type=str,
                     help="name of the workspace"),
                 Arg("project_name", type=str, help="name of the project"),
             ],
         ),
         Cmd(
             "describe",
             describe_project,
             "describe project",
             [
                 Arg("workspace_name",
                     type=str,
                     help="name of the workspace"),
                 Arg("project_name", type=str, help="name of the project"),
                 Arg(
                     "--all",
                     "-a",
                     action="store_true",
                     default=False,
                     help=
                     "show all experiments (including archived and other users')",
                 ),
                 Arg("--json", action="store_true", help="print as JSON"),
             ],
         ),
         Cmd(
             "edit",
             edit_project,
             "edit project",
             [
                 Arg("workspace_name",
                     type=str,
                     help="current name of the workspace"),
                 Arg("project_name", type=str, help="name of the project"),
                 Arg("--new_name", type=str,
                     help="new name of the project"),
                 Arg("--description",
                     type=str,
                     help="description of the project"),
                 Arg("--json", action="store_true", help="print as JSON"),
             ],
         ),
     ],
 )
コード例 #9
0
ファイル: cli.py プロジェクト: shiyuann/determined
    Arg(
        "--image-repo-prefix",
        type=str,
        default="determinedai",
        help=
        "Docker image repository to use for determined-master and determined-agent images",
    ),
    local_args_description,
    aws_args_description,
    gcp_args_description,
]

DEPLOY_CMD_NAME = "d|eploy"
args_description = Cmd(
    DEPLOY_CMD_NAME,
    None,
    "manage deployments",
    args_subs,
)


def main() -> None:
    """Deprecated entry point for standalone `det-deploy`."""
    parser = argparse.ArgumentParser(
        description="Manage Determined deployments.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    add_args(parser, args_subs)
    parsed_args = parser.parse_args()

    v = vars(parsed_args)
    if not v.get("func"):
コード例 #10
0
args_description = Cmd(
    "gke-experimental",
    None,
    "GKE help",
    [
        Cmd(
            "up",
            handle_up,
            "create gke cluster",
            [
                ArgGroup(
                    "required named arguments",
                    None,
                    [
                        Arg(
                            "--cluster-id",
                            type=str,
                            default=None,
                            required=True,
                            help="a unique name for the gke cluster",
                        ),
                    ],
                ),
                ArgGroup(
                    "optional named arguments",
                    None,
                    [
                        Arg(
                            "--agent-node-pool-name",
                            "--gpu-node-pool-name",
                            type=str,
                            default=None,
                            help="a unique name for the GPU node pool",
                        ),
                        Arg(
                            "--gcs-bucket-name",
                            type=str,
                            default=None,
                            help=
                            "a unique name for the GCS bucket that will store your"
                            " checkpoints",
                        ),
                        Arg(
                            "--gpu-type",
                            type=str,
                            default=defaults.GPU_TYPE,
                            required=False,
                            help="accelerator type to use for agents",
                        ),
                        Arg(
                            "--cpu-only",
                            required=False,
                            help=
                            "Flag to create a CPU Only Determined Instance.",
                            action="store_true",
                        ),
                        Arg(
                            "--gpus-per-node",
                            type=int,
                            default=defaults.GPUS_PER_NODE,
                            required=False,
                            help="number of GPUs per node",
                        ),
                        Arg(
                            "--helm-dir",
                            type=str,
                            default="helm/charts/determined",
                            required=False,
                            help=
                            "directory containing Helm Chart, values.yaml and templates.",
                        ),
                        Arg(
                            "--det-version",
                            type=str,
                            default=None,
                            help=argparse.SUPPRESS,
                        ),
                        Arg(
                            "--no-managed-bucket",
                            required=False,
                            help=
                            "flag that indicates GCS checkpointing bucket already exists",
                            action="store_true",
                        ),
                        Arg(
                            "--zone",
                            type=str,
                            default=defaults.ZONE,
                            help="zone to create cluster in",
                        ),
                        Arg(
                            "--master-machine-type",
                            type=str,
                            default=defaults.MASTER_MACHINE_TYPE,
                            help="machine type to use for master node group",
                        ),
                        Arg(
                            "--agent-machine-type",
                            "--machine-type",
                            type=str,
                            default=defaults.AGENT_MACHINE_TYPE,
                            help="machine type to use for agent node group",
                        ),
                        Arg(
                            "--max-gpu-nodes",
                            "--max-nodes",
                            type=int,
                            default=defaults.MAX_GPU_NODES,
                            help=
                            "maximum number of nodes for the GPU node group",
                        ),
                        Arg(
                            "--max-cpu-nodes",
                            type=int,
                            default=defaults.MAX_CPU_NODES,
                            help=
                            "maximum number of nodes for the CPU node group",
                        ),
                        Arg(
                            "--cpu-node-pool-name",
                            type=str,
                            default=None,
                            help="a unique name for the GPU node pool",
                        ),
                        Arg(
                            "--multiple-node-pools",
                            required=False,
                            help=
                            "flag that indicates multiple node pools should be used - one"
                            " for CPU only tasks and one for GPU tasks",
                            action="store_true",
                        ),
                        Arg(
                            "--gpu-coscheduler",
                            "--coscheduler",
                            required=False,
                            help=
                            "Enables the lightweight coscheduling plugin for Kubernetes that"
                            " provides priority-based gang scheduling for the GPU Agent Nodepool."
                            "If this argument is set, cluster autoscaling is disabled, and"
                            " --max-gpu-nodes nodes are statically allocated for the GPU Agent Node"
                            " pool at creation time.",
                            action="store_true",
                        ),
                        Arg(
                            "--preemption",
                            "--preemptive-scheduler",
                            required=False,
                            help=
                            "Enables the priority-based scheduler with preemption on the GPU"
                            " Agent Nodepool. If this argument is set, cluster autoscaling is"
                            " disabled, and --max-gpu-nodes nodes are statically allocated for the "
                            " GPU Agent Node pool at creation time.",
                            action="store_true",
                        ),
                    ],
                ),
            ],
        ),
        Cmd(
            "down",
            handle_down,
            "delete gke cluster",
            [
                ArgGroup(
                    "required named arguments",
                    None,
                    [
                        Arg(
                            "--cluster-id",
                            type=str,
                            default=None,
                            required=True,
                            help="the gke cluster to delete",
                        )
                    ],
                ),
                ArgGroup(
                    "optional named arguments",
                    None,
                    [
                        Arg(
                            "--region",
                            type=str,
                            default="us-west1",
                            help="region containing cluster to delete",
                        ),
                        Arg(
                            "--no-managed-bucket",
                            required=False,
                            help=
                            "GCS checkpointing bucket is managed externally",
                            action="store_true",
                        ),
                        Arg(
                            "--gcs-bucket-name",
                            type=str,
                            default=None,
                            help=
                            "a unique name for the GCS bucket that will store your"
                            " checkpoints",
                        ),
                    ],
                ),
            ],
        ),
    ],
)
コード例 #11
0
 Cmd("notebook", None, "manage notebooks", [
     Cmd("list ls", command.list, "list notebooks", [
         Arg("-q", "--quiet", action="store_true",
             help="only display the IDs"),
         Arg("--all", "-a", action="store_true",
             help="show all notebooks (including other users')")
     ], is_default=True),
     Cmd("config", command.config,
         "display notebook config", [
             Arg("id", type=str, help="notebook ID"),
         ]),
     Cmd("start", start_notebook, "start a new notebook", [
         Arg("--config-file", default=None, type=FileType("r"),
             help="command config file (.yaml)"),
         Arg("-v", "--volume", action="append", default=[],
             help=VOLUME_DESC),
         Arg("-c", "--context", default=None, type=Path, help=CONTEXT_DESC),
         Arg("--config", action="append", default=[], help=CONFIG_DESC),
         Arg("--template", type=str,
             help="name of template to apply to the notebook configuration"),
         Arg("--no-browser", action="store_true",
             help="don't open the notebook in a browser after startup"),
         Arg("-d", "--detach", action="store_true",
             help="run in the background and print the ID"),
         Arg("--preview", action="store_true",
             help="preview the notebook configuration"),
     ]),
     Cmd("open", open_notebook, "open an existing notebook", [
         Arg("notebook_id", help="notebook ID")
     ]),
     Cmd("logs", command.tail_logs, "fetch notebook logs", [
         Arg("notebook_id", help="notebook ID"),
         Arg("-f", "--follow", action="store_true",
             help="follow the logs of a notebook, similar to tail -f"),
         Arg("--tail", type=int, default=200,
             help="number of lines to show, counting from the end "
                  "of the log")
     ]),
     Cmd("kill", command.kill, "kill a notebook", [
         Arg("notebook_id", help="notebook ID", nargs=ONE_OR_MORE),
         Arg("-f", "--force", action="store_true", help="ignore errors"),
     ]),
 ])
コード例 #12
0
 Cmd("notebook", None, "manage notebooks", [
     Cmd("list ls",
         command.list_tasks,
         "list notebooks", [
             Arg("-q",
                 "--quiet",
                 action="store_true",
                 help="only display the IDs"),
             Arg("--all",
                 "-a",
                 action="store_true",
                 help="show all notebooks (including other users')")
         ],
         is_default=True),
     Cmd("config", command.config, "display notebook config", [
         Arg("notebook_id", type=str, help="notebook ID"),
     ]),
     Cmd("start", start_notebook, "start a new notebook", [
         Arg("--config-file",
             default=None,
             type=FileType("r"),
             help="command config file (.yaml)"),
         Arg("-v",
             "--volume",
             action="append",
             default=[],
             help=VOLUME_DESC),
         Arg("-c", "--context", default=None, type=Path, help=CONTEXT_DESC),
         Arg("--config", action="append", default=[], help=CONFIG_DESC),
         Arg("--template",
             type=str,
             help="name of template to apply to the notebook configuration"
             ),
         Arg("--no-browser",
             action="store_true",
             help="don't open the notebook in a browser after startup"),
         Arg("-d",
             "--detach",
             action="store_true",
             help="run in the background and print the ID"),
         Arg("--preview",
             action="store_true",
             help="preview the notebook configuration"),
     ]),
     Cmd("open", open_notebook, "open an existing notebook",
         [Arg("notebook_id", help="notebook ID")]),
     Cmd("logs", lambda *args, **kwargs: task.logs(*args, **kwargs),
         "fetch notebook logs", [
             Arg("task_id", help="notebook ID", metavar="notebook_id"),
             *task.common_log_options
         ]),
     Cmd("kill", command.kill, "kill a notebook", [
         Arg("notebook_id", help="notebook ID", nargs=ONE_OR_MORE),
         Arg("-f", "--force", action="store_true", help="ignore errors"),
     ]),
     Cmd("set", None, "set notebook attributes", [
         Cmd("priority", command.set_priority, "set notebook priority", [
             Arg("notebook_id", help="notebook ID"),
             Arg("priority", type=int, help="priority"),
         ]),
     ]),
 ])
コード例 #13
0
ファイル: cli.py プロジェクト: hoanghphan/determined
args_description = Cmd(
    "aws",
    None,
    "AWS help",
    [
        Cmd(
            "list",
            handle_list,
            "list CloudFormation stacks",
            [
                Arg(
                    "--region",
                    type=str,
                    default=None,
                    help="AWS region",
                ),
                Arg("--profile", type=str, default=None, help="AWS profile"),
            ],
        ),
        Cmd(
            "down",
            handle_down,
            "delete CloudFormation stack",
            [
                ArgGroup(
                    "required named arguments",
                    None,
                    [
                        Arg(
                            "--cluster-id",
                            type=str,
                            help="stack name for CloudFormation cluster",
                            required=True,
                        ),
                    ],
                ),
                Arg(
                    "--region",
                    type=str,
                    default=None,
                    help="AWS region",
                ),
                Arg("--profile", type=str, default=None, help="AWS profile"),
            ],
        ),
        Cmd(
            "up",
            handle_up,
            "deploy/update CloudFormation stack",
            [
                ArgGroup(
                    "required named arguments",
                    None,
                    [
                        Arg(
                            "--cluster-id",
                            type=str,
                            help="stack name for CloudFormation cluster",
                            required=True,
                        ),
                        Arg(
                            "--keypair",
                            type=str,
                            help="aws ec2 keypair for master and agent",
                            required=True,
                        ),
                    ],
                ),
                Arg(
                    "--region",
                    type=str,
                    default=None,
                    help="AWS region",
                ),
                Arg("--profile", type=str, default=None, help="AWS profile"),
                Arg(
                    "--master-instance-type",
                    type=str,
                    help="instance type for master",
                ),
                Arg(
                    "--enable-cors",
                    action="store_true",
                    help="allow CORS requests or not: true/false",
                ),
                Arg("--master-tls-cert"),
                Arg("--master-tls-key"),
                Arg("--master-cert-name"),
                Arg(
                    "--gpu-agent-instance-type",
                    type=str,
                    help="instance type for agent in the GPU resource pool",
                ),
                Arg(
                    "--cpu-agent-instance-type",
                    type=str,
                    help="instance type for agent in the CPU resource pool",
                ),
                Arg(
                    "--deployment-type",
                    type=str,
                    choices=constants.deployment_types.DEPLOYMENT_TYPES,
                    default=constants.defaults.DEPLOYMENT_TYPE,
                    help=f"deployment type - "
                    f'must be one of [{", ".join(constants.deployment_types.DEPLOYMENT_TYPES)}]',
                ),
                Arg(
                    "--inbound-cidr",
                    type=str,
                    help="inbound IP Range in CIDR format",
                ),
                Arg(
                    "--agent-subnet-id",
                    type=str,
                    help="subnet to deploy agents into. Optional. "
                    "Only used with simple deployment type",
                ),
                Arg(
                    "--det-version",
                    type=str,
                    help=argparse.SUPPRESS,
                ),
                Arg(
                    "--db-password",
                    type=str,
                    default=constants.defaults.DB_PASSWORD,
                    help="password for master database",
                ),
                Arg(
                    "--max-idle-agent-period",
                    type=str,
                    help="max agent idle time",
                ),
                Arg(
                    "--max-agent-starting-period",
                    type=str,
                    help="max agent starting time",
                ),
                Arg(
                    "--max-cpu-containers-per-agent",
                    type=int,
                    help="maximum number of cpu containers on agent in the CPU resource pool",
                ),
                Arg(
                    "--min-dynamic-agents",
                    type=int,
                    help="minimum number of dynamic agent instances at one time",
                ),
                Arg(
                    "--max-dynamic-agents",
                    type=int,
                    help="maximum number of dynamic agent instances at one time",
                ),
                Arg(
                    "--spot",
                    action="store_true",
                    help="whether to use spot instances or not",
                ),
                Arg(
                    "--spot-max-price",
                    type=validate_spot_max_price(),
                    help="maximum hourly price for the spot instance "
                    "(do not include the dollar sign)",
                ),
                Arg(
                    "--scheduler-type",
                    type=validate_scheduler_type(),
                    default="fair_share",
                    help="scheduler to use (defaults to fair_share).",
                ),
                Arg(
                    "--preemption-enabled",
                    type=str,
                    default="false",
                    help="whether task preemption is supported in the scheduler "
                    "(only configurable for priority scheduler).",
                ),
                Arg(
                    "--dry-run",
                    action="store_true",
                    help="print deployment template",
                ),
                Arg(
                    "--cpu-env-image",
                    type=str,
                    help="Docker image for CPU tasks",
                ),
                Arg(
                    "--gpu-env-image",
                    type=str,
                    help="Docker image for GPU tasks",
                ),
                Arg(
                    "--log-group-prefix",
                    type=str,
                    help="prefix for output CloudWatch log group",
                ),
                Arg(
                    "--retain-log-group",
                    action="store_const",
                    const="true",
                    help="whether to retain CloudWatch log group after the stack is deleted"
                    " (only available for the simple template)",
                ),
            ],
        ),
    ],
)
コード例 #14
0
 Cmd("a|gent", None, "manage agents", [
     Cmd("list",
         list_agents,
         "list agents", [
             Group(
                 Arg("--csv", action="store_true", help="print as CSV"),
                 Arg("--json", action="store_true", help="print as JSON"),
             ),
         ],
         is_default=True),
     Cmd("enable", patch_agent(True), "enable agent", [
         Group(
             Arg("agent_id",
                 help="agent ID",
                 nargs="?",
                 completer=agent_id_completer),
             Arg("--all", action="store_true", help="enable all agents"),
         )
     ]),
     Cmd("disable", patch_agent(False), "disable agent", [
         Group(
             Arg("agent_id",
                 help="agent ID",
                 nargs="?",
                 completer=agent_id_completer),
             Arg("--all", action="store_true", help="disable all agents"),
         ),
         Arg("--drain",
             action="store_true",
             help="enter drain mode, allowing the tasks currently running on "
             "the disabled agents to finish. will also print these tasks, if any"
             ),
         Group(
             Arg("--csv", action="store_true", help="print as CSV"),
             Arg("--json", action="store_true", help="print as JSON"),
         ),
     ]),
 ]),
コード例 #15
0
ファイル: shell.py プロジェクト: determined-ai/determined
 Cmd("shell", None, "manage shells", [
     Cmd("list",
         partial(command.list_tasks),
         "list shells", [
             Arg("-q",
                 "--quiet",
                 action="store_true",
                 help="only display the IDs"),
             Arg("--all",
                 "-a",
                 action="store_true",
                 help="show all shells (including other users')"),
             Group(format_args["json"], format_args["csv"]),
         ],
         is_default=True),
     Cmd("config", partial(command.config), "display shell config", [
         Arg("shell_id", type=str, help="shell ID"),
     ]),
     Cmd("start", start_shell, "start a new shell", [
         Arg("ssh_opts",
             nargs="*",
             help="additional SSH options when connecting to the shell"),
         Arg("--config-file",
             default=None,
             type=FileType("r"),
             help="command config file (.yaml)"),
         Arg("-v",
             "--volume",
             action="append",
             default=[],
             help=VOLUME_DESC),
         Arg("-c", "--context", default=None, type=Path, help=CONTEXT_DESC),
         Arg("--config", action="append", default=[], help=CONFIG_DESC),
         Arg("-p",
             "--passphrase",
             action="store_true",
             help="passphrase to encrypt the shell private key"),
         Arg("--template",
             type=str,
             help="name of template to apply to the shell configuration"),
         Arg("-d",
             "--detach",
             action="store_true",
             help="run in the background and print the ID"),
         Arg("--show-ssh-command",
             action="store_true",
             help=
             "show ssh command (e.g. for use in IDE) when starting the shell"
             ),
     ]),
     Cmd("open", open_shell, "open an existing shell", [
         Arg("shell_id", help="shell ID"),
         Arg("ssh_opts",
             nargs="*",
             help="additional SSH options when connecting to the shell"),
         Arg("--show-ssh-command",
             action="store_true",
             help=
             "show ssh command (e.g. for use in IDE) when starting the shell"
             ),
     ]),
     Cmd("show_ssh_command", show_ssh_command, "print the ssh command", [
         Arg("shell_id", help="shell ID"),
         Arg("ssh_opts",
             nargs="*",
             help="additional SSH options when connecting to the shell"),
     ]),
     Cmd("logs", partial(task.logs), "fetch shell logs", [
         Arg("task_id", help="shell ID", metavar="shell_id"),
         *task.common_log_options
     ]),
     Cmd("kill", partial(command.kill), "kill a shell", [
         Arg("shell_id", help="shell ID", nargs=ONE_OR_MORE),
         Arg("-f", "--force", action="store_true", help="ignore errors"),
     ]),
     Cmd("set", None, "set shell attributes", [
         Cmd("priority", partial(command.set_priority),
             "set shell priority", [
                 Arg("shell_id", help="shell ID"),
                 Arg("priority", type=int, help="priority"),
             ]),
     ]),
 ])
コード例 #16
0
args_description = Cmd(
    "aws",
    None,
    "AWS help",
    [
        Cmd(
            "list",
            handle_list,
            "list CloudFormation stacks",
            [
                Arg(
                    "--region",
                    type=str,
                    default=None,
                    help="AWS region",
                ),
                Arg("--profile", type=str, default=None, help="AWS profile"),
            ],
        ),
        Cmd(
            "down",
            handle_down,
            "delete CloudFormation stack",
            [
                ArgGroup(
                    "required named arguments",
                    None,
                    [
                        Arg(
                            "--cluster-id",
                            type=str,
                            help="stack name for CloudFormation cluster",
                            required=True,
                        ),
                    ],
                ),
                Arg(
                    "--region",
                    type=str,
                    default=None,
                    help="AWS region",
                ),
                Arg("--profile", type=str, default=None, help="AWS profile"),
                Arg(
                    "--no-prompt",
                    action="store_true",
                    help="no prompt when deleting resources",
                ),
            ],
        ),
        Cmd(
            "up",
            handle_up,
            "deploy/update CloudFormation stack",
            [
                ArgGroup(
                    "required named arguments",
                    None,
                    [
                        Arg(
                            "--cluster-id",
                            type=str,
                            help="stack name for CloudFormation cluster",
                            required=True,
                        ),
                        Arg(
                            "--keypair",
                            type=str,
                            help="aws ec2 keypair for master and agent",
                            required=True,
                        ),
                    ],
                ),
                Arg(
                    "--region",
                    type=str,
                    default=None,
                    help="AWS region",
                ),
                Arg("--profile", type=str, default=None, help="AWS profile"),
                Arg(
                    "--master-instance-type",
                    type=str,
                    help="instance type for master",
                ),
                Arg(
                    "--enable-cors",
                    action="store_true",
                    help="allow CORS requests or not: true/false",
                ),
                Arg("--master-tls-cert"),
                Arg("--master-tls-key"),
                Arg("--master-cert-name"),
                Arg(
                    "--compute-agent-instance-type",
                    "--gpu-agent-instance-type",
                    type=str,
                    help="instance type for agents in the compute resource pool",
                ),
                Arg(
                    "--aux-agent-instance-type",
                    "--cpu-agent-instance-type",
                    type=str,
                    help="instance type for agents in the auxiliary resource pool",
                ),
                Arg(
                    "--deployment-type",
                    type=str,
                    choices=constants.deployment_types.DEPLOYMENT_TYPES,
                    default=constants.defaults.DEPLOYMENT_TYPE,
                    help="deployment type",
                ),
                Arg(
                    "--inbound-cidr",
                    type=str,
                    help="inbound IP Range in CIDR format",
                ),
                Arg(
                    "--agent-subnet-id",
                    type=str,
                    help="subnet to deploy agents into. Optional. "
                    "Only used with simple deployment type",
                ),
                Arg(
                    "--det-version",
                    type=str,
                    help=argparse.SUPPRESS,
                ),
                Arg(
                    "--db-password",
                    type=str,
                    default=constants.defaults.DB_PASSWORD,
                    help="password for master database",
                ),
                Arg(
                    "--max-idle-agent-period",
                    type=str,
                    help="max agent idle time",
                ),
                Arg(
                    "--max-agent-starting-period",
                    type=str,
                    help="max agent starting time",
                ),
                Arg(
                    "--max-aux-containers-per-agent",
                    "--max-cpu-containers-per-agent",
                    type=int,
                    help="maximum number of containers on agents in the auxiliary resource pool",
                ),
                Arg(
                    "--min-dynamic-agents",
                    type=int,
                    help="minimum number of dynamic agent instances at one time",
                ),
                Arg(
                    "--max-dynamic-agents",
                    type=int,
                    help="maximum number of dynamic agent instances at one time",
                ),
                Arg(
                    "--spot",
                    action="store_true",
                    help="whether to use spot instances or not",
                ),
                Arg(
                    "--spot-max-price",
                    type=validate_spot_max_price(),
                    help="maximum hourly price for spot instances "
                    "(do not include the dollar sign)",
                ),
                Arg(
                    "--scheduler-type",
                    type=str,
                    choices=["fair_share", "priority", "round_robin"],
                    default="fair_share",
                    help="scheduler to use",
                ),
                Arg(
                    "--preemption-enabled",
                    type=str,
                    default="false",
                    help="whether task preemption is supported in the scheduler "
                    "(only configurable for priority scheduler).",
                ),
                Arg(
                    "--dry-run",
                    action="store_true",
                    help="print deployment template",
                ),
                Arg(
                    "--cpu-env-image",
                    type=str,
                    help="Docker image for CPU tasks",
                ),
                Arg(
                    "--gpu-env-image",
                    type=str,
                    help="Docker image for GPU tasks",
                ),
                Arg(
                    "--log-group-prefix",
                    type=str,
                    help="prefix for output CloudWatch log group",
                ),
                Arg(
                    "--retain-log-group",
                    action="store_const",
                    const="true",
                    help="whether to retain CloudWatch log group after the stack is deleted"
                    " (only available for the simple template)",
                ),
                Arg(
                    "--master-config-template-path",
                    type=Path,
                    default=None,
                    help="path to master yaml template",
                ),
                Arg(
                    "--efs-id",
                    type=str,
                    help="preexisting EFS that will be mounted into the task containers; "
                    "if not provided, a new EFS instance will be created. The agents must be "
                    "able to connect to the EFS instance.",
                ),
                Arg(
                    "--fsx-id",
                    type=str,
                    help="preexisting FSx that will be mounted into the task containers; "
                    "if not provided, a new FSx instance will be created. The agents must be "
                    "able to connect to the FSx instance.",
                ),
                Arg(
                    "--no-prompt",
                    action="store_true",
                    help="no prompt when deployment would delete existing database",
                ),
            ],
        ),
        Cmd(
            "dump-master-config-template",
            handle_dump_master_config_template,
            "dump default master config template",
            [
                Arg(
                    "--deployment-type",
                    type=str,
                    choices=constants.deployment_types.DEPLOYMENT_TYPES,
                    default=constants.defaults.DEPLOYMENT_TYPE,
                    help="deployment type",
                ),
            ],
        ),
    ],
)
コード例 #17
0
                # fetch logs.
                response = api.get(
                    args.master, "logs", params={"greater_than_id": str(latest_log_id)}
                )
                latest_log_id = process_response(response, latest_log_id)
            except KeyboardInterrupt:
                break


# fmt: off

args_description = [
    Cmd("master", None, "manage master", [
        Cmd("config", config, "fetch master config", [
            Group(format_args["json"], format_args["yaml"])
        ]),
        Cmd("info", get_master, "fetch master info", [
            Group(format_args["json"], format_args["yaml"])
        ]),
        Cmd("logs", logs, "fetch master logs", [
            Arg("-f", "--follow", action="store_true",
                help="follow the logs of master, similar to tail -f"),
            Arg("--tail", type=int,
                help="number of lines to show, counting from the end "
                "of the log (default is all)")
        ]),
    ])
]  # type: List[Any]

# fmt: on
コード例 #18
0
from determined.common.declarative_argparse import Cmd

deploy_cmd = Cmd(
    "d|eploy",
    None,
    "manage deployments",
    [],
)
コード例 #19
0
ファイル: cli.py プロジェクト: wbwatkinson/determined
        help="master address",
        metavar="address",
        default=get_default_master_address()),
    Arg("-v",
        "--version",
        action="version",
        help="print CLI version and exit",
        version="%(prog)s {}".format(determined.__version__)),
    experiment.args_description,
    checkpoint.args_description,
    Cmd(
        "task", None,
        "manage tasks (commands, experiments, notebooks, shells, tensorboards)",
        [
            Cmd("list",
                list_tasks,
                "list tasks in cluster", [
                    Arg("--csv", action="store_true", help="print as CSV"),
                ],
                is_default=True),
        ]),
    Cmd("preview-search", preview_search, "preview search", [
        Arg("config_file",
            type=FileType("r"),
            help="experiment config file (.yaml)")
    ]),
    deploy_args_description,
]  # type: List[object]

# fmt: on
コード例 #20
0
 Cmd(
     "res|ources",
     None,
     "query historical resource allocation",
     [
         Cmd(
             "raw",
             raw,
             "get raw allocation information",
             [
                 Arg("timestamp_after"),
                 Arg("timestamp_before"),
                 Arg("--json",
                     action="store_true",
                     help="output JSON rather than CSV"),
             ],
         ),
         Cmd(
             "agg|regated",
             aggregated,
             "get aggregated allocation information",
             [
                 Arg("start_date", help="first date to include"),
                 Arg("end_date", help="last date to include"),
                 Arg("--json",
                     action="store_true",
                     help="output JSON rather than CSV"),
                 Arg(
                     "--monthly",
                     action="store_true",
                     help="aggregate by month rather than by day",
                 ),
             ],
         ),
     ],
 )
コード例 #21
0
 Cmd(
     "m|odel",
     None,
     "manage models",
     [
         Cmd(
             "list",
             list_models,
             "list all models in the registry",
             [
                 Arg(
                     "--sort-by",
                     type=str,
                     choices=["name", "description", "creation_time", "last_updated_time"],
                     default="last_updated_time",
                     help="sort models by the given field",
                 ),
                 Arg(
                     "--order-by",
                     type=str,
                     choices=["asc", "desc"],
                     default="asc",
                     help="order models in either ascending or descending order",
                 ),
                 Arg("--json", action="store_true", help="print as JSON"),
             ],
             is_default=True,
         ),
         Cmd(
             "register-version",
             register_version,
             "register a new version of a model",
             [
                 Arg("name", type=str, help="name of the model"),
                 Arg("uuid", type=str, help="uuid to register as the next version of the model"),
                 Arg("--json", action="store_true", help="print as JSON"),
             ],
         ),
         Cmd(
             "describe",
             describe,
             "describe model",
             [
                 Arg("name", type=str, help="model to describe"),
                 Arg("--json", action="store_true", help="print as JSON"),
                 Arg(
                     "--version",
                     type=int,
                     default=0,
                     help="model version information to include in output",
                 ),
             ],
         ),
         Cmd(
             "list-versions",
             list_versions,
             "list the versions of a model",
             [
                 Arg("name", type=str, help="unique name of the model"),
                 Arg("--json", action="store_true", help="print as JSON"),
             ],
         ),
         Cmd(
             "create",
             create,
             "create model",
             [
                 Arg("name", type=str, help="unique name of the model"),
                 Arg("--description", type=str, help="description of the model"),
                 Arg("--json", action="store_true", help="print as JSON"),
             ],
         ),
     ],
 )
コード例 #22
0
ファイル: remote.py プロジェクト: wbwatkinson/determined
 Cmd("command cmd", None, "manage commands", [
     Cmd("list ls",
         command.list,
         "list commands", [
             Arg("-q",
                 "--quiet",
                 action="store_true",
                 help="only display the IDs"),
             Arg("--all",
                 "-a",
                 action="store_true",
                 help="show all commands (including other users')"),
         ],
         is_default=True),
     Cmd("config", command.config, "display command config", [
         Arg("id", type=str, help="command ID"),
     ]),
     Cmd("run", run_command, "create command", [
         Arg("entrypoint",
             type=str,
             nargs=REMAINDER,
             help="entrypoint command and arguments to execute"),
         Arg("--config-file",
             default=None,
             type=FileType("r"),
             help="command config file (.yaml)"),
         Arg("-v",
             "--volume",
             action="append",
             default=[],
             help=VOLUME_DESC),
         Arg("-c", "--context", default=None, type=Path, help=CONTEXT_DESC),
         Arg("--config", action="append", default=[], help=CONFIG_DESC),
         Arg("--template",
             type=str,
             help="name of template to apply to the command configuration"),
         Arg("-d",
             "--detach",
             action="store_true",
             help="run in the background and print the ID")
     ]),
     Cmd("logs", command.tail_logs, "fetch command logs", [
         Arg("command_id", help="command ID"),
         Arg("-f",
             "--follow",
             action="store_true",
             help="follow the logs of a command, similar to tail -f"),
         Arg("--tail",
             type=int,
             default=200,
             help="number of lines to show, counting from the end "
             "of the log")
     ]),
     Cmd("kill", command.kill, "forcibly terminate a command", [
         Arg("command_id", help="command ID", nargs=ONE_OR_MORE),
         Arg("-f", "--force", action="store_true", help="ignore errors"),
     ]),
 ])
コード例 #23
0
args_description = Cmd(
    "gcp",
    None,
    "GCP help",
    [
        Cmd(
            "down",
            handle_down,
            "delete gcp cluster",
            [
                ArgGroup(
                    "optional named arguments",
                    None,
                    [
                        Arg(
                            "--local-state-path",
                            type=str,
                            default=os.getcwd(),
                            help="local directory for storing cluster state",
                        ),
                        Arg(
                            "--yes",
                            action="store_true",
                            help="no prompt when deleting resources",
                        ),
                        Arg(
                            "--no-prompt",
                            dest="yes",
                            action="store_true",
                            help=argparse.SUPPRESS,
                        ),
                    ],
                ),
            ],
        ),
        Cmd(
            "up",
            handle_up,
            "create gcp cluster",
            [
                ArgGroup(
                    "required named arguments",
                    None,
                    [
                        Arg(
                            "--cluster-id",
                            type=validate_cluster_id(),
                            default=None,
                            required=True,
                            help="unique identifier to name and tag resources",
                        ),
                        Arg(
                            "--project-id",
                            type=str,
                            default=None,
                            required=True,
                            help="project ID to create the cluster in",
                        ),
                    ],
                ),
                ArgGroup(
                    "optional named arguments",
                    None,
                    [
                        Arg(
                            "--dry-run",
                            action="store_true",
                            help="return the infrastructure plan to be executed "
                            "based on your arguments",
                        ),
                        Arg(
                            "--keypath",
                            type=str,
                            default=None,
                            help=
                            "path to service account key if not using default credentials",
                        ),
                        Arg(
                            "--network",
                            type=str,
                            default="det-default",
                            help="network name to create "
                            "(the network should not already exist in the project)",
                        ),
                        Arg(
                            "--filestore-address",
                            type=str,
                            default="",
                            help=
                            "the address of an existing Filestore in the format of "
                            "'ip-address:/file-share'; if not provided and the no-filestore "
                            "flag is not set, a new Filestore instance will be created",
                        ),
                        Arg(
                            "--no-filestore",
                            help=
                            "whether to create a new Filestore if filestore-address "
                            "parameter is not set",
                            action="store_true",
                        ),
                        Arg(
                            "--det-version",
                            type=str,
                            default=determined.__version__,
                            help=argparse.SUPPRESS,
                        ),
                        Arg(
                            "--region",
                            type=str,
                            default=constants.defaults.REGION,
                            help=
                            "region to create the cluster in (defaults to us-west1)",
                        ),
                        Arg(
                            "--zone",
                            type=str,
                            default=None,
                            help=
                            "zone to create the cluster in (defaults to `region`-b)",
                        ),
                        Arg(
                            "--environment-image",
                            type=str,
                            default=constants.defaults.ENVIRONMENT_IMAGE,
                            help=argparse.SUPPRESS,
                        ),
                        Arg(
                            "--local-state-path",
                            type=str,
                            default=os.getcwd(),
                            help="local directory for storing cluster state",
                        ),
                        Arg(
                            "--preemptible",
                            type=bool,
                            default=False,
                            help=
                            "whether to use preemptible instances for dynamic agents",
                        ),
                        Arg(
                            "--operation-timeout-period",
                            type=str,
                            default=constants.defaults.
                            OPERATION_TIMEOUT_PERIOD,
                            help=
                            "operation timeout before retrying, e.g. 5m for 5 minutes",
                        ),
                        Arg(
                            "--master-instance-type",
                            type=str,
                            default=constants.defaults.MASTER_INSTANCE_TYPE,
                            help="instance type for master",
                        ),
                        Arg(
                            "--compute-agent-instance-type",
                            "--gpu-agent-instance-type",
                            type=str,
                            default=constants.defaults.
                            COMPUTE_AGENT_INSTANCE_TYPE,
                            help=
                            "instance type for agents in the compute resource pool",
                        ),
                        Arg(
                            "--aux-agent-instance-type",
                            "--cpu-agent-instance-type",
                            type=str,
                            default=constants.defaults.AUX_AGENT_INSTANCE_TYPE,
                            help=
                            "instance type for agents in the auxiliary resource pool",
                        ),
                        Arg(
                            "--db-password",
                            type=str,
                            default=constants.defaults.DB_PASSWORD,
                            help="password for master database",
                        ),
                        Arg(
                            "--max-aux-containers-per-agent",
                            "--max-cpu-containers-per-agent",
                            type=int,
                            default=constants.defaults.
                            MAX_AUX_CONTAINERS_PER_AGENT,
                            help="maximum number of containers on agents in the "
                            "auxiliary resource pool",
                        ),
                        Arg(
                            "--max-idle-agent-period",
                            type=str,
                            default=constants.defaults.MAX_IDLE_AGENT_PERIOD,
                            help="max agent idle time before it is shut down, "
                            "e.g. 30m for 30 minutes",
                        ),
                        Arg(
                            "--max-agent-starting-period",
                            type=str,
                            default=constants.defaults.
                            MAX_AGENT_STARTING_PERIOD,
                            help=
                            "max agent starting time before retrying, e.g. 30m for 30 minutes",
                        ),
                        Arg(
                            "--port",
                            type=int,
                            default=constants.defaults.PORT,
                            help=
                            "port to use for communication on master instance",
                        ),
                        Arg(
                            "--gpu-type",
                            type=str,
                            default=constants.defaults.GPU_TYPE,
                            help="type of GPU to use on agents",
                        ),
                        Arg(
                            "--gpu-num",
                            type=int,
                            default=constants.defaults.GPU_NUM,
                            help="number of GPUs per agent instance",
                        ),
                        Arg(
                            "--min-dynamic-agents",
                            type=int,
                            default=constants.defaults.MIN_DYNAMIC_AGENTS,
                            help=
                            "minimum number of dynamic agent instances at one time",
                        ),
                        Arg(
                            "--max-dynamic-agents",
                            type=int,
                            default=constants.defaults.MAX_DYNAMIC_AGENTS,
                            help=
                            "maximum number of dynamic agent instances at one time",
                        ),
                        Arg(
                            "--static-agents",
                            type=int,
                            default=constants.defaults.STATIC_AGENTS,
                            help=argparse.SUPPRESS,
                        ),
                        Arg(
                            "--min-cpu-platform-master",
                            type=str,
                            default=constants.defaults.MIN_CPU_PLATFORM_MASTER,
                            help="minimum cpu platform for master instances",
                        ),
                        Arg(
                            "--min-cpu-platform-agent",
                            type=str,
                            default=constants.defaults.MIN_CPU_PLATFORM_AGENT,
                            help="minimum cpu platform for agent instances",
                        ),
                        Arg(
                            "--scheduler-type",
                            type=str,
                            choices=["fair_share", "priority", "round_robin"],
                            default=constants.defaults.SCHEDULER_TYPE,
                            help="scheduler to use",
                        ),
                        Arg(
                            "--preemption-enabled",
                            type=bool,
                            default=constants.defaults.PREEMPTION_ENABLED,
                            help=
                            "whether task preemption is supported in the scheduler "
                            "(only configurable for priority scheduler).",
                        ),
                        Arg(
                            "--cpu-env-image",
                            type=str,
                            default="",
                            help="Docker image for CPU tasks",
                        ),
                        Arg(
                            "--gpu-env-image",
                            type=str,
                            default="",
                            help="Docker image for GPU tasks",
                        ),
                        Arg(
                            "--master-config-template-path",
                            type=Path,
                            default=None,
                            help="path to master yaml template",
                        ),
                        Arg(
                            "--tf-state-gcs-bucket-name",
                            type=str,
                            default=None,
                            help=
                            "use the GCS bucket to store the terraform state "
                            "instead of a local directory",
                        ),
                    ],
                ),
            ],
        ),
        Cmd(
            "dump-master-config-template",
            handle_dump_master_config_template,
            "dump default master config template",
            [],
        ),
    ],
)
コード例 #24
0
        help="run as the given user",
        metavar="username",
        default=None),
    Arg("-m",
        "--master",
        help="master address",
        metavar="address",
        default=get_default_master_address()),
    Arg("-v",
        "--version",
        action="version",
        help="print CLI version and exit",
        version="%(prog)s {}".format(determined.__version__)),
    Cmd("preview-search", preview_search, "preview search", [
        Arg("config_file",
            type=FileType("r"),
            help="experiment config file (.yaml)")
    ]),
    deploy_cmd,
]  # type: List[object]

# fmt: on

all_args_description = (args_description + experiment_args_description +
                        checkpoint_args_description + master_args_description +
                        model_args_description + agent_args_description +
                        notebook_args_description + job_args_description +
                        resources_args_description + shell_args_description +
                        task_args_description + template_args_description +
                        tensorboard_args_description + trial_args_description +
                        remote_args_description + user_args_description +
コード例 #25
0
ファイル: shell.py プロジェクト: wbwatkinson/determined
 Cmd("shell", None, "manage shells", [
     Cmd("list", command.list, "list shells", [
         Arg("-q", "--quiet", action="store_true",
             help="only display the IDs"),
         Arg("--all", "-a", action="store_true",
             help="show all shells (including other users')")
     ], is_default=True),
     Cmd("config", command.config,
         "display shell config", [
             Arg("id", type=str, help="shell ID"),
         ]),
     Cmd("start", start_shell, "start a new shell", [
         Arg("ssh_opts", nargs="*", help="additional SSH options when connecting to the shell"),
         Arg("--config-file", default=None, type=FileType("r"),
             help="command config file (.yaml)"),
         Arg("-v", "--volume", action="append", default=[],
             help=VOLUME_DESC),
         Arg("-c", "--context", default=None, type=Path, help=CONTEXT_DESC),
         Arg("--config", action="append", default=[], help=CONFIG_DESC),
         Arg("-p", "--passphrase", action="store_true",
             help="passphrase to encrypt the shell private key"),
         Arg("--template", type=str,
             help="name of template to apply to the shell configuration"),
         Arg("-d", "--detach", action="store_true",
             help="run in the background and print the ID"),
     ]),
     Cmd("open", open_shell, "open an existing shell", [
         Arg("shell_id", help="shell ID"),
         Arg("ssh_opts", nargs="*", help="additional SSH options when connecting to the shell"),
     ]),
     Cmd("logs", command.tail_logs, "fetch shell logs", [
         Arg("shell_id", help="shell ID"),
         Arg("-f", "--follow", action="store_true",
             help="follow the logs of a shell, similar to tail -f"),
         Arg("--tail", type=int, default=200,
             help="number of lines to show, counting from the end "
                  "of the log")
     ]),
     Cmd("kill", command.kill, "kill a shell", [
         Arg("shell_id", help="shell ID", nargs=ONE_OR_MORE),
         Arg("-f", "--force", action="store_true", help="ignore errors"),
     ]),
 ])
コード例 #26
0
ファイル: version.py プロジェクト: shiyuann/determined
        print(
            termcolor.colored(
                "CLI version {} is less than master version {}. "
                "Consider upgrading the CLI.".format(client_version,
                                                     master_version),
                "yellow",
            ),
            file=sys.stderr,
        )
    elif version.Version(client_version) > version.Version(master_version):
        print(
            termcolor.colored(
                "Master version {} is less than CLI version {}. "
                "Consider upgrading the master.".format(
                    master_version, client_version),
                "yellow",
            ),
            file=sys.stderr,
        )


def describe_version(parsed_args: argparse.Namespace) -> None:
    info = get_version(parsed_args.master)

    print(render.format_object_as_yaml(info))


args_description = [
    Cmd("version", describe_version, "show version information", [])
]  # type: List[Any]
コード例 #27
0
ファイル: experiment.py プロジェクト: eecsliu/determined
args_description = Cmd(
    "e|xperiment",
    None,
    "manage experiments",
    [
        # Inspection commands.
        Cmd(
            "list",
            list_experiments,
            "list experiments",
            [
                Arg(
                    "--all",
                    "-a",
                    action="store_true",
                    help="show all experiments (including archived and other users')",
                ),
                Arg("--csv", action="store_true", help="print as CSV"),
            ],
            is_default=True,
        ),
        Cmd("config", config, "display experiment config", [experiment_id_arg("experiment ID")]),
        Cmd(
            "describe",
            describe,
            "describe experiment",
            [
                Arg("experiment_ids", help="comma-separated list of experiment IDs to describe"),
                Arg("--metrics", action="store_true", help="display full metrics"),
                Group(
                    Arg("--csv", action="store_true", help="print as CSV"),
                    Arg("--json", action="store_true", help="print as JSON"),
                    Arg("--outdir", type=Path, help="directory to save output"),
                ),
            ],
        ),
        Cmd(
            "download-model-def",
            download_model_def,
            "download model definition",
            [
                experiment_id_arg("experiment ID"),
                Arg("--output-dir", type=Path, help="output directory", default="."),
            ],
        ),
        Cmd(
            "list-trials lt",
            list_trials,
            "list trials of experiment",
            [
                experiment_id_arg("experiment ID"),
                Arg("--csv", action="store_true", help="print as CSV"),
            ],
        ),
        Cmd(
            "list-checkpoints lc",
            checkpoint.list,
            "list checkpoints of experiment",
            [
                experiment_id_arg("experiment ID"),
                Arg(
                    "--best",
                    type=int,
                    help="Return the best N checkpoints for this experiment. "
                    "If this flag is used, only checkpoints with an associated "
                    "validation metric will be considered.",
                ),
                Arg("--csv", action="store_true", help="print as CSV"),
            ],
        ),
        # Create command.
        Cmd(
            "create",
            create,
            "create experiment",
            [
                Arg("config_file", type=FileType("r"), help="experiment config file (.yaml)"),
                Arg("model_def", type=Path, help="file or directory containing model definition"),
                Arg(
                    "-g",
                    "--git",
                    action="store_true",
                    help="Associate git metadata with this experiment. This "
                    "flag assumes that git is installed, a .git repository "
                    "exists in the model definition directory, and that the "
                    "git working tree of that repository is empty.",
                ),
                Arg(
                    "--local",
                    action="store_true",
                    help="Create the experiment in local mode instead of submitting it to the "
                    "cluster. For more information, see documentation on det.experimental.create()",
                ),
                Arg(
                    "--template",
                    type=str,
                    help="name of template to apply to the experiment configuration",
                ),
                Group(
                    Arg(
                        "-f",
                        "--follow-first-trial",
                        action="store_true",
                        help="follow the logs of the first trial that is created",
                    ),
                    Arg("--paused", action="store_true", help="do not activate the experiment"),
                    Arg(
                        "-t",
                        "--test-mode",
                        action="store_true",
                        help="Test the experiment configuration and model "
                        "definition by creating and scheduling a very small "
                        "experiment. This command will verify that a training "
                        "workload and validation workload run successfully and that "
                        "checkpoints can be saved. The test experiment will "
                        "be archived on creation.",
                    ),
                ),
            ],
        ),
        # Lifecycle management commands.
        Cmd(
            "activate",
            activate,
            "activate experiment",
            [experiment_id_arg("experiment ID to activate")],
        ),
        Cmd("cancel", cancel, "cancel experiment", [experiment_id_arg("experiment ID to cancel")]),
        Cmd("pause", pause, "pause experiment", [experiment_id_arg("experiment ID to pause")]),
        Cmd(
            "archive",
            archive,
            "archive experiment",
            [experiment_id_arg("experiment ID to archive")],
        ),
        Cmd(
            "unarchive",
            unarchive,
            "unarchive experiment",
            [experiment_id_arg("experiment ID to unarchive")],
        ),
        Cmd(
            "delete",
            delete_experiment,
            "delete experiment",
            [
                Arg("experiment_id", help="delete experiment"),
                Arg(
                    "--yes",
                    action="store_true",
                    default=False,
                    help="automatically answer yes to prompts",
                ),
            ],
        ),
        Cmd(
            "download",
            download,
            "download checkpoints for an experiment",
            [
                experiment_id_arg("experiment ID to download"),
                Arg(
                    "-o",
                    "--output-dir",
                    type=str,
                    default="checkpoints",
                    help="Desired top level directory for the checkpoints. "
                    "Checkpoints will be downloaded to "
                    "<output_dir>/<checkpoint_uuid>/<checkpoint_files>.",
                ),
                Arg(
                    "--top-n",
                    type=int,
                    default=1,
                    help="The number of checkpoints to download for the "
                    "experiment. The checkpoints are sorted by validation "
                    "metric as defined by --sort-by and --smaller-is-better."
                    "This command will select the best N checkpoints from the "
                    "top performing N trials of the experiment.",
                ),
                Arg(
                    "--sort-by",
                    type=str,
                    default=None,
                    help="The name of the validation metric to sort on. Without --sort-by, the "
                    "experiment's searcher metric is assumed. If this argument is specified, "
                    "--smaller-is-better must also be specified.",
                ),
                Arg(
                    "--smaller-is-better",
                    type=lambda s: bool(distutils.util.strtobool(s)),
                    default=None,
                    help="The sort order for metrics when using --sort-by. For "
                    "example, 'accuracy' would require passing '--smaller-is-better false'. If "
                    "--sort-by is specified, this argument must be specified.",
                ),
                Arg(
                    "-q",
                    "--quiet",
                    action="store_true",
                    help="Only print the paths to the checkpoints.",
                ),
            ],
        ),
        Cmd(
            "kill", kill_experiment, "kill experiment", [Arg("experiment_id", help="experiment ID")]
        ),
        Cmd(
            "wait",
            wait,
            "wait for experiment to reach terminal state",
            [
                experiment_id_arg("experiment ID"),
                Arg(
                    "--polling-interval",
                    type=int,
                    default=5,
                    help="the interval (in seconds) to poll for updated state",
                ),
            ],
        ),
        # Attribute setting commands.
        Cmd(
            "label",
            None,
            "manage experiment labels",
            [
                Cmd(
                    "add",
                    add_label,
                    "add label",
                    [experiment_id_arg("experiment ID"), Arg("label", help="label")],
                ),
                Cmd(
                    "remove",
                    remove_label,
                    "remove label",
                    [experiment_id_arg("experiment ID"), Arg("label", help="label")],
                ),
            ],
        ),
        Cmd(
            "set",
            None,
            "set experiment attributes",
            [
                Cmd(
                    "description",
                    set_description,
                    "set experiment description",
                    [
                        experiment_id_arg("experiment ID to modify"),
                        Arg("description", help="experiment description"),
                    ],
                ),
                Cmd(
                    "gc-policy",
                    set_gc_policy,
                    "set experiment GC policy and run GC",
                    [
                        experiment_id_arg("experiment ID to modify"),
                        Arg(
                            "--save-experiment-best",
                            type=int,
                            required=True,
                            help="number of best checkpoints per experiment " "to save",
                        ),
                        Arg(
                            "--save-trial-best",
                            type=int,
                            required=True,
                            help="number of best checkpoints per trial to save",
                        ),
                        Arg(
                            "--save-trial-latest",
                            type=int,
                            required=True,
                            help="number of latest checkpoints per trial to save",
                        ),
                        Arg(
                            "--yes",
                            action="store_true",
                            default=False,
                            help="automatically answer yes to prompts",
                        ),
                    ],
                ),
                Cmd(
                    "max-slots",
                    set_max_slots,
                    "set `max_slots` of experiment",
                    [
                        experiment_id_arg("experiment ID to modify"),
                        Arg("max_slots", type=none_or_int, help="max slots"),
                    ],
                ),
                Cmd(
                    "weight",
                    set_weight,
                    "set `weight` of experiment",
                    [
                        experiment_id_arg("experiment ID to modify"),
                        Arg("weight", type=float, help="weight"),
                    ],
                ),
            ],
        ),
    ],
)
コード例 #28
0
ファイル: master.py プロジェクト: shiyuann/determined
                # The `tail` parameter only makes sense the first time we
                # fetch logs.
                response = api.get(
                    args.master,
                    "logs",
                    params={"greater_than_id": str(latest_log_id)})
                latest_log_id = process_response(response, latest_log_id)
            except KeyboardInterrupt:
                break


# fmt: off

args_description = [
    Cmd("m|aster", None, "manage master", [
        Cmd("config", config, "fetch master config as JSON", []),
        Cmd("logs", logs, "fetch master logs", [
            Arg("-f",
                "--follow",
                action="store_true",
                help="follow the logs of master, similar to tail -f"),
            Arg("--tail",
                type=int,
                help="number of lines to show, counting from the end "
                "of the log (default is all)")
        ]),
    ])
]  # type: List[Any]

# fmt: on
コード例 #29
0
 Cmd("tensorboard", None, "manage TensorBoard instances", [
     Cmd("list ls",
         partial(command.list_tasks),
         "list TensorBoard instances", [
             Arg("-q",
                 "--quiet",
                 action="store_true",
                 help="only display the IDs"),
             Arg("--all",
                 "-a",
                 action="store_true",
                 help="show all TensorBoards (including other users')"),
             Group(format_args["json"], format_args["csv"]),
         ],
         is_default=True),
     Cmd("start", start_tensorboard, "start new TensorBoard instance", [
         Arg("experiment_ids",
             type=int,
             nargs="*",
             help=
             "experiment IDs to load into TensorBoard. At most 100 trials from "
             "the specified experiment will be loaded into TensorBoard. If the "
             "experiment has more trials, the 100 best-performing trials will "
             "be used."),
         Arg("-t",
             "--trial-ids",
             nargs=ONE_OR_MORE,
             type=int,
             help=
             "trial IDs to load into TensorBoard; at most 100 trials are "
             "allowed per TensorBoard instance"),
         Arg("--config-file",
             default=None,
             type=FileType("r"),
             help="command config file (.yaml)"),
         Arg("-c", "--context", default=None, type=Path, help=CONTEXT_DESC),
         Arg("--config", action="append", default=[], help=CONFIG_DESC),
         Arg("--no-browser",
             action="store_true",
             help="don't open TensorBoard in a browser after startup"),
         Arg("-d",
             "--detach",
             action="store_true",
             help="run in the background and print the ID")
     ]),
     Cmd("config", partial(command.config), "display TensorBoard config",
         [Arg("tensorboard_id", type=str, help="TensorBoard ID")]),
     Cmd("open", open_tensorboard, "open existing TensorBoard instance",
         [Arg("tensorboard_id", help="TensorBoard ID")]),
     Cmd("logs", partial(task.logs), "fetch TensorBoard instance logs", [
         Arg("task_id", help="TensorBoard ID", metavar="tensorboard_id"),
         *task.common_log_options,
     ]),
     Cmd("kill", partial(command.kill), "kill TensorBoard instance", [
         Arg("tensorboard_id", help="TensorBoard ID", nargs=ONE_OR_MORE),
         Arg("-f", "--force", action="store_true", help="ignore errors"),
     ]),
     Cmd("set", None, "set TensorBoard attributes", [
         Cmd("priority", partial(command.set_priority),
             "set TensorBoard priority", [
                 Arg("tensorboard_id", help="TensorBoard ID"),
                 Arg("priority", type=int, help="priority"),
             ]),
     ]),
 ])
コード例 #30
0
ファイル: cli.py プロジェクト: wbwatkinson/determined
args_description = Cmd(
    "gcp",
    None,
    "gcp_help",
    [
        Cmd(
            "down",
            handle_down,
            "delete gcp cluster",
            [
                ArgGroup(
                    "optional named arguments",
                    None,
                    [
                        Arg(
                            "--local-state-path",
                            type=str,
                            default=os.getcwd(),
                            help="local directory for storing cluster state",
                        ),
                    ],
                ),
            ],
        ),
        Cmd(
            "up",
            handle_up,
            "create gcp cluster",
            [
                ArgGroup(
                    "required named arguments",
                    None,
                    [
                        Arg(
                            "--cluster-id",
                            type=validate_cluster_id(),
                            default=None,
                            required=True,
                            help="unique identifier to name and tag resources",
                        ),
                        Arg(
                            "--project-id",
                            type=str,
                            default=None,
                            required=True,
                            help="project ID to create the cluster in",
                        ),
                    ],
                ),
                ArgGroup(
                    "optional named arguments",
                    None,
                    [
                        Arg(
                            "--dry-run",
                            action="store_true",
                            help="return the infrastructure plan to be executed "
                            "based on your arguments",
                        ),
                        Arg(
                            "--keypath",
                            type=str,
                            default=None,
                            help="path to service account key if not using default credentials",
                        ),
                        Arg(
                            "--network",
                            type=str,
                            default="det-default",
                            help="network name to create "
                            "(the network should not already exist in the project)",
                        ),
                        Arg(
                            "--det-version",
                            type=str,
                            default=determined.__version__,
                            help=argparse.SUPPRESS,
                        ),
                        Arg(
                            "--region",
                            type=str,
                            default=constants.defaults.REGION,
                            help="region to create the cluster in (defaults to us-west1)",
                        ),
                        Arg(
                            "--zone",
                            type=str,
                            default=None,
                            help="zone to create the cluster in (defaults to `region`-b)",
                        ),
                        Arg(
                            "--environment-image",
                            type=str,
                            default=constants.defaults.ENVIRONMENT_IMAGE,
                            help=argparse.SUPPRESS,
                        ),
                        Arg(
                            "--local-state-path",
                            type=str,
                            default=os.getcwd(),
                            help="local directory for storing cluster state",
                        ),
                        Arg(
                            "--preemptible",
                            type=bool,
                            default=False,
                            help="whether to use preemptible instances for dynamic agents",
                        ),
                        Arg(
                            "--operation-timeout-period",
                            type=str,
                            default=constants.defaults.OPERATION_TIMEOUT_PERIOD,
                            help="operation timeout before retrying, e.g. 5m for 5 minutes",
                        ),
                        Arg(
                            "--master-instance-type",
                            type=str,
                            default=constants.defaults.MASTER_INSTANCE_TYPE,
                            help="instance type for master",
                        ),
                        Arg(
                            "--cpu-agent-instance-type",
                            type=str,
                            default=constants.defaults.CPU_AGENT_INSTANCE_TYPE,
                            help="instance type for agens in the CPU resource pool",
                        ),
                        Arg(
                            "--gpu-agent-instance-type",
                            type=str,
                            default=constants.defaults.GPU_AGENT_INSTANCE_TYPE,
                            help="instance type for agents in the GPU resource pool",
                        ),
                        Arg(
                            "--db-password",
                            type=str,
                            default=constants.defaults.DB_PASSWORD,
                            help="password for master database",
                        ),
                        Arg(
                            "--max-cpu-containers-per-agent",
                            type=str,
                            default=constants.defaults.MAX_CPU_CONTAINERS_PER_AGENT,
                            help="max CPU containers running for agents in the CPU resource pool",
                        ),
                        Arg(
                            "--max-idle-agent-period",
                            type=str,
                            default=constants.defaults.MAX_IDLE_AGENT_PERIOD,
                            help="max agent idle time before it is shut down, "
                            "e.g. 30m for 30 minutes",
                        ),
                        Arg(
                            "--max-agent-starting-period",
                            type=str,
                            default=constants.defaults.MAX_AGENT_STARTING_PERIOD,
                            help="max agent starting time before retrying, e.g. 30m for 30 minutes",
                        ),
                        Arg(
                            "--port",
                            type=int,
                            default=constants.defaults.PORT,
                            help="port to use for communication on master instance",
                        ),
                        Arg(
                            "--gpu-type",
                            type=str,
                            default=constants.defaults.GPU_TYPE,
                            help="type of GPU to use on agents",
                        ),
                        Arg(
                            "--gpu-num",
                            type=int,
                            default=constants.defaults.GPU_NUM,
                            help="number of GPUs per agent instance",
                        ),
                        Arg(
                            "--min-dynamic-agents",
                            type=int,
                            default=constants.defaults.MIN_DYNAMIC_AGENTS,
                            help="minimum number of dynamic agent instances at one time",
                        ),
                        Arg(
                            "--max-dynamic-agents",
                            type=int,
                            default=constants.defaults.MAX_DYNAMIC_AGENTS,
                            help="maximum number of dynamic agent instances at one time",
                        ),
                        Arg(
                            "--static-agents",
                            type=int,
                            default=constants.defaults.STATIC_AGENTS,
                            help=argparse.SUPPRESS,
                        ),
                        Arg(
                            "--min-cpu-platform-master",
                            type=str,
                            default=constants.defaults.MIN_CPU_PLATFORM_MASTER,
                            help="minimum cpu platform for master instances",
                        ),
                        Arg(
                            "--min-cpu-platform-agent",
                            type=str,
                            default=constants.defaults.MIN_CPU_PLATFORM_AGENT,
                            help="minimum cpu platform for agent instances",
                        ),
                        Arg(
                            "--scheduler-type",
                            type=validate_scheduler_type(),
                            default=constants.defaults.SCHEDULER_TYPE,
                            help="scheduler to use (defaults to fair_share).",
                        ),
                        Arg(
                            "--preemption-enabled",
                            type=bool,
                            default=constants.defaults.PREEMPTION_ENABLED,
                            help="whether task preemption is supported in the scheduler "
                            "(only configurable for priority scheduler).",
                        ),
                        Arg(
                            "--cpu-env-image",
                            type=str,
                            default="",
                            help="Docker image for CPU tasks",
                        ),
                        Arg(
                            "--gpu-env-image",
                            type=str,
                            default="",
                            help="Docker image for GPU tasks",
                        ),
                    ],
                ),
            ],
        ),
    ],
)