Exemple #1
0
def deploy_gcp(command: str, args: argparse.Namespace) -> None:
    # Preprocess the local path to store the states.

    # Set local state path as our current working directory. This is a no-op
    # when the --local-state-path arg isn't used. We do this because Terraform
    # module directories are populated with relative paths, and we want to
    # support users running gcp up and down commands from different directories.
    # Also, because we change the working directory, we ensure that
    # local_state_path is an absolute path.
    args.local_state_path = os.path.abspath(args.local_state_path)
    if not os.path.exists(args.local_state_path):
        os.makedirs(args.local_state_path)
    os.chdir(args.local_state_path)

    # tf_state_gcs_bucket_name argument is not necessary for `down` operation, because TF reads it
    # from local tf files.
    if hasattr(args,
               "tf_state_gcs_bucket_name") and args.tf_state_gcs_bucket_name:
        print("Using GCS bucket for state:", args.tf_state_gcs_bucket_name)
    else:
        print("Using local state path:", args.local_state_path)

    # Set the TF_DATA_DIR where Terraform will store its supporting files
    env = os.environ.copy()
    env["TF_DATA_DIR"] = os.path.join(args.local_state_path, "terraform_data")

    # Initialize determined configurations.
    det_configs = {}
    args_dict = vars(args)
    for arg in args_dict:
        if args_dict[arg] is not None:
            det_configs[arg] = args_dict[arg]

    # Handle down subcommand.
    if command == "down":
        gcp.delete(det_configs, env, args.yes)
        print("Delete Successful")
        return

    # Handle Up subcommand.
    if (args.cpu_env_image
            and not args.gpu_env_image) or (args.gpu_env_image
                                            and not args.cpu_env_image):
        print("If a CPU or GPU image is specified, both should be.")
        sys.exit(1)

    if args.master_config_template_path:
        if not args.master_config_template_path.exists():
            raise ValueError(
                f"Input master config template doesn't exist: {args.master_config_template_path}"
            )
        with args.master_config_template_path.open("r") as fin:
            det_configs["master_config_template"] = fin.read()

    # Not all args will be passed to Terraform, list the ones that won't be
    # TODO(ilia): Switch to filtering variables_to_include instead, i.e.
    #             only pass the ones recognized by terraform.
    variables_to_exclude = [
        "command",
        "dry_run",
        "environment",
        "local_state_path",
        "master",
        "user",
        "no_preflight_checks",
        "no_wait_for_master",
        "yes",
        "no_prompt",
        "master_config_template_path",
        "tf_state_gcs_bucket_name",
        "func",
        "_command",
        "_subcommand",
        "_subsubcommand",
    ]

    # Dry-run flag
    if args.dry_run:
        gcp.dry_run(det_configs, env, variables_to_exclude)
        print("Printed plan. To execute, run `det deploy gcp`")
        return

    print("Starting Determined deployment on GCP...\n")
    gcp.deploy(det_configs, env, variables_to_exclude)

    if not args.no_wait_for_master:
        try:
            gcp.wait_for_master(det_configs, env, timeout=5 * 60)
        except MasterTimeoutExpired:
            print(
                colored(
                    "Determined cluster has been deployed, but master health check has failed.",
                    "red",
                ))
            print("For details, SSH to master instance and run "
                  "`sudo journalctl -u google-startup-scripts.service`"
                  " or check /var/log/cloud-init-output.log.")
            sys.exit(1)

    print("Determined Deployment Successful")

    if args.no_wait_for_master:
        print(
            "Please allow 1-5 minutes for the master instance to be accessible via the web-ui\n"
        )
Exemple #2
0
def deploy_gcp(command: str, args: argparse.Namespace) -> None:
    # Set local state path as our current working directory. This is a no-op
    # when the --local-state-path arg isn't used. We do this because Terraform
    # module directories are populated with relative paths, and we want to
    # support users running gcp up and down commands from different directories.
    # Also, because we change the working directory, we ensure that
    # local_state_path is an absolute path.
    args.local_state_path = os.path.abspath(args.local_state_path)
    if not os.path.exists(args.local_state_path):
        os.makedirs(args.local_state_path)
    os.chdir(args.local_state_path)

    # Set the TF_DATA_DIR where Terraform will store its supporting files
    env = os.environ.copy()
    env["TF_DATA_DIR"] = os.path.join(args.local_state_path, "terraform_data")

    # Create det_configs dictionary
    det_configs = {}

    # Add args to det_configs dict
    args_dict = vars(args)
    for arg in args_dict:
        if args_dict[arg] is not None:
            det_configs[arg] = args_dict[arg]

    # Not all args will be passed to Terraform, list the ones that won't be
    # TODO(ilia): Switch to filtering variables_to_include instead, i.e.
    # only pass the ones recognized by terraform.
    variables_to_exclude = [
        "command",
        "dry_run",
        "environment",
        "local_state_path",
        "master",
        "user",
        "no_preflight_checks",
        "no_wait_for_master",
        "func",
        "_command",
        "_subcommand",
        "_subsubcommand",
    ]

    # Delete
    if command == "down":
        gcp.delete(det_configs, env)
        print("Delete Successful")
        return

    if (args.cpu_env_image and not args.gpu_env_image) or (
        args.gpu_env_image and not args.cpu_env_image
    ):
        print("If a CPU or GPU image is specified, both should be.")
        sys.exit(1)

    # Dry-run flag
    if args.dry_run:
        gcp.dry_run(det_configs, env, variables_to_exclude)
        print("Printed plan. To execute, run `det deploy gcp`")
        return

    print("Starting Determined Deployment")
    gcp.deploy(det_configs, env, variables_to_exclude)

    if not args.no_wait_for_master:
        try:
            gcp.wait_for_master(det_configs, env, timeout=5 * 60)
        except MasterTimeoutExpired:
            print(
                colored(
                    "Determined cluster has been deployed, but master health check has failed.",
                    "red",
                )
            )
            print("For details, SSH to master instance and check /var/log/cloud-init-output.log.")
            sys.exit(1)

    print("Determined Deployment Successful")

    if args.no_wait_for_master:
        print("Please allow 1-5 minutes for the master instance to be accessible via the web-ui\n")