예제 #1
0
def test_multiple_param_changes(mocker, pcluster_config_reader, test_datadir, src_cluster_label, dst_cluster_label):
    _do_mocking_for_tests(mocker)
    dst_config_file = "pcluster.config.dst.ini"
    duplicate_config_file(dst_config_file, test_datadir)

    src_dict = {}
    src_dict.update(default_cluster_params)
    src_dict["cluster_label"] = src_cluster_label
    src_dict["master_subnet_id"] = "subnet-12345678"
    src_dict["compute_subnet_id"] = "subnet-12345678"
    src_dict["additional_sg"] = "sg-12345678"

    src_config_file = pcluster_config_reader(**src_dict)
    src_conf = PclusterConfig(config_file=src_config_file, fail_on_file_absence=True)

    dst_dict = {}
    dst_dict.update(default_cluster_params)
    dst_dict["cluster_label"] = dst_cluster_label
    dst_dict["master_subnet_id"] = "subnet-1234567a"
    dst_dict["compute_subnet_id"] = "subnet-1234567a"
    dst_dict["additional_sg"] = "sg-1234567a"

    dst_config_file = pcluster_config_reader(dst_config_file, **dst_dict)
    dst_conf = PclusterConfig(config_file=dst_config_file)

    expected_changes = [
        Change("vpc", "default", "master_subnet_id", "subnet-12345678", "subnet-1234567a", UpdatePolicy.UNSUPPORTED),
        Change(
            "vpc", "default", "compute_subnet_id", "subnet-12345678", "subnet-1234567a", UpdatePolicy.COMPUTE_FLEET_STOP
        ),
        Change("vpc", "default", "additional_sg", "sg-12345678", "sg-1234567a", UpdatePolicy.SUPPORTED),
    ]

    _check_patch(src_conf, dst_conf, expected_changes, UpdatePolicy.UNSUPPORTED)
예제 #2
0
def start(args):
    """Restore ASG limits or awsbatch CE to min/max/desired."""
    stack_name = utils.get_stack_name(args.cluster_name)
    pcluster_config = PclusterConfig(config_file=args.config_file,
                                     cluster_name=args.cluster_name)
    cluster_section = pcluster_config.get_section("cluster")

    if cluster_section.get_param_value("scheduler") == "awsbatch":
        LOGGER.info("Enabling AWS Batch compute environment : %s",
                    args.cluster_name)
        max_vcpus = cluster_section.get_param_value("max_vcpus")
        desired_vcpus = cluster_section.get_param_value("desired_vcpus")
        min_vcpus = cluster_section.get_param_value("min_vcpus")
        ce_name = _get_batch_ce(stack_name)
        _start_batch_ce(ce_name=ce_name,
                        min_vcpus=min_vcpus,
                        desired_vcpus=desired_vcpus,
                        max_vcpus=max_vcpus)
    else:
        LOGGER.info("Starting compute fleet : %s", args.cluster_name)
        max_queue_size = cluster_section.get_param_value("max_queue_size")
        min_desired_size = (
            cluster_section.get_param_value("initial_queue_size")
            if cluster_section.get_param_value("maintain_initial_size") else 0)
        asg_name = _get_asg_name(stack_name)
        _set_asg_limits(asg_name=asg_name,
                        min=min_desired_size,
                        max=max_queue_size,
                        desired=min_desired_size)
예제 #3
0
def test_single_param_change(
    test_datadir,
    pcluster_config_reader,
    mocker,
    section_key,
    section_label,
    param_key,
    src_param_value,
    dst_param_value,
    change_update_policy,
):
    _do_mocking_for_tests(mocker)
    dst_config_file = "pcluster.config.dst.ini"
    duplicate_config_file(dst_config_file, test_datadir)

    src_dict = {}
    src_dict.update(default_cluster_params)
    src_dict[param_key] = src_param_value

    src_config_file = pcluster_config_reader(**src_dict)
    src_conf = PclusterConfig(config_file=src_config_file, fail_on_file_absence=True)

    dst_dict = {}
    dst_dict.update(default_cluster_params)
    dst_dict[param_key] = dst_param_value
    dst_config_file = pcluster_config_reader(dst_config_file, **dst_dict)
    dst_conf = PclusterConfig(config_file=dst_config_file)

    expected_change = Change(
        section_key, section_label, param_key, src_param_value, dst_param_value, change_update_policy
    )
    _check_patch(src_conf, dst_conf, [expected_change], change_update_policy)
예제 #4
0
def test_config_to_json(capsys, boto3_stubber, test_datadir, pcluster_config_reader, queues):
    queue_settings = ",".join(queues)

    # Create a new configuration file from the initial one
    dst_config_file = "pcluster.config.{0}.ini".format("_".join(queues))
    duplicate_config_file(dst_config_file, test_datadir)

    # Created expected json params based on active queues
    expected_json_params = _prepare_json_config(queues, test_datadir)

    # Mock expected boto3 calls
    _mock_boto3(boto3_stubber, expected_json_params, head_node_instance_type="c4.xlarge")

    # Load config from created config file
    dst_config_file = pcluster_config_reader(dst_config_file, queue_settings=queue_settings)
    pcluster_config = PclusterConfig(config_file=dst_config_file, fail_on_file_absence=True)

    # Create json storage data from config
    storage_data = pcluster_config.to_storage()

    # Check that created json params match the expected ones
    assert_that(json.dumps(storage_data.json_params, indent=2, sort_keys=True)).is_equal_to(
        json.dumps(expected_json_params, indent=2, sort_keys=True)
    )

    readouterr = capsys.readouterr()
    assert_that(readouterr.err).is_equal_to("")

    pass
예제 #5
0
def test_config_patch(mocker):
    _do_mocking_for_tests(mocker)
    # We need to provide a region to PclusterConfig to avoid no region exception.
    # Which region to provide is arbitrary.
    os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
    src_conf = PclusterConfig()
    dst_conf = PclusterConfig()
    # Two new configs must always be equal
    _check_patch(src_conf, dst_conf, [], UpdatePolicy.SUPPORTED)
예제 #6
0
def status(args):  # noqa: C901 FIXME!!!
    stack_name = utils.get_stack_name(args.cluster_name)

    # Parse configuration file to read the AWS section
    PclusterConfig.init_aws(config_file=args.config_file)

    cfn = boto3.client("cloudformation")
    try:
        stack = utils.get_stack(stack_name, cfn)
        sys.stdout.write("\rStatus: %s" % stack.get("StackStatus"))
        sys.stdout.flush()
        if not args.nowait:
            while stack.get("StackStatus") not in [
                "CREATE_COMPLETE",
                "UPDATE_COMPLETE",
                "UPDATE_ROLLBACK_COMPLETE",
                "ROLLBACK_COMPLETE",
                "CREATE_FAILED",
                "DELETE_FAILED",
            ]:
                time.sleep(5)
                stack = utils.get_stack(stack_name, cfn)
                events = utils.get_stack_events(stack_name)[0]
                resource_status = (
                    "Status: %s - %s" % (events.get("LogicalResourceId"), events.get("ResourceStatus"))
                ).ljust(80)
                sys.stdout.write("\r%s" % resource_status)
                sys.stdout.flush()
            sys.stdout.write("\rStatus: %s\n" % stack.get("StackStatus"))
            sys.stdout.flush()
            if stack.get("StackStatus") in ["CREATE_COMPLETE", "UPDATE_COMPLETE", "UPDATE_ROLLBACK_COMPLETE"]:
                state = _poll_head_node_state(stack_name)
                if state == "running":
                    _print_stack_outputs(stack)
                _print_compute_fleet_status(args.cluster_name, stack)
            elif stack.get("StackStatus") in ["ROLLBACK_COMPLETE", "CREATE_FAILED", "DELETE_FAILED"]:
                events = utils.get_stack_events(stack_name)
                for event in events:
                    if event.get("ResourceStatus") in ["CREATE_FAILED", "DELETE_FAILED", "UPDATE_FAILED"]:
                        LOGGER.info(
                            "%s %s %s %s %s",
                            event.get("Timestamp"),
                            event.get("ResourceStatus"),
                            event.get("ResourceType"),
                            event.get("LogicalResourceId"),
                            event.get("ResourceStatusReason"),
                        )
        else:
            sys.stdout.write("\n")
            sys.stdout.flush()
    except ClientError as e:
        LOGGER.critical(e.response.get("Error").get("Message"))
        sys.stdout.flush()
        sys.exit(1)
    except KeyboardInterrupt:
        LOGGER.info("\nExiting...")
        sys.exit(0)
예제 #7
0
def test_example_config_consistency(mocker):
    """Validate example file and try to convert to CFN."""
    mocker.patch("pcluster.config.param_types.get_avail_zone",
                 return_value="mocked_avail_zone")
    pcluster_config = PclusterConfig(
        config_file=utils.get_pcluster_config_example(),
        fail_on_file_absence=True)

    cfn_params = pcluster_config.to_cfn()

    assert_that(len(cfn_params)).is_equal_to(CFN_CONFIG_NUM_OF_PARAMS)
예제 #8
0
def init_pcluster_config_from_configparser(config_parser, validate=True):
    with tempfile.NamedTemporaryFile(delete=False) as config_file:

        with open(config_file.name, "w") as cf:
            config_parser.write(cf)

        pcluster_config = PclusterConfig(
            config_file=config_file.name, cluster_label="default", fail_on_file_absence=True
        )
        if validate:
            pcluster_config.validate()
    return pcluster_config
예제 #9
0
def delete(args):
    saw_update = False
    LOGGER.info("Deleting: %s", args.cluster_name)
    stack_name = utils.get_stack_name(args.cluster_name)

    # Parse configuration file to read the AWS section
    PclusterConfig.init_aws(config_file=args.config_file)

    cfn = boto3.client("cloudformation")
    try:
        # delete_stack does not raise an exception if stack does not exist
        # Use describe_stacks to explicitly check if the stack exists
        cfn.describe_stacks(StackName=stack_name)
        cfn.delete_stack(StackName=stack_name)
        saw_update = True
        stack_status = utils.get_stack(stack_name, cfn).get("StackStatus")
        sys.stdout.write("\rStatus: %s" % stack_status)
        sys.stdout.flush()
        LOGGER.debug("Status: %s", stack_status)
        if not args.nowait:
            while stack_status == "DELETE_IN_PROGRESS":
                time.sleep(5)
                stack_status = utils.get_stack(stack_name,
                                               cfn).get("StackStatus")
                events = cfn.describe_stack_events(
                    StackName=stack_name).get("StackEvents")[0]
                resource_status = ("Status: %s - %s" %
                                   (events.get("LogicalResourceId"),
                                    events.get("ResourceStatus"))).ljust(80)
                sys.stdout.write("\r%s" % resource_status)
                sys.stdout.flush()
            sys.stdout.write("\rStatus: %s\n" % stack_status)
            sys.stdout.flush()
            LOGGER.debug("Status: %s", stack_status)
        else:
            sys.stdout.write("\n")
            sys.stdout.flush()
        if stack_status == "DELETE_FAILED":
            LOGGER.info(
                "Cluster did not delete successfully. Run 'pcluster delete %s' again",
                args.cluster_name)
    except ClientError as e:
        if e.response.get("Error").get("Message").endswith("does not exist"):
            if saw_update:
                LOGGER.info("\nCluster deleted successfully.")
                sys.exit(0)
        LOGGER.critical(e.response.get("Error").get("Message"))
        sys.stdout.flush()
        sys.exit(1)
    except KeyboardInterrupt:
        LOGGER.info("\nExiting...")
        sys.exit(0)
예제 #10
0
def test_patch_check_cluster_resource_bucket(
    old_bucket_name,
    new_bucket_name,
    is_generated_bucket,
    expected_error_row,
    mocker,
    test_datadir,
    pcluster_config_reader,
):
    _do_mocking_for_tests(mocker)
    mocker.patch("pcluster.config.update_policy._is_bucket_pcluster_generated", return_value=is_generated_bucket)
    expected_message_rows = [
        ["section", "parameter", "old value", "new value", "check", "reason", "action_needed"],
        # ec2_iam_role is to make sure other parameters are not affected by cluster_resource_bucket custom logic
        ["cluster some_cluster", "ec2_iam_role", "some_old_role", "some_new_role", "SUCCEEDED", "-", None],
    ]
    if expected_error_row:
        error_message_row = [
            "cluster some_cluster",
            "cluster_resource_bucket",
            old_bucket_name,
            new_bucket_name,
            "ACTION NEEDED",
            (
                "'cluster_resource_bucket' parameter is a read_only parameter that cannot be updated. "
                "New value '{0}' will be ignored and old value '{1}' will be used if you force the update.".format(
                    new_bucket_name, old_bucket_name
                )
            ),
            "Restore the value of parameter 'cluster_resource_bucket' to '{0}'".format(old_bucket_name),
        ]
        expected_message_rows.append(error_message_row)
    src_dict = {"cluster_resource_bucket": old_bucket_name, "ec2_iam_role": "some_old_role"}
    dst_dict = {"cluster_resource_bucket": new_bucket_name, "ec2_iam_role": "some_new_role"}
    dst_config_file = "pcluster.config.dst.ini"
    duplicate_config_file(dst_config_file, test_datadir)

    src_config_file = pcluster_config_reader(**src_dict)
    src_conf = PclusterConfig(config_file=src_config_file, fail_on_file_absence=True)
    dst_config_file = pcluster_config_reader(dst_config_file, **dst_dict)
    dst_conf = PclusterConfig(config_file=dst_config_file, fail_on_file_absence=True)
    patch = ConfigPatch(base_config=src_conf, target_config=dst_conf)

    patch_allowed, rows = patch.check()
    assert_that(len(rows)).is_equal_to(len(expected_message_rows))
    for line in rows:
        # Handle unicode string
        line = ["{0}".format(element) if isinstance(element, str) else element for element in line]
        assert_that(expected_message_rows).contains(line)
    assert_that(patch_allowed).is_equal_to(not expected_error_row)
예제 #11
0
def test_adaptation(mocker, test_datadir, pcluster_config_reader, test):
    _do_mocking_for_tests(mocker)
    base_config_file_name = "pcluster.config.base.ini"
    duplicate_config_file(base_config_file_name, test_datadir)
    target_config_file_name = "pcluster.config.dst.ini"
    duplicate_config_file(target_config_file_name, test_datadir)

    base_config_file = pcluster_config_reader(base_config_file_name, **default_cluster_params)
    target_config_file = pcluster_config_reader(target_config_file_name, **default_cluster_params)

    base_conf = PclusterConfig(config_file=base_config_file, fail_on_file_absence=True)
    target_conf = PclusterConfig(config_file=target_config_file, fail_on_file_absence=True)

    test(base_conf, target_conf)
예제 #12
0
def delete(args):
    PclusterConfig.init_aws(config_file=args.config_file)
    LOGGER.info("Deleting: %s", args.cluster_name)
    stack_name = utils.get_stack_name(args.cluster_name)
    if not utils.stack_exists(stack_name):
        if args.keep_logs:
            utils.warn(
                "Stack for {0} does not exist. Cannot prevent its log groups from being deleted."
                .format(args.cluster_name))
        utils.warn("Cluster {0} has already been deleted.".format(
            args.cluster_name))
        sys.exit(0)
    elif args.keep_logs:
        _persist_cloudwatch_log_groups(args.cluster_name)
    _delete_cluster(args.cluster_name, args.nowait)
예제 #13
0
def stop(args):
    """Set ASG limits or awsbatch ce to min/max/desired = 0/0/0."""
    stack_name = utils.get_stack_name(args.cluster_name)
    pcluster_config = PclusterConfig(config_file=args.config_file,
                                     cluster_name=args.cluster_name)
    cluster_section = pcluster_config.get_section("cluster")

    if cluster_section.get_param_value("scheduler") == "awsbatch":
        LOGGER.info("Disabling AWS Batch compute environment : %s",
                    args.cluster_name)
        ce_name = _get_batch_ce(stack_name)
        _stop_batch_ce(ce_name=ce_name)
    else:
        LOGGER.info("Stopping compute fleet : %s", args.cluster_name)
        asg_name = _get_asg_name(stack_name)
        _set_asg_limits(asg_name=asg_name, min=0, max=0, desired=0)
예제 #14
0
def assert_section_params(mocker, pcluster_config_reader, settings_label,
                          expected_cfn_params):
    mocker.patch(
        "pcluster.config.cfn_param_types.get_supported_architectures_for_instance_type",
        return_value=["x86_64"])
    mocker.patch(
        "pcluster.utils.InstanceTypeInfo.init_from_instance_type",
        return_value=InstanceTypeInfo({
            "InstanceType": "t2.micro",
            "VCpuInfo": {
                "DefaultVCpus": 1,
                "DefaultCores": 1,
                "DefaultThreadsPerCore": 1
            },
            "NetworkInfo": {
                "EfaSupported": False
            },
        }),
    )
    if isinstance(expected_cfn_params, SystemExit):
        with pytest.raises(SystemExit):
            PclusterConfig(
                cluster_label="default",
                config_file=pcluster_config_reader(
                    settings_label=settings_label),
                fail_on_file_absence=True,
                fail_on_error=True,
            )
    else:
        pcluster_config = PclusterConfig(
            config_file=pcluster_config_reader(settings_label=settings_label),
            fail_on_file_absence=True)

        cfn_params = pcluster_config.to_cfn()

        assert_that(len(cfn_params)).is_equal_to(
            get_cfn_config_num_of_params(pcluster_config))

        remove_ignored_params(cfn_params)

        for param_key, _ in cfn_params.items():
            assert_that(cfn_params.get(param_key),
                        description=param_key).is_equal_to(
                            expected_cfn_params.get(param_key))
예제 #15
0
def stop(args):
    """Stop cluster compute fleet."""
    pcluster_config = PclusterConfig(
        config_file=args.config_file,
        cluster_name=args.cluster_name,
        auto_refresh=False,
        enforce_version=False,
        skip_load_json_config=True,
    )
    pcluster_config.cluster_model.get_stop_command(pcluster_config).stop(args, pcluster_config)
예제 #16
0
def dcv_connect(args):
    """
    Execute pcluster dcv connect command.

    :param args: pcluster cli arguments.
    """
    # Parse configuration file to read the AWS section
    PclusterConfig.init_aws(
    )  # FIXME it always searches for the default configuration file

    # Prepare ssh command to execute in the head node instance
    stack = get_stack(get_stack_name(args.cluster_name))
    shared_dir = get_cfn_param(stack.get("Parameters"), "SharedDir")
    head_node_ip, username = get_head_node_ip_and_username(args.cluster_name)
    cmd = 'ssh {CFN_USER}@{HEAD_NODE_IP} {KEY} "{REMOTE_COMMAND} {DCV_SHARED_DIR}"'.format(
        CFN_USER=username,
        HEAD_NODE_IP=head_node_ip,
        KEY="-i {0}".format(args.key_path) if args.key_path else "",
        REMOTE_COMMAND=DCV_CONNECT_SCRIPT,
        DCV_SHARED_DIR=shared_dir,
    )

    try:
        url = retry(_retrieve_dcv_session_url,
                    func_args=[cmd, args.cluster_name, head_node_ip],
                    attempts=4)
        url_message = "Please use the following one-time URL in your browser within 30 seconds:\n{0}".format(
            url)
    except DCVConnectionError as e:
        error("Something went wrong during DCV connection.\n{0}"
              "Please check the logs in the /var/log/parallelcluster/ folder "
              "of the head node and submit an issue {1}\n".format(
                  e, PCLUSTER_ISSUES_LINK))

    if args.show_url:
        LOGGER.info(url_message)
        return

    try:
        if not webbrowser.open_new(url):
            raise webbrowser.Error("Unable to open the Web browser.")
    except webbrowser.Error as e:
        LOGGER.info("{0}\n{1}".format(e, url_message))
예제 #17
0
def instances(args):
    stack_name = utils.get_stack_name(args.cluster_name)
    PclusterConfig.init_aws(config_file=args.config_file)
    cfn_stack = utils.get_stack(stack_name)
    scheduler = utils.get_cfn_param(cfn_stack.get("Parameters"), "Scheduler")

    instances = []
    head_node_server = utils.describe_cluster_instances(stack_name, node_type=utils.NodeType.head_node)
    if head_node_server:
        instances.append(("MasterServer", head_node_server[0].get("InstanceId")))

    if scheduler != "awsbatch":
        instances.extend(_get_compute_instances(stack_name))

    for instance in instances:
        LOGGER.info("%s         %s", instance[0], instance[1])

    if scheduler == "awsbatch":
        LOGGER.info("Run 'awsbhosts --cluster %s' to list the compute instances", args.cluster_name)
예제 #18
0
def instances(args):
    stack_name = utils.get_stack_name(args.cluster_name)
    pcluster_config = PclusterConfig(config_file=args.config_file,
                                     cluster_name=args.cluster_name)
    cluster_section = pcluster_config.get_section("cluster")

    instances = []
    instances.extend(_get_ec2_instances(stack_name))

    if cluster_section.get_param_value("scheduler") != "awsbatch":
        instances.extend(_get_asg_instances(stack_name))

    for instance in instances:
        LOGGER.info("%s         %s", instance[0], instance[1])

    if cluster_section.get_param_value("scheduler") == "awsbatch":
        LOGGER.info(
            "Run 'awsbhosts --cluster %s' to list the compute instances",
            args.cluster_name)
예제 #19
0
def get_mocked_pcluster_config(mocker, auto_refresh=False):
    mocker.patch(
        "pcluster.config.cfn_param_types.get_supported_architectures_for_instance_type",
        return_value=["x86_64"])
    if "AWS_DEFAULT_REGION" not in os.environ:
        # We need to provide a region to PclusterConfig to avoid no region exception.
        # Which region to provide is arbitrary.
        os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
    pcluster_config = PclusterConfig(config_file="wrong-file",
                                     auto_refresh=auto_refresh)
    return pcluster_config
예제 #20
0
def ssh(args, extra_args):  # noqa: C901 FIXME!!!
    """
    Execute an SSH command to the master instance, according to the [aliases] section if there.

    :param args: pcluster CLI args
    :param extra_args: pcluster CLI extra_args
    """
    pcluster_config = PclusterConfig(
        fail_on_error=False
    )  # FIXME it always search for the default config file
    if args.command in pcluster_config.get_section("aliases").params:
        ssh_command = pcluster_config.get_section("aliases").get_param_value(
            args.command)
    else:
        ssh_command = "ssh {CFN_USER}@{MASTER_IP} {ARGS}"

    try:
        master_ip, username = utils.get_master_ip_and_username(
            args.cluster_name)

        try:
            from shlex import quote as cmd_quote
        except ImportError:
            from pipes import quote as cmd_quote

        # build command
        cmd = ssh_command.format(
            CFN_USER=username,
            MASTER_IP=master_ip,
            ARGS=" ".join(cmd_quote(str(arg)) for arg in extra_args))

        # run command
        log_message = "SSH command: {0}".format(cmd)
        if not args.dryrun:
            LOGGER.debug(log_message)
            os.system(cmd)
        else:
            LOGGER.info(log_message)
    except KeyboardInterrupt:
        LOGGER.info("\nExiting...")
        sys.exit(0)
예제 #21
0
def assert_section_params(mocker, pcluster_config_reader, settings_label, expected_cfn_params):
    if isinstance(expected_cfn_params, SystemExit):
        with pytest.raises(SystemExit):
            PclusterConfig(
                cluster_label="default",
                config_file=pcluster_config_reader(settings_label=settings_label),
                fail_on_file_absence=True,
                fail_on_error=True,
            )
    else:
        pcluster_config = PclusterConfig(
            config_file=pcluster_config_reader(settings_label=settings_label), fail_on_file_absence=True
        )

        cfn_params = pcluster_config.to_cfn()

        assert_that(len(cfn_params)).is_equal_to(CFN_CONFIG_NUM_OF_PARAMS)

        for param_key, _ in cfn_params.items():
            assert_that(cfn_params.get(param_key), description=param_key).is_equal_to(
                expected_cfn_params.get(param_key)
            )
예제 #22
0
def convert(args=None):
    """Command to convert SIT cluster section into HIT format."""
    try:
        # Build the config based on args
        pcluster_config = PclusterConfig(
            config_file=args.config_file, cluster_label=args.cluster_template, fail_on_file_absence=True
        )

        # Automatic SIT -> HIT conversion, if needed
        conversion_done, reason = HitConverter(pcluster_config).convert(prepare_to_file=True)
        if conversion_done:
            if args.output_file:
                if os.path.isfile(args.output_file):
                    print("ERROR: File {0} already exists, please select another output file.".format(args.output_file))
                    sys.exit(1)
                else:
                    pcluster_config.config_file = args.output_file
                    pcluster_config.to_file(exclude_unrelated_sections=True)
                    print(
                        "Section [cluster {label}] from file {input} has been converted and saved into {output}.\n"
                        "New [queue compute] and [compute_resource default] sections have been created.".format(
                            label=pcluster_config.get_section("cluster").label,
                            input=args.config_file,
                            output=args.output_file,
                        )
                    )
            else:
                print(
                    "Section [cluster {label}] from file {input} has been converted.\n"
                    "New [queue compute] and [compute_resource default] sections have been created.\n"
                    "Configuration file content:\n\n".format(
                        label=pcluster_config.get_section("cluster").label, input=args.config_file
                    )
                )
                pcluster_config.to_file(exclude_unrelated_sections=True, print_stdout=True)
        else:
            print(reason)
    except KeyboardInterrupt:
        print("Exiting...")
        sys.exit(1)
    except Exception as e:
        print("Unexpected error of type %s: %s", type(e).__name__, e)
        sys.exit(1)
예제 #23
0
def list_stacks(args):
    # Parse configuration file to read the AWS section
    PclusterConfig.init_aws(config_file=args.config_file)

    try:
        result = []
        for stack in utils.paginate_boto3(boto3.client("cloudformation").describe_stacks):
            if stack.get("ParentId") is None and stack.get("StackName").startswith(PCLUSTER_STACK_PREFIX):
                pcluster_version = _get_pcluster_version_from_stack(stack)
                result.append(
                    [
                        stack.get("StackName")[len(PCLUSTER_STACK_PREFIX) :],  # noqa: E203
                        _colorize(stack.get("StackStatus"), args),
                        pcluster_version,
                    ]
                )
        LOGGER.info(tabulate(result, tablefmt="plain"))
    except ClientError as e:
        LOGGER.critical(e.response.get("Error").get("Message"))
        sys.exit(1)
    except KeyboardInterrupt:
        LOGGER.info("Exiting...")
        sys.exit(0)
예제 #24
0
def init_pcluster_config_from_configparser(config_parser,
                                           validate=True,
                                           auto_refresh=True):
    with tempfile.NamedTemporaryFile(delete=False) as config_file:

        with open(config_file.name, "w") as cf:
            config_parser.write(cf)

        if "AWS_DEFAULT_REGION" not in os.environ:
            # We need to provide a region to PclusterConfig to avoid no region exception.
            # Which region to provide is arbitrary.
            os.environ["AWS_DEFAULT_REGION"] = "us-east-1"

        pcluster_config = PclusterConfig(config_file=config_file.name,
                                         cluster_label="default",
                                         fail_on_file_absence=True,
                                         auto_refresh=auto_refresh)
        if validate:
            _validate_config(config_parser, pcluster_config)
    return pcluster_config
예제 #25
0
def dcv_connect(args):
    """
    Execute pcluster dcv connect command.

    :param args: pcluster cli arguments.
    """
    # Parse configuration file to read the AWS section
    PclusterConfig.init_aws(
    )  # FIXME it always searches for the default configuration file

    # Prepare ssh command to execute in the master instance
    stack = get_stack(get_stack_name(args.cluster_name))
    shared_dir = get_cfn_param(stack.get("Parameters"), "SharedDir")
    master_ip, username = get_master_ip_and_username(args.cluster_name)
    cmd = 'ssh {CFN_USER}@{MASTER_IP} {KEY} "{REMOTE_COMMAND} {DCV_SHARED_DIR}"'.format(
        CFN_USER=username,
        MASTER_IP=master_ip,
        KEY="-i {0}".format(args.key_path) if args.key_path else "",
        REMOTE_COMMAND=DCV_CONNECT_SCRIPT,
        DCV_SHARED_DIR=shared_dir,
    )

    # Connect by ssh to the master instance and prepare DCV session
    try:
        LOGGER.debug("SSH command: {0}".format(cmd))
        output = _check_command_output(cmd)
        # At first ssh connection, the ssh command alerts it is adding the host to the known hosts list
        if re.search("Permanently added .* to the list of known hosts.",
                     output):
            output = _check_command_output(cmd)

        dcv_parameters = re.search(
            r"PclusterDcvServerPort=([\d]+) PclusterDcvSessionId=([\w]+) PclusterDcvSessionToken=([\w-]+)",
            output)
        if dcv_parameters:
            dcv_server_port = dcv_parameters.group(1)
            dcv_session_id = dcv_parameters.group(2)
            dcv_session_token = dcv_parameters.group(3)
        else:
            error(
                "Something went wrong during DCV connection. Please manually execute the command:\n{0}\n"
                "If the problem persists, please check the logs in the /var/log/parallelcluster/ folder "
                "of the master instance and submit an issue {1}.".format(
                    cmd, PCLUSTER_ISSUES_LINK))

    except sub.CalledProcessError as e:
        if "{0}: No such file or directory".format(
                DCV_CONNECT_SCRIPT) in e.output:
            error(
                "The cluster {0} has been created with an old version of ParallelCluster "
                "without the DCV support.".format(args.cluster_name))
        else:
            error("Something went wrong during DCV connection.\n{0}".format(
                e.output))

    # Open web browser
    url = "https://{IP}:{PORT}?authToken={TOKEN}#{SESSION_ID}".format(
        IP=master_ip,
        PORT=dcv_server_port,
        TOKEN=dcv_session_token,
        SESSION_ID=dcv_session_id)
    try:
        webbrowser.open_new(url)
    except webbrowser.Error:
        LOGGER.info(
            "Unable to open the Web browser. "
            "Please use the following URL in your browser within 30 seconds:\n{0}"
            .format(url))
예제 #26
0
def get_mocked_pcluster_config(mocker):
    return PclusterConfig(config_file="wrong-file")
예제 #27
0
def configure(args):
    # Check for invalid path (eg. a directory)
    if args.config_file and os.path.exists(
            args.config_file) and not os.path.isfile(args.config_file):
        error("Invalid configuration file path: {0}".format(args.config_file))

    pcluster_config = PclusterConfig(config_file=args.config_file,
                                     fail_on_error=False,
                                     auto_refresh=False)

    # FIXME: Overriding HIT config files is currently not supported.
    if pcluster_config.cluster_model == ClusterModel.HIT:
        error(
            "Configuration in file {0} cannot be overwritten. Please specify a different file path"
            .format(pcluster_config.config_file))

    if os.path.exists(pcluster_config.config_file):
        msg = "WARNING: Configuration file {0} will be overwritten."
    else:
        msg = "INFO: Configuration file {0} will be written."
    print(msg.format(pcluster_config.config_file))
    print("Press CTRL-C to interrupt the procedure.\n\n")

    if not args.region:
        # Use built in boto regions as an available option
        available_regions = get_regions()
        default_region = pcluster_config.get_section("aws").get_param_value(
            "aws_region_name")
        aws_region_name = prompt_iterable("AWS Region ID",
                                          available_regions,
                                          default_value=default_region)
        # Set provided region into os environment for suggestions and validations from here on
        os.environ["AWS_DEFAULT_REGION"] = aws_region_name
    else:
        aws_region_name = args.region

    cluster_section = pcluster_config.get_section("cluster")

    global_config = pcluster_config.get_section("global")
    cluster_label = global_config.get_param_value("cluster_template")

    vpc_section = pcluster_config.get_section("vpc")
    vpc_label = vpc_section.label

    # Get the key name from the current region, if any
    available_keys = _get_keys()
    default_key = cluster_section.get_param_value("key_name")
    key_name = prompt_iterable("EC2 Key Pair Name",
                               available_keys,
                               default_value=default_key)

    scheduler = prompt_iterable(
        "Scheduler",
        get_supported_schedulers(),
        default_value=cluster_section.get_param_value("scheduler"))
    cluster_config = ClusterConfigureHelper(cluster_section, scheduler)
    cluster_config.prompt_os()
    cluster_config.prompt_cluster_size()
    cluster_config.prompt_instance_types()

    vpc_parameters = _create_vpc_parameters(vpc_section, cluster_config)
    # Here is the end of prompt. Code below assembles config and write to file

    cluster_parameters = {"key_name": key_name, "scheduler": scheduler}
    cluster_parameters.update(cluster_config.get_scheduler_parameters())

    # Remove parameters from the past configuration that can conflict with the user's choices.
    _reset_config_params(cluster_section,
                         cluster_config.get_parameters_to_reset())
    _reset_config_params(
        vpc_section,
        ("compute_subnet_id", "use_public_ips", "compute_subnet_cidr"))

    # Update configuration values according to user's choices
    pcluster_config.region = aws_region_name

    cluster_section.label = cluster_label
    for param_key, param_value in cluster_parameters.items():
        param = cluster_section.get_param(param_key)
        param.value = param.get_value_from_string(param_value)

    vpc_section.label = vpc_label
    for param_key, param_value in vpc_parameters.items():
        param = vpc_section.get_param(param_key)
        param.value = param.get_value_from_string(param_value)

    # Update internal params according to provided parameters and enable auto-refresh before eventual hit conversion
    pcluster_config.refresh()
    pcluster_config.auto_refresh = True

    # Convert file if needed
    HitConverter(pcluster_config).convert(prepare_to_file=True)

    # Update config file by overriding changed settings
    pcluster_config.to_file()
    print("Configuration file written to {0}".format(
        pcluster_config.config_file))
    print(
        "You can edit your configuration file or simply run 'pcluster create -c {0} cluster-name' "
        "to create your cluster".format(pcluster_config.config_file))
예제 #28
0
def configure(args):
    # Check for invalid path (eg. a directory)
    if args.config_file and os.path.exists(
            args.config_file) and not os.path.isfile(args.config_file):
        error("Invalid configuration file path: {0}".format(args.config_file))

    pcluster_config = PclusterConfig(config_file=args.config_file,
                                     fail_on_error=False)

    if os.path.exists(pcluster_config.config_file):
        msg = "WARNING: Configuration file {0} will be overwritten."
    else:
        msg = "INFO: Configuration file {0} will be written."
    print(msg.format(pcluster_config.config_file))
    print("Press CTRL-C to interrupt the procedure.\n\n")

    cluster_section = pcluster_config.get_section("cluster")

    global_config = pcluster_config.get_section("global")
    cluster_label = global_config.get_param_value("cluster_template")

    vpc_section = pcluster_config.get_section("vpc")
    vpc_label = vpc_section.label

    # Use built in boto regions as an available option
    available_regions = get_regions()
    default_region = pcluster_config.get_section("aws").get_param_value(
        "aws_region_name")
    aws_region_name = prompt_iterable(
        "AWS Region ID",
        available_regions,
        default_value=default_region
        if default_region in available_regions else None,
    )
    # Set provided region into os environment for suggestions and validations from here on
    os.environ["AWS_DEFAULT_REGION"] = aws_region_name

    # Get the key name from the current region, if any
    available_keys = _get_keys()
    default_key = cluster_section.get_param_value("key_name")
    key_name = prompt_iterable(
        "EC2 Key Pair Name",
        available_keys,
        default_value=default_key if default_key in available_keys else None)

    scheduler = prompt_iterable(
        "Scheduler",
        get_supported_schedulers(),
        default_value=cluster_section.get_param_value("scheduler"))
    scheduler_handler = SchedulerHandler(cluster_section, scheduler)

    scheduler_handler.prompt_os()
    scheduler_handler.prompt_cluster_size()

    master_instance_type = prompt(
        "Master instance type",
        lambda x: x in get_supported_instance_types(),
        default_value=cluster_section.get_param_value("master_instance_type"),
    )

    scheduler_handler.prompt_compute_instance_type()

    automate_vpc = prompt("Automate VPC creation? (y/n)",
                          lambda x: x in ("y", "n"),
                          default_value="n") == "y"

    vpc_parameters = _create_vpc_parameters(vpc_section,
                                            scheduler,
                                            scheduler_handler.max_cluster_size,
                                            automate_vpc_creation=automate_vpc)
    cluster_parameters = {
        "key_name": key_name,
        "scheduler": scheduler,
        "master_instance_type": master_instance_type
    }
    cluster_parameters.update(scheduler_handler.get_scheduler_parameters())

    # Remove parameters from the past configuration that can conflict with the user's choices.
    _reset_config_params(cluster_section,
                         scheduler_handler.get_parameters_to_reset())
    _reset_config_params(
        vpc_section,
        ("compute_subnet_id", "use_public_ips", "compute_subnet_cidr"))

    # Update configuration values according to user's choices
    pcluster_config.region = aws_region_name

    cluster_section.label = cluster_label
    for param_key, param_value in cluster_parameters.items():
        param = cluster_section.get_param(param_key)
        param.value = param.get_value_from_string(param_value)

    vpc_section.label = vpc_label
    for param_key, param_value in vpc_parameters.items():
        param = vpc_section.get_param(param_key)
        param.value = param.get_value_from_string(param_value)

    # Update config file by overriding changed settings
    pcluster_config.to_file()
    print("Configuration file written to {0}".format(
        pcluster_config.config_file))
    print(
        "You can edit your configuration file or simply run 'pcluster create -c {0} cluster-name' "
        "to create your cluster".format(pcluster_config.config_file))
예제 #29
0
def create_ami(args):
    LOGGER.info("Building AWS ParallelCluster AMI. This could take a while...")

    # Do not autofresh; pcluster_config is only used to get info on vpc section, aws section, and template url
    # Logic in autofresh could make unexpected validations not needed in createami
    pcluster_config = PclusterConfig(config_file=args.config_file, fail_on_file_absence=True, auto_refresh=False)

    ami_info = _validate_createami_args_ami_compatibility(args)
    ami_architecture = ami_info.get("Architecture")

    LOGGER.debug("Building AMI based on args %s", str(args))
    results = {}

    instance_type = args.instance_type
    try:
        vpc_section = pcluster_config.get_section("vpc")
        vpc_id = args.vpc_id if args.vpc_id else vpc_section.get_param_value("vpc_id")
        subnet_id = args.subnet_id if args.subnet_id else vpc_section.get_param_value("master_subnet_id")

        packer_env = {
            "CUSTOM_AMI_ID": args.base_ami_id,
            "AWS_FLAVOR_ID": instance_type,
            "AMI_NAME_PREFIX": args.custom_ami_name_prefix,
            "AWS_VPC_ID": vpc_id,
            "AWS_SUBNET_ID": subnet_id,
            "ASSOCIATE_PUBLIC_IP": "true" if args.associate_public_ip else "false",
        }

        aws_section = pcluster_config.get_section("aws")
        aws_region = aws_section.get_param_value("aws_region_name")
        if aws_section and aws_section.get_param_value("aws_access_key_id"):
            packer_env["AWS_ACCESS_KEY_ID"] = aws_section.get_param_value("aws_access_key_id")
        if aws_section and aws_section.get_param_value("aws_secret_access_key"):
            packer_env["AWS_SECRET_ACCESS_KEY"] = aws_section.get_param_value("aws_secret_access_key")

        LOGGER.info("Base AMI ID: %s", args.base_ami_id)
        LOGGER.info("Base AMI OS: %s", args.base_ami_os)
        LOGGER.info("Instance Type: %s", instance_type)
        LOGGER.info("Region: %s", aws_region)
        LOGGER.info("VPC ID: %s", vpc_id)
        LOGGER.info("Subnet ID: %s", subnet_id)

        template_url = evaluate_pcluster_template_url(pcluster_config)

        tmp_dir = mkdtemp()
        cookbook_dir = _get_cookbook_dir(aws_region, template_url, args, tmp_dir)

        _get_post_install_script_dir(args.post_install_script, tmp_dir)

        packer_command = (
            cookbook_dir
            + "/amis/build_ami.sh --os "
            + args.base_ami_os
            + " --partition region"
            + " --region "
            + aws_region
            + " --custom"
            + " --arch "
            + ami_architecture
        )

        results = _run_packer(packer_command, packer_env)
    except KeyboardInterrupt:
        LOGGER.info("\nExiting...")
        sys.exit(0)
    finally:
        _print_create_ami_results(results)
        if "tmp_dir" in locals() and tmp_dir:
            rmtree(tmp_dir)
예제 #30
0
def create(args):  # noqa: C901 FIXME!!!
    LOGGER.info("Beginning cluster creation for cluster: %s", args.cluster_name)
    LOGGER.debug("Building cluster config based on args %s", str(args))

    _validate_cluster_name(args.cluster_name)

    # Build the config based on args
    pcluster_config = PclusterConfig(
        config_file=args.config_file, cluster_label=args.cluster_template, fail_on_file_absence=True
    )
    pcluster_config.validate()

    # Automatic SIT -> HIT conversion, if needed
    HitConverter(pcluster_config).convert()

    # get CFN parameters, template url and tags from config
    storage_data = pcluster_config.to_storage()
    cfn_params = storage_data.cfn_params

    _check_for_updates(pcluster_config)

    bucket_name = None
    artifact_directory = None
    cleanup_bucket = False
    try:
        cfn_client = boto3.client("cloudformation")
        stack_name = utils.get_stack_name(args.cluster_name)

        # merge tags from configuration, command-line and internal ones
        tags = _evaluate_tags(pcluster_config, preferred_tags=args.tags)

        bucket_name, artifact_directory, cleanup_bucket = _setup_bucket_with_resources(
            pcluster_config, storage_data, stack_name, tags
        )
        cfn_params["ResourcesS3Bucket"] = bucket_name
        cfn_params["ArtifactS3RootDirectory"] = artifact_directory
        cfn_params["RemoveBucketOnDeletion"] = str(cleanup_bucket)

        LOGGER.info("Creating stack named: %s", stack_name)

        # determine the CloudFormation Template URL to use
        template_url = evaluate_pcluster_template_url(pcluster_config, preferred_template_url=args.template_url)

        # append extra parameters from command-line
        if args.extra_parameters:
            LOGGER.debug("Adding extra parameters to the CFN parameters")
            cfn_params.update(dict(args.extra_parameters))

        # prepare input parameters for stack creation and create the stack
        LOGGER.debug(cfn_params)
        params = [{"ParameterKey": key, "ParameterValue": value} for key, value in cfn_params.items()]
        stack = cfn_client.create_stack(
            StackName=stack_name,
            TemplateURL=template_url,
            Parameters=params,
            Capabilities=["CAPABILITY_IAM"],
            DisableRollback=args.norollback,
            Tags=tags,
        )
        LOGGER.debug("StackId: %s", stack.get("StackId"))

        if not args.nowait:
            verified = utils.verify_stack_creation(stack_name, cfn_client)
            LOGGER.info("")
            result_stack = utils.get_stack(stack_name, cfn_client)
            _print_stack_outputs(result_stack)
            if not verified:
                sys.exit(1)
        else:
            stack_status = utils.get_stack(stack_name, cfn_client).get("StackStatus")
            LOGGER.info("Status: %s", stack_status)
    except ClientError as e:
        LOGGER.critical(e.response.get("Error").get("Message"))
        sys.stdout.flush()
        utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket)
        sys.exit(1)
    except KeyboardInterrupt:
        LOGGER.info("\nExiting...")
        if not utils.stack_exists(stack_name):
            # Cleanup S3 artifacts if stack is not created yet
            utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket)
        sys.exit(0)
    except KeyError as e:
        LOGGER.critical("ERROR: KeyError - reason:\n%s", e)
        utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket)
        sys.exit(1)
    except Exception as e:
        LOGGER.critical(e)
        utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket)
        sys.exit(1)