コード例 #1
0
def efa_validator(param_key, param_value, pcluster_config):
    errors = []
    warnings = []

    cluster_section = pcluster_config.get_section("cluster")
    supported_features = get_supported_features(pcluster_config.region, "efa")
    allowed_instances = supported_features.get("instances")
    if cluster_section.get_param_value("compute_instance_type") not in allowed_instances:
        errors.append(
            "When using 'enable_efa = {0}' it is required to set the 'compute_instance_type' parameter "
            "to one of the following values : {1}".format(param_value, allowed_instances)
        )

    allowed_oses = ["alinux", "alinux2", "centos7", "ubuntu1604", "ubuntu1804"]
    if cluster_section.get_param_value("base_os") not in allowed_oses:
        errors.append(
            "When using 'enable_efa = {0}' it is required to set the 'base_os' parameter "
            "to one of the following values : {1}".format(param_value, allowed_oses)
        )

    allowed_schedulers = ["sge", "slurm", "torque"]
    if cluster_section.get_param_value("scheduler") not in allowed_schedulers:
        errors.append(
            "When using 'enable_efa = {0}' it is required to set the 'scheduler' parameter "
            "to one of the following values : {1}".format(param_value, allowed_schedulers)
        )

    if cluster_section.get_param_value("placement_group") is None:
        warnings.append("You may see better performance using a cluster placement group.")

    _validate_efa_sg(pcluster_config, errors)

    return errors, warnings
コード例 #2
0
    def __init_efa_parameters(self):
        try:
            __temp__ = self.__config.get(self.__cluster_section, "enable_efa")
            if __temp__ != "compute":
                self.__fail("valid values for enable_efa = compute")

            supported_features = get_supported_features(self.region, "efa")
            valid_instances = supported_features.get("instances")

            self.__validate_instance(
                "EFA", self.parameters.get("ComputeInstanceType"),
                valid_instances)
            self.__validate_os("EFA", self.__get_os(),
                               ["alinux", "centos7", "ubuntu1604"])
            self.__validate_scheduler("EFA", self.__get_scheduler(),
                                      ["sge", "slurm", "torque"])
            self.__validate_resource("EFA", self.parameters)
            self.parameters["EFA"] = __temp__
        except configparser.NoOptionError:
            pass
コード例 #3
0
def compute_instance_type_validator(param_key, param_value, pcluster_config):
    errors = []
    warnings = []

    cluster_config = pcluster_config.get_section("cluster")
    if cluster_config.get_param_value("scheduler") == "awsbatch":

        try:
            supported_instances = get_supported_features(
                pcluster_config.region, "batch").get("instances")
            if supported_instances:
                for instance in param_value.split(","):
                    if not instance.strip() in supported_instances:
                        errors.append(
                            "compute_instance_type '{0}' is not supported by awsbatch in region '{1}'"
                            .format(instance, pcluster_config.region))
            else:
                warnings.append(
                    "Unable to get instance types supported by awsbatch. Skipping compute_instance_type validation"
                )

            if "," not in param_value and "." in param_value:
                # if the type is not a list, and contains dot (nor optimal, nor a family)
                # validate instance type against max_vcpus limit
                vcpus = get_instance_vcpus(pcluster_config.region, param_value)
                if vcpus <= 0:
                    warnings.append(
                        "Unable to get the number of vcpus for the compute_instance_type '{0}'. "
                        "Skipping instance type against max_vcpus validation".
                        format(param_value))
                else:
                    if cluster_config.get_param_value("max_vcpus") < vcpus:
                        errors.append(
                            "max_vcpus must be greater than or equal to {0}, that is the number of vcpus "
                            "available for the {1} that you selected as compute_instance_type"
                            .format(vcpus, param_value))
        except ClientError as e:
            errors.append(e.response.get("Error").get("Message"))

    return errors, warnings
コード例 #4
0
    def validate(self, resource_type, resource_value):  # noqa: C901 FIXME
        """
        Validate the given resource. Print an error and exit in case of error.

        :param resource_type: Resource type
        :param resource_value: Resource value
        """
        # Loop over all supported resource checks
        if resource_type == "EC2KeyPair":
            try:
                ec2 = boto3.client(
                    "ec2",
                    region_name=self.region,
                    aws_access_key_id=self.aws_access_key_id,
                    aws_secret_access_key=self.aws_secret_access_key,
                )
                ec2.describe_key_pairs(KeyNames=[resource_value])
            except ClientError as e:
                self.__fail(resource_type,
                            e.response.get("Error").get("Message"))
        if resource_type == "EC2IAMRoleName":
            try:
                iam = boto3.client(
                    "iam",
                    region_name=self.region,
                    aws_access_key_id=self.aws_access_key_id,
                    aws_secret_access_key=self.aws_secret_access_key,
                )

                arn = iam.get_role(
                    RoleName=resource_value).get("Role").get("Arn")
                account_id = (boto3.client(
                    "sts",
                    region_name=self.region,
                    aws_access_key_id=self.aws_access_key_id,
                    aws_secret_access_key=self.aws_secret_access_key,
                ).get_caller_identity().get("Account"))

                partition = self.__get_partition()

                iam_policy = [
                    (
                        [
                            "ec2:DescribeVolumes",
                            "ec2:AttachVolume",
                            "ec2:DescribeInstanceAttribute",
                            "ec2:DescribeInstanceStatus",
                            "ec2:DescribeInstances",
                        ],
                        "*",
                    ),
                    (["dynamodb:ListTables"], "*"),
                    (
                        [
                            "sqs:SendMessage",
                            "sqs:ReceiveMessage",
                            "sqs:ChangeMessageVisibility",
                            "sqs:DeleteMessage",
                            "sqs:GetQueueUrl",
                        ],
                        "arn:%s:sqs:%s:%s:parallelcluster-*" %
                        (partition, self.region, account_id),
                    ),
                    (
                        [
                            "autoscaling:DescribeAutoScalingGroups",
                            "autoscaling:TerminateInstanceInAutoScalingGroup",
                            "autoscaling:SetDesiredCapacity",
                            "autoscaling:DescribeTags",
                            "autoScaling:UpdateAutoScalingGroup",
                        ],
                        "*",
                    ),
                    (
                        [
                            "dynamodb:PutItem",
                            "dynamodb:Query",
                            "dynamodb:GetItem",
                            "dynamodb:DeleteItem",
                            "dynamodb:DescribeTable",
                        ],
                        "arn:%s:dynamodb:%s:%s:table/parallelcluster-*" %
                        (partition, self.region, account_id),
                    ),
                    (
                        ["cloudformation:DescribeStacks"],
                        "arn:%s:cloudformation:%s:%s:stack/parallelcluster-*" %
                        (partition, self.region, account_id),
                    ),
                    (["s3:GetObject"], "arn:%s:s3:::%s-aws-parallelcluster/*" %
                     (partition, self.region)),
                    (["sqs:ListQueues"], "*"),
                ]

                for actions, resource_arn in iam_policy:
                    response = iam.simulate_principal_policy(
                        PolicySourceArn=arn,
                        ActionNames=actions,
                        ResourceArns=[resource_arn])
                    for decision in response.get("EvaluationResults"):
                        if decision.get("EvalDecision") != "allowed":
                            print(
                                "IAM role error on user provided role %s: action %s is %s"
                                % (resource_value,
                                   decision.get("EvalActionName"),
                                   decision.get("EvalDecision")))
                            print(
                                "See https://aws-parallelcluster.readthedocs.io/en/latest/iam.html"
                            )
                            sys.exit(1)
            except ClientError as e:
                self.__fail(resource_type,
                            e.response.get("Error").get("Message"))
        # VPC Id
        elif resource_type == "VPC":
            try:
                ec2 = boto3.client(
                    "ec2",
                    region_name=self.region,
                    aws_access_key_id=self.aws_access_key_id,
                    aws_secret_access_key=self.aws_secret_access_key,
                )
                ec2.describe_vpcs(VpcIds=[resource_value])
            except ClientError as e:
                self.__fail(resource_type,
                            e.response.get("Error").get("Message"))
            # Check for DNS support in the VPC
            if (not ec2.describe_vpc_attribute(
                    VpcId=resource_value, Attribute="enableDnsSupport").get(
                        "EnableDnsSupport").get("Value")):
                self.__fail(
                    resource_type,
                    "DNS Support is not enabled in %s" % resource_value)
            if (not ec2.describe_vpc_attribute(
                    VpcId=resource_value, Attribute="enableDnsHostnames").get(
                        "EnableDnsHostnames").get("Value")):
                self.__fail(resource_type,
                            "DNS Hostnames not enabled in %s" % resource_value)
        # VPC Subnet Id
        elif resource_type == "VPCSubnet":
            try:
                ec2 = boto3.client(
                    "ec2",
                    region_name=self.region,
                    aws_access_key_id=self.aws_access_key_id,
                    aws_secret_access_key=self.aws_secret_access_key,
                )
                ec2.describe_subnets(SubnetIds=[resource_value])
            except ClientError as e:
                self.__fail(resource_type,
                            e.response.get("Error").get("Message"))
        # VPC Security Group
        elif resource_type == "VPCSecurityGroup":
            try:
                ec2 = boto3.client(
                    "ec2",
                    region_name=self.region,
                    aws_access_key_id=self.aws_access_key_id,
                    aws_secret_access_key=self.aws_secret_access_key,
                )
                ec2.describe_security_groups(GroupIds=[resource_value])
            except ClientError as e:
                self.__fail(resource_type,
                            e.response.get("Error").get("Message"))
        # EC2 AMI Id
        elif resource_type == "EC2Ami":
            try:
                ec2 = boto3.client(
                    "ec2",
                    region_name=self.region,
                    aws_access_key_id=self.aws_access_key_id,
                    aws_secret_access_key=self.aws_secret_access_key,
                )
                ec2.describe_images(ImageIds=[resource_value])
            except ClientError as e:
                self.__fail(resource_type,
                            e.response.get("Error").get("Message"))
        # EC2 Placement Group
        elif resource_type == "EC2PlacementGroup":
            if resource_value == "DYNAMIC":
                pass
            else:
                try:
                    ec2 = boto3.client(
                        "ec2",
                        region_name=self.region,
                        aws_access_key_id=self.aws_access_key_id,
                        aws_secret_access_key=self.aws_secret_access_key,
                    )
                    ec2.describe_placement_groups(GroupNames=[resource_value])
                except ClientError as e:
                    self.__fail(resource_type,
                                e.response.get("Error").get("Message"))
        # URL
        elif resource_type == "URL":
            scheme = urlparse(resource_value).scheme
            if scheme == "s3":
                pass
            else:
                try:
                    urllib.request.urlopen(resource_value)
                except urllib.error.HTTPError as e:
                    self.__fail(
                        resource_type,
                        "%s %s %s" % (resource_value, e.code, e.reason))
                except urllib.error.URLError as e:
                    self.__fail(resource_type,
                                "%s %s" % (resource_value, e.reason))
        # EC2 EBS Snapshot Id
        elif resource_type == "EC2Snapshot":
            try:
                ec2 = boto3.client(
                    "ec2",
                    region_name=self.region,
                    aws_access_key_id=self.aws_access_key_id,
                    aws_secret_access_key=self.aws_secret_access_key,
                )
                test = ec2.describe_snapshots(
                    SnapshotIds=[resource_value]).get("Snapshots")[0]
                if test.get("State") != "completed":
                    self.__fail(
                        resource_type,
                        "Snapshot %s is in state '%s' not 'completed'" %
                        (resource_value, test.get("State")),
                    )
            except ClientError as e:
                self.__fail(resource_type,
                            e.response.get("Error").get("Message"))
        # EC2 EBS Volume Id
        elif resource_type == "EC2Volume":
            try:
                ec2 = boto3.client(
                    "ec2",
                    region_name=self.region,
                    aws_access_key_id=self.aws_access_key_id,
                    aws_secret_access_key=self.aws_secret_access_key,
                )
                test = ec2.describe_volumes(
                    VolumeIds=[resource_value]).get("Volumes")[0]
                if test.get("State") != "available":
                    self.__fail(
                        resource_type,
                        "Volume %s is in state '%s' not 'available'" %
                        (resource_value, test.get("State")),
                    )
            except ClientError as e:
                if (e.response.get("Error").get("Message").endswith(
                        "parameter volumes is invalid. Expected: 'vol-...'.")):
                    self.__fail(resource_type,
                                "Volume %s does not exist." % resource_value)

                self.__fail(resource_type,
                            e.response.get("Error").get("Message"))
        # EFS file system Id
        elif resource_type == "EFSFSId":
            try:
                ec2 = boto3.client(
                    "ec2",
                    region_name=self.region,
                    aws_access_key_id=self.aws_access_key_id,
                    aws_secret_access_key=self.aws_secret_access_key,
                )
                efs = boto3.client(
                    "efs",
                    region_name=self.region,
                    aws_access_key_id=self.aws_access_key_id,
                    aws_secret_access_key=self.aws_secret_access_key,
                )
                self.__check_efs_fs_id(ec2, efs, resource_value)
            except ClientError as e:
                self.__fail(resource_type,
                            e.response.get("Error").get("Message"))
        # EFS Performance Mode check
        elif resource_type == "EFSPerfMode":
            if resource_value != "generalPurpose" and resource_value != "maxIO":
                self.__fail(
                    resource_type,
                    "Invalid value for 'performance_mode'! "
                    "Acceptable values for 'performance_mode' are generalPurpose and maxIO",
                )
        # EFS Throughput check
        elif resource_type == "EFSThroughput":
            throughput_mode = resource_value[0]
            provisioned_throughput = resource_value[1]
            if throughput_mode and (throughput_mode != "provisioned"
                                    and throughput_mode != "bursting"):
                self.__fail(
                    resource_type,
                    "Invalid value for 'throughput_mode'! "
                    "Acceptable values for 'throughput_mode' are bursting and provisioned",
                )
            if provisioned_throughput is not None:
                if throughput_mode != "provisioned":
                    self.__fail(
                        resource_type,
                        "When specifying 'provisioned_throughput', the 'throughput_mode' must be set to provisioned",
                    )
            else:
                if throughput_mode == "provisioned":
                    self.__fail(
                        resource_type,
                        "When specifying 'throughput_mode' to provisioned, "
                        "the 'provisioned_throughput' option must be specified",
                    )
        # RAID EBS IOPS
        elif resource_type == "RAIDIOPS":
            raid_iops = float(resource_value[0])
            raid_vol_size = float(resource_value[1])
            if raid_iops > raid_vol_size * 50:
                self.__fail(
                    resource_type,
                    "IOPS to volume size ratio of %s is too high; maximum is 50."
                    % (raid_iops / raid_vol_size),
                )
        # RAID Array Type
        elif resource_type == "RAIDType":
            if resource_value != "0" and resource_value != "1":
                self.__fail(
                    resource_type,
                    "Invalid raid_type, only RAID 0 and RAID 1 are currently supported."
                )
        # Number of RAID Volumes Requested
        elif resource_type == "RAIDNumVol":
            if int(resource_value) > 5 or int(resource_value) < 2:
                self.__fail(
                    resource_type,
                    "Invalid num_of_raid_volumes. "
                    "Needs min of 2 volumes for RAID and max of 5 EBS volumes are currently supported.",
                )
        # FSX FS Id check
        elif resource_type in [
                "fsx_fs_id", "FSx_storage_capacity",
                "FSx_imported_file_chunk_size", "FSx_export_path"
        ]:
            self.__validate_fsx_parameters(resource_type, resource_value)
        elif resource_type == "EFA":
            self.__validate_efa_parameters(resource_type, resource_value)

        # Batch Parameters
        elif resource_type == "AWSBatch_Parameters":
            # Check region
            if self.region in [
                    "ap-northeast-3",
                    "eu-north-1",
                    "cn-north-1",
                    "cn-northwest-1",
                    "us-gov-east-1",
                    "us-gov-west-1",
            ]:
                self.__fail(
                    resource_type,
                    "Region %s is not supported with batch scheduler" %
                    self.region)

            # Check spot bid percentage
            if "SpotPrice" in resource_value:
                spot_price = int(resource_value["SpotPrice"])
                if spot_price > 100 or spot_price < 0:
                    self.__fail(
                        resource_type,
                        "Spot bid percentage needs to be between 0 and 100")

            min_size = int(resource_value["MinSize"])
            desired_size = int(resource_value["DesiredSize"])
            max_size = int(resource_value["MaxSize"])

            if desired_size < min_size:
                self.__fail(
                    resource_type,
                    "Desired vcpus must be greater than or equal to min vcpus")

            if desired_size > max_size:
                self.__fail(
                    resource_type,
                    "Desired vcpus must be fewer than or equal to max vcpus")

            if max_size < min_size:
                self.__fail(
                    resource_type,
                    "Max vcpus must be greater than or equal to min vcpus")

            # Check compute instance types
            if "ComputeInstanceType" in resource_value:
                compute_instance_type = resource_value["ComputeInstanceType"]
                try:
                    supported_instances = get_supported_features(
                        self.region, "batch").get("instances")
                    if supported_instances:
                        for instance in compute_instance_type.split(","):
                            if not instance.strip() in supported_instances:
                                self.__fail(
                                    resource_type,
                                    "Instance type %s not supported by batch in this region"
                                    % instance)
                    else:
                        self.__warn(
                            "Unable to get instance types supported by Batch. Skipping instance type validation"
                        )

                    if "," not in compute_instance_type and "." in compute_instance_type:
                        # if the type is not a list, and contains dot (nor optimal, nor a family)
                        # validate instance type against max_vcpus limit
                        vcpus = get_instance_vcpus(self.region,
                                                   compute_instance_type)
                        if vcpus <= 0:
                            self.__warn(
                                "Unable to get the number of vcpus for the {0} instance type. "
                                "Skipping instance type against max_vcpus validation"
                                .format(compute_instance_type))
                        else:
                            if max_size < vcpus:
                                self.__fail(
                                    resource_type,
                                    "Max vcpus must be greater than or equal to {0}, that is the number of vcpus "
                                    "available for the {1} that you selected as compute instance type"
                                    .format(vcpus, compute_instance_type),
                                )
                except ClientError as e:
                    self.__fail(resource_type,
                                e.response.get("Error").get("Message"))

            # Check custom batch url
            if "CustomAWSBatchTemplateURL" in resource_value:
                self.validate("URL",
                              resource_value["CustomAWSBatchTemplateURL"])