コード例 #1
0
    def _upload_config(self):
        """Upload source config and save config version."""
        self._check_bucket_existence()
        try:
            # Upload config with default values and sections
            if self.config:
                result = self.bucket.upload_config(
                    config=ClusterSchema(cluster_name=self.name).dump(
                        deepcopy(self.config)),
                    config_name=PCLUSTER_S3_ARTIFACTS_DICT.get("config_name"),
                )

                self.config.config_version = result.get("VersionId")

                # Upload original config
                result = self.bucket.upload_config(
                    config=self.source_config_text,
                    config_name=PCLUSTER_S3_ARTIFACTS_DICT.get(
                        "source_config_name"),
                    format=S3FileFormat.TEXT,
                )

                # original config version will be stored in CloudFormation Parameters
                self.config.original_config_version = result.get("VersionId")

        except Exception as e:
            raise _cluster_error_mapper(
                e,
                f"Unable to upload cluster config to the S3 bucket {self.bucket.name} due to exception: {e}"
            )
コード例 #2
0
    def _generate_artifact_dir(self):
        """
        Generate artifact directory in S3 bucket.

        cluster artifact dir is generated before cfn stack creation and only generate once.
        artifact_directory: e.g. parallelcluster/{version}/clusters/{cluster_name}-jfr4odbeonwb1w5k
        """
        service_directory = generate_random_name_with_prefix(self.name)
        self.__s3_artifact_dir = "/".join([
            PCLUSTER_S3_ARTIFACTS_DICT.get("root_directory"),
            get_installed_version(),
            PCLUSTER_S3_ARTIFACTS_DICT.get("root_cluster_directory"),
            service_directory,
        ])
コード例 #3
0
 def _persist_stack_resources(self, keys):
     """Set the resources in template identified by keys to have a DeletionPolicy of 'Retain'."""
     template = self._get_stack_template()
     for key in keys:
         template["Resources"][key]["DeletionPolicy"] = "Retain"
     try:
         self.bucket.upload_cfn_template(
             template, PCLUSTER_S3_ARTIFACTS_DICT.get("template_name"))
         self._update_stack_template(
             self.bucket.get_cfn_template_url(
                 PCLUSTER_S3_ARTIFACTS_DICT.get("template_name")))
     except AWSClientError as e:
         raise _cluster_error_mapper(
             e,
             f"Unable to persist logs on cluster deletion, failed with error: {e}."
         )
コード例 #4
0
    def _upload_artifacts(self):
        """
        Upload cluster specific resources and cluster template.

        All dirs contained in resource dir will be uploaded as zip files to
        {bucket_name}/parallelcluster/{version}/clusters/{cluster_name}/{resource_dir}/artifacts.zip.
        All files contained in root dir will be uploaded to
        {bucket_name}/parallelcluster/{version}/clusters/{cluster_name}/{resource_dir}/artifact.
        """
        self._check_bucket_existence()
        try:
            resources = pkg_resources.resource_filename(
                __name__, "../resources/custom_resources")
            self.bucket.upload_resources(
                resource_dir=resources,
                custom_artifacts_name=PCLUSTER_S3_ARTIFACTS_DICT.get(
                    "custom_artifacts_name"))
            if self.config.scheduler_resources:
                self.bucket.upload_resources(
                    resource_dir=self.config.scheduler_resources,
                    custom_artifacts_name=PCLUSTER_S3_ARTIFACTS_DICT.get(
                        "scheduler_resources_name"),
                )

            # Upload template
            if self.template_body:
                self.bucket.upload_cfn_template(
                    self.template_body,
                    PCLUSTER_S3_ARTIFACTS_DICT.get("template_name"))

            if isinstance(self.config.scheduling,
                          (SlurmScheduling, SchedulerPluginScheduling)):
                # upload instance types data
                self.bucket.upload_config(
                    self.config.get_instance_types_data(),
                    PCLUSTER_S3_ARTIFACTS_DICT.get("instance_types_data_name"),
                    format=S3FileFormat.JSON,
                )

            if isinstance(self.config.scheduling, SchedulerPluginScheduling):
                self._render_and_upload_scheduler_plugin_template()
        except BadRequestClusterActionError:
            raise
        except Exception as e:
            message = f"Unable to upload cluster resources to the S3 bucket {self.bucket.name} due to exception: {e}"
            LOGGER.error(message)
            raise _cluster_error_mapper(e, message)
コード例 #5
0
    def create(
        self,
        disable_rollback: bool = False,
        validator_suppressors: Set[ValidatorSuppressor] = None,
        validation_failure_level: FailureLevel = FailureLevel.ERROR,
    ) -> Tuple[Optional[str], List]:
        """
        Create cluster.

        raises ClusterActionError: in case of generic error
        raises ConfigValidationError: if configuration is invalid
        """
        creation_result = None
        artifact_dir_generated = False
        try:
            suppressed_validation_failures = self.validate_create_request(
                validator_suppressors, validation_failure_level)

            self._add_version_tag()
            self._generate_artifact_dir()
            artifact_dir_generated = True
            self._upload_config()

            # Create template if not provided by the user
            if not (self.config.dev_settings
                    and self.config.dev_settings.cluster_template):
                self.template_body = CDKTemplateBuilder(
                ).build_cluster_template(cluster_config=self.config,
                                         bucket=self.bucket,
                                         stack_name=self.stack_name)

            # upload cluster artifacts and generated template
            self._upload_artifacts()

            LOGGER.info("Creating stack named: %s", self.stack_name)
            creation_result = AWSApi.instance().cfn.create_stack_from_url(
                stack_name=self.stack_name,
                template_url=self.bucket.get_cfn_template_url(
                    template_name=PCLUSTER_S3_ARTIFACTS_DICT.get(
                        "template_name")),
                disable_rollback=disable_rollback,
                tags=self._get_cfn_tags(),
            )

            return creation_result.get(
                "StackId"), suppressed_validation_failures

        except ConfigValidationError as e:
            raise e
        except Exception as e:
            if not creation_result and artifact_dir_generated:
                # Cleanup S3 artifacts if stack is not created yet
                self.bucket.delete_s3_artifacts()
            raise _cluster_error_mapper(e, str(e))
コード例 #6
0
 def _get_cluster_config(self):
     """Retrieve cluster config content."""
     config_version = self.stack.original_config_version
     try:
         return self.bucket.get_config(
             version_id=config_version,
             config_name=PCLUSTER_S3_ARTIFACTS_DICT.get(
                 "source_config_name"))
     except Exception as e:
         raise _cluster_error_mapper(
             e,
             f"Unable to load configuration from bucket '{self.bucket.name}/{self.s3_artifacts_dir}'.\n{e}"
         )
コード例 #7
0
 def presigned_config_url(self) -> str:
     """Return a pre-signed Url to download the config from the S3 bucket."""
     return self.bucket.get_config_presigned_url(config_name=PCLUSTER_S3_ARTIFACTS_DICT.get("image_config_name"))
コード例 #8
0
    def update(
        self,
        target_source_config: str,
        validator_suppressors: Set[ValidatorSuppressor] = None,
        validation_failure_level: FailureLevel = FailureLevel.ERROR,
        force: bool = False,
    ):
        """
        Update cluster.

        raises ClusterActionError: in case of generic error
        raises ConfigValidationError: if configuration is invalid
        raises ClusterUpdateError: if update is not allowed
        """
        try:
            target_config, changes, ignored_validation_failures = self.validate_update_request(
                target_source_config, validator_suppressors,
                validation_failure_level, force)

            self.config = target_config
            self.__source_config_text = target_source_config

            self._add_version_tag()
            self._upload_config()

            # Create template if not provided by the user
            if not (self.config.dev_settings
                    and self.config.dev_settings.cluster_template):
                self.template_body = CDKTemplateBuilder(
                ).build_cluster_template(
                    cluster_config=self.config,
                    bucket=self.bucket,
                    stack_name=self.stack_name,
                    log_group_name=self.stack.log_group_name,
                )

            # upload cluster artifacts and generated template
            self._upload_artifacts()

            LOGGER.info("Updating stack named: %s", self.stack_name)
            AWSApi.instance().cfn.update_stack_from_url(
                stack_name=self.stack_name,
                template_url=self.bucket.get_cfn_template_url(
                    template_name=PCLUSTER_S3_ARTIFACTS_DICT.get(
                        "template_name")),
                tags=self._get_cfn_tags(),
            )

            self.__stack = ClusterStack(AWSApi.instance().cfn.describe_stack(
                self.stack_name))
            LOGGER.debug("StackId: %s", self.stack.id)
            LOGGER.info("Status: %s", self.stack.status)

            return changes, ignored_validation_failures

        except ClusterActionError as e:
            # It can be a ConfigValidationError or ClusterUpdateError
            raise e
        except Exception as e:
            LOGGER.critical(e)
            raise _cluster_error_mapper(e, f"Cluster update failed.\n{e}")
コード例 #9
0
 def config_presigned_url(self) -> str:
     """Return a pre-signed Url to download the config from the S3 bucket."""
     return self.bucket.get_config_presigned_url(
         config_name=PCLUSTER_S3_ARTIFACTS_DICT.get("source_config_name"),
         version_id=self.stack.original_config_version,
     )