def delete_cluster_instances(cluster_name, region=None, force=None): """ Initiate the forced termination of all cluster compute nodes. Does not work with AWS Batch clusters. :param cluster_name: Name of the cluster :type cluster_name: str :param region: AWS Region that the operation corresponds to. :type region: str :param force: Force the deletion also when the cluster with the given name is not found. (Defaults to 'false'.) :type force: bool :rtype: None """ cluster = Cluster(cluster_name) try: if not check_cluster_version(cluster): raise BadRequestException( f"Cluster '{cluster_name}' belongs to an incompatible ParallelCluster major version." ) if cluster.stack.scheduler == "awsbatch": raise BadRequestException( "the delete cluster instances operation does not support AWS Batch clusters." ) except StackNotFoundError: if not force: raise NotFoundException( f"Cluster '{cluster_name}' does not exist or belongs to an incompatible ParallelCluster major version. " "To force the deletion of all compute nodes, please use the `force` param." ) cluster.terminate_nodes()
def _set_region(region): if not region: raise BadRequestException("region needs to be set") if region not in SUPPORTED_REGIONS: raise BadRequestException(f"invalid or unsupported region '{region}'") LOGGER.info("Setting AWS Region to %s", region) os.environ["AWS_DEFAULT_REGION"] = region
def _wrapper_validate_region(*args, **kwargs): region = kwargs.get("region") if not region: region = os.environ.get("AWS_DEFAULT_REGION") if not region: raise BadRequestException("region needs to be set") if region not in SUPPORTED_REGIONS: raise BadRequestException( f"invalid or unsupported region '{region}'") LOGGER.info("Setting AWS Region to %s", region) os.environ["AWS_DEFAULT_REGION"] = region return func(*args, **kwargs)
def validate_timestamp(date_str: str, ts_name: str = "Time"): try: return to_utc_datetime(date_str) except Exception: raise BadRequestException( f"{ts_name} filter must be in the ISO 8601 format: YYYY-MM-DDThh:mm:ssZ. " "(e.g. 1984-09-15T19:20:30Z or 1984-09-15).")
def update_compute_fleet(update_compute_fleet_request_content, cluster_name, region=None): """ Update the status of the cluster compute fleet. request_content: :type update_compute_fleet_request_content: dict | bytes :param cluster_name: Name of the cluster :type cluster_name: str :param region: AWS Region that the operation corresponds to. :type region: str :rtype: UpdateComputeFleetResponseContent """ update_compute_fleet_request_content = UpdateComputeFleetRequestContent.from_dict( update_compute_fleet_request_content ) cluster = Cluster(cluster_name) validate_cluster(cluster) status = update_compute_fleet_request_content.status if cluster.stack.scheduler == "slurm": if status == RequestedComputeFleetStatus.START_REQUESTED: cluster.start() elif status == RequestedComputeFleetStatus.STOP_REQUESTED: cluster.stop() else: raise BadRequestException( "the update compute fleet status can only be set to" " `START_REQUESTED` or `STOP_REQUESTED` for Slurm clusters." ) else: if cluster.stack.scheduler == "awsbatch": if status == RequestedComputeFleetStatus.ENABLED: cluster.start() elif status == RequestedComputeFleetStatus.DISABLED: cluster.stop() else: raise BadRequestException( "the update compute fleet status can only be set to" " `ENABLED` or `DISABLED` for AWS Batch clusters." ) status, last_status_updated_time = cluster.compute_fleet_status_with_last_updated_time last_status_updated_time = last_status_updated_time and to_utc_datetime(last_status_updated_time) return UpdateComputeFleetResponseContent(last_status_updated_time=last_status_updated_time, status=status.value)
def validate_cluster(cluster: Cluster): try: if not check_cluster_version(cluster): raise BadRequestException( f"Cluster '{cluster.name}' belongs to an incompatible ParallelCluster major version." ) except StackNotFoundError: raise NotFoundException( f"Cluster '{cluster.name}' does not exist or belongs to an incompatible ParallelCluster major version." )
def _validate_optional_filters(os, architecture): error = "" if os is not None and os not in SUPPORTED_OSES: error = f"{os} is not one of {SUPPORTED_OSES}" if architecture is not None and architecture not in SUPPORTED_ARCHITECTURES: if error: error += "; " error += f"{architecture} is not one of {SUPPORTED_ARCHITECTURES}" if error: raise BadRequestException(error)
def _handle_aws_client_error(exception: AWSClientError): """Transform a AWSClientError into a valid API error.""" if exception.error_code == AWSClientError.ErrorCode.VALIDATION_ERROR.value: return ParallelClusterFlaskApp._handle_parallel_cluster_api_exception( BadRequestException(str(exception))) if exception.error_code in AWSClientError.ErrorCode.throttling_error_codes( ): return ParallelClusterFlaskApp._handle_parallel_cluster_api_exception( LimitExceededException(str(exception))) return ParallelClusterFlaskApp._handle_parallel_cluster_api_exception( InternalServiceException( f"Failed when calling AWS service in {exception.function_name}: {exception}" ))
def configure_aws_region_from_config(region: Union[None, str], config_str: str): """Set the region based on either the configuration or theregion parameter.""" # Allow parsing errors to pass through as they will be caught by later functions # which can provide more specific error text based on the operation. try: config_region = parse_config(config_str).get("Region") except Exception: config_region = None if region and config_region and region != config_region: raise BadRequestException( "region is set in both parameter and configuration and conflicts.") _set_region(region or config_region or boto3.Session().region_name)
def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except ParallelClusterApiException as e: raise e except (LimitExceeded, LimitExceededError) as e: raise LimitExceededException(str(e)) from e except (BadRequest, BadRequestError) as e: raise BadRequestException(str(e)) from e except Conflict as e: raise ConflictException(str(e)) from e except NotFound as e: raise NotFoundException(str(e)) from e except Exception as e: raise InternalServiceException(str(e)) from e
def list_cluster_log_streams(cluster_name, region=None, filters=None, next_token=None): """ Retrieve the list of log streams associated with a cluster. :param cluster_name: Name of the cluster :type cluster_name: str :param region: Region that the given cluster belongs to. :type region: str :param filters: Filter the log streams. Format: (Name=a,Values=1 Name=b,Values=2,3). :type filters: List[str] :param next_token: Token to use for paginated requests. :type next_token: str :rtype: ListClusterLogStreamsResponseContent """ accepted_filters = ["private-dns-name", "node-type"] filters = join_filters(accepted_filters, filters) if filters else None cluster = Cluster(cluster_name) validate_cluster(cluster) def convert_log(log): log["logStreamArn"] = log.pop("arn") if "storedBytes" in log: del log["storedBytes"] for ts_name in [ "creationTime", "firstEventTimestamp", "lastEventTimestamp", "lastIngestionTime" ]: log[ts_name] = to_iso_timestr(to_utc_datetime(log[ts_name])) return LogStream.from_dict(log) try: cluster_logs = cluster.list_log_streams(filters=filters, next_token=next_token) except FiltersParserError as e: raise BadRequestException(str(e)) log_streams = [convert_log(log) for log in cluster_logs.log_streams] next_token = cluster_logs.next_token return ListClusterLogStreamsResponseContent(log_streams=log_streams, next_token=next_token)
def test_handle_parallel_cluster_api_exception(self, caplog, flask_app_with_error_route): with flask_app_with_error_route(BadRequestException("invalid request")).test_client() as client: response = client.get("/error") self._assert_response(response, body={"message": "Bad Request: invalid request"}, code=400) self._assert_log_message( caplog, logging.INFO, "Handling exception (status code 400): {'message': 'Bad Request: invalid request'}", ) caplog.clear() with flask_app_with_error_route(InternalServiceException("failure")).test_client() as client: response = client.get("/error") self._assert_response( response, body={"message": "failure"}, code=500, ) self._assert_log_message(caplog, logging.ERROR, "Handling exception (status code 500): {'message': 'failure'}")
def delete_cluster(cluster_name, region=None): """ Initiate the deletion of a cluster. :param cluster_name: Name of the cluster :type cluster_name: str :param region: AWS Region that the operation corresponds to. :type region: str :rtype: DeleteClusterResponseContent """ try: cluster = Cluster(cluster_name) if not check_cluster_version(cluster): raise BadRequestException( f"Cluster '{cluster_name}' belongs to an incompatible ParallelCluster major version." ) if not cluster.status == CloudFormationStackStatus.DELETE_IN_PROGRESS: # TODO: remove keep_logs logic from delete cluster.delete(keep_logs=False) return DeleteClusterResponseContent( cluster=ClusterInfoSummary( cluster_name=cluster_name, cloudformation_stack_status=CloudFormationStackStatus.DELETE_IN_PROGRESS, cloudformation_stack_arn=cluster.stack.id, region=os.environ.get("AWS_DEFAULT_REGION"), version=cluster.stack.version, cluster_status=cloud_formation_status_to_cluster_status(CloudFormationStackStatus.DELETE_IN_PROGRESS), ) ) except StackNotFoundError: raise NotFoundException( f"Cluster '{cluster_name}' does not exist or belongs to an incompatible ParallelCluster major version. " "In case you have running instances belonging to a deleted cluster please use the DeleteClusterInstances " "API." )
def get_cluster_log_events( cluster_name, log_stream_name, region: str = None, next_token: str = None, start_from_head: bool = None, limit: int = None, start_time: str = None, end_time: str = None, ): """ Retrieve the events associated with a log stream. :param cluster_name: Name of the cluster :type cluster_name: str :param log_stream_name: Name of the log stream. :type log_stream_name: str :param region: AWS Region that the operation corresponds to. :type region: str :param next_token: Token to use for paginated requests. :type next_token: str :param start_from_head: If the value is true, the earliest log events are returned first. If the value is false, the latest log events are returned first. (Defaults to 'false'.) :type start_from_head: bool :param limit: The maximum number of log events returned. If you don't specify a value, the maximum is as many log events as can fit in a response size of 1 MB, up to 10,000 log events. :type limit: :param start_time: The start of the time range, expressed in ISO 8601 format (e.g. '2021-01-01T20:00:00Z'). Events with a timestamp equal to this time or later than this time are included. :type start_time: str :param end_time: The end of the time range, expressed in ISO 8601 format (e.g. '2021-01-01T20:00:00Z'). Events with a timestamp equal to or later than this time are not included. :type end_time: str :rtype: GetClusterLogEventsResponseContent """ start_dt = start_time and validate_timestamp(start_time, "start_time") end_dt = end_time and validate_timestamp(end_time, "end_time") if start_time and end_time and start_dt >= end_dt: raise BadRequestException( "start_time filter must be earlier than end_time filter.") if limit and limit <= 0: raise BadRequestException("'limit' must be a positive integer.") cluster = Cluster(cluster_name) validate_cluster(cluster) if not cluster.stack.log_group_name: raise BadRequestException( f"CloudWatch logging is not enabled for cluster {cluster.name}.") log_events = cluster.get_log_events( log_stream_name, start_time=start_dt, end_time=end_dt, start_from_head=start_from_head, limit=limit, next_token=next_token, ) def convert_log_event(event): del event["ingestionTime"] event["timestamp"] = to_iso_timestr(to_utc_datetime( event["timestamp"])) return LogEvent.from_dict(event) events = [convert_log_event(e) for e in log_events.events] return GetClusterLogEventsResponseContent( events=events, next_token=log_events.next_ftoken, prev_token=log_events.next_btoken)
def build_image( build_image_request_content, suppress_validators=None, validation_failure_level=None, dryrun=None, rollback_on_failure=None, region=None, ): """ Create a custom ParallelCluster image in a given region. :param build_image_request_content: :param suppress_validators: Identifies one or more config validators to suppress. Format: (ALL|type:[A-Za-z0-9]+) :type suppress_validators: List[str] :param validation_failure_level: Min validation level that will cause the image creation to fail. Defaults to 'error'. :type validation_failure_level: dict | bytes :param dryrun: Only perform request validation without creating any resource. It can be used to validate the image configuration. Response code: 200 (Defaults to 'false'.) :type dryrun: bool :param rollback_on_failure: When set, will automatically initiate an image stack rollback on failure. (Defaults to 'false'.) :type rollback_on_failure: bool :param region: AWS Region that the operation corresponds to. :type region: str :rtype: BuildImageResponseContent """ assert_node_executable() configure_aws_region_from_config(region, build_image_request_content["imageConfiguration"]) rollback_on_failure = rollback_on_failure if rollback_on_failure is not None else False disable_rollback = not rollback_on_failure validation_failure_level = validation_failure_level or ValidationLevel.ERROR dryrun = dryrun or False build_image_request_content = BuildImageRequestContent.from_dict(build_image_request_content) try: image_id = build_image_request_content.image_id config = build_image_request_content.image_configuration if not config: LOGGER.error("Failed: configuration is required and cannot be empty") raise BadRequestException("configuration is required and cannot be empty") imagebuilder = ImageBuilder(image_id=image_id, config=config) if dryrun: imagebuilder.validate_create_request( validator_suppressors=get_validator_suppressors(suppress_validators), validation_failure_level=FailureLevel[validation_failure_level], ) raise DryrunOperationException() suppressed_validation_failures = imagebuilder.create( disable_rollback=disable_rollback, validator_suppressors=get_validator_suppressors(suppress_validators), validation_failure_level=FailureLevel[validation_failure_level], ) return BuildImageResponseContent( image=_imagebuilder_stack_to_image_info_summary(imagebuilder.stack), validation_messages=validation_results_to_config_validation_errors(suppressed_validation_failures) or None, ) except ConfigValidationError as e: raise _handle_config_validation_error(e) except BadRequestImageBuilderActionError as e: errors = validation_results_to_config_validation_errors(e.validation_failures) raise BuildImageBadRequestException( BuildImageBadRequestExceptionResponseContent(message=str(e), configuration_validation_errors=errors or None) )
def update_cluster( update_cluster_request_content: Dict, cluster_name, suppress_validators=None, validation_failure_level=None, region=None, dryrun=None, force_update=None, ): """ Update a cluster managed in a given region. :param update_cluster_request_content: :param cluster_name: Name of the cluster :type cluster_name: str :param suppress_validators: Identifies one or more config validators to suppress. Format: (ALL|type:[A-Za-z0-9]+) :type suppress_validators: List[str] :param validation_failure_level: Min validation level that will cause the update to fail. (Defaults to 'error'.) :type validation_failure_level: dict | bytes :param region: AWS Region that the operation corresponds to. :type region: str :param dryrun: Only perform request validation without creating any resource. May be used to validate the cluster configuration and update requirements. Response code: 200 :type dryrun: bool :param force_update: Force update by ignoring the update validation errors. (Defaults to 'false'.) :type force_update: bool :rtype: UpdateClusterResponseContent """ # Set defaults validation_failure_level = validation_failure_level or ValidationLevel.ERROR dryrun = dryrun is True force_update = force_update is True update_cluster_request_content = UpdateClusterRequestContent.from_dict(update_cluster_request_content) cluster_config = update_cluster_request_content.cluster_configuration if not cluster_config: LOGGER.error("Failed: configuration is required and cannot be empty") raise BadRequestException("configuration is required and cannot be empty") try: cluster = Cluster(cluster_name) if not check_cluster_version(cluster, exact_match=True): raise BadRequestException( f"the update can be performed only with the same ParallelCluster version ({cluster.stack.version}) " "used to create the cluster." ) if dryrun: _, changes, ignored_validation_failures = cluster.validate_update_request( target_source_config=cluster_config, force=force_update, validator_suppressors=get_validator_suppressors(suppress_validators), validation_failure_level=FailureLevel[validation_failure_level], ) change_set, _ = _analyze_changes(changes) validation_messages = validation_results_to_config_validation_errors(ignored_validation_failures) raise DryrunOperationException(change_set=change_set, validation_messages=validation_messages or None) changes, ignored_validation_failures = cluster.update( target_source_config=cluster_config, validator_suppressors=get_validator_suppressors(suppress_validators), validation_failure_level=FailureLevel[validation_failure_level], force=force_update, ) change_set, _ = _analyze_changes(changes) return UpdateClusterResponseContent( cluster=ClusterInfoSummary( cluster_name=cluster_name, cloudformation_stack_status=CloudFormationStackStatus.UPDATE_IN_PROGRESS, cloudformation_stack_arn=cluster.stack.id, region=os.environ.get("AWS_DEFAULT_REGION"), version=cluster.stack.version, cluster_status=cloud_formation_status_to_cluster_status(CloudFormationStackStatus.UPDATE_IN_PROGRESS), ), validation_messages=validation_results_to_config_validation_errors(ignored_validation_failures) or None, change_set=change_set, ) except ConfigValidationError as e: config_validation_messages = validation_results_to_config_validation_errors(e.validation_failures) or None raise UpdateClusterBadRequestException( UpdateClusterBadRequestExceptionResponseContent( configuration_validation_errors=config_validation_messages, message=str(e) ) ) except ClusterUpdateError as e: raise _handle_cluster_update_error(e) except (NotFoundClusterActionError, StackNotFoundError): raise NotFoundException( f"Cluster '{cluster_name}' does not exist or belongs to an incompatible ParallelCluster major version." )
def create_cluster( create_cluster_request_content: Dict, region: str = None, suppress_validators: List[str] = None, validation_failure_level: str = None, dryrun: bool = None, rollback_on_failure: bool = None, ) -> CreateClusterResponseContent: """ Create a managed cluster in a given region. :param create_cluster_request_content: :type create_cluster_request_content: dict | bytes :param region: AWS Region that the operation corresponds to. :type region: str :param suppress_validators: Identifies one or more config validators to suppress. Format: (ALL|type:[A-Za-z0-9]+) :param validation_failure_level: Min validation level that will cause the cluster creation to fail. (Defaults to 'ERROR'.) :param dryrun: Only perform request validation without creating any resource. May be used to validate the cluster configuration. (Defaults to 'false'.) :type dryrun: bool :param rollback_on_failure: When set it automatically initiates a cluster stack rollback on failures. (Defaults to 'true'.) :type rollback_on_failure: bool """ # Set defaults rollback_on_failure = rollback_on_failure in {True, None} validation_failure_level = validation_failure_level or ValidationLevel.ERROR dryrun = dryrun is True create_cluster_request_content = CreateClusterRequestContent.from_dict(create_cluster_request_content) cluster_config = create_cluster_request_content.cluster_configuration if not cluster_config: LOGGER.error("Failed: configuration is required and cannot be empty") raise BadRequestException("configuration is required and cannot be empty") try: cluster = Cluster(create_cluster_request_content.cluster_name, cluster_config) if dryrun: ignored_validation_failures = cluster.validate_create_request( get_validator_suppressors(suppress_validators), FailureLevel[validation_failure_level] ) validation_messages = validation_results_to_config_validation_errors(ignored_validation_failures) raise DryrunOperationException(validation_messages=validation_messages or None) stack_id, ignored_validation_failures = cluster.create( disable_rollback=not rollback_on_failure, validator_suppressors=get_validator_suppressors(suppress_validators), validation_failure_level=FailureLevel[validation_failure_level], ) return CreateClusterResponseContent( ClusterInfoSummary( cluster_name=create_cluster_request_content.cluster_name, cloudformation_stack_status=CloudFormationStackStatus.CREATE_IN_PROGRESS, cloudformation_stack_arn=stack_id, region=os.environ.get("AWS_DEFAULT_REGION"), version=get_installed_version(), cluster_status=cloud_formation_status_to_cluster_status(CloudFormationStackStatus.CREATE_IN_PROGRESS), ), validation_messages=validation_results_to_config_validation_errors(ignored_validation_failures) or None, ) except ConfigValidationError as e: config_validation_messages = validation_results_to_config_validation_errors(e.validation_failures) or None raise CreateClusterBadRequestException( CreateClusterBadRequestExceptionResponseContent( configuration_validation_errors=config_validation_messages, message=str(e) ) )
def fail(): raise BadRequestException( f"filters parameter must be in the form {pattern.pattern}.")