Example #1
0
def delete_cluster_instances(cluster_name, region=None, force=None):
    """
    Initiate the forced termination of all cluster compute nodes. Does not work with AWS Batch clusters.

    :param cluster_name: Name of the cluster
    :type cluster_name: str
    :param region: AWS Region that the operation corresponds to.
    :type region: str
    :param force: Force the deletion also when the cluster with the given name is not found. (Defaults to 'false'.)
    :type force: bool

    :rtype: None
    """
    cluster = Cluster(cluster_name)
    try:
        if not check_cluster_version(cluster):
            raise BadRequestException(
                f"Cluster '{cluster_name}' belongs to an incompatible ParallelCluster major version."
            )
        if cluster.stack.scheduler == "awsbatch":
            raise BadRequestException(
                "the delete cluster instances operation does not support AWS Batch clusters."
            )
    except StackNotFoundError:
        if not force:
            raise NotFoundException(
                f"Cluster '{cluster_name}' does not exist or belongs to an incompatible ParallelCluster major version. "
                "To force the deletion of all compute nodes, please use the `force` param."
            )
    cluster.terminate_nodes()
Example #2
0
def _set_region(region):
    if not region:
        raise BadRequestException("region needs to be set")
    if region not in SUPPORTED_REGIONS:
        raise BadRequestException(f"invalid or unsupported region '{region}'")

    LOGGER.info("Setting AWS Region to %s", region)
    os.environ["AWS_DEFAULT_REGION"] = region
Example #3
0
        def _wrapper_validate_region(*args, **kwargs):
            region = kwargs.get("region")

            if not region:
                region = os.environ.get("AWS_DEFAULT_REGION")

            if not region:
                raise BadRequestException("region needs to be set")
            if region not in SUPPORTED_REGIONS:
                raise BadRequestException(
                    f"invalid or unsupported region '{region}'")

            LOGGER.info("Setting AWS Region to %s", region)
            os.environ["AWS_DEFAULT_REGION"] = region

            return func(*args, **kwargs)
Example #4
0
def validate_timestamp(date_str: str, ts_name: str = "Time"):
    try:
        return to_utc_datetime(date_str)
    except Exception:
        raise BadRequestException(
            f"{ts_name} filter must be in the ISO 8601 format: YYYY-MM-DDThh:mm:ssZ. "
            "(e.g. 1984-09-15T19:20:30Z or 1984-09-15).")
def update_compute_fleet(update_compute_fleet_request_content, cluster_name, region=None):
    """
        Update the status of the cluster compute fleet.

    request_content:
        :type update_compute_fleet_request_content: dict | bytes
        :param cluster_name: Name of the cluster
        :type cluster_name: str
        :param region: AWS Region that the operation corresponds to.
        :type region: str

        :rtype: UpdateComputeFleetResponseContent
    """
    update_compute_fleet_request_content = UpdateComputeFleetRequestContent.from_dict(
        update_compute_fleet_request_content
    )
    cluster = Cluster(cluster_name)
    validate_cluster(cluster)

    status = update_compute_fleet_request_content.status
    if cluster.stack.scheduler == "slurm":
        if status == RequestedComputeFleetStatus.START_REQUESTED:
            cluster.start()
        elif status == RequestedComputeFleetStatus.STOP_REQUESTED:
            cluster.stop()
        else:
            raise BadRequestException(
                "the update compute fleet status can only be set to"
                " `START_REQUESTED` or `STOP_REQUESTED` for Slurm clusters."
            )
    else:
        if cluster.stack.scheduler == "awsbatch":
            if status == RequestedComputeFleetStatus.ENABLED:
                cluster.start()
            elif status == RequestedComputeFleetStatus.DISABLED:
                cluster.stop()
            else:
                raise BadRequestException(
                    "the update compute fleet status can only be set to"
                    " `ENABLED` or `DISABLED` for AWS Batch clusters."
                )
    status, last_status_updated_time = cluster.compute_fleet_status_with_last_updated_time
    last_status_updated_time = last_status_updated_time and to_utc_datetime(last_status_updated_time)
    return UpdateComputeFleetResponseContent(last_status_updated_time=last_status_updated_time, status=status.value)
Example #6
0
def validate_cluster(cluster: Cluster):
    try:
        if not check_cluster_version(cluster):
            raise BadRequestException(
                f"Cluster '{cluster.name}' belongs to an incompatible ParallelCluster major version."
            )
    except StackNotFoundError:
        raise NotFoundException(
            f"Cluster '{cluster.name}' does not exist or belongs to an incompatible ParallelCluster major version."
        )
def _validate_optional_filters(os, architecture):
    error = ""
    if os is not None and os not in SUPPORTED_OSES:
        error = f"{os} is not one of {SUPPORTED_OSES}"
    if architecture is not None and architecture not in SUPPORTED_ARCHITECTURES:
        if error:
            error += "; "
        error += f"{architecture} is not one of {SUPPORTED_ARCHITECTURES}"
    if error:
        raise BadRequestException(error)
Example #8
0
 def _handle_aws_client_error(exception: AWSClientError):
     """Transform a AWSClientError into a valid API error."""
     if exception.error_code == AWSClientError.ErrorCode.VALIDATION_ERROR.value:
         return ParallelClusterFlaskApp._handle_parallel_cluster_api_exception(
             BadRequestException(str(exception)))
     if exception.error_code in AWSClientError.ErrorCode.throttling_error_codes(
     ):
         return ParallelClusterFlaskApp._handle_parallel_cluster_api_exception(
             LimitExceededException(str(exception)))
     return ParallelClusterFlaskApp._handle_parallel_cluster_api_exception(
         InternalServiceException(
             f"Failed when calling AWS service in {exception.function_name}: {exception}"
         ))
Example #9
0
def configure_aws_region_from_config(region: Union[None, str],
                                     config_str: str):
    """Set the region based on either the configuration or theregion parameter."""
    # Allow parsing errors to pass through as they will be caught by later functions
    # which can provide more specific error text based on the operation.
    try:
        config_region = parse_config(config_str).get("Region")
    except Exception:
        config_region = None
    if region and config_region and region != config_region:
        raise BadRequestException(
            "region is set in both parameter and configuration and conflicts.")

    _set_region(region or config_region or boto3.Session().region_name)
Example #10
0
 def wrapper(*args, **kwargs):
     try:
         return func(*args, **kwargs)
     except ParallelClusterApiException as e:
         raise e
     except (LimitExceeded, LimitExceededError) as e:
         raise LimitExceededException(str(e)) from e
     except (BadRequest, BadRequestError) as e:
         raise BadRequestException(str(e)) from e
     except Conflict as e:
         raise ConflictException(str(e)) from e
     except NotFound as e:
         raise NotFoundException(str(e)) from e
     except Exception as e:
         raise InternalServiceException(str(e)) from e
def list_cluster_log_streams(cluster_name,
                             region=None,
                             filters=None,
                             next_token=None):
    """
    Retrieve the list of log streams associated with a cluster.

    :param cluster_name: Name of the cluster
    :type cluster_name: str
    :param region: Region that the given cluster belongs to.
    :type region: str
    :param filters: Filter the log streams. Format: (Name=a,Values=1 Name=b,Values=2,3).
    :type filters: List[str]
    :param next_token: Token to use for paginated requests.
    :type next_token: str

    :rtype: ListClusterLogStreamsResponseContent
    """
    accepted_filters = ["private-dns-name", "node-type"]
    filters = join_filters(accepted_filters, filters) if filters else None
    cluster = Cluster(cluster_name)
    validate_cluster(cluster)

    def convert_log(log):
        log["logStreamArn"] = log.pop("arn")
        if "storedBytes" in log:
            del log["storedBytes"]
        for ts_name in [
                "creationTime", "firstEventTimestamp", "lastEventTimestamp",
                "lastIngestionTime"
        ]:
            log[ts_name] = to_iso_timestr(to_utc_datetime(log[ts_name]))
        return LogStream.from_dict(log)

    try:
        cluster_logs = cluster.list_log_streams(filters=filters,
                                                next_token=next_token)
    except FiltersParserError as e:
        raise BadRequestException(str(e))

    log_streams = [convert_log(log) for log in cluster_logs.log_streams]
    next_token = cluster_logs.next_token
    return ListClusterLogStreamsResponseContent(log_streams=log_streams,
                                                next_token=next_token)
Example #12
0
    def test_handle_parallel_cluster_api_exception(self, caplog, flask_app_with_error_route):
        with flask_app_with_error_route(BadRequestException("invalid request")).test_client() as client:
            response = client.get("/error")

        self._assert_response(response, body={"message": "Bad Request: invalid request"}, code=400)
        self._assert_log_message(
            caplog,
            logging.INFO,
            "Handling exception (status code 400): {'message': 'Bad Request: invalid request'}",
        )

        caplog.clear()
        with flask_app_with_error_route(InternalServiceException("failure")).test_client() as client:
            response = client.get("/error")

        self._assert_response(
            response,
            body={"message": "failure"},
            code=500,
        )
        self._assert_log_message(caplog, logging.ERROR, "Handling exception (status code 500): {'message': 'failure'}")
Example #13
0
def delete_cluster(cluster_name, region=None):
    """
    Initiate the deletion of a cluster.

    :param cluster_name: Name of the cluster
    :type cluster_name: str
    :param region: AWS Region that the operation corresponds to.
    :type region: str

    :rtype: DeleteClusterResponseContent
    """
    try:
        cluster = Cluster(cluster_name)
        if not check_cluster_version(cluster):
            raise BadRequestException(
                f"Cluster '{cluster_name}' belongs to an incompatible ParallelCluster major version."
            )

        if not cluster.status == CloudFormationStackStatus.DELETE_IN_PROGRESS:
            # TODO: remove keep_logs logic from delete
            cluster.delete(keep_logs=False)

        return DeleteClusterResponseContent(
            cluster=ClusterInfoSummary(
                cluster_name=cluster_name,
                cloudformation_stack_status=CloudFormationStackStatus.DELETE_IN_PROGRESS,
                cloudformation_stack_arn=cluster.stack.id,
                region=os.environ.get("AWS_DEFAULT_REGION"),
                version=cluster.stack.version,
                cluster_status=cloud_formation_status_to_cluster_status(CloudFormationStackStatus.DELETE_IN_PROGRESS),
            )
        )
    except StackNotFoundError:
        raise NotFoundException(
            f"Cluster '{cluster_name}' does not exist or belongs to an incompatible ParallelCluster major version. "
            "In case you have running instances belonging to a deleted cluster please use the DeleteClusterInstances "
            "API."
        )
def get_cluster_log_events(
    cluster_name,
    log_stream_name,
    region: str = None,
    next_token: str = None,
    start_from_head: bool = None,
    limit: int = None,
    start_time: str = None,
    end_time: str = None,
):
    """
    Retrieve the events associated with a log stream.

    :param cluster_name: Name of the cluster
    :type cluster_name: str
    :param log_stream_name: Name of the log stream.
    :type log_stream_name: str
    :param region: AWS Region that the operation corresponds to.
    :type region: str
    :param next_token: Token to use for paginated requests.
    :type next_token: str
    :param start_from_head: If the value is true, the earliest log events are returned first. If the value is false, the
                            latest log events are returned first. (Defaults to 'false'.)
    :type start_from_head: bool
    :param limit: The maximum number of log events returned. If you don't specify a value, the maximum is as many
                  log events as can fit in a response size of 1 MB, up to 10,000 log events.
    :type limit:
    :param start_time: The start of the time range, expressed in ISO 8601 format
                       (e.g. '2021-01-01T20:00:00Z'). Events with a timestamp equal to this time or later
                       than this time are included.
    :type start_time: str
    :param end_time: The end of the time range, expressed in ISO 8601 format (e.g. '2021-01-01T20:00:00Z').
                     Events with a timestamp equal to or later than this time are not included.
    :type end_time: str

    :rtype: GetClusterLogEventsResponseContent
    """
    start_dt = start_time and validate_timestamp(start_time, "start_time")
    end_dt = end_time and validate_timestamp(end_time, "end_time")

    if start_time and end_time and start_dt >= end_dt:
        raise BadRequestException(
            "start_time filter must be earlier than end_time filter.")

    if limit and limit <= 0:
        raise BadRequestException("'limit' must be a positive integer.")

    cluster = Cluster(cluster_name)
    validate_cluster(cluster)

    if not cluster.stack.log_group_name:
        raise BadRequestException(
            f"CloudWatch logging is not enabled for cluster {cluster.name}.")

    log_events = cluster.get_log_events(
        log_stream_name,
        start_time=start_dt,
        end_time=end_dt,
        start_from_head=start_from_head,
        limit=limit,
        next_token=next_token,
    )

    def convert_log_event(event):
        del event["ingestionTime"]
        event["timestamp"] = to_iso_timestr(to_utc_datetime(
            event["timestamp"]))
        return LogEvent.from_dict(event)

    events = [convert_log_event(e) for e in log_events.events]
    return GetClusterLogEventsResponseContent(
        events=events,
        next_token=log_events.next_ftoken,
        prev_token=log_events.next_btoken)
def build_image(
    build_image_request_content,
    suppress_validators=None,
    validation_failure_level=None,
    dryrun=None,
    rollback_on_failure=None,
    region=None,
):
    """
    Create a custom ParallelCluster image in a given region.

    :param build_image_request_content:
    :param suppress_validators: Identifies one or more config validators to suppress.
    Format: (ALL|type:[A-Za-z0-9]+)
    :type suppress_validators: List[str]
    :param validation_failure_level: Min validation level that will cause the image creation to fail.
    Defaults to &#39;error&#39;.
    :type validation_failure_level: dict | bytes
    :param dryrun: Only perform request validation without creating any resource.
    It can be used to validate the image configuration. Response code: 200
    (Defaults to &#39;false&#39;.)
    :type dryrun: bool
    :param rollback_on_failure: When set, will automatically initiate an image stack rollback on failure.
    (Defaults to &#39;false&#39;.)
    :type rollback_on_failure: bool
    :param region: AWS Region that the operation corresponds to.
    :type region: str

    :rtype: BuildImageResponseContent
    """
    assert_node_executable()
    configure_aws_region_from_config(region, build_image_request_content["imageConfiguration"])
    rollback_on_failure = rollback_on_failure if rollback_on_failure is not None else False
    disable_rollback = not rollback_on_failure
    validation_failure_level = validation_failure_level or ValidationLevel.ERROR
    dryrun = dryrun or False

    build_image_request_content = BuildImageRequestContent.from_dict(build_image_request_content)

    try:
        image_id = build_image_request_content.image_id
        config = build_image_request_content.image_configuration

        if not config:
            LOGGER.error("Failed: configuration is required and cannot be empty")
            raise BadRequestException("configuration is required and cannot be empty")

        imagebuilder = ImageBuilder(image_id=image_id, config=config)

        if dryrun:
            imagebuilder.validate_create_request(
                validator_suppressors=get_validator_suppressors(suppress_validators),
                validation_failure_level=FailureLevel[validation_failure_level],
            )
            raise DryrunOperationException()

        suppressed_validation_failures = imagebuilder.create(
            disable_rollback=disable_rollback,
            validator_suppressors=get_validator_suppressors(suppress_validators),
            validation_failure_level=FailureLevel[validation_failure_level],
        )

        return BuildImageResponseContent(
            image=_imagebuilder_stack_to_image_info_summary(imagebuilder.stack),
            validation_messages=validation_results_to_config_validation_errors(suppressed_validation_failures) or None,
        )
    except ConfigValidationError as e:
        raise _handle_config_validation_error(e)
    except BadRequestImageBuilderActionError as e:
        errors = validation_results_to_config_validation_errors(e.validation_failures)
        raise BuildImageBadRequestException(
            BuildImageBadRequestExceptionResponseContent(message=str(e), configuration_validation_errors=errors or None)
        )
Example #16
0
def update_cluster(
    update_cluster_request_content: Dict,
    cluster_name,
    suppress_validators=None,
    validation_failure_level=None,
    region=None,
    dryrun=None,
    force_update=None,
):
    """
    Update a cluster managed in a given region.

    :param update_cluster_request_content:
    :param cluster_name: Name of the cluster
    :type cluster_name: str
    :param suppress_validators: Identifies one or more config validators to suppress.
    Format: (ALL|type:[A-Za-z0-9]+)
    :type suppress_validators: List[str]
    :param validation_failure_level: Min validation level that will cause the update to fail.
    (Defaults to &#39;error&#39;.)
    :type validation_failure_level: dict | bytes
    :param region: AWS Region that the operation corresponds to.
    :type region: str
    :param dryrun: Only perform request validation without creating any resource.
    May be used to validate the cluster configuration and update requirements. Response code: 200
    :type dryrun: bool
    :param force_update: Force update by ignoring the update validation errors.
    (Defaults to &#39;false&#39;.)
    :type force_update: bool

    :rtype: UpdateClusterResponseContent
    """
    # Set defaults
    validation_failure_level = validation_failure_level or ValidationLevel.ERROR
    dryrun = dryrun is True
    force_update = force_update is True
    update_cluster_request_content = UpdateClusterRequestContent.from_dict(update_cluster_request_content)
    cluster_config = update_cluster_request_content.cluster_configuration

    if not cluster_config:
        LOGGER.error("Failed: configuration is required and cannot be empty")
        raise BadRequestException("configuration is required and cannot be empty")

    try:
        cluster = Cluster(cluster_name)
        if not check_cluster_version(cluster, exact_match=True):
            raise BadRequestException(
                f"the update can be performed only with the same ParallelCluster version ({cluster.stack.version}) "
                "used to create the cluster."
            )

        if dryrun:
            _, changes, ignored_validation_failures = cluster.validate_update_request(
                target_source_config=cluster_config,
                force=force_update,
                validator_suppressors=get_validator_suppressors(suppress_validators),
                validation_failure_level=FailureLevel[validation_failure_level],
            )
            change_set, _ = _analyze_changes(changes)
            validation_messages = validation_results_to_config_validation_errors(ignored_validation_failures)
            raise DryrunOperationException(change_set=change_set, validation_messages=validation_messages or None)

        changes, ignored_validation_failures = cluster.update(
            target_source_config=cluster_config,
            validator_suppressors=get_validator_suppressors(suppress_validators),
            validation_failure_level=FailureLevel[validation_failure_level],
            force=force_update,
        )

        change_set, _ = _analyze_changes(changes)
        return UpdateClusterResponseContent(
            cluster=ClusterInfoSummary(
                cluster_name=cluster_name,
                cloudformation_stack_status=CloudFormationStackStatus.UPDATE_IN_PROGRESS,
                cloudformation_stack_arn=cluster.stack.id,
                region=os.environ.get("AWS_DEFAULT_REGION"),
                version=cluster.stack.version,
                cluster_status=cloud_formation_status_to_cluster_status(CloudFormationStackStatus.UPDATE_IN_PROGRESS),
            ),
            validation_messages=validation_results_to_config_validation_errors(ignored_validation_failures) or None,
            change_set=change_set,
        )
    except ConfigValidationError as e:
        config_validation_messages = validation_results_to_config_validation_errors(e.validation_failures) or None
        raise UpdateClusterBadRequestException(
            UpdateClusterBadRequestExceptionResponseContent(
                configuration_validation_errors=config_validation_messages, message=str(e)
            )
        )
    except ClusterUpdateError as e:
        raise _handle_cluster_update_error(e)
    except (NotFoundClusterActionError, StackNotFoundError):
        raise NotFoundException(
            f"Cluster '{cluster_name}' does not exist or belongs to an incompatible ParallelCluster major version."
        )
Example #17
0
def create_cluster(
    create_cluster_request_content: Dict,
    region: str = None,
    suppress_validators: List[str] = None,
    validation_failure_level: str = None,
    dryrun: bool = None,
    rollback_on_failure: bool = None,
) -> CreateClusterResponseContent:
    """
    Create a managed cluster in a given region.

    :param create_cluster_request_content:
    :type create_cluster_request_content: dict | bytes
    :param region: AWS Region that the operation corresponds to.
    :type region: str
    :param suppress_validators: Identifies one or more config validators to suppress.
    Format: (ALL|type:[A-Za-z0-9]+)
    :param validation_failure_level: Min validation level that will cause the cluster creation to fail.
    (Defaults to &#39;ERROR&#39;.)
    :param dryrun: Only perform request validation without creating any resource. May be used to validate the cluster
    configuration. (Defaults to &#39;false&#39;.)
    :type dryrun: bool
    :param rollback_on_failure: When set it automatically initiates a cluster stack rollback on failures.
    (Defaults to &#39;true&#39;.)
    :type rollback_on_failure: bool
    """
    # Set defaults
    rollback_on_failure = rollback_on_failure in {True, None}
    validation_failure_level = validation_failure_level or ValidationLevel.ERROR
    dryrun = dryrun is True
    create_cluster_request_content = CreateClusterRequestContent.from_dict(create_cluster_request_content)
    cluster_config = create_cluster_request_content.cluster_configuration

    if not cluster_config:
        LOGGER.error("Failed: configuration is required and cannot be empty")
        raise BadRequestException("configuration is required and cannot be empty")

    try:
        cluster = Cluster(create_cluster_request_content.cluster_name, cluster_config)

        if dryrun:
            ignored_validation_failures = cluster.validate_create_request(
                get_validator_suppressors(suppress_validators), FailureLevel[validation_failure_level]
            )
            validation_messages = validation_results_to_config_validation_errors(ignored_validation_failures)
            raise DryrunOperationException(validation_messages=validation_messages or None)

        stack_id, ignored_validation_failures = cluster.create(
            disable_rollback=not rollback_on_failure,
            validator_suppressors=get_validator_suppressors(suppress_validators),
            validation_failure_level=FailureLevel[validation_failure_level],
        )

        return CreateClusterResponseContent(
            ClusterInfoSummary(
                cluster_name=create_cluster_request_content.cluster_name,
                cloudformation_stack_status=CloudFormationStackStatus.CREATE_IN_PROGRESS,
                cloudformation_stack_arn=stack_id,
                region=os.environ.get("AWS_DEFAULT_REGION"),
                version=get_installed_version(),
                cluster_status=cloud_formation_status_to_cluster_status(CloudFormationStackStatus.CREATE_IN_PROGRESS),
            ),
            validation_messages=validation_results_to_config_validation_errors(ignored_validation_failures) or None,
        )
    except ConfigValidationError as e:
        config_validation_messages = validation_results_to_config_validation_errors(e.validation_failures) or None
        raise CreateClusterBadRequestException(
            CreateClusterBadRequestExceptionResponseContent(
                configuration_validation_errors=config_validation_messages, message=str(e)
            )
        )
 def fail():
     raise BadRequestException(
         f"filters parameter must be in the form {pattern.pattern}.")