def test_validate_empty_change_set(self, mocker, force): mock_aws_api(mocker) cluster = Cluster( FAKE_NAME, stack=ClusterStack({ "StackName": FAKE_NAME, "CreationTime": "2021-06-04 10:23:20.199000+00:00", "StackStatus": ClusterStatus.CREATE_COMPLETE, }), config=OLD_CONFIGURATION, ) mocker.patch("pcluster.aws.cfn.CfnClient.stack_exists", return_value=True) if force: _, changes, _ = cluster.validate_update_request( target_source_config=OLD_CONFIGURATION, validator_suppressors={AllValidatorsSuppressor()}, force=force, ) assert_that(changes).is_length(1) else: with pytest.raises( BadRequestClusterActionError, match="No changes found in your cluster configuration."): cluster.validate_update_request( target_source_config=OLD_CONFIGURATION, validator_suppressors={AllValidatorsSuppressor()}, force=force, )
def delete_cluster_instances(cluster_name, region=None, force=None): """ Initiate the forced termination of all cluster compute nodes. Does not work with AWS Batch clusters. :param cluster_name: Name of the cluster :type cluster_name: str :param region: AWS Region that the operation corresponds to. :type region: str :param force: Force the deletion also when the cluster with the given name is not found. (Defaults to 'false'.) :type force: bool :rtype: None """ cluster = Cluster(cluster_name) try: if not check_cluster_version(cluster): raise BadRequestException( f"Cluster '{cluster_name}' belongs to an incompatible ParallelCluster major version." ) if cluster.stack.scheduler == "awsbatch": raise BadRequestException( "the delete cluster instances operation does not support AWS Batch clusters." ) except StackNotFoundError: if not force: raise NotFoundException( f"Cluster '{cluster_name}' does not exist or belongs to an incompatible ParallelCluster major version. " "To force the deletion of all compute nodes, please use the `force` param." ) cluster.terminate_nodes()
def get_cluster_stack_events(cluster_name, region=None, next_token=None): """ Retrieve the events associated with the stack for a given cluster. :param cluster_name: Name of the cluster :type cluster_name: str :param region: AWS Region that the operation corresponds to. :type region: str :param next_token: Token to use for paginated requests. :type next_token: str :rtype: GetClusterStackEventsResponseContent """ cluster = Cluster(cluster_name) validate_cluster(cluster) stack_events = cluster.get_stack_events(next_token=next_token) def convert_event(event): event = {k[0].lower() + k[1:]: v for k, v in event.items()} event["timestamp"] = to_iso_timestr(to_utc_datetime( event["timestamp"])) return StackEvent.from_dict(event) events = [convert_event(event) for event in stack_events["StackEvents"]] return GetClusterStackEventsResponseContent( next_token=stack_events.get("NextToken"), events=events)
def list_cluster_log_streams(cluster_name, region=None, filters=None, next_token=None): """ Retrieve the list of log streams associated with a cluster. :param cluster_name: Name of the cluster :type cluster_name: str :param region: Region that the given cluster belongs to. :type region: str :param filters: Filter the log streams. Format: (Name=a,Values=1 Name=b,Values=2,3). :type filters: List[str] :param next_token: Token to use for paginated requests. :type next_token: str :rtype: ListClusterLogStreamsResponseContent """ accepted_filters = ["private-dns-name", "node-type"] filters = join_filters(accepted_filters, filters) if filters else None cluster = Cluster(cluster_name) validate_cluster(cluster) def convert_log(log): log["logStreamArn"] = log.pop("arn") if "storedBytes" in log: del log["storedBytes"] for ts_name in [ "creationTime", "firstEventTimestamp", "lastEventTimestamp", "lastIngestionTime" ]: log[ts_name] = to_iso_timestr(to_utc_datetime(log[ts_name])) return LogStream.from_dict(log) try: cluster_logs = cluster.list_log_streams(filters=filters, next_token=next_token) except FiltersParserError as e: raise BadRequestException(str(e)) log_streams = [convert_log(log) for log in cluster_logs.log_streams] next_token = cluster_logs.next_token return ListClusterLogStreamsResponseContent(log_streams=log_streams, next_token=next_token)
def update_compute_fleet(update_compute_fleet_request_content, cluster_name, region=None): """ Update the status of the cluster compute fleet. request_content: :type update_compute_fleet_request_content: dict | bytes :param cluster_name: Name of the cluster :type cluster_name: str :param region: AWS Region that the operation corresponds to. :type region: str :rtype: UpdateComputeFleetResponseContent """ update_compute_fleet_request_content = UpdateComputeFleetRequestContent.from_dict( update_compute_fleet_request_content ) cluster = Cluster(cluster_name) validate_cluster(cluster) status = update_compute_fleet_request_content.status if cluster.stack.scheduler == "slurm": if status == RequestedComputeFleetStatus.START_REQUESTED: cluster.start() elif status == RequestedComputeFleetStatus.STOP_REQUESTED: cluster.stop() else: raise BadRequestException( "the update compute fleet status can only be set to" " `START_REQUESTED` or `STOP_REQUESTED` for Slurm clusters." ) else: if cluster.stack.scheduler == "awsbatch": if status == RequestedComputeFleetStatus.ENABLED: cluster.start() elif status == RequestedComputeFleetStatus.DISABLED: cluster.stop() else: raise BadRequestException( "the update compute fleet status can only be set to" " `ENABLED` or `DISABLED` for AWS Batch clusters." ) status, last_status_updated_time = cluster.compute_fleet_status_with_last_updated_time last_status_updated_time = last_status_updated_time and to_utc_datetime(last_status_updated_time) return UpdateComputeFleetResponseContent(last_status_updated_time=last_status_updated_time, status=status.value)
def update_cluster( cluster_config: str, cluster_name: str, region: str, suppress_validators: bool = False, validation_failure_level: FailureLevel = FailureLevel.ERROR, force: bool = False, ): """ Update existing cluster. :param cluster_config: cluster configuration (str) :param cluster_name: the name to assign to the cluster :param region: AWS region :param suppress_validators: bool = False, :param validation_failure_level: FailureLevel = FailureLevel.ERROR, :param force: set to True to force stack update """ try: if region: os.environ["AWS_DEFAULT_REGION"] = region # Check if stack version matches with running version. cluster = Cluster(cluster_name) installed_version = get_installed_version() if cluster.stack.version != installed_version: raise ClusterActionError( "The cluster was created with a different version of " f"ParallelCluster: {cluster.stack.version}. Installed version is {installed_version}. " "This operation may only be performed using the same ParallelCluster " "version used to create the cluster.") validator_suppressors = set() if suppress_validators: validator_suppressors.add(AllValidatorsSuppressor()) cluster.update(cluster_config, validator_suppressors, validation_failure_level, force) # TODO add dryrun return ClusterInfo(cluster.stack) except ConfigValidationError as e: return ApiFailure(str(e), validation_failures=e.validation_failures) except ClusterUpdateError as e: return ApiFailure(str(e), update_changes=e.update_changes) except Exception as e: return ApiFailure(str(e))
def describe_cluster(cluster_name: str, region: str): """Get cluster information.""" try: if region: os.environ["AWS_DEFAULT_REGION"] = region cluster = Cluster(cluster_name) return ClusterInfo(cluster.stack) except Exception as e: return ApiFailure(str(e))
def describe_cluster_instances(cluster_name, region=None, next_token=None, node_type=None, queue_name=None): """ Describe the instances belonging to a given cluster. :param cluster_name: Name of the cluster :type cluster_name: str :param region: AWS Region that the operation corresponds to. :type region: str :param next_token: Token to use for paginated requests. :type next_token: str :param node_type: :type node_type: dict | bytes :param queue_name: :type queue_name: str :rtype: DescribeClusterInstancesResponseContent """ cluster = Cluster(cluster_name) node_type = api_node_type_to_cluster_node_type(node_type) instances, next_token = cluster.describe_instances(next_token=next_token, node_type=node_type, queue_name=queue_name) ec2_instances = [] for instance in instances: ec2_instances.append( ClusterInstance( instance_id=instance.id, launch_time=to_utc_datetime(instance.launch_time), public_ip_address=instance.public_ip, instance_type=instance.instance_type, state=instance.state, private_ip_address=instance.private_ip, node_type=ApiNodeType.HEAD if instance.node_type == NodeType.HEAD_NODE.value else ApiNodeType.COMPUTE, queue_name=instance.queue_name, )) return DescribeClusterInstancesResponseContent(instances=ec2_instances, next_token=next_token)
def _export_cluster_logs(args: Namespace, output_file: str = None): """Export the logs associated to the cluster.""" LOGGER.debug("Beginning export of logs for the cluster: %s", args.cluster_name) cluster = Cluster(args.cluster_name) url = cluster.export_logs( bucket=args.bucket, bucket_prefix=args.bucket_prefix, keep_s3_objects=args.keep_s3_objects, start_time=args.start_time, end_time=args.end_time, filters=" ".join(args.filters) if args.filters else None, output_file=output_file, ) LOGGER.debug("Cluster's logs exported correctly to %s", url) return { "path": output_file } if output_file is not None else { "url": url }
def cluster(self, mocker): mocker.patch( "pcluster.models.cluster.Cluster.bucket", new_callable=PropertyMock( return_value=S3Bucket( service_name=FAKE_NAME, stack_name=FAKE_NAME, artifact_directory=ARTIFACT_DIRECTORY ) ), ) return Cluster( FAKE_NAME, stack=ClusterStack({"StackName": FAKE_NAME, "CreationTime": "2021-06-04 10:23:20.199000+00:00"}) )
def describe_cluster(cluster_name, region=None): """ Get detailed information about an existing cluster. :param cluster_name: Name of the cluster :type cluster_name: str :param region: AWS Region that the operation corresponds to. :type region: str :rtype: DescribeClusterResponseContent """ cluster = Cluster(cluster_name) validate_cluster(cluster) cfn_stack = cluster.stack fleet_status = cluster.compute_fleet_status config_url = "NOT_AVAILABLE" try: config_url = cluster.config_presigned_url except ClusterActionError as e: # Do not fail request when S3 bucket is not available LOGGER.error(e) response = DescribeClusterResponseContent( creation_time=to_utc_datetime(cfn_stack.creation_time), version=cfn_stack.version, cluster_configuration=ClusterConfigurationStructure(url=config_url), tags=[Tag(value=tag.get("Value"), key=tag.get("Key")) for tag in cfn_stack.tags], cloud_formation_stack_status=cfn_stack.status, cluster_name=cluster_name, compute_fleet_status=fleet_status.value, cloudformation_stack_arn=cfn_stack.id, last_updated_time=to_utc_datetime(cfn_stack.last_updated_time), region=os.environ.get("AWS_DEFAULT_REGION"), cluster_status=cloud_formation_status_to_cluster_status(cfn_stack.status), ) try: head_node = cluster.head_node_instance response.headnode = EC2Instance( instance_id=head_node.id, launch_time=to_utc_datetime(head_node.launch_time), public_ip_address=head_node.public_ip, instance_type=head_node.instance_type, state=InstanceState.from_dict(head_node.state), private_ip_address=head_node.private_ip, ) except ClusterActionError as e: # This should not be treated as a failure cause head node might not be running in some cases LOGGER.info(e) return response
def update_compute_fleet_status(cluster_name: str, region: str, status: ComputeFleetStatus): """Update existing compute fleet status.""" try: if region: os.environ["AWS_DEFAULT_REGION"] = region cluster = Cluster(cluster_name) if PclusterApi._is_version_2(cluster): raise ClusterActionError( f"The cluster {cluster.name} was created with ParallelCluster {cluster.stack.version}. " "This operation may only be performed using the same version used to create the cluster." ) if status == ComputeFleetStatus.START_REQUESTED: cluster.start() elif status == ComputeFleetStatus.STOP_REQUESTED: cluster.stop() else: return ApiFailure( f"Unable to update the compute fleet to status {status}. Not supported." ) except Exception as e: return ApiFailure(str(e)) return None
def delete_cluster(cluster_name, region=None): """ Initiate the deletion of a cluster. :param cluster_name: Name of the cluster :type cluster_name: str :param region: AWS Region that the operation corresponds to. :type region: str :rtype: DeleteClusterResponseContent """ try: cluster = Cluster(cluster_name) if not check_cluster_version(cluster): raise BadRequestException( f"Cluster '{cluster_name}' belongs to an incompatible ParallelCluster major version." ) if not cluster.status == CloudFormationStackStatus.DELETE_IN_PROGRESS: # TODO: remove keep_logs logic from delete cluster.delete(keep_logs=False) return DeleteClusterResponseContent( cluster=ClusterInfoSummary( cluster_name=cluster_name, cloudformation_stack_status=CloudFormationStackStatus.DELETE_IN_PROGRESS, cloudformation_stack_arn=cluster.stack.id, region=os.environ.get("AWS_DEFAULT_REGION"), version=cluster.stack.version, cluster_status=cloud_formation_status_to_cluster_status(CloudFormationStackStatus.DELETE_IN_PROGRESS), ) ) except StackNotFoundError: raise NotFoundException( f"Cluster '{cluster_name}' does not exist or belongs to an incompatible ParallelCluster major version. " "In case you have running instances belonging to a deleted cluster please use the DeleteClusterInstances " "API." )
def create_cluster( cluster_config: str, cluster_name: str, region: str, disable_rollback: bool = False, suppress_validators: bool = False, validation_failure_level: FailureLevel = FailureLevel.ERROR, ): """ Load cluster model from cluster_config and create stack. :param cluster_config: cluster configuration (str) :param cluster_name: the name to assign to the cluster :param region: AWS region :param disable_rollback: Disable rollback in case of failures :param suppress_validators: Disable validator execution :param validation_failure_level: Min validation level that will cause the creation to fail """ try: # Generate model from config dict and validate if region: os.environ["AWS_DEFAULT_REGION"] = region cluster = Cluster(cluster_name, cluster_config) validator_suppressors = set() if suppress_validators: validator_suppressors.add(AllValidatorsSuppressor()) # check cluster existence if AWSApi.instance().cfn.stack_exists(cluster.stack_name): raise Exception(f"Cluster {cluster.name} already exists") cluster.create(disable_rollback, validator_suppressors, validation_failure_level) return ClusterInfo(cluster.stack) except ConfigValidationError as e: return ApiFailure(str(e), validation_failures=e.validation_failures) except Exception as e: return ApiFailure(str(e))
def describe_compute_fleet(cluster_name, region=None): """ Describe the status of the compute fleet. :param cluster_name: Name of the cluster :type cluster_name: str :param region: AWS Region that the operation corresponds to. :type region: str :rtype: DescribeComputeFleetResponseContent """ cluster = Cluster(cluster_name) validate_cluster(cluster) status, last_status_updated_time = cluster.compute_fleet_status_with_last_updated_time return DescribeComputeFleetResponseContent( last_status_updated_time=last_status_updated_time and to_utc_datetime(last_status_updated_time), status=status.value, )
def delete_cluster(cluster_name: str, region: str, keep_logs: bool = True): """Delete cluster.""" cluster = None try: if region: os.environ["AWS_DEFAULT_REGION"] = region # retrieve cluster config and generate model cluster = Cluster(cluster_name) cluster.delete(keep_logs) return ClusterInfo(cluster.stack) except Exception as e: if cluster: cluster.terminate_nodes() return ApiFailure(str(e))
def describe_cluster_instances( cluster_name: str, region: str, node_type: NodeType = None ) -> Union[List[ClusterInstanceInfo], ApiFailure]: """List instances for a cluster.""" try: if region: os.environ["AWS_DEFAULT_REGION"] = region cluster = Cluster(cluster_name) instances = [] if node_type == NodeType.HEAD_NODE or node_type is None: instances.append(cluster.head_node_instance) if node_type == NodeType.COMPUTE or node_type is None: instances += cluster.compute_instances return [ClusterInstanceInfo(instance) for instance in instances] except Exception as e: return ApiFailure(str(e))
def create_cluster( create_cluster_request_content: Dict, region: str = None, suppress_validators: List[str] = None, validation_failure_level: str = None, dryrun: bool = None, rollback_on_failure: bool = None, ) -> CreateClusterResponseContent: """ Create a managed cluster in a given region. :param create_cluster_request_content: :type create_cluster_request_content: dict | bytes :param region: AWS Region that the operation corresponds to. :type region: str :param suppress_validators: Identifies one or more config validators to suppress. Format: (ALL|type:[A-Za-z0-9]+) :param validation_failure_level: Min validation level that will cause the cluster creation to fail. (Defaults to 'ERROR'.) :param dryrun: Only perform request validation without creating any resource. May be used to validate the cluster configuration. (Defaults to 'false'.) :type dryrun: bool :param rollback_on_failure: When set it automatically initiates a cluster stack rollback on failures. (Defaults to 'true'.) :type rollback_on_failure: bool """ # Set defaults rollback_on_failure = rollback_on_failure in {True, None} validation_failure_level = validation_failure_level or ValidationLevel.ERROR dryrun = dryrun is True create_cluster_request_content = CreateClusterRequestContent.from_dict(create_cluster_request_content) cluster_config = create_cluster_request_content.cluster_configuration if not cluster_config: LOGGER.error("Failed: configuration is required and cannot be empty") raise BadRequestException("configuration is required and cannot be empty") try: cluster = Cluster(create_cluster_request_content.cluster_name, cluster_config) if dryrun: ignored_validation_failures = cluster.validate_create_request( get_validator_suppressors(suppress_validators), FailureLevel[validation_failure_level] ) validation_messages = validation_results_to_config_validation_errors(ignored_validation_failures) raise DryrunOperationException(validation_messages=validation_messages or None) stack_id, ignored_validation_failures = cluster.create( disable_rollback=not rollback_on_failure, validator_suppressors=get_validator_suppressors(suppress_validators), validation_failure_level=FailureLevel[validation_failure_level], ) return CreateClusterResponseContent( ClusterInfoSummary( cluster_name=create_cluster_request_content.cluster_name, cloudformation_stack_status=CloudFormationStackStatus.CREATE_IN_PROGRESS, cloudformation_stack_arn=stack_id, region=os.environ.get("AWS_DEFAULT_REGION"), version=get_installed_version(), cluster_status=cloud_formation_status_to_cluster_status(CloudFormationStackStatus.CREATE_IN_PROGRESS), ), validation_messages=validation_results_to_config_validation_errors(ignored_validation_failures) or None, ) except ConfigValidationError as e: config_validation_messages = validation_results_to_config_validation_errors(e.validation_failures) or None raise CreateClusterBadRequestException( CreateClusterBadRequestExceptionResponseContent( configuration_validation_errors=config_validation_messages, message=str(e) ) )
def update_cluster( update_cluster_request_content: Dict, cluster_name, suppress_validators=None, validation_failure_level=None, region=None, dryrun=None, force_update=None, ): """ Update a cluster managed in a given region. :param update_cluster_request_content: :param cluster_name: Name of the cluster :type cluster_name: str :param suppress_validators: Identifies one or more config validators to suppress. Format: (ALL|type:[A-Za-z0-9]+) :type suppress_validators: List[str] :param validation_failure_level: Min validation level that will cause the update to fail. (Defaults to 'error'.) :type validation_failure_level: dict | bytes :param region: AWS Region that the operation corresponds to. :type region: str :param dryrun: Only perform request validation without creating any resource. May be used to validate the cluster configuration and update requirements. Response code: 200 :type dryrun: bool :param force_update: Force update by ignoring the update validation errors. (Defaults to 'false'.) :type force_update: bool :rtype: UpdateClusterResponseContent """ # Set defaults validation_failure_level = validation_failure_level or ValidationLevel.ERROR dryrun = dryrun is True force_update = force_update is True update_cluster_request_content = UpdateClusterRequestContent.from_dict(update_cluster_request_content) cluster_config = update_cluster_request_content.cluster_configuration if not cluster_config: LOGGER.error("Failed: configuration is required and cannot be empty") raise BadRequestException("configuration is required and cannot be empty") try: cluster = Cluster(cluster_name) if not check_cluster_version(cluster, exact_match=True): raise BadRequestException( f"the update can be performed only with the same ParallelCluster version ({cluster.stack.version}) " "used to create the cluster." ) if dryrun: _, changes, ignored_validation_failures = cluster.validate_update_request( target_source_config=cluster_config, force=force_update, validator_suppressors=get_validator_suppressors(suppress_validators), validation_failure_level=FailureLevel[validation_failure_level], ) change_set, _ = _analyze_changes(changes) validation_messages = validation_results_to_config_validation_errors(ignored_validation_failures) raise DryrunOperationException(change_set=change_set, validation_messages=validation_messages or None) changes, ignored_validation_failures = cluster.update( target_source_config=cluster_config, validator_suppressors=get_validator_suppressors(suppress_validators), validation_failure_level=FailureLevel[validation_failure_level], force=force_update, ) change_set, _ = _analyze_changes(changes) return UpdateClusterResponseContent( cluster=ClusterInfoSummary( cluster_name=cluster_name, cloudformation_stack_status=CloudFormationStackStatus.UPDATE_IN_PROGRESS, cloudformation_stack_arn=cluster.stack.id, region=os.environ.get("AWS_DEFAULT_REGION"), version=cluster.stack.version, cluster_status=cloud_formation_status_to_cluster_status(CloudFormationStackStatus.UPDATE_IN_PROGRESS), ), validation_messages=validation_results_to_config_validation_errors(ignored_validation_failures) or None, change_set=change_set, ) except ConfigValidationError as e: config_validation_messages = validation_results_to_config_validation_errors(e.validation_failures) or None raise UpdateClusterBadRequestException( UpdateClusterBadRequestExceptionResponseContent( configuration_validation_errors=config_validation_messages, message=str(e) ) ) except ClusterUpdateError as e: raise _handle_cluster_update_error(e) except (NotFoundClusterActionError, StackNotFoundError): raise NotFoundException( f"Cluster '{cluster_name}' does not exist or belongs to an incompatible ParallelCluster major version." )
def get_cluster_log_events( cluster_name, log_stream_name, region: str = None, next_token: str = None, start_from_head: bool = None, limit: int = None, start_time: str = None, end_time: str = None, ): """ Retrieve the events associated with a log stream. :param cluster_name: Name of the cluster :type cluster_name: str :param log_stream_name: Name of the log stream. :type log_stream_name: str :param region: AWS Region that the operation corresponds to. :type region: str :param next_token: Token to use for paginated requests. :type next_token: str :param start_from_head: If the value is true, the earliest log events are returned first. If the value is false, the latest log events are returned first. (Defaults to 'false'.) :type start_from_head: bool :param limit: The maximum number of log events returned. If you don't specify a value, the maximum is as many log events as can fit in a response size of 1 MB, up to 10,000 log events. :type limit: :param start_time: The start of the time range, expressed in ISO 8601 format (e.g. '2021-01-01T20:00:00Z'). Events with a timestamp equal to this time or later than this time are included. :type start_time: str :param end_time: The end of the time range, expressed in ISO 8601 format (e.g. '2021-01-01T20:00:00Z'). Events with a timestamp equal to or later than this time are not included. :type end_time: str :rtype: GetClusterLogEventsResponseContent """ start_dt = start_time and validate_timestamp(start_time, "start_time") end_dt = end_time and validate_timestamp(end_time, "end_time") if start_time and end_time and start_dt >= end_dt: raise BadRequestException( "start_time filter must be earlier than end_time filter.") if limit and limit <= 0: raise BadRequestException("'limit' must be a positive integer.") cluster = Cluster(cluster_name) validate_cluster(cluster) if not cluster.stack.log_group_name: raise BadRequestException( f"CloudWatch logging is not enabled for cluster {cluster.name}.") log_events = cluster.get_log_events( log_stream_name, start_time=start_dt, end_time=end_dt, start_from_head=start_from_head, limit=limit, next_token=next_token, ) def convert_log_event(event): del event["ingestionTime"] event["timestamp"] = to_iso_timestr(to_utc_datetime( event["timestamp"])) return LogEvent.from_dict(event) events = [convert_log_event(e) for e in log_events.events] return GetClusterLogEventsResponseContent( events=events, next_token=log_events.next_ftoken, prev_token=log_events.next_btoken)
def dummy_cluster(name=FAKE_NAME, stack=None): """Return dummy cluster object.""" if stack is None: stack = dummy_cluster_stack() return Cluster(name, stack=stack)