def init_pcluster_config_from_configparser(config_parser, validate=True): with tempfile.NamedTemporaryFile(delete=False) as config_file: with open(config_file.name, "w") as cf: config_parser.write(cf) pcluster_config = PclusterConfig( config_file=config_file.name, cluster_label="default", fail_on_file_absence=True ) if validate: pcluster_config.validate() return pcluster_config
def create(args): # noqa: C901 FIXME!!! LOGGER.info("Beginning cluster creation for cluster: %s", args.cluster_name) LOGGER.debug("Building cluster config based on args %s", str(args)) _validate_cluster_name(args.cluster_name) # Build the config based on args pcluster_config = PclusterConfig( config_file=args.config_file, cluster_label=args.cluster_template, fail_on_file_absence=True ) pcluster_config.validate() # Automatic SIT -> HIT conversion, if needed HitConverter(pcluster_config).convert() # get CFN parameters, template url and tags from config storage_data = pcluster_config.to_storage() cfn_params = storage_data.cfn_params _check_for_updates(pcluster_config) bucket_name = None artifact_directory = None cleanup_bucket = False try: cfn_client = boto3.client("cloudformation") stack_name = utils.get_stack_name(args.cluster_name) # merge tags from configuration, command-line and internal ones tags = _evaluate_tags(pcluster_config, preferred_tags=args.tags) bucket_name, artifact_directory, cleanup_bucket = _setup_bucket_with_resources( pcluster_config, storage_data, stack_name, tags ) cfn_params["ResourcesS3Bucket"] = bucket_name cfn_params["ArtifactS3RootDirectory"] = artifact_directory cfn_params["RemoveBucketOnDeletion"] = str(cleanup_bucket) LOGGER.info("Creating stack named: %s", stack_name) # determine the CloudFormation Template URL to use template_url = evaluate_pcluster_template_url(pcluster_config, preferred_template_url=args.template_url) # append extra parameters from command-line if args.extra_parameters: LOGGER.debug("Adding extra parameters to the CFN parameters") cfn_params.update(dict(args.extra_parameters)) # prepare input parameters for stack creation and create the stack LOGGER.debug(cfn_params) params = [{"ParameterKey": key, "ParameterValue": value} for key, value in cfn_params.items()] stack = cfn_client.create_stack( StackName=stack_name, TemplateURL=template_url, Parameters=params, Capabilities=["CAPABILITY_IAM"], DisableRollback=args.norollback, Tags=tags, ) LOGGER.debug("StackId: %s", stack.get("StackId")) if not args.nowait: verified = utils.verify_stack_creation(stack_name, cfn_client) LOGGER.info("") result_stack = utils.get_stack(stack_name, cfn_client) _print_stack_outputs(result_stack) if not verified: sys.exit(1) else: stack_status = utils.get_stack(stack_name, cfn_client).get("StackStatus") LOGGER.info("Status: %s", stack_status) except ClientError as e: LOGGER.critical(e.response.get("Error").get("Message")) sys.stdout.flush() utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket) sys.exit(1) except KeyboardInterrupt: LOGGER.info("\nExiting...") if not utils.stack_exists(stack_name): # Cleanup S3 artifacts if stack is not created yet utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket) sys.exit(0) except KeyError as e: LOGGER.critical("ERROR: KeyError - reason:\n%s", e) utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket) sys.exit(1) except Exception as e: LOGGER.critical(e) utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket) sys.exit(1)
def create(args): # noqa: C901 FIXME!!! LOGGER.info("Beginning cluster creation for cluster: %s", args.cluster_name) LOGGER.debug("Building cluster config based on args %s", str(args)) # Build the config based on args pcluster_config = PclusterConfig(config_file=args.config_file, cluster_label=args.cluster_template, fail_on_file_absence=True) pcluster_config.validate() # get CFN parameters, template url and tags from config cluster_section = pcluster_config.get_section("cluster") cfn_params = pcluster_config.to_cfn() _check_for_updates(pcluster_config) batch_temporary_bucket = None try: cfn_client = boto3.client("cloudformation") stack_name = utils.get_stack_name(args.cluster_name) # If scheduler is awsbatch create bucket with resources if cluster_section.get_param_value("scheduler") == "awsbatch": batch_resources = pkg_resources.resource_filename( __name__, "resources/batch") batch_temporary_bucket = _create_bucket_with_batch_resources( stack_name=stack_name, resources_dir=batch_resources, region=pcluster_config.region) cfn_params["ResourcesS3Bucket"] = batch_temporary_bucket LOGGER.info("Creating stack named: %s", stack_name) LOGGER.debug(cfn_params) # determine the CloudFormation Template URL to use template_url = _evaluate_pcluster_template_url( pcluster_config, preferred_template_url=args.template_url) # merge tags from configuration, command-line and internal ones tags = _evaluate_tags(pcluster_config, preferred_tags=args.tags) # append extra parameters from command-line if args.extra_parameters: LOGGER.debug("Adding extra parameters to the CFN parameters") cfn_params.update(dict(args.extra_parameters)) # prepare input parameters for stack creation and create the stack LOGGER.debug(cfn_params) params = [{ "ParameterKey": key, "ParameterValue": value } for key, value in cfn_params.items()] stack = cfn_client.create_stack( StackName=stack_name, TemplateURL=template_url, Parameters=params, Capabilities=["CAPABILITY_IAM"], DisableRollback=args.norollback, Tags=tags, ) LOGGER.debug("StackId: %s", stack.get("StackId")) if not args.nowait: utils.verify_stack_creation(stack_name, cfn_client) LOGGER.info("") result_stack = utils.get_stack(stack_name, cfn_client) _print_stack_outputs(result_stack) else: stack_status = utils.get_stack(stack_name, cfn_client).get("StackStatus") LOGGER.info("Status: %s", stack_status) except ClientError as e: LOGGER.critical(e.response.get("Error").get("Message")) sys.stdout.flush() if batch_temporary_bucket: utils.delete_s3_bucket(bucket_name=batch_temporary_bucket) sys.exit(1) except KeyboardInterrupt: LOGGER.info("\nExiting...") sys.exit(0) except KeyError as e: LOGGER.critical("ERROR: KeyError - reason:") LOGGER.critical(e) if batch_temporary_bucket: utils.delete_s3_bucket(bucket_name=batch_temporary_bucket) sys.exit(1) except Exception as e: LOGGER.critical(e) if batch_temporary_bucket: utils.delete_s3_bucket(bucket_name=batch_temporary_bucket) sys.exit(1)
def execute(args): LOGGER.info("Retrieving configuration from CloudFormation for cluster {0}...".format(args.cluster_name)) base_config = PclusterConfig(config_file=args.config_file, cluster_name=args.cluster_name) stack_status = base_config.cfn_stack.get("StackStatus") if "IN_PROGRESS" in stack_status: utils.error("Cannot execute update while stack is in {} status.".format(stack_status)) LOGGER.info("Validating configuration file {0}...".format(args.config_file if args.config_file else "")) stack_name = utils.get_stack_name(args.cluster_name) target_config = PclusterConfig( config_file=args.config_file, cluster_label=args.cluster_template, fail_on_file_absence=True ) target_config.validate() if _check_cluster_models(base_config, target_config, args.cluster_template) and _check_changes( args, base_config, target_config ): # Update base config settings base_config.update(target_config) cfn_params = base_config.to_cfn() cfn_client = boto3.client("cloudformation") _restore_cfn_only_params(cfn_client, args, cfn_params, stack_name, target_config) s3_bucket_name = cfn_params["ResourcesS3Bucket"] tags = _get_target_config_tags_list(target_config) artifact_directory = cfn_params["ArtifactS3RootDirectory"] is_hit = utils.is_hit_enabled_cluster(base_config.cfn_stack) template_url = None if is_hit: try: upload_hit_resources( s3_bucket_name, artifact_directory, target_config, target_config.to_storage().json_params, tags ) except Exception: utils.error("Failed when uploading resources to cluster S3 bucket {0}".format(s3_bucket_name)) template_url = evaluate_pcluster_template_url(target_config) try: upload_dashboard_resource( s3_bucket_name, artifact_directory, target_config, target_config.to_storage().json_params, target_config.to_storage().cfn_params, ) except Exception: utils.error("Failed when uploading the dashboard resource to cluster S3 bucket {0}".format(s3_bucket_name)) _update_cluster( args, cfn_client, cfn_params, stack_name, use_previous_template=not is_hit, template_url=template_url, tags=tags, ) else: LOGGER.info("Update aborted.") sys.exit(1)
def update(args): # noqa: C901 FIXME!!! LOGGER.info("Updating: %s", args.cluster_name) stack_name = utils.get_stack_name(args.cluster_name) pcluster_config = PclusterConfig(config_file=args.config_file, cluster_label=args.cluster_template, fail_on_file_absence=True) pcluster_config.validate() cfn_params = pcluster_config.to_cfn() cluster_section = pcluster_config.get_section("cluster") cfn = boto3.client("cloudformation") if cluster_section.get_param_value("scheduler") != "awsbatch": if not args.reset_desired: asg_name = _get_asg_name(stack_name) desired_capacity = ( boto3.client("autoscaling").describe_auto_scaling_groups( AutoScalingGroupNames=[asg_name]).get( "AutoScalingGroups")[0].get("DesiredCapacity")) cfn_params["DesiredSize"] = str(desired_capacity) else: if args.reset_desired: LOGGER.info( "reset_desired flag does not work with awsbatch scheduler") params = utils.get_stack(stack_name, cfn).get("Parameters") for parameter in params: if parameter.get("ParameterKey") == "ResourcesS3Bucket": cfn_params["ResourcesS3Bucket"] = parameter.get( "ParameterValue") try: LOGGER.debug(cfn_params) if args.extra_parameters: LOGGER.debug("Adding extra parameters to the CFN parameters") cfn_params.update(dict(args.extra_parameters)) cfn_params = [{ "ParameterKey": key, "ParameterValue": value } for key, value in cfn_params.items()] LOGGER.info("Calling update_stack") cfn.update_stack(StackName=stack_name, UsePreviousTemplate=True, Parameters=cfn_params, Capabilities=["CAPABILITY_IAM"]) stack_status = utils.get_stack(stack_name, cfn).get("StackStatus") if not args.nowait: while stack_status == "UPDATE_IN_PROGRESS": stack_status = utils.get_stack(stack_name, cfn).get("StackStatus") events = cfn.describe_stack_events( StackName=stack_name).get("StackEvents")[0] resource_status = ("Status: %s - %s" % (events.get("LogicalResourceId"), events.get("ResourceStatus"))).ljust(80) sys.stdout.write("\r%s" % resource_status) sys.stdout.flush() time.sleep(5) else: stack_status = utils.get_stack(stack_name, cfn).get("StackStatus") LOGGER.info("Status: %s", stack_status) except ClientError as e: LOGGER.critical(e.response.get("Error").get("Message")) sys.exit(1) except KeyboardInterrupt: LOGGER.info("\nExiting...") sys.exit(0)