def start(args): """Restore ASG limits or awsbatch CE to min/max/desired.""" stack_name = utils.get_stack_name(args.cluster_name) pcluster_config = PclusterConfig(config_file=args.config_file, cluster_name=args.cluster_name) cluster_section = pcluster_config.get_section("cluster") if cluster_section.get_param_value("scheduler") == "awsbatch": LOGGER.info("Enabling AWS Batch compute environment : %s", args.cluster_name) max_vcpus = cluster_section.get_param_value("max_vcpus") desired_vcpus = cluster_section.get_param_value("desired_vcpus") min_vcpus = cluster_section.get_param_value("min_vcpus") ce_name = _get_batch_ce(stack_name) _start_batch_ce(ce_name=ce_name, min_vcpus=min_vcpus, desired_vcpus=desired_vcpus, max_vcpus=max_vcpus) else: LOGGER.info("Starting compute fleet : %s", args.cluster_name) max_queue_size = cluster_section.get_param_value("max_queue_size") min_desired_size = ( cluster_section.get_param_value("initial_queue_size") if cluster_section.get_param_value("maintain_initial_size") else 0) asg_name = _get_asg_name(stack_name) _set_asg_limits(asg_name=asg_name, min=min_desired_size, max=max_queue_size, desired=min_desired_size)
def convert(args=None): """Command to convert SIT cluster section into HIT format.""" try: # Build the config based on args pcluster_config = PclusterConfig( config_file=args.config_file, cluster_label=args.cluster_template, fail_on_file_absence=True ) # Automatic SIT -> HIT conversion, if needed conversion_done, reason = HitConverter(pcluster_config).convert(prepare_to_file=True) if conversion_done: if args.output_file: if os.path.isfile(args.output_file): print("ERROR: File {0} already exists, please select another output file.".format(args.output_file)) sys.exit(1) else: pcluster_config.config_file = args.output_file pcluster_config.to_file(exclude_unrelated_sections=True) print( "Section [cluster {label}] from file {input} has been converted and saved into {output}.\n" "New [queue compute] and [compute_resource default] sections have been created.".format( label=pcluster_config.get_section("cluster").label, input=args.config_file, output=args.output_file, ) ) else: print( "Section [cluster {label}] from file {input} has been converted.\n" "New [queue compute] and [compute_resource default] sections have been created.\n" "Configuration file content:\n\n".format( label=pcluster_config.get_section("cluster").label, input=args.config_file ) ) pcluster_config.to_file(exclude_unrelated_sections=True, print_stdout=True) else: print(reason) except KeyboardInterrupt: print("Exiting...") sys.exit(1) except Exception as e: print("Unexpected error of type %s: %s", type(e).__name__, e) sys.exit(1)
def ssh(args, extra_args): # noqa: C901 FIXME!!! """ Execute an SSH command to the master instance, according to the [aliases] section if there. :param args: pcluster CLI args :param extra_args: pcluster CLI extra_args """ pcluster_config = PclusterConfig( fail_on_error=False ) # FIXME it always search for the default config file if args.command in pcluster_config.get_section("aliases").params: ssh_command = pcluster_config.get_section("aliases").get_param_value( args.command) else: ssh_command = "ssh {CFN_USER}@{MASTER_IP} {ARGS}" try: master_ip, username = utils.get_master_ip_and_username( args.cluster_name) try: from shlex import quote as cmd_quote except ImportError: from pipes import quote as cmd_quote # build command cmd = ssh_command.format( CFN_USER=username, MASTER_IP=master_ip, ARGS=" ".join(cmd_quote(str(arg)) for arg in extra_args)) # run command log_message = "SSH command: {0}".format(cmd) if not args.dryrun: LOGGER.debug(log_message) os.system(cmd) else: LOGGER.info(log_message) except KeyboardInterrupt: LOGGER.info("\nExiting...") sys.exit(0)
def stop(args): """Set ASG limits or awsbatch ce to min/max/desired = 0/0/0.""" stack_name = utils.get_stack_name(args.cluster_name) pcluster_config = PclusterConfig(config_file=args.config_file, cluster_name=args.cluster_name) cluster_section = pcluster_config.get_section("cluster") if cluster_section.get_param_value("scheduler") == "awsbatch": LOGGER.info("Disabling AWS Batch compute environment : %s", args.cluster_name) ce_name = _get_batch_ce(stack_name) _stop_batch_ce(ce_name=ce_name) else: LOGGER.info("Stopping compute fleet : %s", args.cluster_name) asg_name = _get_asg_name(stack_name) _set_asg_limits(asg_name=asg_name, min=0, max=0, desired=0)
def test_update_sections(mocker, pcluster_config_reader): mocker.patch( "pcluster.config.cfn_param_types.get_supported_architectures_for_instance_type", return_value=["x86_64"] ) instance_type_info_mock = mocker.MagicMock() mocker.patch( "pcluster.config.cfn_param_types.InstanceTypeInfo.init_from_instance_type", return_value=instance_type_info_mock ) instance_type_info_mock.max_network_interface_count.return_value = 1 pcluster_config = PclusterConfig( cluster_label="default", config_file=pcluster_config_reader(), fail_on_file_absence=True, fail_on_error=True ) ebs1 = pcluster_config.get_section("ebs", "ebs1") assert_that(ebs1).is_not_none() assert_that(ebs1.get_param_value("shared_dir")).is_equal_to("ebs1") assert_that(pcluster_config.get_section("cluster").get_param_value("ebs_settings")).is_equal_to("ebs1,ebs2") # Test section re-labelling: # Update a section label and verify that pcluster_config_get_section() works correctly ebs1.label = "ebs1_updated" assert_that(pcluster_config.get_section("ebs", "ebs1")).is_none() ebs1_updated = pcluster_config.get_section("ebs", "ebs1_updated") assert_that(ebs1_updated).is_not_none() assert_that(ebs1_updated.get_param_value("shared_dir")).is_equal_to("ebs1") assert_that(pcluster_config.get_section("cluster").get_param_value("ebs_settings")).is_equal_to("ebs1_updated,ebs2") # Test removing section # Remove a section and verify that ebs_settings param is updated accordingly ebs2 = pcluster_config.get_section("ebs", "ebs2") pcluster_config.remove_section(ebs2.key, ebs2.label) assert_that(pcluster_config.get_section("cluster").get_param_value("ebs_settings")).is_equal_to("ebs1_updated") # Test adding section # Add a section and verify that ebs_settings param is updated accordingly pcluster_config.add_section(ebs2) assert_that(pcluster_config.get_section("cluster").get_param_value("ebs_settings")).is_equal_to("ebs1_updated,ebs2") # Test removing multiple sections by key # Removing sections by key should be prevented if there are multiple sections with the same key with pytest.raises(Exception): pcluster_config.remove_section("ebs")
def instances(args): stack_name = utils.get_stack_name(args.cluster_name) pcluster_config = PclusterConfig(config_file=args.config_file, cluster_name=args.cluster_name) cluster_section = pcluster_config.get_section("cluster") instances = [] instances.extend(_get_ec2_instances(stack_name)) if cluster_section.get_param_value("scheduler") != "awsbatch": instances.extend(_get_asg_instances(stack_name)) for instance in instances: LOGGER.info("%s %s", instance[0], instance[1]) if cluster_section.get_param_value("scheduler") == "awsbatch": LOGGER.info( "Run 'awsbhosts --cluster %s' to list the compute instances", args.cluster_name)
def configure(args): # Check for invalid path (eg. a directory) if args.config_file and os.path.exists( args.config_file) and not os.path.isfile(args.config_file): error("Invalid configuration file path: {0}".format(args.config_file)) pcluster_config = PclusterConfig(config_file=args.config_file, fail_on_error=False, auto_refresh=False) # FIXME: Overriding HIT config files is currently not supported. if pcluster_config.cluster_model == ClusterModel.HIT: error( "Configuration in file {0} cannot be overwritten. Please specify a different file path" .format(pcluster_config.config_file)) if os.path.exists(pcluster_config.config_file): msg = "WARNING: Configuration file {0} will be overwritten." else: msg = "INFO: Configuration file {0} will be written." print(msg.format(pcluster_config.config_file)) print("Press CTRL-C to interrupt the procedure.\n\n") if not args.region: # Use built in boto regions as an available option available_regions = get_regions() default_region = pcluster_config.get_section("aws").get_param_value( "aws_region_name") aws_region_name = prompt_iterable("AWS Region ID", available_regions, default_value=default_region) # Set provided region into os environment for suggestions and validations from here on os.environ["AWS_DEFAULT_REGION"] = aws_region_name else: aws_region_name = args.region cluster_section = pcluster_config.get_section("cluster") global_config = pcluster_config.get_section("global") cluster_label = global_config.get_param_value("cluster_template") vpc_section = pcluster_config.get_section("vpc") vpc_label = vpc_section.label # Get the key name from the current region, if any available_keys = _get_keys() default_key = cluster_section.get_param_value("key_name") key_name = prompt_iterable("EC2 Key Pair Name", available_keys, default_value=default_key) scheduler = prompt_iterable( "Scheduler", get_supported_schedulers(), default_value=cluster_section.get_param_value("scheduler")) cluster_config = ClusterConfigureHelper(cluster_section, scheduler) cluster_config.prompt_os() cluster_config.prompt_cluster_size() cluster_config.prompt_instance_types() vpc_parameters = _create_vpc_parameters(vpc_section, cluster_config) # Here is the end of prompt. Code below assembles config and write to file cluster_parameters = {"key_name": key_name, "scheduler": scheduler} cluster_parameters.update(cluster_config.get_scheduler_parameters()) # Remove parameters from the past configuration that can conflict with the user's choices. _reset_config_params(cluster_section, cluster_config.get_parameters_to_reset()) _reset_config_params( vpc_section, ("compute_subnet_id", "use_public_ips", "compute_subnet_cidr")) # Update configuration values according to user's choices pcluster_config.region = aws_region_name cluster_section.label = cluster_label for param_key, param_value in cluster_parameters.items(): param = cluster_section.get_param(param_key) param.value = param.get_value_from_string(param_value) vpc_section.label = vpc_label for param_key, param_value in vpc_parameters.items(): param = vpc_section.get_param(param_key) param.value = param.get_value_from_string(param_value) # Update internal params according to provided parameters and enable auto-refresh before eventual hit conversion pcluster_config.refresh() pcluster_config.auto_refresh = True # Convert file if needed HitConverter(pcluster_config).convert(prepare_to_file=True) # Update config file by overriding changed settings pcluster_config.to_file() print("Configuration file written to {0}".format( pcluster_config.config_file)) print( "You can edit your configuration file or simply run 'pcluster create -c {0} cluster-name' " "to create your cluster".format(pcluster_config.config_file))
def create_ami(args): LOGGER.info("Building AWS ParallelCluster AMI. This could take a while...") # Do not autofresh; pcluster_config is only used to get info on vpc section, aws section, and template url # Logic in autofresh could make unexpected validations not needed in createami pcluster_config = PclusterConfig(config_file=args.config_file, fail_on_file_absence=True, auto_refresh=False) ami_info = _validate_createami_args_ami_compatibility(args) ami_architecture = ami_info.get("Architecture") LOGGER.debug("Building AMI based on args %s", str(args)) results = {} instance_type = args.instance_type try: vpc_section = pcluster_config.get_section("vpc") vpc_id = args.vpc_id if args.vpc_id else vpc_section.get_param_value("vpc_id") subnet_id = args.subnet_id if args.subnet_id else vpc_section.get_param_value("master_subnet_id") packer_env = { "CUSTOM_AMI_ID": args.base_ami_id, "AWS_FLAVOR_ID": instance_type, "AMI_NAME_PREFIX": args.custom_ami_name_prefix, "AWS_VPC_ID": vpc_id, "AWS_SUBNET_ID": subnet_id, "ASSOCIATE_PUBLIC_IP": "true" if args.associate_public_ip else "false", } aws_section = pcluster_config.get_section("aws") aws_region = aws_section.get_param_value("aws_region_name") if aws_section and aws_section.get_param_value("aws_access_key_id"): packer_env["AWS_ACCESS_KEY_ID"] = aws_section.get_param_value("aws_access_key_id") if aws_section and aws_section.get_param_value("aws_secret_access_key"): packer_env["AWS_SECRET_ACCESS_KEY"] = aws_section.get_param_value("aws_secret_access_key") LOGGER.info("Base AMI ID: %s", args.base_ami_id) LOGGER.info("Base AMI OS: %s", args.base_ami_os) LOGGER.info("Instance Type: %s", instance_type) LOGGER.info("Region: %s", aws_region) LOGGER.info("VPC ID: %s", vpc_id) LOGGER.info("Subnet ID: %s", subnet_id) template_url = evaluate_pcluster_template_url(pcluster_config) tmp_dir = mkdtemp() cookbook_dir = _get_cookbook_dir(aws_region, template_url, args, tmp_dir) _get_post_install_script_dir(args.post_install_script, tmp_dir) packer_command = ( cookbook_dir + "/amis/build_ami.sh --os " + args.base_ami_os + " --partition region" + " --region " + aws_region + " --custom" + " --arch " + ami_architecture ) results = _run_packer(packer_command, packer_env) except KeyboardInterrupt: LOGGER.info("\nExiting...") sys.exit(0) finally: _print_create_ami_results(results) if "tmp_dir" in locals() and tmp_dir: rmtree(tmp_dir)
def configure(args): # Check for invalid path (eg. a directory) if args.config_file and os.path.exists( args.config_file) and not os.path.isfile(args.config_file): error("Invalid configuration file path: {0}".format(args.config_file)) pcluster_config = PclusterConfig(config_file=args.config_file, fail_on_error=False) if os.path.exists(pcluster_config.config_file): msg = "WARNING: Configuration file {0} will be overwritten." else: msg = "INFO: Configuration file {0} will be written." print(msg.format(pcluster_config.config_file)) print("Press CTRL-C to interrupt the procedure.\n\n") cluster_section = pcluster_config.get_section("cluster") global_config = pcluster_config.get_section("global") cluster_label = global_config.get_param_value("cluster_template") vpc_section = pcluster_config.get_section("vpc") vpc_label = vpc_section.label # Use built in boto regions as an available option available_regions = get_regions() default_region = pcluster_config.get_section("aws").get_param_value( "aws_region_name") aws_region_name = prompt_iterable( "AWS Region ID", available_regions, default_value=default_region if default_region in available_regions else None, ) # Set provided region into os environment for suggestions and validations from here on os.environ["AWS_DEFAULT_REGION"] = aws_region_name # Get the key name from the current region, if any available_keys = _get_keys() default_key = cluster_section.get_param_value("key_name") key_name = prompt_iterable( "EC2 Key Pair Name", available_keys, default_value=default_key if default_key in available_keys else None) scheduler = prompt_iterable( "Scheduler", get_supported_schedulers(), default_value=cluster_section.get_param_value("scheduler")) scheduler_handler = SchedulerHandler(cluster_section, scheduler) scheduler_handler.prompt_os() scheduler_handler.prompt_cluster_size() master_instance_type = prompt( "Master instance type", lambda x: x in get_supported_instance_types(), default_value=cluster_section.get_param_value("master_instance_type"), ) scheduler_handler.prompt_compute_instance_type() automate_vpc = prompt("Automate VPC creation? (y/n)", lambda x: x in ("y", "n"), default_value="n") == "y" vpc_parameters = _create_vpc_parameters(vpc_section, scheduler, scheduler_handler.max_cluster_size, automate_vpc_creation=automate_vpc) cluster_parameters = { "key_name": key_name, "scheduler": scheduler, "master_instance_type": master_instance_type } cluster_parameters.update(scheduler_handler.get_scheduler_parameters()) # Remove parameters from the past configuration that can conflict with the user's choices. _reset_config_params(cluster_section, scheduler_handler.get_parameters_to_reset()) _reset_config_params( vpc_section, ("compute_subnet_id", "use_public_ips", "compute_subnet_cidr")) # Update configuration values according to user's choices pcluster_config.region = aws_region_name cluster_section.label = cluster_label for param_key, param_value in cluster_parameters.items(): param = cluster_section.get_param(param_key) param.value = param.get_value_from_string(param_value) vpc_section.label = vpc_label for param_key, param_value in vpc_parameters.items(): param = vpc_section.get_param(param_key) param.value = param.get_value_from_string(param_value) # Update config file by overriding changed settings pcluster_config.to_file() print("Configuration file written to {0}".format( pcluster_config.config_file)) print( "You can edit your configuration file or simply run 'pcluster create -c {0} cluster-name' " "to create your cluster".format(pcluster_config.config_file))
def create_ami(args): LOGGER.info("Building AWS ParallelCluster AMI. This could take a while...") LOGGER.debug("Building AMI based on args %s", str(args)) results = {} instance_type = args.instance_type try: # FIXME it doesn't work if there is no a default section pcluster_config = PclusterConfig(config_file=args.config_file, fail_on_file_absence=True) vpc_section = pcluster_config.get_section("vpc") vpc_id = args.vpc_id if args.vpc_id else vpc_section.get_param_value( "vpc_id") subnet_id = args.subnet_id if args.subnet_id else vpc_section.get_param_value( "master_subnet_id") packer_env = { "CUSTOM_AMI_ID": args.base_ami_id, "AWS_FLAVOR_ID": instance_type, "AMI_NAME_PREFIX": args.custom_ami_name_prefix, "AWS_VPC_ID": vpc_id, "AWS_SUBNET_ID": subnet_id, "ASSOCIATE_PUBLIC_IP": "true" if args.associate_public_ip else "false", } aws_section = pcluster_config.get_section("aws") aws_region = aws_section.get_param_value("aws_region_name") if aws_section and aws_section.get_param_value("aws_access_key_id"): packer_env["AWS_ACCESS_KEY_ID"] = aws_section.get_param_value( "aws_access_key_id") if aws_section and aws_section.get_param_value( "aws_secret_access_key"): packer_env["AWS_SECRET_ACCESS_KEY"] = aws_section.get_param_value( "aws_secret_access_key") LOGGER.info("Base AMI ID: %s", args.base_ami_id) LOGGER.info("Base AMI OS: %s", args.base_ami_os) LOGGER.info("Instance Type: %s", instance_type) LOGGER.info("Region: %s", aws_region) LOGGER.info("VPC ID: %s", vpc_id) LOGGER.info("Subnet ID: %s", subnet_id) template_url = _evaluate_pcluster_template_url(pcluster_config) tmp_dir = mkdtemp() cookbook_dir = _get_cookbook_dir(aws_region, template_url, args, tmp_dir) packer_command = (cookbook_dir + "/amis/build_ami.sh --os " + args.base_ami_os + " --partition region" + " --region " + aws_region + " --custom") results = _run_packer(packer_command, packer_env) except KeyboardInterrupt: LOGGER.info("\nExiting...") sys.exit(0) finally: _print_create_ami_results(results) if "tmp_dir" in locals() and tmp_dir: rmtree(tmp_dir)
def create(args): # noqa: C901 FIXME!!! LOGGER.info("Beginning cluster creation for cluster: %s", args.cluster_name) LOGGER.debug("Building cluster config based on args %s", str(args)) # Build the config based on args pcluster_config = PclusterConfig(config_file=args.config_file, cluster_label=args.cluster_template, fail_on_file_absence=True) pcluster_config.validate() # get CFN parameters, template url and tags from config cluster_section = pcluster_config.get_section("cluster") cfn_params = pcluster_config.to_cfn() _check_for_updates(pcluster_config) batch_temporary_bucket = None try: cfn_client = boto3.client("cloudformation") stack_name = utils.get_stack_name(args.cluster_name) # If scheduler is awsbatch create bucket with resources if cluster_section.get_param_value("scheduler") == "awsbatch": batch_resources = pkg_resources.resource_filename( __name__, "resources/batch") batch_temporary_bucket = _create_bucket_with_batch_resources( stack_name=stack_name, resources_dir=batch_resources, region=pcluster_config.region) cfn_params["ResourcesS3Bucket"] = batch_temporary_bucket LOGGER.info("Creating stack named: %s", stack_name) LOGGER.debug(cfn_params) # determine the CloudFormation Template URL to use template_url = _evaluate_pcluster_template_url( pcluster_config, preferred_template_url=args.template_url) # merge tags from configuration, command-line and internal ones tags = _evaluate_tags(pcluster_config, preferred_tags=args.tags) # append extra parameters from command-line if args.extra_parameters: LOGGER.debug("Adding extra parameters to the CFN parameters") cfn_params.update(dict(args.extra_parameters)) # prepare input parameters for stack creation and create the stack LOGGER.debug(cfn_params) params = [{ "ParameterKey": key, "ParameterValue": value } for key, value in cfn_params.items()] stack = cfn_client.create_stack( StackName=stack_name, TemplateURL=template_url, Parameters=params, Capabilities=["CAPABILITY_IAM"], DisableRollback=args.norollback, Tags=tags, ) LOGGER.debug("StackId: %s", stack.get("StackId")) if not args.nowait: utils.verify_stack_creation(stack_name, cfn_client) LOGGER.info("") result_stack = utils.get_stack(stack_name, cfn_client) _print_stack_outputs(result_stack) else: stack_status = utils.get_stack(stack_name, cfn_client).get("StackStatus") LOGGER.info("Status: %s", stack_status) except ClientError as e: LOGGER.critical(e.response.get("Error").get("Message")) sys.stdout.flush() if batch_temporary_bucket: utils.delete_s3_bucket(bucket_name=batch_temporary_bucket) sys.exit(1) except KeyboardInterrupt: LOGGER.info("\nExiting...") sys.exit(0) except KeyError as e: LOGGER.critical("ERROR: KeyError - reason:") LOGGER.critical(e) if batch_temporary_bucket: utils.delete_s3_bucket(bucket_name=batch_temporary_bucket) sys.exit(1) except Exception as e: LOGGER.critical(e) if batch_temporary_bucket: utils.delete_s3_bucket(bucket_name=batch_temporary_bucket) sys.exit(1)
def update(args): # noqa: C901 FIXME!!! LOGGER.info("Updating: %s", args.cluster_name) stack_name = utils.get_stack_name(args.cluster_name) pcluster_config = PclusterConfig(config_file=args.config_file, cluster_label=args.cluster_template, fail_on_file_absence=True) pcluster_config.validate() cfn_params = pcluster_config.to_cfn() cluster_section = pcluster_config.get_section("cluster") cfn = boto3.client("cloudformation") if cluster_section.get_param_value("scheduler") != "awsbatch": if not args.reset_desired: asg_name = _get_asg_name(stack_name) desired_capacity = ( boto3.client("autoscaling").describe_auto_scaling_groups( AutoScalingGroupNames=[asg_name]).get( "AutoScalingGroups")[0].get("DesiredCapacity")) cfn_params["DesiredSize"] = str(desired_capacity) else: if args.reset_desired: LOGGER.info( "reset_desired flag does not work with awsbatch scheduler") params = utils.get_stack(stack_name, cfn).get("Parameters") for parameter in params: if parameter.get("ParameterKey") == "ResourcesS3Bucket": cfn_params["ResourcesS3Bucket"] = parameter.get( "ParameterValue") try: LOGGER.debug(cfn_params) if args.extra_parameters: LOGGER.debug("Adding extra parameters to the CFN parameters") cfn_params.update(dict(args.extra_parameters)) cfn_params = [{ "ParameterKey": key, "ParameterValue": value } for key, value in cfn_params.items()] LOGGER.info("Calling update_stack") cfn.update_stack(StackName=stack_name, UsePreviousTemplate=True, Parameters=cfn_params, Capabilities=["CAPABILITY_IAM"]) stack_status = utils.get_stack(stack_name, cfn).get("StackStatus") if not args.nowait: while stack_status == "UPDATE_IN_PROGRESS": stack_status = utils.get_stack(stack_name, cfn).get("StackStatus") events = cfn.describe_stack_events( StackName=stack_name).get("StackEvents")[0] resource_status = ("Status: %s - %s" % (events.get("LogicalResourceId"), events.get("ResourceStatus"))).ljust(80) sys.stdout.write("\r%s" % resource_status) sys.stdout.flush() time.sleep(5) else: stack_status = utils.get_stack(stack_name, cfn).get("StackStatus") LOGGER.info("Status: %s", stack_status) except ClientError as e: LOGGER.critical(e.response.get("Error").get("Message")) sys.exit(1) except KeyboardInterrupt: LOGGER.info("\nExiting...") sys.exit(0)