def test_multiple_param_changes(mocker, pcluster_config_reader, test_datadir, src_cluster_label, dst_cluster_label): _do_mocking_for_tests(mocker) dst_config_file = "pcluster.config.dst.ini" duplicate_config_file(dst_config_file, test_datadir) src_dict = {} src_dict.update(default_cluster_params) src_dict["cluster_label"] = src_cluster_label src_dict["master_subnet_id"] = "subnet-12345678" src_dict["compute_subnet_id"] = "subnet-12345678" src_dict["additional_sg"] = "sg-12345678" src_config_file = pcluster_config_reader(**src_dict) src_conf = PclusterConfig(config_file=src_config_file, fail_on_file_absence=True) dst_dict = {} dst_dict.update(default_cluster_params) dst_dict["cluster_label"] = dst_cluster_label dst_dict["master_subnet_id"] = "subnet-1234567a" dst_dict["compute_subnet_id"] = "subnet-1234567a" dst_dict["additional_sg"] = "sg-1234567a" dst_config_file = pcluster_config_reader(dst_config_file, **dst_dict) dst_conf = PclusterConfig(config_file=dst_config_file) expected_changes = [ Change("vpc", "default", "master_subnet_id", "subnet-12345678", "subnet-1234567a", UpdatePolicy.UNSUPPORTED), Change( "vpc", "default", "compute_subnet_id", "subnet-12345678", "subnet-1234567a", UpdatePolicy.COMPUTE_FLEET_STOP ), Change("vpc", "default", "additional_sg", "sg-12345678", "sg-1234567a", UpdatePolicy.SUPPORTED), ] _check_patch(src_conf, dst_conf, expected_changes, UpdatePolicy.UNSUPPORTED)
def start(args): """Restore ASG limits or awsbatch CE to min/max/desired.""" stack_name = utils.get_stack_name(args.cluster_name) pcluster_config = PclusterConfig(config_file=args.config_file, cluster_name=args.cluster_name) cluster_section = pcluster_config.get_section("cluster") if cluster_section.get_param_value("scheduler") == "awsbatch": LOGGER.info("Enabling AWS Batch compute environment : %s", args.cluster_name) max_vcpus = cluster_section.get_param_value("max_vcpus") desired_vcpus = cluster_section.get_param_value("desired_vcpus") min_vcpus = cluster_section.get_param_value("min_vcpus") ce_name = _get_batch_ce(stack_name) _start_batch_ce(ce_name=ce_name, min_vcpus=min_vcpus, desired_vcpus=desired_vcpus, max_vcpus=max_vcpus) else: LOGGER.info("Starting compute fleet : %s", args.cluster_name) max_queue_size = cluster_section.get_param_value("max_queue_size") min_desired_size = ( cluster_section.get_param_value("initial_queue_size") if cluster_section.get_param_value("maintain_initial_size") else 0) asg_name = _get_asg_name(stack_name) _set_asg_limits(asg_name=asg_name, min=min_desired_size, max=max_queue_size, desired=min_desired_size)
def test_single_param_change( test_datadir, pcluster_config_reader, mocker, section_key, section_label, param_key, src_param_value, dst_param_value, change_update_policy, ): _do_mocking_for_tests(mocker) dst_config_file = "pcluster.config.dst.ini" duplicate_config_file(dst_config_file, test_datadir) src_dict = {} src_dict.update(default_cluster_params) src_dict[param_key] = src_param_value src_config_file = pcluster_config_reader(**src_dict) src_conf = PclusterConfig(config_file=src_config_file, fail_on_file_absence=True) dst_dict = {} dst_dict.update(default_cluster_params) dst_dict[param_key] = dst_param_value dst_config_file = pcluster_config_reader(dst_config_file, **dst_dict) dst_conf = PclusterConfig(config_file=dst_config_file) expected_change = Change( section_key, section_label, param_key, src_param_value, dst_param_value, change_update_policy ) _check_patch(src_conf, dst_conf, [expected_change], change_update_policy)
def test_config_to_json(capsys, boto3_stubber, test_datadir, pcluster_config_reader, queues): queue_settings = ",".join(queues) # Create a new configuration file from the initial one dst_config_file = "pcluster.config.{0}.ini".format("_".join(queues)) duplicate_config_file(dst_config_file, test_datadir) # Created expected json params based on active queues expected_json_params = _prepare_json_config(queues, test_datadir) # Mock expected boto3 calls _mock_boto3(boto3_stubber, expected_json_params, head_node_instance_type="c4.xlarge") # Load config from created config file dst_config_file = pcluster_config_reader(dst_config_file, queue_settings=queue_settings) pcluster_config = PclusterConfig(config_file=dst_config_file, fail_on_file_absence=True) # Create json storage data from config storage_data = pcluster_config.to_storage() # Check that created json params match the expected ones assert_that(json.dumps(storage_data.json_params, indent=2, sort_keys=True)).is_equal_to( json.dumps(expected_json_params, indent=2, sort_keys=True) ) readouterr = capsys.readouterr() assert_that(readouterr.err).is_equal_to("") pass
def test_config_patch(mocker): _do_mocking_for_tests(mocker) # We need to provide a region to PclusterConfig to avoid no region exception. # Which region to provide is arbitrary. os.environ["AWS_DEFAULT_REGION"] = "us-east-1" src_conf = PclusterConfig() dst_conf = PclusterConfig() # Two new configs must always be equal _check_patch(src_conf, dst_conf, [], UpdatePolicy.SUPPORTED)
def status(args): # noqa: C901 FIXME!!! stack_name = utils.get_stack_name(args.cluster_name) # Parse configuration file to read the AWS section PclusterConfig.init_aws(config_file=args.config_file) cfn = boto3.client("cloudformation") try: stack = utils.get_stack(stack_name, cfn) sys.stdout.write("\rStatus: %s" % stack.get("StackStatus")) sys.stdout.flush() if not args.nowait: while stack.get("StackStatus") not in [ "CREATE_COMPLETE", "UPDATE_COMPLETE", "UPDATE_ROLLBACK_COMPLETE", "ROLLBACK_COMPLETE", "CREATE_FAILED", "DELETE_FAILED", ]: time.sleep(5) stack = utils.get_stack(stack_name, cfn) events = utils.get_stack_events(stack_name)[0] resource_status = ( "Status: %s - %s" % (events.get("LogicalResourceId"), events.get("ResourceStatus")) ).ljust(80) sys.stdout.write("\r%s" % resource_status) sys.stdout.flush() sys.stdout.write("\rStatus: %s\n" % stack.get("StackStatus")) sys.stdout.flush() if stack.get("StackStatus") in ["CREATE_COMPLETE", "UPDATE_COMPLETE", "UPDATE_ROLLBACK_COMPLETE"]: state = _poll_head_node_state(stack_name) if state == "running": _print_stack_outputs(stack) _print_compute_fleet_status(args.cluster_name, stack) elif stack.get("StackStatus") in ["ROLLBACK_COMPLETE", "CREATE_FAILED", "DELETE_FAILED"]: events = utils.get_stack_events(stack_name) for event in events: if event.get("ResourceStatus") in ["CREATE_FAILED", "DELETE_FAILED", "UPDATE_FAILED"]: LOGGER.info( "%s %s %s %s %s", event.get("Timestamp"), event.get("ResourceStatus"), event.get("ResourceType"), event.get("LogicalResourceId"), event.get("ResourceStatusReason"), ) else: sys.stdout.write("\n") sys.stdout.flush() except ClientError as e: LOGGER.critical(e.response.get("Error").get("Message")) sys.stdout.flush() sys.exit(1) except KeyboardInterrupt: LOGGER.info("\nExiting...") sys.exit(0)
def test_example_config_consistency(mocker): """Validate example file and try to convert to CFN.""" mocker.patch("pcluster.config.param_types.get_avail_zone", return_value="mocked_avail_zone") pcluster_config = PclusterConfig( config_file=utils.get_pcluster_config_example(), fail_on_file_absence=True) cfn_params = pcluster_config.to_cfn() assert_that(len(cfn_params)).is_equal_to(CFN_CONFIG_NUM_OF_PARAMS)
def init_pcluster_config_from_configparser(config_parser, validate=True): with tempfile.NamedTemporaryFile(delete=False) as config_file: with open(config_file.name, "w") as cf: config_parser.write(cf) pcluster_config = PclusterConfig( config_file=config_file.name, cluster_label="default", fail_on_file_absence=True ) if validate: pcluster_config.validate() return pcluster_config
def delete(args): saw_update = False LOGGER.info("Deleting: %s", args.cluster_name) stack_name = utils.get_stack_name(args.cluster_name) # Parse configuration file to read the AWS section PclusterConfig.init_aws(config_file=args.config_file) cfn = boto3.client("cloudformation") try: # delete_stack does not raise an exception if stack does not exist # Use describe_stacks to explicitly check if the stack exists cfn.describe_stacks(StackName=stack_name) cfn.delete_stack(StackName=stack_name) saw_update = True stack_status = utils.get_stack(stack_name, cfn).get("StackStatus") sys.stdout.write("\rStatus: %s" % stack_status) sys.stdout.flush() LOGGER.debug("Status: %s", stack_status) if not args.nowait: while stack_status == "DELETE_IN_PROGRESS": time.sleep(5) stack_status = utils.get_stack(stack_name, cfn).get("StackStatus") events = cfn.describe_stack_events( StackName=stack_name).get("StackEvents")[0] resource_status = ("Status: %s - %s" % (events.get("LogicalResourceId"), events.get("ResourceStatus"))).ljust(80) sys.stdout.write("\r%s" % resource_status) sys.stdout.flush() sys.stdout.write("\rStatus: %s\n" % stack_status) sys.stdout.flush() LOGGER.debug("Status: %s", stack_status) else: sys.stdout.write("\n") sys.stdout.flush() if stack_status == "DELETE_FAILED": LOGGER.info( "Cluster did not delete successfully. Run 'pcluster delete %s' again", args.cluster_name) except ClientError as e: if e.response.get("Error").get("Message").endswith("does not exist"): if saw_update: LOGGER.info("\nCluster deleted successfully.") sys.exit(0) LOGGER.critical(e.response.get("Error").get("Message")) sys.stdout.flush() sys.exit(1) except KeyboardInterrupt: LOGGER.info("\nExiting...") sys.exit(0)
def test_patch_check_cluster_resource_bucket( old_bucket_name, new_bucket_name, is_generated_bucket, expected_error_row, mocker, test_datadir, pcluster_config_reader, ): _do_mocking_for_tests(mocker) mocker.patch("pcluster.config.update_policy._is_bucket_pcluster_generated", return_value=is_generated_bucket) expected_message_rows = [ ["section", "parameter", "old value", "new value", "check", "reason", "action_needed"], # ec2_iam_role is to make sure other parameters are not affected by cluster_resource_bucket custom logic ["cluster some_cluster", "ec2_iam_role", "some_old_role", "some_new_role", "SUCCEEDED", "-", None], ] if expected_error_row: error_message_row = [ "cluster some_cluster", "cluster_resource_bucket", old_bucket_name, new_bucket_name, "ACTION NEEDED", ( "'cluster_resource_bucket' parameter is a read_only parameter that cannot be updated. " "New value '{0}' will be ignored and old value '{1}' will be used if you force the update.".format( new_bucket_name, old_bucket_name ) ), "Restore the value of parameter 'cluster_resource_bucket' to '{0}'".format(old_bucket_name), ] expected_message_rows.append(error_message_row) src_dict = {"cluster_resource_bucket": old_bucket_name, "ec2_iam_role": "some_old_role"} dst_dict = {"cluster_resource_bucket": new_bucket_name, "ec2_iam_role": "some_new_role"} dst_config_file = "pcluster.config.dst.ini" duplicate_config_file(dst_config_file, test_datadir) src_config_file = pcluster_config_reader(**src_dict) src_conf = PclusterConfig(config_file=src_config_file, fail_on_file_absence=True) dst_config_file = pcluster_config_reader(dst_config_file, **dst_dict) dst_conf = PclusterConfig(config_file=dst_config_file, fail_on_file_absence=True) patch = ConfigPatch(base_config=src_conf, target_config=dst_conf) patch_allowed, rows = patch.check() assert_that(len(rows)).is_equal_to(len(expected_message_rows)) for line in rows: # Handle unicode string line = ["{0}".format(element) if isinstance(element, str) else element for element in line] assert_that(expected_message_rows).contains(line) assert_that(patch_allowed).is_equal_to(not expected_error_row)
def test_adaptation(mocker, test_datadir, pcluster_config_reader, test): _do_mocking_for_tests(mocker) base_config_file_name = "pcluster.config.base.ini" duplicate_config_file(base_config_file_name, test_datadir) target_config_file_name = "pcluster.config.dst.ini" duplicate_config_file(target_config_file_name, test_datadir) base_config_file = pcluster_config_reader(base_config_file_name, **default_cluster_params) target_config_file = pcluster_config_reader(target_config_file_name, **default_cluster_params) base_conf = PclusterConfig(config_file=base_config_file, fail_on_file_absence=True) target_conf = PclusterConfig(config_file=target_config_file, fail_on_file_absence=True) test(base_conf, target_conf)
def delete(args): PclusterConfig.init_aws(config_file=args.config_file) LOGGER.info("Deleting: %s", args.cluster_name) stack_name = utils.get_stack_name(args.cluster_name) if not utils.stack_exists(stack_name): if args.keep_logs: utils.warn( "Stack for {0} does not exist. Cannot prevent its log groups from being deleted." .format(args.cluster_name)) utils.warn("Cluster {0} has already been deleted.".format( args.cluster_name)) sys.exit(0) elif args.keep_logs: _persist_cloudwatch_log_groups(args.cluster_name) _delete_cluster(args.cluster_name, args.nowait)
def stop(args): """Set ASG limits or awsbatch ce to min/max/desired = 0/0/0.""" stack_name = utils.get_stack_name(args.cluster_name) pcluster_config = PclusterConfig(config_file=args.config_file, cluster_name=args.cluster_name) cluster_section = pcluster_config.get_section("cluster") if cluster_section.get_param_value("scheduler") == "awsbatch": LOGGER.info("Disabling AWS Batch compute environment : %s", args.cluster_name) ce_name = _get_batch_ce(stack_name) _stop_batch_ce(ce_name=ce_name) else: LOGGER.info("Stopping compute fleet : %s", args.cluster_name) asg_name = _get_asg_name(stack_name) _set_asg_limits(asg_name=asg_name, min=0, max=0, desired=0)
def assert_section_params(mocker, pcluster_config_reader, settings_label, expected_cfn_params): mocker.patch( "pcluster.config.cfn_param_types.get_supported_architectures_for_instance_type", return_value=["x86_64"]) mocker.patch( "pcluster.utils.InstanceTypeInfo.init_from_instance_type", return_value=InstanceTypeInfo({ "InstanceType": "t2.micro", "VCpuInfo": { "DefaultVCpus": 1, "DefaultCores": 1, "DefaultThreadsPerCore": 1 }, "NetworkInfo": { "EfaSupported": False }, }), ) if isinstance(expected_cfn_params, SystemExit): with pytest.raises(SystemExit): PclusterConfig( cluster_label="default", config_file=pcluster_config_reader( settings_label=settings_label), fail_on_file_absence=True, fail_on_error=True, ) else: pcluster_config = PclusterConfig( config_file=pcluster_config_reader(settings_label=settings_label), fail_on_file_absence=True) cfn_params = pcluster_config.to_cfn() assert_that(len(cfn_params)).is_equal_to( get_cfn_config_num_of_params(pcluster_config)) remove_ignored_params(cfn_params) for param_key, _ in cfn_params.items(): assert_that(cfn_params.get(param_key), description=param_key).is_equal_to( expected_cfn_params.get(param_key))
def stop(args): """Stop cluster compute fleet.""" pcluster_config = PclusterConfig( config_file=args.config_file, cluster_name=args.cluster_name, auto_refresh=False, enforce_version=False, skip_load_json_config=True, ) pcluster_config.cluster_model.get_stop_command(pcluster_config).stop(args, pcluster_config)
def dcv_connect(args): """ Execute pcluster dcv connect command. :param args: pcluster cli arguments. """ # Parse configuration file to read the AWS section PclusterConfig.init_aws( ) # FIXME it always searches for the default configuration file # Prepare ssh command to execute in the head node instance stack = get_stack(get_stack_name(args.cluster_name)) shared_dir = get_cfn_param(stack.get("Parameters"), "SharedDir") head_node_ip, username = get_head_node_ip_and_username(args.cluster_name) cmd = 'ssh {CFN_USER}@{HEAD_NODE_IP} {KEY} "{REMOTE_COMMAND} {DCV_SHARED_DIR}"'.format( CFN_USER=username, HEAD_NODE_IP=head_node_ip, KEY="-i {0}".format(args.key_path) if args.key_path else "", REMOTE_COMMAND=DCV_CONNECT_SCRIPT, DCV_SHARED_DIR=shared_dir, ) try: url = retry(_retrieve_dcv_session_url, func_args=[cmd, args.cluster_name, head_node_ip], attempts=4) url_message = "Please use the following one-time URL in your browser within 30 seconds:\n{0}".format( url) except DCVConnectionError as e: error("Something went wrong during DCV connection.\n{0}" "Please check the logs in the /var/log/parallelcluster/ folder " "of the head node and submit an issue {1}\n".format( e, PCLUSTER_ISSUES_LINK)) if args.show_url: LOGGER.info(url_message) return try: if not webbrowser.open_new(url): raise webbrowser.Error("Unable to open the Web browser.") except webbrowser.Error as e: LOGGER.info("{0}\n{1}".format(e, url_message))
def instances(args): stack_name = utils.get_stack_name(args.cluster_name) PclusterConfig.init_aws(config_file=args.config_file) cfn_stack = utils.get_stack(stack_name) scheduler = utils.get_cfn_param(cfn_stack.get("Parameters"), "Scheduler") instances = [] head_node_server = utils.describe_cluster_instances(stack_name, node_type=utils.NodeType.head_node) if head_node_server: instances.append(("MasterServer", head_node_server[0].get("InstanceId"))) if scheduler != "awsbatch": instances.extend(_get_compute_instances(stack_name)) for instance in instances: LOGGER.info("%s %s", instance[0], instance[1]) if scheduler == "awsbatch": LOGGER.info("Run 'awsbhosts --cluster %s' to list the compute instances", args.cluster_name)
def instances(args): stack_name = utils.get_stack_name(args.cluster_name) pcluster_config = PclusterConfig(config_file=args.config_file, cluster_name=args.cluster_name) cluster_section = pcluster_config.get_section("cluster") instances = [] instances.extend(_get_ec2_instances(stack_name)) if cluster_section.get_param_value("scheduler") != "awsbatch": instances.extend(_get_asg_instances(stack_name)) for instance in instances: LOGGER.info("%s %s", instance[0], instance[1]) if cluster_section.get_param_value("scheduler") == "awsbatch": LOGGER.info( "Run 'awsbhosts --cluster %s' to list the compute instances", args.cluster_name)
def get_mocked_pcluster_config(mocker, auto_refresh=False): mocker.patch( "pcluster.config.cfn_param_types.get_supported_architectures_for_instance_type", return_value=["x86_64"]) if "AWS_DEFAULT_REGION" not in os.environ: # We need to provide a region to PclusterConfig to avoid no region exception. # Which region to provide is arbitrary. os.environ["AWS_DEFAULT_REGION"] = "us-east-1" pcluster_config = PclusterConfig(config_file="wrong-file", auto_refresh=auto_refresh) return pcluster_config
def ssh(args, extra_args): # noqa: C901 FIXME!!! """ Execute an SSH command to the master instance, according to the [aliases] section if there. :param args: pcluster CLI args :param extra_args: pcluster CLI extra_args """ pcluster_config = PclusterConfig( fail_on_error=False ) # FIXME it always search for the default config file if args.command in pcluster_config.get_section("aliases").params: ssh_command = pcluster_config.get_section("aliases").get_param_value( args.command) else: ssh_command = "ssh {CFN_USER}@{MASTER_IP} {ARGS}" try: master_ip, username = utils.get_master_ip_and_username( args.cluster_name) try: from shlex import quote as cmd_quote except ImportError: from pipes import quote as cmd_quote # build command cmd = ssh_command.format( CFN_USER=username, MASTER_IP=master_ip, ARGS=" ".join(cmd_quote(str(arg)) for arg in extra_args)) # run command log_message = "SSH command: {0}".format(cmd) if not args.dryrun: LOGGER.debug(log_message) os.system(cmd) else: LOGGER.info(log_message) except KeyboardInterrupt: LOGGER.info("\nExiting...") sys.exit(0)
def assert_section_params(mocker, pcluster_config_reader, settings_label, expected_cfn_params): if isinstance(expected_cfn_params, SystemExit): with pytest.raises(SystemExit): PclusterConfig( cluster_label="default", config_file=pcluster_config_reader(settings_label=settings_label), fail_on_file_absence=True, fail_on_error=True, ) else: pcluster_config = PclusterConfig( config_file=pcluster_config_reader(settings_label=settings_label), fail_on_file_absence=True ) cfn_params = pcluster_config.to_cfn() assert_that(len(cfn_params)).is_equal_to(CFN_CONFIG_NUM_OF_PARAMS) for param_key, _ in cfn_params.items(): assert_that(cfn_params.get(param_key), description=param_key).is_equal_to( expected_cfn_params.get(param_key) )
def convert(args=None): """Command to convert SIT cluster section into HIT format.""" try: # Build the config based on args pcluster_config = PclusterConfig( config_file=args.config_file, cluster_label=args.cluster_template, fail_on_file_absence=True ) # Automatic SIT -> HIT conversion, if needed conversion_done, reason = HitConverter(pcluster_config).convert(prepare_to_file=True) if conversion_done: if args.output_file: if os.path.isfile(args.output_file): print("ERROR: File {0} already exists, please select another output file.".format(args.output_file)) sys.exit(1) else: pcluster_config.config_file = args.output_file pcluster_config.to_file(exclude_unrelated_sections=True) print( "Section [cluster {label}] from file {input} has been converted and saved into {output}.\n" "New [queue compute] and [compute_resource default] sections have been created.".format( label=pcluster_config.get_section("cluster").label, input=args.config_file, output=args.output_file, ) ) else: print( "Section [cluster {label}] from file {input} has been converted.\n" "New [queue compute] and [compute_resource default] sections have been created.\n" "Configuration file content:\n\n".format( label=pcluster_config.get_section("cluster").label, input=args.config_file ) ) pcluster_config.to_file(exclude_unrelated_sections=True, print_stdout=True) else: print(reason) except KeyboardInterrupt: print("Exiting...") sys.exit(1) except Exception as e: print("Unexpected error of type %s: %s", type(e).__name__, e) sys.exit(1)
def list_stacks(args): # Parse configuration file to read the AWS section PclusterConfig.init_aws(config_file=args.config_file) try: result = [] for stack in utils.paginate_boto3(boto3.client("cloudformation").describe_stacks): if stack.get("ParentId") is None and stack.get("StackName").startswith(PCLUSTER_STACK_PREFIX): pcluster_version = _get_pcluster_version_from_stack(stack) result.append( [ stack.get("StackName")[len(PCLUSTER_STACK_PREFIX) :], # noqa: E203 _colorize(stack.get("StackStatus"), args), pcluster_version, ] ) LOGGER.info(tabulate(result, tablefmt="plain")) except ClientError as e: LOGGER.critical(e.response.get("Error").get("Message")) sys.exit(1) except KeyboardInterrupt: LOGGER.info("Exiting...") sys.exit(0)
def init_pcluster_config_from_configparser(config_parser, validate=True, auto_refresh=True): with tempfile.NamedTemporaryFile(delete=False) as config_file: with open(config_file.name, "w") as cf: config_parser.write(cf) if "AWS_DEFAULT_REGION" not in os.environ: # We need to provide a region to PclusterConfig to avoid no region exception. # Which region to provide is arbitrary. os.environ["AWS_DEFAULT_REGION"] = "us-east-1" pcluster_config = PclusterConfig(config_file=config_file.name, cluster_label="default", fail_on_file_absence=True, auto_refresh=auto_refresh) if validate: _validate_config(config_parser, pcluster_config) return pcluster_config
def dcv_connect(args): """ Execute pcluster dcv connect command. :param args: pcluster cli arguments. """ # Parse configuration file to read the AWS section PclusterConfig.init_aws( ) # FIXME it always searches for the default configuration file # Prepare ssh command to execute in the master instance stack = get_stack(get_stack_name(args.cluster_name)) shared_dir = get_cfn_param(stack.get("Parameters"), "SharedDir") master_ip, username = get_master_ip_and_username(args.cluster_name) cmd = 'ssh {CFN_USER}@{MASTER_IP} {KEY} "{REMOTE_COMMAND} {DCV_SHARED_DIR}"'.format( CFN_USER=username, MASTER_IP=master_ip, KEY="-i {0}".format(args.key_path) if args.key_path else "", REMOTE_COMMAND=DCV_CONNECT_SCRIPT, DCV_SHARED_DIR=shared_dir, ) # Connect by ssh to the master instance and prepare DCV session try: LOGGER.debug("SSH command: {0}".format(cmd)) output = _check_command_output(cmd) # At first ssh connection, the ssh command alerts it is adding the host to the known hosts list if re.search("Permanently added .* to the list of known hosts.", output): output = _check_command_output(cmd) dcv_parameters = re.search( r"PclusterDcvServerPort=([\d]+) PclusterDcvSessionId=([\w]+) PclusterDcvSessionToken=([\w-]+)", output) if dcv_parameters: dcv_server_port = dcv_parameters.group(1) dcv_session_id = dcv_parameters.group(2) dcv_session_token = dcv_parameters.group(3) else: error( "Something went wrong during DCV connection. Please manually execute the command:\n{0}\n" "If the problem persists, please check the logs in the /var/log/parallelcluster/ folder " "of the master instance and submit an issue {1}.".format( cmd, PCLUSTER_ISSUES_LINK)) except sub.CalledProcessError as e: if "{0}: No such file or directory".format( DCV_CONNECT_SCRIPT) in e.output: error( "The cluster {0} has been created with an old version of ParallelCluster " "without the DCV support.".format(args.cluster_name)) else: error("Something went wrong during DCV connection.\n{0}".format( e.output)) # Open web browser url = "https://{IP}:{PORT}?authToken={TOKEN}#{SESSION_ID}".format( IP=master_ip, PORT=dcv_server_port, TOKEN=dcv_session_token, SESSION_ID=dcv_session_id) try: webbrowser.open_new(url) except webbrowser.Error: LOGGER.info( "Unable to open the Web browser. " "Please use the following URL in your browser within 30 seconds:\n{0}" .format(url))
def get_mocked_pcluster_config(mocker): return PclusterConfig(config_file="wrong-file")
def configure(args): # Check for invalid path (eg. a directory) if args.config_file and os.path.exists( args.config_file) and not os.path.isfile(args.config_file): error("Invalid configuration file path: {0}".format(args.config_file)) pcluster_config = PclusterConfig(config_file=args.config_file, fail_on_error=False, auto_refresh=False) # FIXME: Overriding HIT config files is currently not supported. if pcluster_config.cluster_model == ClusterModel.HIT: error( "Configuration in file {0} cannot be overwritten. Please specify a different file path" .format(pcluster_config.config_file)) if os.path.exists(pcluster_config.config_file): msg = "WARNING: Configuration file {0} will be overwritten." else: msg = "INFO: Configuration file {0} will be written." print(msg.format(pcluster_config.config_file)) print("Press CTRL-C to interrupt the procedure.\n\n") if not args.region: # Use built in boto regions as an available option available_regions = get_regions() default_region = pcluster_config.get_section("aws").get_param_value( "aws_region_name") aws_region_name = prompt_iterable("AWS Region ID", available_regions, default_value=default_region) # Set provided region into os environment for suggestions and validations from here on os.environ["AWS_DEFAULT_REGION"] = aws_region_name else: aws_region_name = args.region cluster_section = pcluster_config.get_section("cluster") global_config = pcluster_config.get_section("global") cluster_label = global_config.get_param_value("cluster_template") vpc_section = pcluster_config.get_section("vpc") vpc_label = vpc_section.label # Get the key name from the current region, if any available_keys = _get_keys() default_key = cluster_section.get_param_value("key_name") key_name = prompt_iterable("EC2 Key Pair Name", available_keys, default_value=default_key) scheduler = prompt_iterable( "Scheduler", get_supported_schedulers(), default_value=cluster_section.get_param_value("scheduler")) cluster_config = ClusterConfigureHelper(cluster_section, scheduler) cluster_config.prompt_os() cluster_config.prompt_cluster_size() cluster_config.prompt_instance_types() vpc_parameters = _create_vpc_parameters(vpc_section, cluster_config) # Here is the end of prompt. Code below assembles config and write to file cluster_parameters = {"key_name": key_name, "scheduler": scheduler} cluster_parameters.update(cluster_config.get_scheduler_parameters()) # Remove parameters from the past configuration that can conflict with the user's choices. _reset_config_params(cluster_section, cluster_config.get_parameters_to_reset()) _reset_config_params( vpc_section, ("compute_subnet_id", "use_public_ips", "compute_subnet_cidr")) # Update configuration values according to user's choices pcluster_config.region = aws_region_name cluster_section.label = cluster_label for param_key, param_value in cluster_parameters.items(): param = cluster_section.get_param(param_key) param.value = param.get_value_from_string(param_value) vpc_section.label = vpc_label for param_key, param_value in vpc_parameters.items(): param = vpc_section.get_param(param_key) param.value = param.get_value_from_string(param_value) # Update internal params according to provided parameters and enable auto-refresh before eventual hit conversion pcluster_config.refresh() pcluster_config.auto_refresh = True # Convert file if needed HitConverter(pcluster_config).convert(prepare_to_file=True) # Update config file by overriding changed settings pcluster_config.to_file() print("Configuration file written to {0}".format( pcluster_config.config_file)) print( "You can edit your configuration file or simply run 'pcluster create -c {0} cluster-name' " "to create your cluster".format(pcluster_config.config_file))
def configure(args): # Check for invalid path (eg. a directory) if args.config_file and os.path.exists( args.config_file) and not os.path.isfile(args.config_file): error("Invalid configuration file path: {0}".format(args.config_file)) pcluster_config = PclusterConfig(config_file=args.config_file, fail_on_error=False) if os.path.exists(pcluster_config.config_file): msg = "WARNING: Configuration file {0} will be overwritten." else: msg = "INFO: Configuration file {0} will be written." print(msg.format(pcluster_config.config_file)) print("Press CTRL-C to interrupt the procedure.\n\n") cluster_section = pcluster_config.get_section("cluster") global_config = pcluster_config.get_section("global") cluster_label = global_config.get_param_value("cluster_template") vpc_section = pcluster_config.get_section("vpc") vpc_label = vpc_section.label # Use built in boto regions as an available option available_regions = get_regions() default_region = pcluster_config.get_section("aws").get_param_value( "aws_region_name") aws_region_name = prompt_iterable( "AWS Region ID", available_regions, default_value=default_region if default_region in available_regions else None, ) # Set provided region into os environment for suggestions and validations from here on os.environ["AWS_DEFAULT_REGION"] = aws_region_name # Get the key name from the current region, if any available_keys = _get_keys() default_key = cluster_section.get_param_value("key_name") key_name = prompt_iterable( "EC2 Key Pair Name", available_keys, default_value=default_key if default_key in available_keys else None) scheduler = prompt_iterable( "Scheduler", get_supported_schedulers(), default_value=cluster_section.get_param_value("scheduler")) scheduler_handler = SchedulerHandler(cluster_section, scheduler) scheduler_handler.prompt_os() scheduler_handler.prompt_cluster_size() master_instance_type = prompt( "Master instance type", lambda x: x in get_supported_instance_types(), default_value=cluster_section.get_param_value("master_instance_type"), ) scheduler_handler.prompt_compute_instance_type() automate_vpc = prompt("Automate VPC creation? (y/n)", lambda x: x in ("y", "n"), default_value="n") == "y" vpc_parameters = _create_vpc_parameters(vpc_section, scheduler, scheduler_handler.max_cluster_size, automate_vpc_creation=automate_vpc) cluster_parameters = { "key_name": key_name, "scheduler": scheduler, "master_instance_type": master_instance_type } cluster_parameters.update(scheduler_handler.get_scheduler_parameters()) # Remove parameters from the past configuration that can conflict with the user's choices. _reset_config_params(cluster_section, scheduler_handler.get_parameters_to_reset()) _reset_config_params( vpc_section, ("compute_subnet_id", "use_public_ips", "compute_subnet_cidr")) # Update configuration values according to user's choices pcluster_config.region = aws_region_name cluster_section.label = cluster_label for param_key, param_value in cluster_parameters.items(): param = cluster_section.get_param(param_key) param.value = param.get_value_from_string(param_value) vpc_section.label = vpc_label for param_key, param_value in vpc_parameters.items(): param = vpc_section.get_param(param_key) param.value = param.get_value_from_string(param_value) # Update config file by overriding changed settings pcluster_config.to_file() print("Configuration file written to {0}".format( pcluster_config.config_file)) print( "You can edit your configuration file or simply run 'pcluster create -c {0} cluster-name' " "to create your cluster".format(pcluster_config.config_file))
def create_ami(args): LOGGER.info("Building AWS ParallelCluster AMI. This could take a while...") # Do not autofresh; pcluster_config is only used to get info on vpc section, aws section, and template url # Logic in autofresh could make unexpected validations not needed in createami pcluster_config = PclusterConfig(config_file=args.config_file, fail_on_file_absence=True, auto_refresh=False) ami_info = _validate_createami_args_ami_compatibility(args) ami_architecture = ami_info.get("Architecture") LOGGER.debug("Building AMI based on args %s", str(args)) results = {} instance_type = args.instance_type try: vpc_section = pcluster_config.get_section("vpc") vpc_id = args.vpc_id if args.vpc_id else vpc_section.get_param_value("vpc_id") subnet_id = args.subnet_id if args.subnet_id else vpc_section.get_param_value("master_subnet_id") packer_env = { "CUSTOM_AMI_ID": args.base_ami_id, "AWS_FLAVOR_ID": instance_type, "AMI_NAME_PREFIX": args.custom_ami_name_prefix, "AWS_VPC_ID": vpc_id, "AWS_SUBNET_ID": subnet_id, "ASSOCIATE_PUBLIC_IP": "true" if args.associate_public_ip else "false", } aws_section = pcluster_config.get_section("aws") aws_region = aws_section.get_param_value("aws_region_name") if aws_section and aws_section.get_param_value("aws_access_key_id"): packer_env["AWS_ACCESS_KEY_ID"] = aws_section.get_param_value("aws_access_key_id") if aws_section and aws_section.get_param_value("aws_secret_access_key"): packer_env["AWS_SECRET_ACCESS_KEY"] = aws_section.get_param_value("aws_secret_access_key") LOGGER.info("Base AMI ID: %s", args.base_ami_id) LOGGER.info("Base AMI OS: %s", args.base_ami_os) LOGGER.info("Instance Type: %s", instance_type) LOGGER.info("Region: %s", aws_region) LOGGER.info("VPC ID: %s", vpc_id) LOGGER.info("Subnet ID: %s", subnet_id) template_url = evaluate_pcluster_template_url(pcluster_config) tmp_dir = mkdtemp() cookbook_dir = _get_cookbook_dir(aws_region, template_url, args, tmp_dir) _get_post_install_script_dir(args.post_install_script, tmp_dir) packer_command = ( cookbook_dir + "/amis/build_ami.sh --os " + args.base_ami_os + " --partition region" + " --region " + aws_region + " --custom" + " --arch " + ami_architecture ) results = _run_packer(packer_command, packer_env) except KeyboardInterrupt: LOGGER.info("\nExiting...") sys.exit(0) finally: _print_create_ami_results(results) if "tmp_dir" in locals() and tmp_dir: rmtree(tmp_dir)
def create(args): # noqa: C901 FIXME!!! LOGGER.info("Beginning cluster creation for cluster: %s", args.cluster_name) LOGGER.debug("Building cluster config based on args %s", str(args)) _validate_cluster_name(args.cluster_name) # Build the config based on args pcluster_config = PclusterConfig( config_file=args.config_file, cluster_label=args.cluster_template, fail_on_file_absence=True ) pcluster_config.validate() # Automatic SIT -> HIT conversion, if needed HitConverter(pcluster_config).convert() # get CFN parameters, template url and tags from config storage_data = pcluster_config.to_storage() cfn_params = storage_data.cfn_params _check_for_updates(pcluster_config) bucket_name = None artifact_directory = None cleanup_bucket = False try: cfn_client = boto3.client("cloudformation") stack_name = utils.get_stack_name(args.cluster_name) # merge tags from configuration, command-line and internal ones tags = _evaluate_tags(pcluster_config, preferred_tags=args.tags) bucket_name, artifact_directory, cleanup_bucket = _setup_bucket_with_resources( pcluster_config, storage_data, stack_name, tags ) cfn_params["ResourcesS3Bucket"] = bucket_name cfn_params["ArtifactS3RootDirectory"] = artifact_directory cfn_params["RemoveBucketOnDeletion"] = str(cleanup_bucket) LOGGER.info("Creating stack named: %s", stack_name) # determine the CloudFormation Template URL to use template_url = evaluate_pcluster_template_url(pcluster_config, preferred_template_url=args.template_url) # append extra parameters from command-line if args.extra_parameters: LOGGER.debug("Adding extra parameters to the CFN parameters") cfn_params.update(dict(args.extra_parameters)) # prepare input parameters for stack creation and create the stack LOGGER.debug(cfn_params) params = [{"ParameterKey": key, "ParameterValue": value} for key, value in cfn_params.items()] stack = cfn_client.create_stack( StackName=stack_name, TemplateURL=template_url, Parameters=params, Capabilities=["CAPABILITY_IAM"], DisableRollback=args.norollback, Tags=tags, ) LOGGER.debug("StackId: %s", stack.get("StackId")) if not args.nowait: verified = utils.verify_stack_creation(stack_name, cfn_client) LOGGER.info("") result_stack = utils.get_stack(stack_name, cfn_client) _print_stack_outputs(result_stack) if not verified: sys.exit(1) else: stack_status = utils.get_stack(stack_name, cfn_client).get("StackStatus") LOGGER.info("Status: %s", stack_status) except ClientError as e: LOGGER.critical(e.response.get("Error").get("Message")) sys.stdout.flush() utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket) sys.exit(1) except KeyboardInterrupt: LOGGER.info("\nExiting...") if not utils.stack_exists(stack_name): # Cleanup S3 artifacts if stack is not created yet utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket) sys.exit(0) except KeyError as e: LOGGER.critical("ERROR: KeyError - reason:\n%s", e) utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket) sys.exit(1) except Exception as e: LOGGER.critical(e) utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket) sys.exit(1)