def status(args): # noqa: C901 FIXME!!! stack_name = utils.get_stack_name(args.cluster_name) # Parse configuration file to read the AWS section PclusterConfig.init_aws(config_file=args.config_file) cfn = boto3.client("cloudformation") try: stack = utils.get_stack(stack_name, cfn) sys.stdout.write("\rStatus: %s" % stack.get("StackStatus")) sys.stdout.flush() if not args.nowait: while stack.get("StackStatus") not in [ "CREATE_COMPLETE", "UPDATE_COMPLETE", "UPDATE_ROLLBACK_COMPLETE", "ROLLBACK_COMPLETE", "CREATE_FAILED", "DELETE_FAILED", ]: time.sleep(5) stack = utils.get_stack(stack_name, cfn) events = utils.get_stack_events(stack_name)[0] resource_status = ( "Status: %s - %s" % (events.get("LogicalResourceId"), events.get("ResourceStatus")) ).ljust(80) sys.stdout.write("\r%s" % resource_status) sys.stdout.flush() sys.stdout.write("\rStatus: %s\n" % stack.get("StackStatus")) sys.stdout.flush() if stack.get("StackStatus") in ["CREATE_COMPLETE", "UPDATE_COMPLETE", "UPDATE_ROLLBACK_COMPLETE"]: state = _poll_head_node_state(stack_name) if state == "running": _print_stack_outputs(stack) _print_compute_fleet_status(args.cluster_name, stack) elif stack.get("StackStatus") in ["ROLLBACK_COMPLETE", "CREATE_FAILED", "DELETE_FAILED"]: events = utils.get_stack_events(stack_name) for event in events: if event.get("ResourceStatus") in ["CREATE_FAILED", "DELETE_FAILED", "UPDATE_FAILED"]: LOGGER.info( "%s %s %s %s %s", event.get("Timestamp"), event.get("ResourceStatus"), event.get("ResourceType"), event.get("LogicalResourceId"), event.get("ResourceStatusReason"), ) else: sys.stdout.write("\n") sys.stdout.flush() except ClientError as e: LOGGER.critical(e.response.get("Error").get("Message")) sys.stdout.flush() sys.exit(1) except KeyboardInterrupt: LOGGER.info("\nExiting...") sys.exit(0)
def _delete_cluster(cluster_name, nowait): """Delete cluster described by cluster_name.""" cfn = boto3.client("cloudformation") saw_update = False terminate_compute_fleet = not nowait stack_name = utils.get_stack_name(cluster_name) try: # delete_stack does not raise an exception if stack does not exist # Use describe_stacks to explicitly check if the stack exists cfn.delete_stack(StackName=stack_name) saw_update = True stack_status = utils.get_stack(stack_name, cfn).get("StackStatus") sys.stdout.write("\rStatus: %s" % stack_status) sys.stdout.flush() LOGGER.debug("Status: %s", stack_status) if not nowait: while stack_status == "DELETE_IN_PROGRESS": time.sleep(5) stack_status = utils.get_stack( stack_name, cfn, raise_on_error=True).get("StackStatus") events = utils.get_stack_events(stack_name, raise_on_error=True)[0] resource_status = ("Status: %s - %s" % (events.get("LogicalResourceId"), events.get("ResourceStatus"))).ljust(80) sys.stdout.write("\r%s" % resource_status) sys.stdout.flush() sys.stdout.write("\rStatus: %s\n" % stack_status) sys.stdout.flush() LOGGER.debug("Status: %s", stack_status) else: sys.stdout.write("\n") sys.stdout.flush() if stack_status == "DELETE_FAILED": LOGGER.info( "Cluster did not delete successfully. Run 'pcluster delete %s' again", cluster_name) except ClientError as e: if e.response.get("Error").get("Message").endswith("does not exist"): if saw_update: LOGGER.info("\nCluster deleted successfully.") sys.exit(0) LOGGER.critical(e.response.get("Error").get("Message")) sys.stdout.flush() sys.exit(1) except KeyboardInterrupt: terminate_compute_fleet = False LOGGER.info("\nExiting...") sys.exit(0) finally: if terminate_compute_fleet: _terminate_cluster_nodes(stack_name)
def test_get_stack_events_retry(boto3_stubber, mocker): sleep_mock = mocker.patch("pcluster.utils.time.sleep") expected_events = [_generate_stack_event()] mocked_requests = [ MockedBoto3Request( method="describe_stack_events", response="Error", expected_params={"StackName": FAKE_STACK_NAME}, generate_error=True, error_code="Throttling", ), MockedBoto3Request( method="describe_stack_events", response={"StackEvents": expected_events}, expected_params={"StackName": FAKE_STACK_NAME}, ), ] boto3_stubber("cloudformation", mocked_requests) assert_that( utils.get_stack_events(FAKE_STACK_NAME)).is_equal_to(expected_events) sleep_mock.assert_called_with(5)
def update(args): # noqa: C901 FIXME!!! LOGGER.info("Updating: %s", args.cluster_name) stack_name = utils.get_stack_name(args.cluster_name) pcluster_config = PclusterConfig(config_file=args.config_file, cluster_label=args.cluster_template, fail_on_file_absence=True) pcluster_config.validate() cfn_params = pcluster_config.to_cfn() cluster_section = pcluster_config.get_section("cluster") cfn = boto3.client("cloudformation") if cluster_section.get_param_value("scheduler") != "awsbatch": if not args.reset_desired: asg_name = _get_asg_name(stack_name) desired_capacity = ( boto3.client("autoscaling").describe_auto_scaling_groups( AutoScalingGroupNames=[asg_name]).get( "AutoScalingGroups")[0].get("DesiredCapacity")) cfn_params["DesiredSize"] = str(desired_capacity) else: if args.reset_desired: LOGGER.info( "reset_desired flag does not work with awsbatch scheduler") params = utils.get_stack(stack_name, cfn).get("Parameters") for parameter in params: if parameter.get("ParameterKey") == "ResourcesS3Bucket": cfn_params["ResourcesS3Bucket"] = parameter.get( "ParameterValue") try: LOGGER.debug(cfn_params) if args.extra_parameters: LOGGER.debug("Adding extra parameters to the CFN parameters") cfn_params.update(dict(args.extra_parameters)) cfn_params = [{ "ParameterKey": key, "ParameterValue": value } for key, value in cfn_params.items()] LOGGER.info("Calling update_stack") cfn.update_stack(StackName=stack_name, UsePreviousTemplate=True, Parameters=cfn_params, Capabilities=["CAPABILITY_IAM"]) stack_status = utils.get_stack(stack_name, cfn).get("StackStatus") if not args.nowait: while stack_status == "UPDATE_IN_PROGRESS": stack_status = utils.get_stack(stack_name, cfn).get("StackStatus") events = utils.get_stack_events(stack_name)[0] resource_status = ("Status: %s - %s" % (events.get("LogicalResourceId"), events.get("ResourceStatus"))).ljust(80) sys.stdout.write("\r%s" % resource_status) sys.stdout.flush() time.sleep(5) else: stack_status = utils.get_stack(stack_name, cfn).get("StackStatus") LOGGER.info("Status: %s", stack_status) except ClientError as e: LOGGER.critical(e.response.get("Error").get("Message")) sys.exit(1) except KeyboardInterrupt: LOGGER.info("\nExiting...") sys.exit(0)