def create_state_machines(client, role_arn, names): """Create the state machine for running tasks on instances Args: client (SFN.client): boto3 step functions client role_arn (str): The Amazon Resource Name (ARN) of the IAM role to use for this state machine. names (namespace): the names used to label provisioned aws resources Returns: namespace: context object containing AWS state machine identifying information """ ctx = Namespace() ctx.activity_arn = _create_worker_activity(client=client, names=names) ctx.task_state_machine_arn = _create_task_state_machine( client=client, worker_activity_resource_arn=ctx.activity_arn, role_arn=role_arn, names=names) ctx.app_state_machine_arn = \ _create_application_state_machine( client=client, task_state_machine_arn=ctx.task_state_machine_arn, role_arn=role_arn, names=names) return ctx
def main(): parser = ArgumentParser( description="Starts an execution on a cbm3_aws cluster") parser.add_argument( "--resource_description_path", required=True, type=os.path.abspath, help="Path to a json formatted file containing the allocated AWS " "cbm3_aws cluster") parser.add_argument( "--execution_name", required=True, help="The name of the execution. This name must be unique for your " "AWS account, region, and state machine for 90 days. For more " "information, see Limits Related to State Machine Executions in " "the AWS Step Functions Developer Guide.") parser.add_argument( "--tasks_file_path", required=True, type=os.path.abspath, help="Path to json formatted tasks who has at a minimum a " "'task_list' key whose value is the list of tasks to " "pass to each instance as they call get_activity_task") parser.add_argument("--response_output_path", required=True, type=os.path.abspath, help="Path") log_helper.start_logging("start_execution", level="INFO") logger = log_helper.get_logger("start_execution") try: args = parser.parse_args() logger.info("start_execution") logger.info(vars(args)) if os.path.exists(args.response_output_path): # do not overwrite an existing file, which could potentially # contain useful information. raise ValueError("specified response_output_path already exists: " f"'{args.response_output_path}'") with open(args.response_output_path, 'w') as out_file: with open(args.resource_description_path, 'r') as resources_fp: rd = Namespace(**json.load(resources_fp)) with open(args.tasks_file_path, 'r') as tasks_fp: tasks = json.load(tasks_fp) state_machine_arn = rd.state_machine_context.app_state_machine_arn start_execution_response = execution.start_execution( execution_name=args.execution_name, state_machine_arn=state_machine_arn, region_name=rd.region_name, tasks=tasks) logger.info(json.dumps(start_execution_response, indent=4)) json.dump(start_execution_response, out_file) except Exception: logger.exception("")
def test_namespace_dictionary_round_trip(self): data = { "a": 1, "b": [1, 2], "c": {"d": 1, "e": 2}, "f": {"g": [1, 2], "h": {"i": 1, "j": [2]}} } ns = Namespace(**data) self.assertTrue(ns.a == 1) self.assertTrue(ns.b == [1, 2]) self.assertTrue(ns.c.d == 1) self.assertTrue(ns.c.e == 2) self.assertTrue(ns.f.g == [1, 2]) self.assertTrue(ns.f.h.i == 1) self.assertTrue(ns.f.h.j == [2]) data_result = ns.to_dict() self.assertTrue(data_result == data)
def create_instance_iam_role(client, policy_context_list, names): """Create an instance IAM role Args: client (IAM.client): boto3 IAM client policy_context_list (list): list of objects containing the policy ARN to assign to the instance IAM role. names (namespace): the names used to label provisioned aws resources Returns: namespace: context object containing the identifying information belonging to the IAM role and Instance profile """ ec2_assume_role_policy = { "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "Service": "ec2.amazonaws.com" }, "Action": "sts:AssumeRole" }] } create_role_response = client.create_role( Path='/', RoleName=names.instance_iam_role, AssumeRolePolicyDocument=json.dumps(ec2_assume_role_policy), Description='grants ec2 instances read and write permission to ' 'specific bucket, state machine access, and autoscale ' 'group access') for policy_context in policy_context_list: client.attach_role_policy(RoleName=names.instance_iam_role, PolicyArn=policy_context.policy_arn) create_instance_profile_response = client.create_instance_profile( InstanceProfileName=names.instance_iam_role) client.add_role_to_instance_profile( InstanceProfileName=names.instance_iam_role, RoleName=names.instance_iam_role) return Namespace( role_arn=create_role_response["Role"]["Arn"], role_name=create_role_response["Role"]["RoleName"], instance_profile_name=create_instance_profile_response[ "InstanceProfile"]["InstanceProfileName"], instance_profile_id=create_instance_profile_response["InstanceProfile"] ["InstanceProfileId"], instance_profile_arn=create_instance_profile_response[ "InstanceProfile"]["Arn"])
def get_names(uuid): return Namespace( run_activity=f"cbm3_run_activity_{uuid}", autoscale_launch_template=f"cbm3_run_launch_template_{uuid}", autoscale_group=f"cbm3_autoscale_group_{uuid}", run_task_state_machine=f"cbm3_run_task_state_machine_{uuid}", run_state_machine=f"cbm3_run_state_machine_{uuid}", state_machine_policy=f"cbm3_state_machine_policy_{uuid}", state_machine_role=f"cbm3_state_machine_role_{uuid}", instance_s3_policy=f"cbm3_s3_instance_policy_{uuid}", instance_iam_role=f"cbm3_iam_instance_role_{uuid}")
def create_state_machine_policy(client, account_number, names): """Create a state machine policy to allow state machine function Args: client (IAM.client): boto3 IAM client account_number (str): the AWS account number for filtering permitted resources names (namespace): the names used to label provisioned aws resources Returns: namespace: object containing the policy ARN """ # see: # https://docs.aws.amazon.com/step-functions/latest/dg/stepfunctions-iam.html policy = { "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Action": ["states:StartExecution"], "Resource": [f"arn:aws:states:*:{account_number}:stateMachine:*"] }, { "Effect": "Allow", "Action": ["states:DescribeExecution", "states:StopExecution"], "Resource": "*" }, { "Effect": "Allow", "Action": ["events:PutTargets", "events:PutRule", "events:DescribeRule"], "Resource": [ f"arn:aws:events:*:{account_number}:" "rule/StepFunctionsGetEventsForStepFunctionsExecutionRule" ] }] } create_policy_response = client.create_policy( PolicyName=names.state_machine_policy, Path='/', PolicyDocument=json.dumps(policy), Description='grants access for state machine execution') return Namespace(policy_arn=create_policy_response["Policy"]["Arn"])
def main(): parser = ArgumentParser( description="Deallocates AWS resources that were allocated for " "cbm3_aws runs") parser.add_argument( "--resource_description_path", required=True, help="Path to a json formatted file containing the allocated AWS " "resources to de-allocate with this script.") log_helper.start_logging("aws_cleanup", level="INFO") logger = log_helper.get_logger("aws_cleanup") try: args = parser.parse_args() logger.info("aws_cleanup start up") logger.info(vars(args)) path = os.path.abspath(args.resource_description_path) with open(path, 'r') as fp: data = Namespace(**json.load(fp)) resources.cleanup(resource_description=data) except Exception: logger.exception("")
def iterate_tasks(task_message, local_projects, local_results_dir): for task in task_message: for simulation_id in task["simulation_ids"]: project_code = task["project_code"] yield Namespace( project_code=project_code, project_path=local_projects[project_code], simulation_id=simulation_id, results_database_path=os.path.join( local_results_dir, project_code, str(simulation_id), f"{simulation_id}.mdb"), tempfiles_output_dir=os.path.join( local_results_dir, project_code, str(simulation_id), f"temp_files_{simulation_id}"), stdout_path=os.path.join( local_results_dir, project_code, str(simulation_id), f"stdout_{simulation_id}.txt") )
def deploy(region_name, s3_bucket_name, min_virtual_cpu, max_virtual_cpu, image_ami_id, resource_description_path, vpc_zone_identifier=None): if os.path.exists(resource_description_path): raise ValueError("specified resource_description_path already exists: " f"'{resource_description_path}'") # resource description rd = Namespace() rd.uuid = get_uuid() __write_resources_file(rd, resource_description_path) rd.names = get_names(rd.uuid) rd.region_name = region_name rd.s3_bucket_name = s3_bucket_name rd.min_virtual_cpu = int(min_virtual_cpu) rd.max_virtual_cpu = int(max_virtual_cpu) rd.image_ami_id = image_ami_id try: logger.info("connecting") s3_client = boto3.client("s3", region_name=rd.region_name) ec2_client = boto3.client("ec2", region_name=rd.region_name) auto_scale_client = boto3.client('autoscaling', region_name=rd.region_name) iam_client = boto3.client("iam", region_name=rd.region_name) sts_client = boto3.client("sts", region_name=rd.region_name) sfn_client = boto3.client('stepfunctions', region_name=rd.region_name) logger.info("check if bucket exists") if not __s3_bucket_exists(s3_client, rd.s3_bucket_name): logger.info(f"creating s3 bucket {rd.s3_bucket_name}") s3_bucket.create_bucket(client=s3_client, bucket_name=rd.s3_bucket_name, region=rd.region_name) account_number = __get_account_number(sts_client) logger.info("creating policies") rd.ec2_worker_policy = roles.create_ec2_worker_policy( client=iam_client, s3_bucket_name=rd.s3_bucket_name, account_number=account_number, names=rd.names) rd.state_machine_policy_context = roles.create_state_machine_policy( client=iam_client, account_number=account_number, names=rd.names) logger.info("creating iam roles") rd.instance_iam_role_context = roles.create_instance_iam_role( client=iam_client, policy_context_list=[rd.ec2_worker_policy], names=rd.names) logger.info("creating state machine role") rd.state_machine_role_context = roles.create_state_machine_role( client=iam_client, policy_context_list=[rd.state_machine_policy_context], names=rd.names) # https://github.com/hashicorp/terraform/issues/15341 # need to add a delay for the iam changes above to be processed # internally by AWS wait_time = 20 logger.info( f"waiting {wait_time} seconds for changes to take effect on AWS") time.sleep(wait_time) logger.info("creating state machine") rd.state_machine_context = step_functions.create_state_machines( client=sfn_client, role_arn=rd.state_machine_role_context.role_arn, names=rd.names) logger.info("creating userdata") rd.user_data = create_userdata( s3_bucket_name=rd.s3_bucket_name, activity_arn=rd.state_machine_context.activity_arn, region_name=rd.region_name) iam_instance_profile_arn = \ rd.instance_iam_role_context.instance_profile_arn logger.info("creating launch template") rd.launch_template_context = autoscale_group.create_launch_template( client=ec2_client, name=rd.names.autoscale_launch_template, image_ami_id=rd.image_ami_id, iam_instance_profile_arn=iam_instance_profile_arn, user_data=rd.user_data) availability_zones = None if not vpc_zone_identifier: logger.info("getting availability zones") availability_zones = autoscale_group.get_availability_zones( client=ec2_client) logger.info(f"using zones: {availability_zones}") logger.info(f"create autoscaling group") rd.autoscale_group_context = autoscale_group.create_autoscaling_group( client=auto_scale_client, name=rd.names.autoscale_group, launch_template_context=rd.launch_template_context, min_size=rd.min_virtual_cpu, max_size=rd.max_virtual_cpu, availability_zones=availability_zones, vpc_zone_identifier=vpc_zone_identifier) return rd except ClientError as err: # from: # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html if err.response['Error']['Code'] == 'InternalError': # Generic error logger.error('Error Message: {}'.format( err.response['Error']['Message'])) logger.error('Request ID: {}'.format( err.response['ResponseMetadata']['RequestId'])) logger.error('Http code: {}'.format( err.response['ResponseMetadata']['HTTPStatusCode'])) else: raise err finally: __write_resources_file(rd, resource_description_path)
def create_launch_template(client, name, image_ami_id, iam_instance_profile_arn, user_data): """Create a launch template for provisioning instances Args: client (EC2.Client): boto3 ec2 client name (str): the name of the launch template image_ami_id (str): the ami id for the launched instances iam_instance_profile_arn (str): ARN for for the Iam instance profile to attach to launched instances user_data (str): line break seperated commands to run on instance start Returns: object: launch template context object """ client_token = str(uuid.uuid4()) response = client.create_launch_template( DryRun=False, ClientToken=client_token, LaunchTemplateName=name, LaunchTemplateData={ 'EbsOptimized': False, 'IamInstanceProfile': { 'Arn': iam_instance_profile_arn, }, 'ImageId': image_ami_id, 'Monitoring': { 'Enabled': True }, 'InstanceInitiatedShutdownBehavior': 'terminate', 'UserData': user_data, 'TagSpecifications': [ { 'ResourceType': 'instance', 'Tags': [ { 'Key': 'Name', 'Value': 'CBM3 Worker Instance' }, ] }, { 'ResourceType': 'volume', 'Tags': [ { 'Key': 'Name', 'Value': 'CBM3 Worker volume' }, ] }, ] }, TagSpecifications=[ { 'ResourceType': 'launch-template', 'Tags': [ { 'Key': 'name', 'Value': 'CBM3 launch template' }, ] }, ]) return Namespace( launch_template_name=response["LaunchTemplate"]["LaunchTemplateName"], launch_template_id=response["LaunchTemplate"]["LaunchTemplateId"])
def create_autoscaling_group(client, name, launch_template_context, min_size, max_size, availability_zones=None, vpc_zone_identifier=None): """Create an autoscaling group to manage spot instances. Args: client (AutoScaling.Client): boto3 autoscaling client name (str): the name of the autoscaling group launch_template_context (object): Return value of: :py:func:`create_launch_template` min_size (int): minimum number of threads to run in auto scaling group. max_size (int): maximum number of threads to run in auto scaling group. availability_zones (list): the list of availability zones for the autoscaling group. vpc_zone_identifier (str): A comma-separated list of subnet IDs for your virtual private cloud (VPC). Returns: object: autoscaling group context """ kwargs = dict( AutoScalingGroupName=name, MixedInstancesPolicy={ 'LaunchTemplate': { 'LaunchTemplateSpecification': { 'LaunchTemplateId': launch_template_context.launch_template_id }, 'Overrides': [{ 'InstanceType': 'm5.2xlarge', 'WeightedCapacity': '8' }, { 'InstanceType': 'm5.4xlarge', 'WeightedCapacity': '16' }, { 'InstanceType': 'm5.8xlarge', 'WeightedCapacity': '32' }, { 'InstanceType': 'm5a.2xlarge', 'WeightedCapacity': '8' }, { 'InstanceType': 'm5a.4xlarge', 'WeightedCapacity': '16' }, { 'InstanceType': 'm5a.8xlarge', 'WeightedCapacity': '32' }, { 'InstanceType': 'm4.2xlarge', 'WeightedCapacity': '8' }, { 'InstanceType': 'm4.4xlarge', 'WeightedCapacity': '16' }, { 'InstanceType': 'm4.10xlarge', 'WeightedCapacity': '40' }, { 'InstanceType': 'c5.2xlarge', 'WeightedCapacity': '8' }, { 'InstanceType': 'c5.4xlarge', 'WeightedCapacity': '16' }, { 'InstanceType': 'c4.2xlarge', 'WeightedCapacity': '8' }, { 'InstanceType': 'c4.4xlarge', 'WeightedCapacity': '16' }], }, 'InstancesDistribution': { # prioritized by the order of the above overrides list # for on-demand only 'OnDemandAllocationStrategy': 'prioritized', # minimum number of On demand instances 'OnDemandBaseCapacity': 0, # percent of on demand versus spot instances 'OnDemandPercentageAboveBaseCapacity': 0, 'SpotAllocationStrategy': 'capacity-optimized', } }, MinSize=min_size, MaxSize=max_size, TerminationPolicies=["NewestInstance"], NewInstancesProtectedFromScaleIn=False) if availability_zones: kwargs["AvailabilityZones"] = availability_zones if vpc_zone_identifier: kwargs["VPCZoneIdentifier"] = vpc_zone_identifier client.create_auto_scaling_group(**kwargs) return Namespace(auto_scaling_group_name=name)
def create_ec2_worker_policy(client, s3_bucket_name, account_number, names): """Create a policy object for: 1. permitting put/get/delete operations on the specified named bucket 2. interact with activity tasks for the cbm3_aws state machine Args: client (IAM.client): boto3 IAM client s3_bucket_name (str): the name of the bucket for which to assign the policy names (namespace): the names used to label provisioned aws resources Returns: namespace: a namespace containing the policy ARN """ policy = { "Version": "2012-10-17", "Statement": [{ "Sid": "0", "Effect": "Allow", "Action": ["s3:PutObject", "s3:GetObject", "s3:DeleteObject"], "Resource": f"arn:aws:s3:::{s3_bucket_name}/*" }, { "Sid": "1", "Effect": "Allow", "Action": "states:GetActivityTask", "Resource": f"arn:aws:states:*:{account_number}:activity:" f"{names.run_activity}" }, { "Sid": "2", "Effect": "Allow", "Action": [ "states:SendTaskSuccess", "states:SendTaskFailure", "states:ListActivities", "states:SendTaskHeartbeat" ], "Resource": "*" }, { "Sid": "3", "Effect": "Allow", "Action": [ "logs:CreateLogStream", "logs:CreateLogGroup", "logs:PutLogEvents" ], "Resource": [ f"arn:aws:logs:*:{account_number}:log-group:cbm3_aws", f"arn:aws:logs:*:{account_number}:log-group:cbm3_aws:log-stream:*" ] }] } create_policy_response = client.create_policy( PolicyName=names.instance_s3_policy, Path='/', PolicyDocument=json.dumps(policy), Description='grants read/write/delete access to a particular s3 ' 'bucket and step function activity tasks access for ' 'IAM instance role') return Namespace(policy_arn=create_policy_response["Policy"]["Arn"])
def get_step_functions_executions_name(uuid): return Namespace( step_functions_execution=f"cbm3_aws_step_functions_execution_{uuid}")