def __init__(self, scope: core.Construct, id: str, config_dict, **kwargs) -> None: super().__init__(scope, id, **kwargs) """ get Comp Reg ECR Image details """ comp_reg_image_id = ecs.ContainerImage.from_ecr_repository( repository=ecr.Repository.from_repository_name( self, "GetCompRegRepoName", repository_name=config_dict['workflow_ecr_repo']), tag=config_dict['workflow_comp_reg_image_version']) """ Create Comp Reg Batch Job Definition """ createCompRegJob = batch.JobDefinition( self, "createCompRegJob", job_definition_name="comp-reg-etl-job", retry_attempts=2, container=batch.JobDefinitionContainer( image=comp_reg_image_id, memory_limit_mib=4000, vcpus=1, environment=dict( COMPREG_ORACLE_SECRET_NAME=config_dict[ 'comp_reg_secret_name'], COMPREG_BUCKET=config_dict['datalake_bucket_name']))) core.CfnOutput(self, "createCompRegJobName", value=createCompRegJob.job_definition_name)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) job_name = 'npp-batch-job' # TODO pass this in somehow, SSM? job_role = iam.Role.from_role_arn( self, 'job-role', role_arn='arn:aws:iam::138863487738:role/covariate-ingest-batch-job' ) # TODO pass this in somehow, SSM? job_queue = batch.JobQueue.from_job_queue_arn( self, 'batch-queue', job_queue_arn= 'arn:aws:batch:us-east-1:138863487738:job-queue/covariate-ingest-cpu-queue' ) npp_job = batch.JobDefinition( self, job_name, # Use a readable name for executing. job_definition_name=job_name, container=batch.JobDefinitionContainer( # https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_batch/JobDefinitionContainer.html image=ecs.ContainerImage.from_asset("./jobs/npp/", ), job_role=job_role, memory_limit_mib=4096, vcpus=1, environment={ "ENV": "", "AWS_BUCKET": "covariate-ingest-data", "STAC_API": "https://discovery-cosmos.azurewebsites.net/stac/dev/addItem" }, privileged=False, ), ) events.Rule( self, 'npp-ingest-trigger', description='Trigger for sample ingest', schedule=events.Schedule.cron(minute="0", hour="4"), # Every day at 4am targets=[ targets.BatchJob( job_queue=job_queue, job_definition=npp_job, ) ], enabled=False)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # ======================== # VPC # ======================== # VPC vpc = ec2.Vpc( self, 'fetch-and-run-vpc', max_azs=2, subnet_configuration=[ ec2.SubnetConfiguration( name='public-subnet', subnet_type=ec2.SubnetType.PUBLIC ) ], nat_gateways=0 ) # Security Group sg = ec2.SecurityGroup( self, 'fetch-and-run-sg', vpc=vpc, description='SG for fetch and run', security_group_name='fetch-and-run-sg' ) # Ingress from IP address via HTTP, SSH for port in PORTS: sg.add_ingress_rule( peer=ec2.Peer.ipv4(IP_ADDRESS), connection=ec2.Port.tcp(port) ) # ======================== # IAM # ======================== ''' I. Batch Instance Role - Makes calls to other AWS services on your behalf to manage the resources that you use with the service ''' batch_service_role = iam.Role.from_role_arn( self, 'batch-service-role', role_arn=BATCH_SERVICE_ROLE_ARN ) ''' II. ECS Instance Role - Batch compute environmens are populated with ECS container instances, which run the ECS container agent locally - ECS container agent makes calls to AWS APIs on your behalf - Container instances that run the agent require a policy and role for these services to know that the agent belongs to you - Instance Profile uses the batch instance role name - This is fed into the compute environment ''' batch_instance_role = iam.Role.from_role_arn( self, 'batch-instance-role', role_arn=ECS_INSTANCE_ROLE_ARN ) instance_profile = iam.CfnInstanceProfile( self, 'instance-profile', roles=[batch_instance_role.role_name] ) ''' Job Role - Used in the job definition - IAM role that the container can assume for AWS permissions When the fetch_and_run image runs as an AWS Batch job, it fetches the job script from Amazon S3. You need an IAM role that the AWS Batch job can use to access S3 Trusted Entity --> AWS service --> Elastic Container Service --> Elastic Container Service Task - In the Role's trust relationship, this will be displayed as follows: { "Version": "2012-10-17", "Statement": [ { "Sid": "", "Effect": "Allow", "Principal": { "Service": "ecs-tasks.amazonaws.com" }, "Action": "sts:AssumeRole" } ] } Default is for a role to be created ''' batch_job_role = iam.Role.from_role_arn( self, 'batch-job-role', role_arn=BATCH_JOB_ROLE_ARN ) # ======================== # ECR # ======================== ''' Repository TODO: Evaluate integrating repository into CDK (in this stack or another) ''' ecr_repository = ecr.Repository.from_repository_name( self, 'ecr-repository', repository_name=ECR_REPOSITORY_NAME ) ''' Container Image NOTE: We are pulling the image directly from ECR. Pushed before stack is created. - Can alternatively create the image from files in the stack (commented out) TODO: Evaluate ability to programatically update the tag. - Manually updating the tag follows approach of pushing image before stack creation/updates - Review adding alphanumeric tag as opposed to simply 'latest' --> more detail for auditing ''' # image_asset = ecr_assets.DockerImageAsset( # self, 'docker-image', # directory='./fetch-and-run', # file='./Dockerfile' # ) # image = ecs.ContainerImage.from_docker_image_asset(image_asset) image = ecs.ContainerImage.from_ecr_repository( repository=ecr_repository, tag='latest' ) # ======================== # BATCH # ======================== ''' I. Compute Environment - Execution runtime of submitted batch jobs ''' compute_environment = batch.ComputeEnvironment( self, 'batch-compute-environment', compute_environment_name='batch-compute-environment', compute_resources=batch.ComputeResources( vpc=vpc, # BEST_FIT_PROGRESSIVE will select an additional instance type that is large enough to meet the requirements of the jobs in the queue, with a preference for an instance type with a lower cost. allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE, compute_resources_tags={ "name": "fetch-and-run" }, ec2_key_pair=KEY_PAIR, instance_role=instance_profile.attr_arn, security_groups=[sg], type=batch.ComputeResourceType.ON_DEMAND, vpc_subnets=ec2.SubnetSelection( subnet_type=ec2.SubnetType.PUBLIC) ), service_role=batch_service_role, ) ''' II. Job Queue - Queue where batch jobs can be submitted ''' job_queue = batch.JobQueue( self, 'fetch-and-run-queue', compute_environments=[ batch.JobQueueComputeEnvironment( compute_environment=compute_environment, order=1 )], job_queue_name='fetch-and-run-queue' ) ''' III. Job Definition - Group various job properties (image, resource requirements, env variables) into a single definition. Definitionns are used to job submission time TODO: Build out functionality for the following: - `command` => The command that is passed to the container. If you provide a shell command as a single string, you have to quote command-line arguments - `environment` => The environment variables to pass to the container - `mount_points` => The mount points for data volumes in your container - `volumes` => A list of data volumes used in a job. NOTE: Can optionally add command, environment variables directly in code - Alternatively can reference them in `fetch_and_run.sh` ''' job_definition = batch.JobDefinition( self, 'fetch-and-run-job-definition', container=batch.JobDefinitionContainer( image=image, job_role=batch_job_role, # The hard limit (in MiB) of memory to present to the container memory_limit_mib=500, # The number of vCPUs reserved for the container. Each vCPU is equivalent to 1,024 CPU vcpus=1, user="******" ) )
def __init__(self, scope: core.Construct, id: str, props, **kwargs) -> None: super().__init__(scope, id, **kwargs) ################################################################################ # Set up permissions ro_buckets = set() for bucket in props['ro_buckets']: tmp_bucket = s3.Bucket.from_bucket_name(self, bucket, bucket_name=bucket) ro_buckets.add(tmp_bucket) rw_buckets = set() for bucket in props['rw_buckets']: tmp_bucket = s3.Bucket.from_bucket_name(self, bucket, bucket_name=bucket) rw_buckets.add(tmp_bucket) batch_service_role = iam.Role( self, 'BatchServiceRole', assumed_by=iam.ServicePrincipal('batch.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSBatchServiceRole') ]) spotfleet_role = iam.Role( self, 'AmazonEC2SpotFleetRole', assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2SpotFleetTaggingRole') ]) # Create role for Batch instances batch_instance_role = iam.Role( self, 'BatchInstanceRole', role_name='UmccriseBatchInstanceRole', assumed_by=iam.CompositePrincipal( iam.ServicePrincipal('ec2.amazonaws.com'), iam.ServicePrincipal('ecs.amazonaws.com')), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2RoleforSSM'), iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2ContainerServiceforEC2Role') ]) batch_instance_role.add_to_policy( iam.PolicyStatement(actions=[ "ec2:Describe*", "ec2:AttachVolume", "ec2:CreateVolume", "ec2:CreateTags", "ec2:ModifyInstanceAttribute" ], resources=["*"])) batch_instance_role.add_to_policy( iam.PolicyStatement(actions=["ecs:ListClusters"], resources=["*"])) for bucket in ro_buckets: bucket.grant_read(batch_instance_role) for bucket in rw_buckets: # restirct write to paths with */umccrise/* bucket.grant_read_write(batch_instance_role, '*/umccrised/*') # Turn the instance role into a Instance Profile batch_instance_profile = iam.CfnInstanceProfile( self, 'BatchInstanceProfile', instance_profile_name='UmccriseBatchInstanceProfile', roles=[batch_instance_role.role_name]) ################################################################################ # Minimal networking # TODO: import resource created with TF vpc = props['vpc'] ################################################################################ # Setup Batch compute resources # Configure BlockDevice to expand instance disk space (if needed?) block_device_mappings = [{ 'deviceName': '/dev/xvdf', 'ebs': { 'deleteOnTermination': True, 'volumeSize': 1024, 'volumeType': 'gp2' } }] launch_template = ec2.CfnLaunchTemplate( self, 'UmccriseBatchComputeLaunchTemplate', launch_template_name='UmccriseBatchComputeLaunchTemplate', launch_template_data={ 'userData': core.Fn.base64(user_data_script), 'blockDeviceMappings': block_device_mappings }) launch_template_spec = batch.LaunchTemplateSpecification( launch_template_name=launch_template.launch_template_name, version='$Latest') my_compute_res = batch.ComputeResources( type=batch.ComputeResourceType.SPOT, allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE, desiredv_cpus=0, maxv_cpus=128, minv_cpus=0, image=ec2.MachineImage.generic_linux( ami_map={'ap-southeast-2': props['compute_env_ami']}), launch_template=launch_template_spec, spot_fleet_role=spotfleet_role, instance_role=batch_instance_profile.instance_profile_name, vpc=vpc, #compute_resources_tags=core.Tag('Creator', 'Batch') ) # XXX: How to add more than one tag above?? # core.Tag.add(my_compute_res, 'Foo', 'Bar') my_compute_env = batch.ComputeEnvironment( self, 'UmccriseBatchComputeEnv', compute_environment_name="cdk-umccrise-batch-compute-env", service_role=batch_service_role, compute_resources=my_compute_res) job_queue = batch.JobQueue(self, 'UmccriseJobQueue', job_queue_name='cdk-umccrise_job_queue', compute_environments=[ batch.JobQueueComputeEnvironment( compute_environment=my_compute_env, order=1) ], priority=10) job_container = batch.JobDefinitionContainer( image=ecs.ContainerImage.from_registry( name=props['container_image']), vcpus=2, memory_limit_mib=2048, command=["/opt/container/umccrise-wrapper.sh", "Ref::vcpus"], mount_points=[ ecs.MountPoint(container_path='/work', read_only=False, source_volume='work'), ecs.MountPoint(container_path='/opt/container', read_only=True, source_volume='container') ], volumes=[ ecs.Volume(name='container', host=ecs.Host(source_path='/opt/container')), ecs.Volume(name='work', host=ecs.Host(source_path='/mnt')) ], privileged=True) job_definition = batch.JobDefinition( self, 'UmccriseJobDefinition', job_definition_name='cdk-umccrise-job-definition', parameters={'vcpus': '1'}, container=job_container, timeout=core.Duration.hours(5)) ################################################################################ # Set up job submission Lambda lambda_role = iam.Role( self, 'UmccriseLambdaRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSLambdaBasicExecutionRole'), iam.ManagedPolicy.from_aws_managed_policy_name( 'AWSBatchFullAccess') # TODO: restrict! ]) for bucket in ro_buckets: bucket.grant_read(lambda_role) for bucket in rw_buckets: bucket.grant_read(lambda_role) # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod # may need a default JobDefinition to be set up lmbda.Function(self, 'UmccriseLambda', function_name='umccrise_batch_lambda', handler='umccrise.lambda_handler', runtime=lmbda.Runtime.PYTHON_3_7, code=lmbda.Code.from_asset('lambdas/umccrise'), environment={ 'JOBNAME_PREFIX': "UMCCRISE_", 'JOBQUEUE': job_queue.job_queue_name, 'REFDATA_BUCKET': props['refdata_bucket'], 'DATA_BUCKET': props['data_bucket'], 'UMCCRISE_MEM': '50000', 'UMCCRISE_VCPUS': '16' }, role=lambda_role)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) stack_role = iam.Role( self, "SimulationServiceRole", assumed_by=iam.ServicePrincipal("batch.amazonaws.com"), ) stack_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( "AdministratorAccess")) job_role = iam.Role( self, "SimulationJobServiceRole", assumed_by=iam.ServicePrincipal("ecs-tasks.amazonaws.com"), ) job_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( "AdministratorAccess")) lambda_role = iam.Role( self, "SimulationLambdaServiceRole", assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"), ) lambda_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( "AdministratorAccess")) # Create Input S3 input_bucket = s3.Bucket(self, "InputS3Bucket") # Create Output S3 output_bucket = s3.Bucket(self, "OutputS3Bucket") # admin_policy = iam.from_policy_name("AdministratorAccess", "AdministratorAccess") job_table = aws_dynamodb.Table( self, id="JobTable", partition_key=aws_dynamodb.Attribute( name="PK", type=aws_dynamodb.AttributeType.STRING), stream=aws_dynamodb.StreamViewType.NEW_AND_OLD_IMAGES, removal_policy=core.RemovalPolicy.DESTROY, ) orchestration_handler_lambda = aws_lambda.Function( self, id="JobOrchestrationHandler", runtime=aws_lambda.Runtime.PYTHON_3_7, handler="orchestration_handler_lambda.handler", code=aws_lambda.Code.asset("./simulations_service/functions/"), ) # Give only write access to the post handler job_table.grant_write_data(orchestration_handler_lambda) # Pass table_name as env variable orchestration_handler_lambda.add_environment("TABLE_NAME", job_table.table_name) # Create lambda function for processing dynamodb streams dynamodb_streams_processor_lambda = aws_lambda.Function( self, id="JobsDynamoDBStreamsProcessor", runtime=aws_lambda.Runtime.PYTHON_3_7, handler="dynamodb_streams_processor_lambda.handler", code=aws_lambda.Code.asset("./simulations_service/functions/"), role=lambda_role, ) # Add dynamo db as lambda event source dynamodb_streams_processor_lambda.add_event_source( aws_lambda_event_sources.DynamoEventSource( job_table, starting_position=aws_lambda.StartingPosition.LATEST, batch_size=1, )) dynamodb_streams_processor_lambda.add_environment( "S3_OUTPUT_BUCKET", output_bucket.bucket_name) dynamodb_streams_processor_lambda.add_environment( "TABLE_NAME", job_table.table_name) vpc = ec2.Vpc(self, "VPC") spot_environment = batch.ComputeEnvironment( self, "MyComputedEnvironment", compute_resources={ "vpc": vpc, }, service_role=stack_role.without_policy_updates(), ) job_queue = batch.JobQueue( self, "JobQueue", compute_environments=[ batch.JobQueueComputeEnvironment( compute_environment=spot_environment, order=1) ], ) dynamodb_streams_processor_lambda.add_environment( "JOB_QUEUE", job_queue.job_queue_name) job_definition = batch.JobDefinition( self, "batch-job-def-from-local", container={ "image": ecs.ContainerImage.from_asset("./simulations_service/job/"), "memory_limit_mib": 500, "privileged": True, "job_role": job_role, }, ) dynamodb_streams_processor_lambda.add_environment( "JOB_DEFINITION", job_definition.job_definition_name) orchestration_handler_lambda.add_event_source( aws_lambda_event_sources.S3EventSource( bucket=input_bucket, events=[s3.EventType.OBJECT_CREATED], ))
def __init__(self, app: core.Construct, stack_name: str, vpc: aws_ec2.Vpc, security_group: aws_ec2.SecurityGroup): super().__init__(scope=app, id=f"{stack_name}-batch") batch_role = aws_iam.Role( scope=self, id=f"batch_role", role_name=f"batch_role", assumed_by=aws_iam.ServicePrincipal("batch.amazonaws.com")) batch_role.add_managed_policy( aws_iam.ManagedPolicy.from_managed_policy_arn( scope=self, id=f"AWSBatchServiceRole", managed_policy_arn= "arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole")) batch_role.add_to_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["arn:aws:logs:*:*:*"], actions=[ "logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents", "logs:DescribeLogStreams" ])) # Role to attach EC2 instance_role = aws_iam.Role( scope=self, id=f"instance_role", role_name=f"instance_role_for", assumed_by=aws_iam.ServicePrincipal("ec2.amazonaws.com")) instance_role.add_managed_policy( aws_iam.ManagedPolicy.from_managed_policy_arn( scope=self, id=f"AmazonEC2ContainerServiceforEC2Role", managed_policy_arn= "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" )) # add policy to access S3 instance_role.add_to_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=["s3:*"])) # add policy to access CloudWatch Logs instance_role.add_to_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["arn:aws:logs:*:*:*"], actions=[ "logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents", "logs:DescribeLogStreams" ])) # attach role to EC2 instance_profile = aws_iam.CfnInstanceProfile( scope=self, id=f"instance_profile", instance_profile_name=f"instance_profile", roles=[instance_role.role_name]) # ===== # # batch # # ===== # batch_compute_resources = aws_batch.ComputeResources( vpc=vpc, maxv_cpus=4, minv_cpus=0, security_groups=[security_group], instance_role=instance_profile.attr_arn, type=aws_batch.ComputeResourceType.SPOT) batch_compute_environment = aws_batch.ComputeEnvironment( scope=self, id="batch_compute_environment", compute_environment_name="batch_compute_environment", compute_resources=batch_compute_resources, service_role=batch_role) job_role = aws_iam.Role( scope=self, id=f"job_role", role_name=f"job_role", assumed_by=aws_iam.ServicePrincipal("ecs-tasks.amazonaws.com")) job_role.add_managed_policy( aws_iam.ManagedPolicy.from_managed_policy_arn( scope=self, id=f"AmazonECSTaskExecutionRolePolicy", managed_policy_arn= "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" )) job_role.add_managed_policy( aws_iam.ManagedPolicy.from_managed_policy_arn( scope=self, id=f"AmazonS3FullAccess", managed_policy_arn="arn:aws:iam::aws:policy/AmazonS3FullAccess" )) job_role.add_managed_policy( aws_iam.ManagedPolicy.from_managed_policy_arn( scope=self, id=f"CloudWatchLogsFullAccess", managed_policy_arn= "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess")) self.batch_job_queue = aws_batch.JobQueue( scope=self, id=f"job_queue", job_queue_name=f"job_queue", compute_environments=[ aws_batch.JobQueueComputeEnvironment( compute_environment=batch_compute_environment, order=1) ], priority=1) # ECR repository ecr_repository = aws_ecr_assets.DockerImageAsset( scope=self, id=f"ecr_image", directory="./docker", repository_name=f"repository") # get image from ECR container_image = aws_ecs.ContainerImage.from_ecr_repository( repository=ecr_repository.repository) # job define # pass `S3_BUCKET` as environment argument. self.batch_job_definition = aws_batch.JobDefinition( scope=self, id=f"job_definition", job_definition_name=f"job_definition", container=aws_batch.JobDefinitionContainer( image=container_image, environment={"S3_BUCKET": f"{S3_BUCKET}"}, job_role=job_role, vcpus=1, memory_limit_mib=1024))
def __init__(self, scope: core.Construct, id: str, props, **kwargs) -> None: super().__init__(scope, id, **kwargs) dirname = os.path.dirname(__file__) ecr_repo = ecr.Repository.from_repository_name( self, 'UmccriseEcrRepo', repository_name='umccrise' ) ################################################################################ # Set up permissions ro_buckets = set() for bucket in props['ro_buckets']: tmp_bucket = s3.Bucket.from_bucket_name( self, bucket, bucket_name=bucket ) ro_buckets.add(tmp_bucket) rw_buckets = set() for bucket in props['rw_buckets']: tmp_bucket = s3.Bucket.from_bucket_name( self, bucket, bucket_name=bucket ) rw_buckets.add(tmp_bucket) batch_service_role = iam.Role( self, 'BatchServiceRole', assumed_by=iam.ServicePrincipal('batch.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSBatchServiceRole') ] ) spotfleet_role = iam.Role( self, 'AmazonEC2SpotFleetRole', assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2SpotFleetTaggingRole') ] ) # Create role for Batch instances batch_instance_role = iam.Role( self, 'BatchInstanceRole', role_name='UmccriseBatchInstanceRole', assumed_by=iam.CompositePrincipal( iam.ServicePrincipal('ec2.amazonaws.com'), iam.ServicePrincipal('ecs.amazonaws.com') ), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2RoleforSSM'), iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2ContainerServiceforEC2Role') ] ) batch_instance_role.add_to_policy( iam.PolicyStatement( actions=[ "ec2:Describe*", "ec2:AttachVolume", "ec2:CreateVolume", "ec2:CreateTags", "ec2:ModifyInstanceAttribute" ], resources=["*"] ) ) batch_instance_role.add_to_policy( iam.PolicyStatement( actions=[ "ecs:ListClusters" ], resources=["*"] ) ) for bucket in ro_buckets: bucket.grant_read(batch_instance_role) for bucket in rw_buckets: # restirct write to paths with */umccrise/* bucket.grant_read_write(batch_instance_role, '*/umccrised/*') # Turn the instance role into a Instance Profile batch_instance_profile = iam.CfnInstanceProfile( self, 'BatchInstanceProfile', instance_profile_name='UmccriseBatchInstanceProfile', roles=[batch_instance_role.role_name] ) ################################################################################ # Network # (Import common infrastructure (maintained via TerraForm) # VPC vpc = ec2.Vpc.from_lookup( self, 'UmccrMainVpc', tags={'Name': 'main-vpc', 'Stack': 'networking'} ) batch_security_group = ec2.SecurityGroup( self, "BatchSecurityGroup", vpc=vpc, description="Allow all outbound, no inbound traffic" ) ################################################################################ # Setup Batch compute resources # Configure BlockDevice to expand instance disk space (if needed?) block_device_mappings = [ { 'deviceName': '/dev/xvdf', 'ebs': { 'deleteOnTermination': True, 'encrypted': True, 'volumeSize': 2048, 'volumeType': 'gp2' } } ] # Set up custom user data to configure the Batch instances umccrise_wrapper_asset = assets.Asset( self, 'UmccriseWrapperAsset', path=os.path.join(dirname, '..', 'assets', "umccrise-wrapper.sh") ) umccrise_wrapper_asset.grant_read(batch_instance_role) user_data_asset = assets.Asset( self, 'UserDataAsset', path=os.path.join(dirname, '..', 'assets', "batch-user-data.sh") ) user_data_asset.grant_read(batch_instance_role) user_data = ec2.UserData.for_linux() local_path = user_data.add_s3_download_command( bucket=user_data_asset.bucket, bucket_key=user_data_asset.s3_object_key ) user_data.add_execute_file_command( file_path=local_path, arguments=f"s3://{umccrise_wrapper_asset.bucket.bucket_name}/{umccrise_wrapper_asset.s3_object_key}" ) # Generate user data wrapper to comply with LaunchTemplate required MIME multi-part archive format for user data mime_wrapper = ec2.UserData.custom('MIME-Version: 1.0') mime_wrapper.add_commands('Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="') mime_wrapper.add_commands('') mime_wrapper.add_commands('--==MYBOUNDARY==') mime_wrapper.add_commands('Content-Type: text/x-shellscript; charset="us-ascii"') mime_wrapper.add_commands('') # install AWS CLI, as it's unexpectedly missing from the AWS Linux 2 AMI... mime_wrapper.add_commands('yum -y install unzip') mime_wrapper.add_commands('cd /opt') mime_wrapper.add_commands('curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"') mime_wrapper.add_commands('unzip awscliv2.zip') mime_wrapper.add_commands('sudo ./aws/install --bin-dir /usr/bin') # insert our actual user data payload mime_wrapper.add_commands(user_data.render()) mime_wrapper.add_commands('--==MYBOUNDARY==--') launch_template = ec2.CfnLaunchTemplate( self, 'UmccriseBatchComputeLaunchTemplate', launch_template_name='UmccriseBatchComputeLaunchTemplate', launch_template_data={ 'userData': core.Fn.base64(mime_wrapper.render()), 'blockDeviceMappings': block_device_mappings } ) launch_template_spec = batch.LaunchTemplateSpecification( launch_template_name=launch_template.launch_template_name, version='$Latest' ) my_compute_res = batch.ComputeResources( type=(batch.ComputeResourceType.SPOT if props['compute_env_type'].lower() == 'spot' else batch.ComputeResourceType.ON_DEMAND), allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE, desiredv_cpus=0, maxv_cpus=320, minv_cpus=0, image=ec2.MachineImage.generic_linux(ami_map={'ap-southeast-2': props['compute_env_ami']}), launch_template=launch_template_spec, spot_fleet_role=spotfleet_role, instance_role=batch_instance_profile.instance_profile_name, vpc=vpc, vpc_subnets=ec2.SubnetSelection( subnet_type=ec2.SubnetType.PRIVATE, # availability_zones=["ap-southeast-2a"] ), security_groups=[batch_security_group] # compute_resources_tags=core.Tag('Creator', 'Batch') ) # XXX: How to add more than one tag above?? # https://github.com/aws/aws-cdk/issues/7350 # core.Tag.add(my_compute_res, 'Foo', 'Bar') my_compute_env = batch.ComputeEnvironment( self, 'UmccriseBatchComputeEnv', compute_environment_name="cdk-umccr_ise-batch-compute-env", service_role=batch_service_role, compute_resources=my_compute_res ) # child = my_compute_env.node.default_child # child_comp_res = child.compute_resources # child_comp_res.tags = "{'Foo': 'Bar'}" job_queue = batch.JobQueue( self, 'UmccriseJobQueue', job_queue_name='cdk-umccrise_job_queue', compute_environments=[ batch.JobQueueComputeEnvironment( compute_environment=my_compute_env, order=1 ) ], priority=10 ) job_container = batch.JobDefinitionContainer( image=ecs.ContainerImage.from_registry(name=props['container_image']), vcpus=32, memory_limit_mib=100000, command=[ "/opt/container/umccrise-wrapper.sh", "Ref::vcpus" ], mount_points=[ ecs.MountPoint( container_path='/work', read_only=False, source_volume='work' ), ecs.MountPoint( container_path='/opt/container', read_only=True, source_volume='container' ) ], volumes=[ ecs.Volume( name='container', host=ecs.Host( source_path='/opt/container' ) ), ecs.Volume( name='work', host=ecs.Host( source_path='/mnt' ) ) ], privileged=True ) job_definition = batch.JobDefinition( self, 'UmccriseJobDefinition', job_definition_name='cdk-umccrise-job-definition', parameters={'vcpus': '1'}, container=job_container, timeout=core.Duration.hours(5) ) ################################################################################ # Set up job submission Lambda lambda_role = iam.Role( self, 'UmccriseLambdaRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaBasicExecutionRole'), iam.ManagedPolicy.from_aws_managed_policy_name('AWSBatchFullAccess') # TODO: restrict! ] ) for bucket in ro_buckets: bucket.grant_read(lambda_role) for bucket in rw_buckets: bucket.grant_read(lambda_role) ecr_repo.grant(lambda_role, 'ecr:ListImages') # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod # may need a default JobDefinition to be set up lmbda.Function( self, 'UmccriseLambda', function_name='umccrise_batch_lambda', handler='umccrise.lambda_handler', runtime=lmbda.Runtime.PYTHON_3_7, code=lmbda.Code.from_asset('lambdas/umccrise'), environment={ 'JOBNAME_PREFIX': "UMCCRISE_", 'JOBQUEUE': job_queue.job_queue_name, 'UMCCRISE_MEM': '100000', 'UMCCRISE_VCPUS': '32', 'JOBDEF': job_definition.job_definition_name, 'REFDATA_BUCKET': props['refdata_bucket'], 'INPUT_BUCKET': props['input_bucket'], 'RESULT_BUCKET': props['result_bucket'], 'IMAGE_CONFIGURABLE': props['image_configurable'] }, role=lambda_role )
def __init__(self, scope: cdk.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) # The code that defines your stack goes here table = dynamodb.Table( self, "TheTable", table_name="cdk-table", partition_key=dynamodb.Attribute( name="id", type=dynamodb.AttributeType.STRING), removal_policy=cdk.RemovalPolicy.DESTROY, ) # compute_environment = batch.ComputeEnvironment( # self, # "MyComputeEnvironment", # compute_environment_name="cdk-env", # compute_resources=batch.ComputeResources( # vpc=Vpc.from_lookup(self, "VPC", is_default=True), # ), # enabled=True, # managed=True, # ) job_role = Role( self, "BatchJobRole", assumed_by=ServicePrincipal("ecs-tasks.amazonaws.com"), description="Role for a container in a Batch job", role_name="CDK-BatchJobRole", managed_policies=[ ManagedPolicy.from_aws_managed_policy_name( managed_policy_name="AmazonDynamoDBFullAccess"), ], ) repository = Repository( self, "MyRepository", removal_policy=cdk.RemovalPolicy.DESTROY, repository_name="cdk-my-repository", lifecycle_rules=[ LifecycleRule(max_image_count=5, description="Max 5 images") ], ) image: ContainerImage = ContainerImage.from_ecr_repository( repository=repository, tag="latest", ) container = batch.JobDefinitionContainer( image=image, job_role=job_role, command=["python", "run.py", "--help"], environment={ "READINGS_TABLE": table.table_name, "AWS_REGION": self.region, }, vcpus=1, log_configuration=batch.LogConfiguration( log_driver=batch.LogDriver.AWSLOGS), memory_limit_mib=2048, ) batch.JobDefinition( self, "JobDefinitionCreate", container=container, job_definition_name="create", retry_attempts=1, )
def __init__(self, app: core.App, stack_name: str, stack_env: str): super().__init__(scope=app, id=f"{stack_name}-{stack_env}") # CIDR cidr = "192.168.0.0/24" # === # # vpc # # === # vpc = aws_ec2.Vpc( self, id=f"{stack_name}-{stack_env}-vpc", cidr=cidr, subnet_configuration=[ # Public Subnet aws_ec2.SubnetConfiguration( cidr_mask=28, name=f"{stack_name}-{stack_env}-public", subnet_type=aws_ec2.SubnetType.PUBLIC, ) ], ) security_group = aws_ec2.SecurityGroup( self, id=f'security-group-for-{stack_name}-{stack_env}', vpc=vpc, security_group_name=f'security-group-for-{stack_name}-{stack_env}', allow_all_outbound=True) batch_role = aws_iam.Role( scope=self, id=f"batch_role_for_{stack_name}-{stack_env}", role_name=f"batch_role_for_{stack_name}-{stack_env}", assumed_by=aws_iam.ServicePrincipal("batch.amazonaws.com")) batch_role.add_managed_policy( aws_iam.ManagedPolicy.from_managed_policy_arn( scope=self, id=f"AWSBatchServiceRole-{stack_env}", managed_policy_arn= "arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole")) batch_role.add_to_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["arn:aws:logs:*:*:*"], actions=[ "logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents", "logs:DescribeLogStreams" ])) # Role to attach EC2 instance_role = aws_iam.Role( scope=self, id=f"instance_role_for_{stack_name}-{stack_env}", role_name=f"instance_role_for_{stack_name}-{stack_env}", assumed_by=aws_iam.ServicePrincipal("ec2.amazonaws.com")) instance_role.add_managed_policy( aws_iam.ManagedPolicy.from_managed_policy_arn( scope=self, id=f"AmazonEC2ContainerServiceforEC2Role-{stack_env}", managed_policy_arn= "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" )) # add policy to access S3 instance_role.add_to_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=["s3:*"])) # add policy to access CloudWatch Logs instance_role.add_to_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["arn:aws:logs:*:*:*"], actions=[ "logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents", "logs:DescribeLogStreams" ])) # attach role to EC2 instance_profile = aws_iam.CfnInstanceProfile( scope=self, id=f"instance_profile_for_{stack_name}-{stack_env}", instance_profile_name= f"instance_profile_for_{stack_name}-{stack_env}", roles=[instance_role.role_name]) # ===== # # batch # # ===== # batch_compute_resources = aws_batch.ComputeResources( vpc=vpc, maxv_cpus=4, minv_cpus=0, security_groups=[security_group], instance_role=instance_profile.attr_arn, type=aws_batch.ComputeResourceType.SPOT) batch_compute_environment = aws_batch.ComputeEnvironment( scope=self, id=f"ProjectEnvironment-{stack_env}", compute_environment_name=f"ProjectEnvironmentBatch-{stack_env}", compute_resources=batch_compute_resources, service_role=batch_role) job_role = aws_iam.Role( scope=self, id=f"job_role_{stack_name}-{stack_env}", role_name=f"job_role_{stack_name}-{stack_env}", assumed_by=aws_iam.ServicePrincipal("ecs-tasks.amazonaws.com")) job_role.add_managed_policy( aws_iam.ManagedPolicy.from_managed_policy_arn( scope=self, id=f"AmazonECSTaskExecutionRolePolicy_{stack_name}-{stack_env}", managed_policy_arn= "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" )) job_role.add_managed_policy( aws_iam.ManagedPolicy.from_managed_policy_arn( scope=self, id=f"AmazonS3FullAccess_{stack_name}-{stack_env}", managed_policy_arn="arn:aws:iam::aws:policy/AmazonS3FullAccess" )) job_role.add_managed_policy( aws_iam.ManagedPolicy.from_managed_policy_arn( scope=self, id=f"CloudWatchLogsFullAccess_{stack_name}-{stack_env}", managed_policy_arn= "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess")) batch_job_queue = aws_batch.JobQueue( scope=self, id=f"job_queue_for_{stack_name}-{stack_env}", job_queue_name=f"job_queue_for_{stack_name}-{stack_env}", compute_environments=[ aws_batch.JobQueueComputeEnvironment( compute_environment=batch_compute_environment, order=1) ], priority=1) # ECR repository ecr_repository = aws_ecr_assets.DockerImageAsset( scope=self, id=f"ecr_image_{stack_env}", directory="./docker", repository_name=f"repository_for_{stack_env}") # get image from ECR container_image = aws_ecs.ContainerImage.from_ecr_repository( repository=ecr_repository.repository) # job define # pass `S3_BUCKET` as environment argument. batch_job_definition = aws_batch.JobDefinition( scope=self, id=f"job_definition_for_{stack_env}", job_definition_name=f"job_definition_for_{stack_env}", container=aws_batch.JobDefinitionContainer( image=container_image, environment={"S3_BUCKET": f"{S3_BUCKET}"}, job_role=job_role, vcpus=1, memory_limit_mib=1024)) # ============= # # StepFunctions # # ============= # # Ref::{keyword} can be replaced with StepFunction input command_overrides = ["python", "__init__.py", "--time", "Ref::time"] batch_task = aws_sfn_tasks.BatchSubmitJob( scope=self, id=f"batch_job_{stack_env}", job_definition=batch_job_definition, job_name=f"batch_job_{stack_env}_today", job_queue=batch_job_queue, container_overrides=aws_sfn_tasks.BatchContainerOverrides( command=command_overrides), payload=aws_sfn.TaskInput.from_object({"time.$": "$.time"})) # `one step` for StepFunctions definition = batch_task sfn_daily_process = aws_sfn.StateMachine( scope=self, id=f"YourProjectSFn-{stack_env}", definition=definition) # ================ # # CloudWatch Event # # ================ # # Run every day at 21:30 JST # See https://docs.aws.amazon.com/lambda/latest/dg/tutorial-scheduled-events-schedule-expressions.html events_daily_process = aws_events.Rule( scope=self, id=f"DailySFnProcess-{stack_env}", schedule=aws_events.Schedule.cron(minute="30", hour="12", month='*', day="*", year='*'), ) events_daily_process.add_target( aws_events_targets.SfnStateMachine(sfn_daily_process))