def _add_job_definition_serial(self): return batch.CfnJobDefinition( self.stack_scope, "JobDefinitionSerial", type="container", container_properties=self._get_container_properties(), )
def _add_job_definition_mnp(self): return batch.CfnJobDefinition( self.stack_scope, "JobDefinitionMNP", type="multinode", node_properties=batch.CfnJobDefinition.NodePropertiesProperty( main_node=0, num_nodes=1, node_range_properties=[ batch.CfnJobDefinition.NodeRangePropertyProperty( target_nodes="0:", container=self._get_container_properties()) ], ), )
def __init__(self, scope: core.Construct, id: str, props, **kwargs) -> None: super().__init__(scope, id, **kwargs) ################################################################################ # Set up permissions ro_buckets = set() for bucket in props['ro_buckets']: tmp_bucket = s3.Bucket.from_bucket_name(self, bucket, bucket_name=bucket) ro_buckets.add(tmp_bucket) rw_buckets = set() for bucket in props['rw_buckets']: tmp_bucket = s3.Bucket.from_bucket_name(self, bucket, bucket_name=bucket) rw_buckets.add(tmp_bucket) batch_service_role = iam.Role( self, 'BatchServiceRole', assumed_by=iam.ServicePrincipal('batch.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSBatchServiceRole') ]) spotfleet_role = iam.Role( self, 'AmazonEC2SpotFleetRole', assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2SpotFleetTaggingRole') ]) # Create role for Batch instances batch_instance_role = iam.Role( self, 'BatchInstanceRole', role_name='RnasumBatchInstanceRole', assumed_by=iam.CompositePrincipal( iam.ServicePrincipal('ec2.amazonaws.com'), iam.ServicePrincipal('ecs.amazonaws.com')), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2RoleforSSM'), iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2ContainerServiceforEC2Role') ]) batch_instance_role.add_to_policy( iam.PolicyStatement(actions=[ "ec2:Describe*", "ec2:AttachVolume", "ec2:CreateVolume", "ec2:CreateTags", "ec2:ModifyInstanceAttribute" ], resources=["*"])) batch_instance_role.add_to_policy( iam.PolicyStatement(actions=["ecs:ListClusters"], resources=["*"])) for bucket in ro_buckets: bucket.grant_read(batch_instance_role) for bucket in rw_buckets: # TODO: restirct write to paths with */rnasum/* bucket.grant_read_write(batch_instance_role) # Turn the instance role into a Instance Profile batch_instance_profile = iam.CfnInstanceProfile( self, 'BatchInstanceProfile', instance_profile_name='RnasumBatchInstanceProfile', roles=[batch_instance_role.role_name]) ################################################################################ # Minimal networking # TODO: import resource created with TF vpc = props['vpc'] ################################################################################ # Setup Batch compute resources # Configure BlockDevice to expand instance disk space (if needed?) block_device_mappings = [{ 'deviceName': '/dev/xvdf', 'ebs': { 'deleteOnTermination': True, 'volumeSize': 1024, 'volumeType': 'gp2' } }] launch_template = ec2.CfnLaunchTemplate( self, 'RnasumBatchComputeLaunchTemplate', launch_template_name='RnasumBatchComputeLaunchTemplate', launch_template_data={ # 'userData': core.Fn.base64(user_data_script), FIXME may not need this for RNAsum case? see job_definition below 'blockDeviceMappings': block_device_mappings }) launch_template_spec = batch.LaunchTemplateSpecification( launch_template_name=launch_template.launch_template_name, version='$Latest') my_compute_res = batch.ComputeResources( type=batch.ComputeResourceType.SPOT, allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE, desiredv_cpus=0, maxv_cpus=80, minv_cpus=0, image=ec2.MachineImage.generic_linux( ami_map={'ap-southeast-2': props['compute_env_ami']}), launch_template=launch_template_spec, spot_fleet_role=spotfleet_role, instance_role=batch_instance_profile.instance_profile_name, vpc=vpc, #compute_resources_tags=core.Tag('Creator', 'Batch') ) # XXX: How to add more than one tag above?? # core.Tag.add(my_compute_res, 'Foo', 'Bar') my_compute_env = batch.ComputeEnvironment( self, 'RnasumBatchComputeEnv', compute_environment_name="RnasumBatchComputeEnv", service_role=batch_service_role, compute_resources=my_compute_res) job_queue = batch.JobQueue(self, 'RnasumJobQueue', job_queue_name='rnasum_job_queue', compute_environments=[ batch.JobQueueComputeEnvironment( compute_environment=my_compute_env, order=1) ], priority=10) # it is equivalent of # https://github.com/umccr/infrastructure/blob/master/terraform/stacks/wts_report/jobs/wts_report.json default_container_props = { 'image': props['container_image'], 'vcpus': 2, 'memory': 2048, 'command': ['/opt/container/WTS-report-wrapper.sh', 'Ref::vcpus'], 'volumes': [{ 'host': { 'sourcePath': '/mnt' }, 'name': 'work' }, { 'host': { 'sourcePath': '/opt/container' }, 'name': 'container' }], 'mountPoints': [{ 'containerPath': '/work', 'readOnly': False, 'sourceVolume': 'work' }, { 'containerPath': '/opt/container', 'readOnly': True, 'sourceVolume': 'container' }], 'readonlyRootFilesystem': False, 'privileged': True, 'ulimits': [] } # and CDK equivalent of # https://github.com/umccr/infrastructure/blob/master/terraform/stacks/wts_report/main.tf#L113 job_definition = batch.CfnJobDefinition( self, 'RnasumJobDefinition', job_definition_name='rnasum_job_dev', type='container', container_properties=default_container_props, parameters={ 'vcpus': 1, }) ################################################################################ # Set up job submission Lambda lambda_role = iam.Role( self, 'RnasumLambdaRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSLambdaBasicExecutionRole'), iam.ManagedPolicy.from_aws_managed_policy_name( 'AWSBatchFullAccess') # TODO: restrict! ]) for bucket in ro_buckets: bucket.grant_read(lambda_role) for bucket in rw_buckets: bucket.grant_read(lambda_role) # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod # may need a default JobDefinition to be set up # and CDK equivalent of # https://github.com/umccr/infrastructure/blob/master/terraform/stacks/wts_report/main.tf#L159 lmbda.Function(self, 'RnasumLambda', function_name='rnasum_batch_lambda', handler='trigger_wts_report.lambda_handler', runtime=lmbda.Runtime.PYTHON_3_7, code=lmbda.Code.from_asset('lambdas/'), environment={ 'JOBNAME_PREFIX': "rnasum_", 'JOBQUEUE': job_queue.job_queue_name, 'JOBDEF': job_definition.job_definition_name, 'REFDATA_BUCKET': props['refdata_bucket'], 'DATA_BUCKET': props['data_bucket'], 'JOB_MEM': '32000', 'JOB_VCPUS': '8', 'REF_DATASET': 'PANCAN', 'GENOME_BUILD': '38', }, role=lambda_role)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # VPC & Security Group vpc = ec2.Vpc(scope=self, id=VPC_ID, max_azs=3) sg = ec2.SecurityGroup(self, SECURITY_GROUP_ID, vpc=vpc, security_group_name=SECURITY_GROUP_NAME ) # IAM Roles and Permissions batch_service_role = iam.Role(self, BATCH_SERVICE_ROLE_ID, role_name=BATCH_SERVICE_ROLE_NAME, assumed_by=iam.ServicePrincipal("batch.amazonaws.com"), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AWSBatchServiceRole") ] ) spot_fleet_role = iam.Role(self, SPOT_FLEET_ROLE_ID, role_name=SPOT_FLEET_ROLE_NAME, assumed_by=iam.ServicePrincipal("spotfleet.amazonaws.com"), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AmazonEC2SpotFleetTaggingRole") ] ) batch_instance_role = iam.Role(self, BATCH_INSTANCE_ROLE_ID, role_name=BATCH_INSTANCE_ROLE_NAME, assumed_by=iam.CompositePrincipal( iam.ServicePrincipal("ec2.amazonaws.com"), iam.ServicePrincipal("ecs.amazonaws.com") ), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name("AmazonS3FullAccess"), iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AmazonEC2ContainerServiceforEC2Role") ] ) instance_profile = iam.CfnInstanceProfile(self, INSTANCE_PROFILE_ID, instance_profile_name=batch_instance_role.role_name, roles=[batch_instance_role.role_name] ) # Compute Environment compute_environment = batch.CfnComputeEnvironment(self, COMPUTE_ENVIRONMENT_ID, compute_environment_name=COMPUTE_ENVIRONMENT_NAME, type="MANAGED", service_role=batch_service_role.role_arn, compute_resources={ "type": COMPUTE_TYPE, "maxvCpus": COMPUTE_MAX_VCPUS, "minvCpus": COMPUTE_MIN_VCPUS, "desiredvCpus": COMPUTE_DESIRED_VCPUS, "bidPercentage": BID_PERCENTAGE, "spotIamFleetRole": spot_fleet_role.role_arn, "instanceTypes": COMPUTE_INSTANCE_TYPES, "instanceRole": batch_instance_role.role_name, "subnets": [subnet.subnet_id for subnet in vpc.public_subnets], "securityGroupIds": [sg.security_group_id] } ) compute_environment.add_depends_on(instance_profile) # Job Queue job_queue = batch.CfnJobQueue(self, JOB_QUEUE_ID, job_queue_name=JOB_QUEUE_NAME, priority=1, compute_environment_order=[ { "order": 1, "computeEnvironment": compute_environment.compute_environment_name } ] ) job_queue.add_depends_on(compute_environment) # Job Definition job_definition = batch.CfnJobDefinition(self, JOB_DEFINITION_ID, job_definition_name=JOB_DEFINITION_NAME, type="container", retry_strategy={ "Attemps": 1 }, timeout={ "AttemptDurationSeconds": 60 }, container_properties={ "image": CONTAINER_IMAGE, "vcpus": CONTAINER_VCPUS, "memory": CONTAINER_MEMORY, "environment": [ { "name": STACK_PREFIX + "_POSTGRES_DB", "value": "{{resolve:secretsmanager:" + SECRET_NAME + ":SecretString:POSTGRES_DB}}" }, { "name": STACK_PREFIX + "_POSTGRES_USER", "value": "{{resolve:secretsmanager:" + SECRET_NAME + ":SecretString:POSTGRES_USER}}" } ] } )