def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # launchtemplate # https://aws.amazon.com/marketplace/pp/B00O7WM7QW ami-06a46da680048c8ae template01=aws_ec2.CfnLaunchTemplate(self, 'template01', launch_template_data={'imageId':'ami-06a46da680048c8ae', 'blockDeviceMappings':[{'deviceName':'/dev/sda1','ebs':{'deleteOnTermination':True, 'volumeSize':20, 'volumeType':'gp2'}}], 'securityGroupIds':[core.Fn.import_value('publicsecuritygroup01')], 'instanceType':'t3.micro'}, launch_template_name='public01') template02=aws_ec2.CfnLaunchTemplate(self, 'template02', launch_template_data={'imageId':'ami-06a46da680048c8ae', 'blockDeviceMappings':[{'deviceName':'/dev/sda1','ebs':{'deleteOnTermination':True, 'volumeSize':20, 'volumeType':'gp2'}}], 'securityGroupIds':[core.Fn.import_value('privatesecuritygroup01')], 'instanceType':'t3.micro'}, launch_template_name='private01') # public instance instance01=aws_ec2.CfnInstance(self, 'instance01', launch_template={'launchTemplateId': template01.ref, 'version': template01.attr_latest_version_number}, key_name='aws-example-key', subnet_id=core.Fn.import_value('publicsubnet01')) aws_ec2.CfnEIP(self, 'eip', domain='vpc', instance_id=instance01.ref, tags=[core.CfnTag(key='Name', value='eip01')]) # private instance aws_ec2.CfnInstance(self, 'instance02', launch_template={'launchTemplateId': template02.ref, 'version': template01.attr_latest_version_number}, key_name='aws-example-key', subnet_id=core.Fn.import_value('publicsubnet02'))
def __init__( self, scope: Construct, construct_id: str, *, deploy_env: str, processing_assets_table: aws_dynamodb.Table, ): # pylint: disable=too-many-locals super().__init__(scope, construct_id) if deploy_env == "prod": instance_types = [ aws_ec2.InstanceType("c5.xlarge"), aws_ec2.InstanceType("c5.2xlarge"), aws_ec2.InstanceType("c5.4xlarge"), aws_ec2.InstanceType("c5.9xlarge"), ] else: instance_types = [ aws_ec2.InstanceType("m5.large"), aws_ec2.InstanceType("m5.xlarge"), ] ec2_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name( "service-role/AmazonEC2ContainerServiceforEC2Role") batch_instance_role = aws_iam.Role( self, "batch-instance-role", assumed_by=aws_iam.ServicePrincipal( "ec2.amazonaws.com"), # type: ignore[arg-type] managed_policies=[ec2_policy], ) processing_assets_table.grant_read_write_data( batch_instance_role) # type: ignore[arg-type] batch_instance_profile = aws_iam.CfnInstanceProfile( self, "batch-instance-profile", roles=[batch_instance_role.role_name], ) batch_launch_template_data = textwrap.dedent(""" MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="==MYBOUNDARY==" --==MYBOUNDARY== Content-Type: text/x-shellscript; charset="us-ascii" #!/bin/bash echo ECS_IMAGE_PULL_BEHAVIOR=prefer-cached >> /etc/ecs/ecs.config --==MYBOUNDARY==-- """) launch_template_data = aws_ec2.CfnLaunchTemplate.LaunchTemplateDataProperty( user_data=Fn.base64(batch_launch_template_data.strip())) cloudformation_launch_template = aws_ec2.CfnLaunchTemplate( self, "batch-launch-template", launch_template_name=f"{deploy_env}-datalake-batch-launch-template", launch_template_data=launch_template_data, ) assert cloudformation_launch_template.launch_template_name is not None launch_template = aws_batch.LaunchTemplateSpecification( launch_template_name=cloudformation_launch_template. launch_template_name) # use existing VPC in LINZ AWS account. # VPC with these tags is required to exist in AWS account before being deployed. # A VPC will not be deployed by this project. vpc = aws_ec2.Vpc.from_lookup( self, "datalake-vpc", tags={ APPLICATION_NAME_TAG_NAME: APPLICATION_NAME, "ApplicationLayer": "networking", }, ) compute_resources = aws_batch.ComputeResources( vpc=vpc, minv_cpus=0, desiredv_cpus=0, maxv_cpus=1000, instance_types=instance_types, instance_role=batch_instance_profile.instance_profile_name, allocation_strategy=aws_batch.AllocationStrategy( "BEST_FIT_PROGRESSIVE"), launch_template=launch_template, ) batch_service_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name( "service-role/AWSBatchServiceRole") service_role = aws_iam.Role( self, "batch-service-role", assumed_by=aws_iam.ServicePrincipal( "batch.amazonaws.com"), # type: ignore[arg-type] managed_policies=[batch_service_policy], ) compute_environment = aws_batch.ComputeEnvironment( self, "compute-environment", compute_resources=compute_resources, service_role=service_role, # type: ignore[arg-type] ) self.job_queue = aws_batch.JobQueue( scope, f"{construct_id}-job-queue", compute_environments=[ aws_batch.JobQueueComputeEnvironment( compute_environment=compute_environment, order=10 # type: ignore[arg-type] ), ], priority=10, )
def __init__(self, scope: core.Construct, id: str, props, **kwargs) -> None: super().__init__(scope, id, **kwargs) ################################################################################ # Set up permissions ro_buckets = set() for bucket in props['ro_buckets']: tmp_bucket = s3.Bucket.from_bucket_name(self, bucket, bucket_name=bucket) ro_buckets.add(tmp_bucket) rw_buckets = set() for bucket in props['rw_buckets']: tmp_bucket = s3.Bucket.from_bucket_name(self, bucket, bucket_name=bucket) rw_buckets.add(tmp_bucket) batch_service_role = iam.Role( self, 'BatchServiceRole', assumed_by=iam.ServicePrincipal('batch.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSBatchServiceRole') ]) spotfleet_role = iam.Role( self, 'AmazonEC2SpotFleetRole', assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2SpotFleetTaggingRole') ]) # Create role for Batch instances batch_instance_role = iam.Role( self, 'BatchInstanceRole', role_name='UmccriseBatchInstanceRole', assumed_by=iam.CompositePrincipal( iam.ServicePrincipal('ec2.amazonaws.com'), iam.ServicePrincipal('ecs.amazonaws.com')), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2RoleforSSM'), iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2ContainerServiceforEC2Role') ]) batch_instance_role.add_to_policy( iam.PolicyStatement(actions=[ "ec2:Describe*", "ec2:AttachVolume", "ec2:CreateVolume", "ec2:CreateTags", "ec2:ModifyInstanceAttribute" ], resources=["*"])) batch_instance_role.add_to_policy( iam.PolicyStatement(actions=["ecs:ListClusters"], resources=["*"])) for bucket in ro_buckets: bucket.grant_read(batch_instance_role) for bucket in rw_buckets: # restirct write to paths with */umccrise/* bucket.grant_read_write(batch_instance_role, '*/umccrised/*') # Turn the instance role into a Instance Profile batch_instance_profile = iam.CfnInstanceProfile( self, 'BatchInstanceProfile', instance_profile_name='UmccriseBatchInstanceProfile', roles=[batch_instance_role.role_name]) ################################################################################ # Minimal networking # TODO: import resource created with TF vpc = props['vpc'] ################################################################################ # Setup Batch compute resources # Configure BlockDevice to expand instance disk space (if needed?) block_device_mappings = [{ 'deviceName': '/dev/xvdf', 'ebs': { 'deleteOnTermination': True, 'volumeSize': 1024, 'volumeType': 'gp2' } }] launch_template = ec2.CfnLaunchTemplate( self, 'UmccriseBatchComputeLaunchTemplate', launch_template_name='UmccriseBatchComputeLaunchTemplate', launch_template_data={ 'userData': core.Fn.base64(user_data_script), 'blockDeviceMappings': block_device_mappings }) launch_template_spec = batch.LaunchTemplateSpecification( launch_template_name=launch_template.launch_template_name, version='$Latest') my_compute_res = batch.ComputeResources( type=batch.ComputeResourceType.SPOT, allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE, desiredv_cpus=0, maxv_cpus=128, minv_cpus=0, image=ec2.MachineImage.generic_linux( ami_map={'ap-southeast-2': props['compute_env_ami']}), launch_template=launch_template_spec, spot_fleet_role=spotfleet_role, instance_role=batch_instance_profile.instance_profile_name, vpc=vpc, #compute_resources_tags=core.Tag('Creator', 'Batch') ) # XXX: How to add more than one tag above?? # core.Tag.add(my_compute_res, 'Foo', 'Bar') my_compute_env = batch.ComputeEnvironment( self, 'UmccriseBatchComputeEnv', compute_environment_name="cdk-umccrise-batch-compute-env", service_role=batch_service_role, compute_resources=my_compute_res) job_queue = batch.JobQueue(self, 'UmccriseJobQueue', job_queue_name='cdk-umccrise_job_queue', compute_environments=[ batch.JobQueueComputeEnvironment( compute_environment=my_compute_env, order=1) ], priority=10) job_container = batch.JobDefinitionContainer( image=ecs.ContainerImage.from_registry( name=props['container_image']), vcpus=2, memory_limit_mib=2048, command=["/opt/container/umccrise-wrapper.sh", "Ref::vcpus"], mount_points=[ ecs.MountPoint(container_path='/work', read_only=False, source_volume='work'), ecs.MountPoint(container_path='/opt/container', read_only=True, source_volume='container') ], volumes=[ ecs.Volume(name='container', host=ecs.Host(source_path='/opt/container')), ecs.Volume(name='work', host=ecs.Host(source_path='/mnt')) ], privileged=True) job_definition = batch.JobDefinition( self, 'UmccriseJobDefinition', job_definition_name='cdk-umccrise-job-definition', parameters={'vcpus': '1'}, container=job_container, timeout=core.Duration.hours(5)) ################################################################################ # Set up job submission Lambda lambda_role = iam.Role( self, 'UmccriseLambdaRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSLambdaBasicExecutionRole'), iam.ManagedPolicy.from_aws_managed_policy_name( 'AWSBatchFullAccess') # TODO: restrict! ]) for bucket in ro_buckets: bucket.grant_read(lambda_role) for bucket in rw_buckets: bucket.grant_read(lambda_role) # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod # may need a default JobDefinition to be set up lmbda.Function(self, 'UmccriseLambda', function_name='umccrise_batch_lambda', handler='umccrise.lambda_handler', runtime=lmbda.Runtime.PYTHON_3_7, code=lmbda.Code.from_asset('lambdas/umccrise'), environment={ 'JOBNAME_PREFIX': "UMCCRISE_", 'JOBQUEUE': job_queue.job_queue_name, 'REFDATA_BUCKET': props['refdata_bucket'], 'DATA_BUCKET': props['data_bucket'], 'UMCCRISE_MEM': '50000', 'UMCCRISE_VCPUS': '16' }, role=lambda_role)
def _add_compute_resource_launch_template( self, queue, compute_resource, instance_type, queue_pre_install_action, queue_post_install_action, queue_lt_security_groups, queue_placement_group, ): # LT network interfaces compute_lt_nw_interfaces = [ ec2.CfnLaunchTemplate.NetworkInterfaceProperty( device_index=0, associate_public_ip_address=queue.networking.assign_public_ip if compute_resource.max_network_interface_count == 1 else None, # parameter not supported for instance types with multiple network interfaces interface_type="efa" if compute_resource.efa and compute_resource.efa.enabled else None, groups=queue_lt_security_groups, subnet_id=queue.networking.subnet_ids[0], ) ] for device_index in range( 1, compute_resource.max_network_interface_count): compute_lt_nw_interfaces.append( ec2.CfnLaunchTemplate.NetworkInterfaceProperty( device_index=device_index, network_card_index=device_index, interface_type="efa" if compute_resource.efa and compute_resource.efa.enabled else None, groups=queue_lt_security_groups, subnet_id=queue.networking.subnet_ids[0], )) instance_market_options = None if queue.capacity_type == CapacityType.SPOT: instance_market_options = ec2.CfnLaunchTemplate.InstanceMarketOptionsProperty( market_type="spot", spot_options=ec2.CfnLaunchTemplate.SpotOptionsProperty( spot_instance_type="one-time", instance_interruption_behavior="terminate", max_price=None if compute_resource.spot_price is None else str(compute_resource.spot_price), ), ) ec2.CfnLaunchTemplate( self.stack_scope, f"ComputeServerLaunchTemplate{create_hash_suffix(queue.name + instance_type)}", launch_template_name= f"{self.stack_name}-{queue.name}-{instance_type}", launch_template_data=ec2.CfnLaunchTemplate. LaunchTemplateDataProperty( instance_type=instance_type, cpu_options=ec2.CfnLaunchTemplate.CpuOptionsProperty( core_count=compute_resource.vcpus, threads_per_core=1) if compute_resource.pass_cpu_options_in_launch_template else None, block_device_mappings=get_block_device_mappings( queue.compute_settings.local_storage, self.config.image.os), # key_name=, network_interfaces=compute_lt_nw_interfaces, placement=ec2.CfnLaunchTemplate.PlacementProperty( group_name=queue_placement_group), image_id=self.config.image_dict[queue.name], ebs_optimized=compute_resource.is_ebs_optimized, iam_instance_profile=ec2.CfnLaunchTemplate. IamInstanceProfileProperty( name=self.instance_profiles[queue.name]), instance_market_options=instance_market_options, user_data=Fn.base64( Fn.sub( get_user_data_content( "../resources/compute_node/user_data.sh"), { **{ "EnableEfa": "efa" if compute_resource.efa and compute_resource.efa.enabled else "NONE", "RAIDOptions": get_shared_storage_options_by_type( self.shared_storage_options, SharedStorageType.RAID), "DisableHyperThreadingManually": "true" if compute_resource.disable_simultaneous_multithreading_manually else "false", "BaseOS": self.config.image.os, "PreInstallScript": queue_pre_install_action.script if queue_pre_install_action else "NONE", "PreInstallArgs": join_shell_args(queue_pre_install_action.args) if queue_pre_install_action and queue_pre_install_action.args else "NONE", "PostInstallScript": queue_post_install_action.script if queue_post_install_action else "NONE", "PostInstallArgs": join_shell_args(queue_post_install_action.args) if queue_post_install_action and queue_post_install_action.args else "NONE", "EFSId": get_shared_storage_ids_by_type( self.shared_storage_mappings, SharedStorageType.EFS), "EFSOptions": get_shared_storage_options_by_type( self.shared_storage_options, SharedStorageType.EFS), # FIXME "FSXId": get_shared_storage_ids_by_type( self.shared_storage_mappings, SharedStorageType.FSX), "FSXMountName": self.shared_storage_attributes[SharedStorageType.FSX].get( "MountName", ""), "FSXDNSName": self.shared_storage_attributes[SharedStorageType.FSX].get( "DNSName", ""), "FSXOptions": get_shared_storage_options_by_type( self.shared_storage_options, SharedStorageType.FSX), "Scheduler": self.config.scheduling.scheduler, "EphemeralDir": queue.compute_settings.local_storage.ephemeral_volume.mount_dir if queue.compute_settings and queue.compute_settings.local_storage and queue.compute_settings.local_storage.ephemeral_volume else "/scratch", "EbsSharedDirs": get_shared_storage_options_by_type( self.shared_storage_options, SharedStorageType.EBS), "ClusterDNSDomain": str(self.cluster_hosted_zone.name) if self.cluster_hosted_zone else "", "ClusterHostedZone": str(self.cluster_hosted_zone.ref) if self.cluster_hosted_zone else "", "OSUser": OS_MAPPING[self.config.image.os]["user"], "DynamoDBTable": self.dynamodb_table.ref, "LogGroupName": self.log_group.log_group_name if self.config.monitoring.logs.cloud_watch.enabled else "NONE", "IntelHPCPlatform": "true" if self.config.is_intel_hpc_platform_enabled else "false", "CWLoggingEnabled": "true" if self.config.is_cw_logging_enabled else "false", "QueueName": queue.name, "EnableEfaGdr": "compute" if compute_resource.efa and compute_resource.efa.gdr_support else "NONE", "CustomNodePackage": self.config.custom_node_package or "", "CustomAwsBatchCliPackage": self.config.custom_aws_batch_cli_package or "", "ExtraJson": self.config.extra_chef_attributes, }, **get_common_user_data_env(queue, self.config), }, )), monitoring=ec2.CfnLaunchTemplate.MonitoringProperty( enabled=False), tag_specifications=[ ec2.CfnLaunchTemplate.TagSpecificationProperty( resource_type="instance", tags=get_default_instance_tags( self.stack_name, self.config, compute_resource, "Compute", self.shared_storage_mappings) + [ CfnTag(key=PCLUSTER_QUEUE_NAME_TAG, value=queue.name) ] + get_custom_tags(self.config), ), ec2.CfnLaunchTemplate.TagSpecificationProperty( resource_type="volume", tags=get_default_volume_tags( self.stack_name, "Compute") + [ CfnTag(key=PCLUSTER_QUEUE_NAME_TAG, value=queue.name) ] + get_custom_tags(self.config), ), ], ), )
def __init__(self, scope: core.Construct, id: str, props, **kwargs) -> None: super().__init__(scope, id, **kwargs) ################################################################################ # Set up permissions ro_buckets = set() for bucket in props['ro_buckets']: tmp_bucket = s3.Bucket.from_bucket_name(self, bucket, bucket_name=bucket) ro_buckets.add(tmp_bucket) rw_buckets = set() for bucket in props['rw_buckets']: tmp_bucket = s3.Bucket.from_bucket_name(self, bucket, bucket_name=bucket) rw_buckets.add(tmp_bucket) batch_service_role = iam.Role( self, 'BatchServiceRole', assumed_by=iam.ServicePrincipal('batch.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSBatchServiceRole') ]) spotfleet_role = iam.Role( self, 'AmazonEC2SpotFleetRole', assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2SpotFleetTaggingRole') ]) # Create role for Batch instances batch_instance_role = iam.Role( self, 'BatchInstanceRole', role_name='RnasumBatchInstanceRole', assumed_by=iam.CompositePrincipal( iam.ServicePrincipal('ec2.amazonaws.com'), iam.ServicePrincipal('ecs.amazonaws.com')), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2RoleforSSM'), iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2ContainerServiceforEC2Role') ]) batch_instance_role.add_to_policy( iam.PolicyStatement(actions=[ "ec2:Describe*", "ec2:AttachVolume", "ec2:CreateVolume", "ec2:CreateTags", "ec2:ModifyInstanceAttribute" ], resources=["*"])) batch_instance_role.add_to_policy( iam.PolicyStatement(actions=["ecs:ListClusters"], resources=["*"])) for bucket in ro_buckets: bucket.grant_read(batch_instance_role) for bucket in rw_buckets: # TODO: restirct write to paths with */rnasum/* bucket.grant_read_write(batch_instance_role) # Turn the instance role into a Instance Profile batch_instance_profile = iam.CfnInstanceProfile( self, 'BatchInstanceProfile', instance_profile_name='RnasumBatchInstanceProfile', roles=[batch_instance_role.role_name]) ################################################################################ # Minimal networking # TODO: import resource created with TF vpc = props['vpc'] ################################################################################ # Setup Batch compute resources # Configure BlockDevice to expand instance disk space (if needed?) block_device_mappings = [{ 'deviceName': '/dev/xvdf', 'ebs': { 'deleteOnTermination': True, 'volumeSize': 1024, 'volumeType': 'gp2' } }] launch_template = ec2.CfnLaunchTemplate( self, 'RnasumBatchComputeLaunchTemplate', launch_template_name='RnasumBatchComputeLaunchTemplate', launch_template_data={ # 'userData': core.Fn.base64(user_data_script), FIXME may not need this for RNAsum case? see job_definition below 'blockDeviceMappings': block_device_mappings }) launch_template_spec = batch.LaunchTemplateSpecification( launch_template_name=launch_template.launch_template_name, version='$Latest') my_compute_res = batch.ComputeResources( type=batch.ComputeResourceType.SPOT, allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE, desiredv_cpus=0, maxv_cpus=80, minv_cpus=0, image=ec2.MachineImage.generic_linux( ami_map={'ap-southeast-2': props['compute_env_ami']}), launch_template=launch_template_spec, spot_fleet_role=spotfleet_role, instance_role=batch_instance_profile.instance_profile_name, vpc=vpc, #compute_resources_tags=core.Tag('Creator', 'Batch') ) # XXX: How to add more than one tag above?? # core.Tag.add(my_compute_res, 'Foo', 'Bar') my_compute_env = batch.ComputeEnvironment( self, 'RnasumBatchComputeEnv', compute_environment_name="RnasumBatchComputeEnv", service_role=batch_service_role, compute_resources=my_compute_res) job_queue = batch.JobQueue(self, 'RnasumJobQueue', job_queue_name='rnasum_job_queue', compute_environments=[ batch.JobQueueComputeEnvironment( compute_environment=my_compute_env, order=1) ], priority=10) # it is equivalent of # https://github.com/umccr/infrastructure/blob/master/terraform/stacks/wts_report/jobs/wts_report.json default_container_props = { 'image': props['container_image'], 'vcpus': 2, 'memory': 2048, 'command': ['/opt/container/WTS-report-wrapper.sh', 'Ref::vcpus'], 'volumes': [{ 'host': { 'sourcePath': '/mnt' }, 'name': 'work' }, { 'host': { 'sourcePath': '/opt/container' }, 'name': 'container' }], 'mountPoints': [{ 'containerPath': '/work', 'readOnly': False, 'sourceVolume': 'work' }, { 'containerPath': '/opt/container', 'readOnly': True, 'sourceVolume': 'container' }], 'readonlyRootFilesystem': False, 'privileged': True, 'ulimits': [] } # and CDK equivalent of # https://github.com/umccr/infrastructure/blob/master/terraform/stacks/wts_report/main.tf#L113 job_definition = batch.CfnJobDefinition( self, 'RnasumJobDefinition', job_definition_name='rnasum_job_dev', type='container', container_properties=default_container_props, parameters={ 'vcpus': 1, }) ################################################################################ # Set up job submission Lambda lambda_role = iam.Role( self, 'RnasumLambdaRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSLambdaBasicExecutionRole'), iam.ManagedPolicy.from_aws_managed_policy_name( 'AWSBatchFullAccess') # TODO: restrict! ]) for bucket in ro_buckets: bucket.grant_read(lambda_role) for bucket in rw_buckets: bucket.grant_read(lambda_role) # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod # may need a default JobDefinition to be set up # and CDK equivalent of # https://github.com/umccr/infrastructure/blob/master/terraform/stacks/wts_report/main.tf#L159 lmbda.Function(self, 'RnasumLambda', function_name='rnasum_batch_lambda', handler='trigger_wts_report.lambda_handler', runtime=lmbda.Runtime.PYTHON_3_7, code=lmbda.Code.from_asset('lambdas/'), environment={ 'JOBNAME_PREFIX': "rnasum_", 'JOBQUEUE': job_queue.job_queue_name, 'JOBDEF': job_definition.job_definition_name, 'REFDATA_BUCKET': props['refdata_bucket'], 'DATA_BUCKET': props['data_bucket'], 'JOB_MEM': '32000', 'JOB_VCPUS': '8', 'REF_DATASET': 'PANCAN', 'GENOME_BUILD': '38', }, role=lambda_role)
def create_eks(self, vpc, k8sVersionTxt): # map input k8s version string to eks.KubernetesVersion object k8sVersion = self.getK8sVersion(k8sVersionTxt) # initialize bottlerocket arm64 AMI id from SSM parameter store bottleRkt_arm64Ami = ssm.StringParameter.value_for_string_parameter( self, "/aws/service/bottlerocket/aws-k8s-" + k8sVersionTxt + "/arm64/latest/image_id") # initialize bottlerocket x86 AMI id from SSM parameter store bottleRkt_x86Ami = ssm.StringParameter.value_for_string_parameter( self, "/aws/service/bottlerocket/aws-k8s-" + k8sVersionTxt + "/x86_64/latest/image_id") # create eks cluster cluster = eks.Cluster(self, "EKS", vpc=vpc, version=k8sVersion, default_capacity=0) # prepare userdata in TOML format clusterCertAuthorityData = cluster.cluster_certificate_authority_data clusterEndpoint = cluster.cluster_endpoint clusterName = cluster.cluster_name userdata = "settings.kubernetes.api-server = \"" + \ clusterEndpoint + \ "\"\nsettings.kubernetes.cluster-certificate = \"" + \ clusterCertAuthorityData + \ "\"\nsettings.kubernetes.cluster-name = \"" \ + clusterName + "\"" core.CfnOutput(self, "EC2-Instance-UserData", value=userdata) # create a launch template for arm64 launchTemplData = ec2.CfnLaunchTemplate.LaunchTemplateDataProperty( image_id=bottleRkt_arm64Ami, instance_type="c6g.medium", user_data=core.Fn.base64(userdata)) launchTempl = ec2.CfnLaunchTemplate( self, id="bottle_arm64_lt", launch_template_data=launchTemplData, launch_template_name="bottlerocket-arm64-launchTempl") launchTemplSpec = eks.LaunchTemplateSpec( id=launchTempl.ref, version=launchTempl.attr_default_version_number) # add arm/graviton nodegroup ng = cluster.add_nodegroup_capacity( "bottle_arm64_ng", desired_size=1, nodegroup_name="bottlerocket_arm64_ng", launch_template_spec=launchTemplSpec) # add ssm access and secret access to eks node role ng.role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonSSMManagedInstanceCore")) ng.role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( "SecretsManagerReadWrite")) # Now repeat the same steps for x86 # create a launch template for x86 launchTemplData = ec2.CfnLaunchTemplate.LaunchTemplateDataProperty( image_id=bottleRkt_x86Ami, instance_type="c5.large", user_data=core.Fn.base64(userdata)) launchTempl = ec2.CfnLaunchTemplate( self, id="bottle_x86_lt", launch_template_data=launchTemplData, launch_template_name="bottlerocket-x86-launchTempl") launchTemplSpec = eks.LaunchTemplateSpec( id=launchTempl.ref, version=launchTempl.attr_default_version_number) # add x86 nodegroup ng = cluster.add_nodegroup_capacity( "bottle_x86_ng", desired_size=1, nodegroup_name="bottlerocket_x86_ng", launch_template_spec=launchTemplSpec) # add ssm access and secret access to eks node role ng.role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonSSMManagedInstanceCore")) ng.role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( "SecretsManagerReadWrite")) return cluster
def __init__(self, scope: core.Stack, id: str, **kwargs): super().__init__(scope, id, **kwargs) # This resource alone will create a private/public subnet in each AZ as well as nat/internet gateway(s) self.vpc = aws_ec2.Vpc( self, "BaseVPC", cidr='10.0.0.0/24', ) # Creating ECS Cluster in the VPC created above self.ecs_cluster = aws_ecs.Cluster(self, "ECSCluster", vpc=self.vpc, cluster_name="container-demo") # Adding service discovery namespace to cluster self.ecs_cluster.add_default_cloud_map_namespace(name="service", ) ###### EC2 SPOT CAPACITY PROVIDER SECTION ###### ## As of today, AWS CDK doesn't support Launch Templates on the AutoScaling construct, hence it ## doesn't support Mixed Instances Policy to combine instance types on Auto Scaling and adhere to Spot best practices ## In the meantime, CfnLaunchTemplate and CfnAutoScalingGroup resources are used to configure Spot capacity ## https://github.com/aws/aws-cdk/issues/6734 self.ecs_spot_instance_role = aws_iam.Role( self, "ECSSpotECSInstanceRole", assumed_by=aws_iam.ServicePrincipal("ec2.amazonaws.com"), managed_policies=[ aws_iam.ManagedPolicy.from_aws_managed_policy_name( "service-role/AmazonEC2ContainerServiceforEC2Role"), aws_iam.ManagedPolicy.from_aws_managed_policy_name( "service-role/AmazonEC2RoleforSSM") ]) self.ecs_spot_instance_profile = aws_iam.CfnInstanceProfile( self, "ECSSpotInstanceProfile", roles=[self.ecs_spot_instance_role.role_name]) ## This creates a Launch Template for the Auto Scaling group self.lt = aws_ec2.CfnLaunchTemplate( self, "ECSEC2SpotCapacityLaunchTemplate", launch_template_data={ "instanceType": "m5.large", "imageId": aws_ssm.StringParameter.value_for_string_parameter( self, "/aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id" ), "securityGroupIds": [ x.security_group_id for x in self.ecs_cluster.connections.security_groups ], "iamInstanceProfile": { "arn": self.ecs_spot_instance_profile.attr_arn }, # ## Here we configure the ECS agent to drain Spot Instances upon catching a Spot Interruption notice from instance metadata "userData": core.Fn.base64( core.Fn.sub( "#!/usr/bin/bash\n" "echo ECS_CLUSTER=${cluster_name} >> /etc/ecs/ecs.config\n" "sudo iptables --insert FORWARD 1 --in-interface docker+ --destination 169.254.169.254/32 --jump DROP\n" "sudo service iptables save\n" "echo ECS_ENABLE_SPOT_INSTANCE_DRAINING=true >> /etc/ecs/ecs.config\n" "echo ECS_AWSVPC_BLOCK_IMDS=true >> /etc/ecs/ecs.config\n" "cat /etc/ecs/ecs.config", variables={ "cluster_name": self.ecs_cluster.cluster_name })) }, launch_template_name="ECSEC2SpotCapacityLaunchTemplate") self.ecs_ec2_spot_mig_asg = aws_autoscaling.CfnAutoScalingGroup( self, "ECSEC2SpotCapacity", min_size="0", max_size="10", vpc_zone_identifier=[ x.subnet_id for x in self.vpc.private_subnets ], mixed_instances_policy={ "instancesDistribution": { "onDemandAllocationStrategy": "prioritized", "onDemandBaseCapacity": 0, "onDemandPercentageAboveBaseCapacity": 0, "spotAllocationStrategy": "capacity-optimized" }, "launchTemplate": { "launchTemplateSpecification": { "launchTemplateId": self.lt.ref, "version": self.lt.attr_default_version_number }, "overrides": [{ "instanceType": "m5.large" }, { "instanceType": "m5d.large" }, { "instanceType": "m5a.large" }, { "instanceType": "m5ad.large" }, { "instanceType": "m5n.large" }, { "instanceType": "m5dn.large" }, { "instanceType": "m3.large" }, { "instanceType": "m4.large" }, { "instanceType": "t3.large" }, { "instanceType": "t2.large" }] } }) # core.Tag.add(self.ecs_ec2_spot_mig_asg, "Name", self.ecs_ec2_spot_mig_asg.node.path) core.CfnOutput(self, "EC2SpotAutoScalingGroupName", value=self.ecs_ec2_spot_mig_asg.ref, export_name="EC2SpotASGName") # ##### END EC2 SPOT CAPACITY PROVIDER SECTION ##### # Namespace details as CFN output self.namespace_outputs = { 'ARN': self.ecs_cluster.default_cloud_map_namespace. private_dns_namespace_arn, 'NAME': self.ecs_cluster.default_cloud_map_namespace. private_dns_namespace_name, 'ID': self.ecs_cluster.default_cloud_map_namespace. private_dns_namespace_id, } # Cluster Attributes self.cluster_outputs = { 'NAME': self.ecs_cluster.cluster_name, 'SECGRPS': str(self.ecs_cluster.connections.security_groups) } # When enabling EC2, we need the security groups "registered" to the cluster for imports in other service stacks if self.ecs_cluster.connections.security_groups: self.cluster_outputs['SECGRPS'] = str([ x.security_group_id for x in self.ecs_cluster.connections.security_groups ][0]) # Frontend service to backend services on 3000 self.services_3000_sec_group = aws_ec2.SecurityGroup( self, "FrontendToBackendSecurityGroup", allow_all_outbound=True, description= "Security group for frontend service to talk to backend services", vpc=self.vpc) # Allow inbound 3000 from ALB to Frontend Service self.sec_grp_ingress_self_3000 = aws_ec2.CfnSecurityGroupIngress( self, "InboundSecGrp3000", ip_protocol='TCP', source_security_group_id=self.services_3000_sec_group. security_group_id, from_port=3000, to_port=3000, group_id=self.services_3000_sec_group.security_group_id) # Creating an EC2 bastion host to perform load test on private backend services amzn_linux = aws_ec2.MachineImage.latest_amazon_linux( generation=aws_ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, edition=aws_ec2.AmazonLinuxEdition.STANDARD, virtualization=aws_ec2.AmazonLinuxVirt.HVM, storage=aws_ec2.AmazonLinuxStorage.GENERAL_PURPOSE) # Instance Role/profile that will be attached to the ec2 instance # Enabling service role so the EC2 service can use ssm role = aws_iam.Role( self, "InstanceSSM", assumed_by=aws_iam.ServicePrincipal("ec2.amazonaws.com")) # Attaching the SSM policy to the role so we can use SSM to ssh into the ec2 instance role.add_managed_policy( aws_iam.ManagedPolicy.from_aws_managed_policy_name( "service-role/AmazonEC2RoleforSSM")) # Reading user data, to install siege into the ec2 instance. with open("stresstool_user_data.sh") as f: user_data = f.read() # Instance creation self.instance = aws_ec2.Instance( self, "Instance", instance_name="{}-stresstool".format(stack_name), instance_type=aws_ec2.InstanceType("t3.medium"), machine_image=amzn_linux, vpc=self.vpc, role=role, user_data=aws_ec2.UserData.custom(user_data), security_group=self.services_3000_sec_group) # All Outputs required for other stacks to build core.CfnOutput(self, "NSArn", value=self.namespace_outputs['ARN'], export_name="NSARN") core.CfnOutput(self, "NSName", value=self.namespace_outputs['NAME'], export_name="NSNAME") core.CfnOutput(self, "NSId", value=self.namespace_outputs['ID'], export_name="NSID") core.CfnOutput(self, "FE2BESecGrp", value=self.services_3000_sec_group.security_group_id, export_name="SecGrpId") core.CfnOutput(self, "ECSClusterName", value=self.cluster_outputs['NAME'], export_name="ECSClusterName") core.CfnOutput(self, "ECSClusterSecGrp", value=self.cluster_outputs['SECGRPS'], export_name="ECSSecGrpList") core.CfnOutput(self, "ServicesSecGrp", value=self.services_3000_sec_group.security_group_id, export_name="ServicesSecGrp") core.CfnOutput(self, "StressToolEc2Id", value=self.instance.instance_id) core.CfnOutput(self, "StressToolEc2Ip", value=self.instance.instance_private_ip)
def __init__(self, scope: core.Construct, id: str, props, **kwargs) -> None: super().__init__(scope, id, **kwargs) dirname = os.path.dirname(__file__) ecr_repo = ecr.Repository.from_repository_name( self, 'UmccriseEcrRepo', repository_name='umccrise' ) ################################################################################ # Set up permissions ro_buckets = set() for bucket in props['ro_buckets']: tmp_bucket = s3.Bucket.from_bucket_name( self, bucket, bucket_name=bucket ) ro_buckets.add(tmp_bucket) rw_buckets = set() for bucket in props['rw_buckets']: tmp_bucket = s3.Bucket.from_bucket_name( self, bucket, bucket_name=bucket ) rw_buckets.add(tmp_bucket) batch_service_role = iam.Role( self, 'BatchServiceRole', assumed_by=iam.ServicePrincipal('batch.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSBatchServiceRole') ] ) spotfleet_role = iam.Role( self, 'AmazonEC2SpotFleetRole', assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2SpotFleetTaggingRole') ] ) # Create role for Batch instances batch_instance_role = iam.Role( self, 'BatchInstanceRole', role_name='UmccriseBatchInstanceRole', assumed_by=iam.CompositePrincipal( iam.ServicePrincipal('ec2.amazonaws.com'), iam.ServicePrincipal('ecs.amazonaws.com') ), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2RoleforSSM'), iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2ContainerServiceforEC2Role') ] ) batch_instance_role.add_to_policy( iam.PolicyStatement( actions=[ "ec2:Describe*", "ec2:AttachVolume", "ec2:CreateVolume", "ec2:CreateTags", "ec2:ModifyInstanceAttribute" ], resources=["*"] ) ) batch_instance_role.add_to_policy( iam.PolicyStatement( actions=[ "ecs:ListClusters" ], resources=["*"] ) ) for bucket in ro_buckets: bucket.grant_read(batch_instance_role) for bucket in rw_buckets: # restirct write to paths with */umccrise/* bucket.grant_read_write(batch_instance_role, '*/umccrised/*') # Turn the instance role into a Instance Profile batch_instance_profile = iam.CfnInstanceProfile( self, 'BatchInstanceProfile', instance_profile_name='UmccriseBatchInstanceProfile', roles=[batch_instance_role.role_name] ) ################################################################################ # Network # (Import common infrastructure (maintained via TerraForm) # VPC vpc = ec2.Vpc.from_lookup( self, 'UmccrMainVpc', tags={'Name': 'main-vpc', 'Stack': 'networking'} ) batch_security_group = ec2.SecurityGroup( self, "BatchSecurityGroup", vpc=vpc, description="Allow all outbound, no inbound traffic" ) ################################################################################ # Setup Batch compute resources # Configure BlockDevice to expand instance disk space (if needed?) block_device_mappings = [ { 'deviceName': '/dev/xvdf', 'ebs': { 'deleteOnTermination': True, 'encrypted': True, 'volumeSize': 2048, 'volumeType': 'gp2' } } ] # Set up custom user data to configure the Batch instances umccrise_wrapper_asset = assets.Asset( self, 'UmccriseWrapperAsset', path=os.path.join(dirname, '..', 'assets', "umccrise-wrapper.sh") ) umccrise_wrapper_asset.grant_read(batch_instance_role) user_data_asset = assets.Asset( self, 'UserDataAsset', path=os.path.join(dirname, '..', 'assets', "batch-user-data.sh") ) user_data_asset.grant_read(batch_instance_role) user_data = ec2.UserData.for_linux() local_path = user_data.add_s3_download_command( bucket=user_data_asset.bucket, bucket_key=user_data_asset.s3_object_key ) user_data.add_execute_file_command( file_path=local_path, arguments=f"s3://{umccrise_wrapper_asset.bucket.bucket_name}/{umccrise_wrapper_asset.s3_object_key}" ) # Generate user data wrapper to comply with LaunchTemplate required MIME multi-part archive format for user data mime_wrapper = ec2.UserData.custom('MIME-Version: 1.0') mime_wrapper.add_commands('Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="') mime_wrapper.add_commands('') mime_wrapper.add_commands('--==MYBOUNDARY==') mime_wrapper.add_commands('Content-Type: text/x-shellscript; charset="us-ascii"') mime_wrapper.add_commands('') # install AWS CLI, as it's unexpectedly missing from the AWS Linux 2 AMI... mime_wrapper.add_commands('yum -y install unzip') mime_wrapper.add_commands('cd /opt') mime_wrapper.add_commands('curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"') mime_wrapper.add_commands('unzip awscliv2.zip') mime_wrapper.add_commands('sudo ./aws/install --bin-dir /usr/bin') # insert our actual user data payload mime_wrapper.add_commands(user_data.render()) mime_wrapper.add_commands('--==MYBOUNDARY==--') launch_template = ec2.CfnLaunchTemplate( self, 'UmccriseBatchComputeLaunchTemplate', launch_template_name='UmccriseBatchComputeLaunchTemplate', launch_template_data={ 'userData': core.Fn.base64(mime_wrapper.render()), 'blockDeviceMappings': block_device_mappings } ) launch_template_spec = batch.LaunchTemplateSpecification( launch_template_name=launch_template.launch_template_name, version='$Latest' ) my_compute_res = batch.ComputeResources( type=(batch.ComputeResourceType.SPOT if props['compute_env_type'].lower() == 'spot' else batch.ComputeResourceType.ON_DEMAND), allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE, desiredv_cpus=0, maxv_cpus=320, minv_cpus=0, image=ec2.MachineImage.generic_linux(ami_map={'ap-southeast-2': props['compute_env_ami']}), launch_template=launch_template_spec, spot_fleet_role=spotfleet_role, instance_role=batch_instance_profile.instance_profile_name, vpc=vpc, vpc_subnets=ec2.SubnetSelection( subnet_type=ec2.SubnetType.PRIVATE, # availability_zones=["ap-southeast-2a"] ), security_groups=[batch_security_group] # compute_resources_tags=core.Tag('Creator', 'Batch') ) # XXX: How to add more than one tag above?? # https://github.com/aws/aws-cdk/issues/7350 # core.Tag.add(my_compute_res, 'Foo', 'Bar') my_compute_env = batch.ComputeEnvironment( self, 'UmccriseBatchComputeEnv', compute_environment_name="cdk-umccr_ise-batch-compute-env", service_role=batch_service_role, compute_resources=my_compute_res ) # child = my_compute_env.node.default_child # child_comp_res = child.compute_resources # child_comp_res.tags = "{'Foo': 'Bar'}" job_queue = batch.JobQueue( self, 'UmccriseJobQueue', job_queue_name='cdk-umccrise_job_queue', compute_environments=[ batch.JobQueueComputeEnvironment( compute_environment=my_compute_env, order=1 ) ], priority=10 ) job_container = batch.JobDefinitionContainer( image=ecs.ContainerImage.from_registry(name=props['container_image']), vcpus=32, memory_limit_mib=100000, command=[ "/opt/container/umccrise-wrapper.sh", "Ref::vcpus" ], mount_points=[ ecs.MountPoint( container_path='/work', read_only=False, source_volume='work' ), ecs.MountPoint( container_path='/opt/container', read_only=True, source_volume='container' ) ], volumes=[ ecs.Volume( name='container', host=ecs.Host( source_path='/opt/container' ) ), ecs.Volume( name='work', host=ecs.Host( source_path='/mnt' ) ) ], privileged=True ) job_definition = batch.JobDefinition( self, 'UmccriseJobDefinition', job_definition_name='cdk-umccrise-job-definition', parameters={'vcpus': '1'}, container=job_container, timeout=core.Duration.hours(5) ) ################################################################################ # Set up job submission Lambda lambda_role = iam.Role( self, 'UmccriseLambdaRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaBasicExecutionRole'), iam.ManagedPolicy.from_aws_managed_policy_name('AWSBatchFullAccess') # TODO: restrict! ] ) for bucket in ro_buckets: bucket.grant_read(lambda_role) for bucket in rw_buckets: bucket.grant_read(lambda_role) ecr_repo.grant(lambda_role, 'ecr:ListImages') # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod # may need a default JobDefinition to be set up lmbda.Function( self, 'UmccriseLambda', function_name='umccrise_batch_lambda', handler='umccrise.lambda_handler', runtime=lmbda.Runtime.PYTHON_3_7, code=lmbda.Code.from_asset('lambdas/umccrise'), environment={ 'JOBNAME_PREFIX': "UMCCRISE_", 'JOBQUEUE': job_queue.job_queue_name, 'UMCCRISE_MEM': '100000', 'UMCCRISE_VCPUS': '32', 'JOBDEF': job_definition.job_definition_name, 'REFDATA_BUCKET': props['refdata_bucket'], 'INPUT_BUCKET': props['input_bucket'], 'RESULT_BUCKET': props['result_bucket'], 'IMAGE_CONFIGURABLE': props['image_configurable'] }, role=lambda_role )
def __init__(self, scope: core.Construct, id: str, props, **kwargs) -> None: super().__init__(scope, id, **kwargs) ################################################################################ # Set up permissions ro_buckets = set() for bucket in props['ro_buckets']: tmp_bucket = s3.Bucket.from_bucket_name(self, bucket, bucket_name=bucket) ro_buckets.add(tmp_bucket) rw_buckets = set() for bucket in props['rw_buckets']: tmp_bucket = s3.Bucket.from_bucket_name(self, bucket, bucket_name=bucket) rw_buckets.add(tmp_bucket) batch_service_role = iam.Role( self, 'BatchServiceRole', assumed_by=iam.ServicePrincipal('batch.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSBatchServiceRole') ]) spotfleet_role = iam.Role( self, 'AmazonEC2SpotFleetRole', assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2SpotFleetTaggingRole') ]) # Create role for Batch instances batch_instance_role = iam.Role( self, 'BatchInstanceRole', role_name='UmccriseBatchInstanceRole', assumed_by=iam.CompositePrincipal( iam.ServicePrincipal('ec2.amazonaws.com'), iam.ServicePrincipal('ecs.amazonaws.com')), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2RoleforSSM'), iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2ContainerServiceforEC2Role') ]) batch_instance_role.add_to_policy( iam.PolicyStatement(actions=[ "ec2:Describe*", "ec2:AttachVolume", "ec2:CreateVolume", "ec2:CreateTags", "ec2:ModifyInstanceAttribute" ], resources=["*"])) batch_instance_role.add_to_policy( iam.PolicyStatement(actions=["ecs:ListClusters"], resources=["*"])) for bucket in ro_buckets: bucket.grant_read(batch_instance_role) for bucket in rw_buckets: bucket.grant_read_write(batch_instance_role) # Turn the instance role into a Instance Profile batch_instance_profile = iam.CfnInstanceProfile( self, 'BatchInstanceProfile', instance_profile_name='UmccriseBatchInstanceProfile', roles=[batch_instance_role.role_name]) ################################################################################ # Minimal networking # TODO: use exiting common setup # TODO: roll out across all AZs? (Will require more subnets, NATs, ENIs, etc...) vpc = ec2.Vpc(self, 'UmccrVpc', cidr="10.2.0.0/16", max_azs=1) ################################################################################ # Setup Batch compute resources # Configure BlockDevice to expand instance disk space (if needed?) block_device_mappings = [{ 'deviceName': '/dev/xvdf', 'ebs': { 'deleteOnTermination': True, 'volumeSize': 1024, 'volumeType': 'gp2' } }] launch_template = ec2.CfnLaunchTemplate( self, 'UmccriseBatchComputeLaunchTemplate', launch_template_name='UmccriseBatchComputeLaunchTemplate', launch_template_data={ 'userData': core.Fn.base64(user_data_script), 'blockDeviceMappings': block_device_mappings }) # TODO: Replace with proper CDK construct once available # TODO: Uses public subnet and default security group batch_comp_env = batch.CfnComputeEnvironment( self, 'UmccriseBatchComputeEnv', type='MANAGED', service_role=batch_service_role.role_arn, compute_resources={ 'type': props['compute_env_type'], 'allocationStrategy': 'BEST_FIT_PROGRESSIVE', 'maxvCpus': 128, 'minvCpus': 0, 'desiredvCpus': 0, 'imageId': props['compute_env_ami'], 'launchTemplate': { 'launchTemplateName': launch_template.launch_template_name, 'version': '$Latest' }, 'spotIamFleetRole': spotfleet_role.role_arn, 'instanceRole': batch_instance_profile.instance_profile_name, 'instanceTypes': ['optimal'], 'subnets': [vpc.public_subnets[0].subnet_id], 'securityGroupIds': [vpc.vpc_default_security_group], 'tags': { 'Creator': 'Batch', 'Name': 'BatchWorker' } }) # TODO: Replace with proper CDK construct once available # TODO: job_queue_name could result in a clash, but is currently necessary # as we need a reference for the ENV variables of the lambda # Could/Should append a unique element/string. job_queue = batch.CfnJobQueue(self, 'UmccriseJobQueue', compute_environment_order=[{ 'computeEnvironment': batch_comp_env.ref, 'order': 1 }], priority=10, job_queue_name='umccrise_job_queue') ################################################################################ # Set up job submission Lambda lambda_role = iam.Role( self, 'UmccriseLambdaRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSLambdaBasicExecutionRole'), iam.ManagedPolicy.from_aws_managed_policy_name( 'AWSBatchFullAccess') # TODO: restrict! ]) for bucket in ro_buckets: bucket.grant_read(lambda_role) for bucket in rw_buckets: bucket.grant_read(lambda_role) lmbda.Function(self, 'UmccriseLambda', function_name='umccrise_batch_lambda', handler='umccrise.lambda_handler', runtime=lmbda.Runtime.PYTHON_3_7, code=lmbda.Code.from_asset('lambdas/umccrise'), environment={ 'JOBNAME_PREFIX': "UMCCRISE_", 'JOBQUEUE': job_queue.job_queue_name, 'REFDATA_BUCKET': props['refdata_bucket'], 'DATA_BUCKET': props['data_bucket'], 'UMCCRISE_MEM': '50000', 'UMCCRISE_VCPUS': '16' }, role=lambda_role)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Get/set stack name for context self.node.set_context("STACK_NAME", self.stack_name) # The code that defines your stack goes here # Set a vpc vpc = ec2.Vpc.from_lookup(self, "VPC", is_default=True) vpc_subnets = ec2.SubnetSelection() # Set access policies for the instance policies = [ # Read only access for all our s3 buckets iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonS3ReadOnlyAccess"), # Set the container registry policy so we can pull docker containers from our ECR repo iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonEC2ContainerRegistryReadOnly"), # Allow us login by the ssm manger iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonSSMManagedInstanceCore") ] # Get role object with set policies role = iam.Role(self, "EC2Role", assumed_by=iam.ServicePrincipal("ec2.amazonaws.com"), managed_policies=policies) # Get a root ebs volume (we mount it on /dev/xvda1) ebs_var_vol = ec2.BlockDeviceVolume.ebs( volume_size=int(self.node.try_get_context("VAR_VOLUME_SIZE"))) # Place volume on a block device with the set mount point ebs_var_block_device = ec2.BlockDevice(device_name="/dev/sdf", volume=ebs_var_vol) # Get volume - contains a block device volume and a block device ebs_extended_vol = ec2.BlockDeviceVolume.ebs( volume_size=int(self.node.try_get_context("EXTENDED_VOLUME_SIZE"))) # Place volume on a block device with a set mount point ebs_extended_block_device = ec2.BlockDevice(device_name="/dev/sdg", volume=ebs_extended_vol) # Run boot strap - """ The code under userdata.sh completes the following steps 1. Installs docker into ec2 instance 2. Mounts our volume to /mnt/ 3. Log into docker """ mappings = { "__ACCOUNT_ID__": str(self.account), "__REGION__": str(self.region) } with open("user_data/user_data.sh", 'r') as user_data_h: # Use a substitution user_data_sub = core.Fn.sub(user_data_h.read(), mappings) # Import substitution object into user_data set user_data = ec2.UserData.custom(user_data_sub) # Set instance type from ec2-type in context instance_type = ec2.InstanceType( instance_type_identifier=self.node.try_get_context("EC2_TYPE")) # Get machine type from context machine_image = ec2.GenericLinuxImage({ self.region: self.node.try_get_context( "MACHINE_IMAGE"), # Refer to an existing AMI type }) # The code that defines your stack goes here # We take all of the parameters we have and place this into the ec2 instance class # Except LaunchTemplate which is added as a property to the instance host = ec2.Instance( self, id="{}-instance".format(self.node.try_get_context("STACK_NAME")), instance_type=instance_type, instance_name=self.node.try_get_context("INSTANCE_NAME"), machine_image=machine_image, vpc=vpc, vpc_subnets=vpc_subnets, role=role, user_data=user_data, block_devices=[ebs_var_block_device, ebs_extended_block_device], ) if self.node.try_get_context("USE_SPOT_INSTANCE").lower() == 'true': # Spot pricing via ec2 fleet spot_price = self.node.try_get_context("MAX_SPOT_PRICE") market_options = {"MarketType": "spot"} if spot_price is not None: spot_options = {"MaxPrice": spot_price} market_options["SpotOptions"] = spot_options launch_template_data = {"InstanceMarketOptions": market_options} launch_template = ec2.CfnLaunchTemplate(self, "LaunchTemplate") launch_template.add_property_override("LaunchTemplateData", launch_template_data) host.instance.add_property_override( "LaunchTemplate", { "LaunchTemplateId": launch_template.ref, "Version": launch_template.attr_latest_version_number }) # Return public IP address s.t we can ssh into it # Note that we may return an IP prior to the user_data shell script completing so not # all of our goodies may be here yet core.CfnOutput(self, "Output", value=host.instance_id)
def __init__(self, app, id, target, **kwargs): """ Initializer """ super().__init__(app, id, **kwargs) self.config = consolidate_context(self, target) # Fetch VPC info vpc = ec2.Vpc.from_lookup(self, "VPC", vpc_id=self.config["vpc_id"]["us-west-2"]) subnets = vpc.private_subnets # AMI ami = ec2.MachineImage.generic_linux( {self.region: self.config["ami_name"]}) # Create a specific SG for DT instances ec2_sg = ec2.SecurityGroup(self, "EC2SG", description='ec2 SG', vpc=vpc) # Add default rules for the SG self._add_default_rules(ec2_sg, vpc) # IAM role and profile for the EC2 iam_role = iam.Role( self, "EC2Role", assumed_by=iam.ServicePrincipal("ec2.amazonaws.com"), inline_policies={ "extra-permissions": iam.PolicyDocument(statements=[ iam.PolicyStatement( actions=["s3:*"], resources=[ "arn:aws:iam::aws:policy/AmazonS3FullAccess" ], ) ]) }, managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( managed_policy_name="ReadOnlyAccess"), # Required by SSM StateManager iam.ManagedPolicy.from_aws_managed_policy_name( managed_policy_name="AmazonSSMManagedInstanceCore"), ], ) iam_ip = iam.CfnInstanceProfile( self, "EC2InstProf", path="/", roles=[iam_role.role_name], ) # Create a placement group in cluster mode to locate all the DT nodes close # to each other. This mode means we restrict ourself to a single AZ placement_group = ec2.CfnPlacementGroup(self, "EC2PG", strategy="cluster") instance_specs = self.config["instance_spec"] for key, spec in instance_specs.items(): launch_template = ec2.CfnLaunchTemplate( self, f"EC2LT{key}", launch_template_data=ec2.CfnLaunchTemplate. LaunchTemplateDataProperty( block_device_mappings=[ ec2.CfnLaunchTemplate.BlockDeviceMappingProperty( device_name="/dev/xvda", ebs=ec2.CfnLaunchTemplate.EbsProperty( volume_size=self.config["ebs_volume_size"], volume_type="gp2", ), ) ], iam_instance_profile=ec2.CfnLaunchTemplate. IamInstanceProfileProperty(arn=iam_ip.attr_arn), image_id=str(ami.get_image(self).image_id), instance_type=spec["instance_type"], # TODO: we should use SSM Systems Manager rather than native SSH key_name=self.config["key_pair"].format( region=self.region), security_group_ids=[ec2_sg.security_group_id]), ) asg = autoscaling.CfnAutoScalingGroup( self, f"ASG{key}", desired_capacity="1", min_size="1", max_size="1", mixed_instances_policy=autoscaling.CfnAutoScalingGroup. MixedInstancesPolicyProperty( instances_distribution=autoscaling.CfnAutoScalingGroup. InstancesDistributionProperty( on_demand_base_capacity=0, on_demand_percentage_above_base_capacity=spec[ 'on_demand_percentage_above_base_capacity'], spot_allocation_strategy="lowest-price", spot_instance_pools=1, ), launch_template=autoscaling.CfnAutoScalingGroup. LaunchTemplateProperty( launch_template_specification=autoscaling. CfnAutoScalingGroup. LaunchTemplateSpecificationProperty( launch_template_id=launch_template.ref, version=launch_template.attr_latest_version_number, ))), # Use placement group, which means we restrict ourself to a single AZ placement_group=placement_group.ref, # Restrict to a single subnet because of the placement group vpc_zone_identifier=[subnets[0].subnet_id], # Set max instance lifetime to 7 days for worker nodes and 30 days for master ) # Add a name to the ASG and it will be propagated to underlying EC2 instances tag_name = f'{self.config["prefix"]} ASG {key}' core.Tags.of(asg).add("Name", tag_name)
def __init__(self, scope: core.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) # import default vpc # vpc = ec2.Vpc.from_lookup(self, id='Vpc', is_default=True) # from vpcId # vpc = ec2.Vpc.from_lookup(self, id='Vpc', vpc_id='vpc-0417e46d') # create vpc vpc = ec2.Vpc(self, 'eks-vpc', cidr='10.3.0.0/16', max_azs=3, nat_gateways=1) # create eks admin role eks_master_role = iam.Role(self, 'EksMasterRole', role_name='EksAdminRole', assumed_by=iam.AccountRootPrincipal()) cluster = eks.Cluster(self, 'Cluster', vpc=vpc, version=eks.KubernetesVersion.V1_18, masters_role=eks_master_role, default_capacity=0) # # Conditionally dd aws console login user to the RBAC so we can browse the EKS workloads console_user_string = self.node.try_get_context('console_user') if console_user_string and 'console_user_string' in vars(): console_user = iam.User.from_user_name( self, 'ConsoleUser', user_name=console_user_string) cluster.aws_auth.add_user_mapping(console_user, groups=['system:masters']) # user data user_data = ec2.UserData.for_linux() user_data.add_commands( 'set -o xtrace', '/etc/eks/bootstrap.sh {}'.format('cluster.clusterName')) lt = ec2.CfnLaunchTemplate( self, 'LT', launch_template_data={ 'imageId': eks.EksOptimizedImage().get_image(self).image_id, 'instanceType': ec2.InstanceType('t3.large').to_string(), 'user_data': core.Fn.base64(user_data.render()), 'tagSpecifications': [{ 'resourceType': 'instance', 'tags': [{ 'key': 'Name', 'value': 'MNG' }, { 'key': 'Foo', 'value': 'Bar' }] }] }) # create eks managed nodegroup cluster.add_nodegroup_capacity('MNG', launch_template_spec={ 'id': lt.ref, 'version': lt.attr_default_version_number, }, desired_size=2)