コード例 #1
0
 def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
     super().__init__(scope, id, **kwargs)
     # launchtemplate
     # https://aws.amazon.com/marketplace/pp/B00O7WM7QW ami-06a46da680048c8ae
     template01=aws_ec2.CfnLaunchTemplate(self, 'template01',
         launch_template_data={'imageId':'ami-06a46da680048c8ae', 
                             'blockDeviceMappings':[{'deviceName':'/dev/sda1','ebs':{'deleteOnTermination':True, 'volumeSize':20, 'volumeType':'gp2'}}],
                             'securityGroupIds':[core.Fn.import_value('publicsecuritygroup01')],
                             'instanceType':'t3.micro'},
         launch_template_name='public01')
     template02=aws_ec2.CfnLaunchTemplate(self, 'template02',
         launch_template_data={'imageId':'ami-06a46da680048c8ae',
                             'blockDeviceMappings':[{'deviceName':'/dev/sda1','ebs':{'deleteOnTermination':True, 'volumeSize':20, 'volumeType':'gp2'}}],
                             'securityGroupIds':[core.Fn.import_value('privatesecuritygroup01')],
                             'instanceType':'t3.micro'},
         launch_template_name='private01')
     # public instance
     instance01=aws_ec2.CfnInstance(self, 'instance01',
         launch_template={'launchTemplateId': template01.ref, 'version': template01.attr_latest_version_number},
         key_name='aws-example-key',
         subnet_id=core.Fn.import_value('publicsubnet01'))
     aws_ec2.CfnEIP(self, 'eip',
         domain='vpc',
         instance_id=instance01.ref,
         tags=[core.CfnTag(key='Name', value='eip01')])
     # private instance
     aws_ec2.CfnInstance(self, 'instance02',
         launch_template={'launchTemplateId': template02.ref, 'version': template01.attr_latest_version_number},
         key_name='aws-example-key',
         subnet_id=core.Fn.import_value('publicsubnet02'))
コード例 #2
0
    def __init__(
        self,
        scope: Construct,
        construct_id: str,
        *,
        deploy_env: str,
        processing_assets_table: aws_dynamodb.Table,
    ):
        # pylint: disable=too-many-locals
        super().__init__(scope, construct_id)

        if deploy_env == "prod":
            instance_types = [
                aws_ec2.InstanceType("c5.xlarge"),
                aws_ec2.InstanceType("c5.2xlarge"),
                aws_ec2.InstanceType("c5.4xlarge"),
                aws_ec2.InstanceType("c5.9xlarge"),
            ]
        else:
            instance_types = [
                aws_ec2.InstanceType("m5.large"),
                aws_ec2.InstanceType("m5.xlarge"),
            ]

        ec2_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name(
            "service-role/AmazonEC2ContainerServiceforEC2Role")

        batch_instance_role = aws_iam.Role(
            self,
            "batch-instance-role",
            assumed_by=aws_iam.ServicePrincipal(
                "ec2.amazonaws.com"),  # type: ignore[arg-type]
            managed_policies=[ec2_policy],
        )
        processing_assets_table.grant_read_write_data(
            batch_instance_role)  # type: ignore[arg-type]

        batch_instance_profile = aws_iam.CfnInstanceProfile(
            self,
            "batch-instance-profile",
            roles=[batch_instance_role.role_name],
        )

        batch_launch_template_data = textwrap.dedent("""
            MIME-Version: 1.0
            Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="

            --==MYBOUNDARY==
            Content-Type: text/x-shellscript; charset="us-ascii"

            #!/bin/bash
            echo ECS_IMAGE_PULL_BEHAVIOR=prefer-cached >> /etc/ecs/ecs.config

            --==MYBOUNDARY==--
            """)
        launch_template_data = aws_ec2.CfnLaunchTemplate.LaunchTemplateDataProperty(
            user_data=Fn.base64(batch_launch_template_data.strip()))
        cloudformation_launch_template = aws_ec2.CfnLaunchTemplate(
            self,
            "batch-launch-template",
            launch_template_name=f"{deploy_env}-datalake-batch-launch-template",
            launch_template_data=launch_template_data,
        )
        assert cloudformation_launch_template.launch_template_name is not None
        launch_template = aws_batch.LaunchTemplateSpecification(
            launch_template_name=cloudformation_launch_template.
            launch_template_name)

        # use existing VPC in LINZ AWS account.
        # VPC with these tags is required to exist in AWS account before being deployed.
        # A VPC will not be deployed by this project.
        vpc = aws_ec2.Vpc.from_lookup(
            self,
            "datalake-vpc",
            tags={
                APPLICATION_NAME_TAG_NAME: APPLICATION_NAME,
                "ApplicationLayer": "networking",
            },
        )

        compute_resources = aws_batch.ComputeResources(
            vpc=vpc,
            minv_cpus=0,
            desiredv_cpus=0,
            maxv_cpus=1000,
            instance_types=instance_types,
            instance_role=batch_instance_profile.instance_profile_name,
            allocation_strategy=aws_batch.AllocationStrategy(
                "BEST_FIT_PROGRESSIVE"),
            launch_template=launch_template,
        )
        batch_service_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name(
            "service-role/AWSBatchServiceRole")
        service_role = aws_iam.Role(
            self,
            "batch-service-role",
            assumed_by=aws_iam.ServicePrincipal(
                "batch.amazonaws.com"),  # type: ignore[arg-type]
            managed_policies=[batch_service_policy],
        )
        compute_environment = aws_batch.ComputeEnvironment(
            self,
            "compute-environment",
            compute_resources=compute_resources,
            service_role=service_role,  # type: ignore[arg-type]
        )

        self.job_queue = aws_batch.JobQueue(
            scope,
            f"{construct_id}-job-queue",
            compute_environments=[
                aws_batch.JobQueueComputeEnvironment(
                    compute_environment=compute_environment,
                    order=10  # type: ignore[arg-type]
                ),
            ],
            priority=10,
        )
コード例 #3
0
    def __init__(self, scope: core.Construct, id: str, props,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        ################################################################################
        # Set up permissions
        ro_buckets = set()
        for bucket in props['ro_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(self,
                                                    bucket,
                                                    bucket_name=bucket)
            ro_buckets.add(tmp_bucket)

        rw_buckets = set()
        for bucket in props['rw_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(self,
                                                    bucket,
                                                    bucket_name=bucket)
            rw_buckets.add(tmp_bucket)

        batch_service_role = iam.Role(
            self,
            'BatchServiceRole',
            assumed_by=iam.ServicePrincipal('batch.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSBatchServiceRole')
            ])

        spotfleet_role = iam.Role(
            self,
            'AmazonEC2SpotFleetRole',
            assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2SpotFleetTaggingRole')
            ])

        # Create role for Batch instances
        batch_instance_role = iam.Role(
            self,
            'BatchInstanceRole',
            role_name='UmccriseBatchInstanceRole',
            assumed_by=iam.CompositePrincipal(
                iam.ServicePrincipal('ec2.amazonaws.com'),
                iam.ServicePrincipal('ecs.amazonaws.com')),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2RoleforSSM'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2ContainerServiceforEC2Role')
            ])
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(actions=[
                "ec2:Describe*", "ec2:AttachVolume", "ec2:CreateVolume",
                "ec2:CreateTags", "ec2:ModifyInstanceAttribute"
            ],
                                resources=["*"]))
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(actions=["ecs:ListClusters"], resources=["*"]))
        for bucket in ro_buckets:
            bucket.grant_read(batch_instance_role)
        for bucket in rw_buckets:
            # restirct write to paths with */umccrise/*
            bucket.grant_read_write(batch_instance_role, '*/umccrised/*')

        # Turn the instance role into a Instance Profile
        batch_instance_profile = iam.CfnInstanceProfile(
            self,
            'BatchInstanceProfile',
            instance_profile_name='UmccriseBatchInstanceProfile',
            roles=[batch_instance_role.role_name])

        ################################################################################
        # Minimal networking
        # TODO: import resource created with TF
        vpc = props['vpc']

        ################################################################################
        # Setup Batch compute resources

        # Configure BlockDevice to expand instance disk space (if needed?)
        block_device_mappings = [{
            'deviceName': '/dev/xvdf',
            'ebs': {
                'deleteOnTermination': True,
                'volumeSize': 1024,
                'volumeType': 'gp2'
            }
        }]

        launch_template = ec2.CfnLaunchTemplate(
            self,
            'UmccriseBatchComputeLaunchTemplate',
            launch_template_name='UmccriseBatchComputeLaunchTemplate',
            launch_template_data={
                'userData': core.Fn.base64(user_data_script),
                'blockDeviceMappings': block_device_mappings
            })

        launch_template_spec = batch.LaunchTemplateSpecification(
            launch_template_name=launch_template.launch_template_name,
            version='$Latest')

        my_compute_res = batch.ComputeResources(
            type=batch.ComputeResourceType.SPOT,
            allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
            desiredv_cpus=0,
            maxv_cpus=128,
            minv_cpus=0,
            image=ec2.MachineImage.generic_linux(
                ami_map={'ap-southeast-2': props['compute_env_ami']}),
            launch_template=launch_template_spec,
            spot_fleet_role=spotfleet_role,
            instance_role=batch_instance_profile.instance_profile_name,
            vpc=vpc,
            #compute_resources_tags=core.Tag('Creator', 'Batch')
        )
        # XXX: How to add more than one tag above??
        # core.Tag.add(my_compute_res, 'Foo', 'Bar')

        my_compute_env = batch.ComputeEnvironment(
            self,
            'UmccriseBatchComputeEnv',
            compute_environment_name="cdk-umccrise-batch-compute-env",
            service_role=batch_service_role,
            compute_resources=my_compute_res)

        job_queue = batch.JobQueue(self,
                                   'UmccriseJobQueue',
                                   job_queue_name='cdk-umccrise_job_queue',
                                   compute_environments=[
                                       batch.JobQueueComputeEnvironment(
                                           compute_environment=my_compute_env,
                                           order=1)
                                   ],
                                   priority=10)

        job_container = batch.JobDefinitionContainer(
            image=ecs.ContainerImage.from_registry(
                name=props['container_image']),
            vcpus=2,
            memory_limit_mib=2048,
            command=["/opt/container/umccrise-wrapper.sh", "Ref::vcpus"],
            mount_points=[
                ecs.MountPoint(container_path='/work',
                               read_only=False,
                               source_volume='work'),
                ecs.MountPoint(container_path='/opt/container',
                               read_only=True,
                               source_volume='container')
            ],
            volumes=[
                ecs.Volume(name='container',
                           host=ecs.Host(source_path='/opt/container')),
                ecs.Volume(name='work', host=ecs.Host(source_path='/mnt'))
            ],
            privileged=True)

        job_definition = batch.JobDefinition(
            self,
            'UmccriseJobDefinition',
            job_definition_name='cdk-umccrise-job-definition',
            parameters={'vcpus': '1'},
            container=job_container,
            timeout=core.Duration.hours(5))

        ################################################################################
        # Set up job submission Lambda

        lambda_role = iam.Role(
            self,
            'UmccriseLambdaRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSLambdaBasicExecutionRole'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AWSBatchFullAccess')  # TODO: restrict!
            ])

        for bucket in ro_buckets:
            bucket.grant_read(lambda_role)
        for bucket in rw_buckets:
            bucket.grant_read(lambda_role)

        # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod
        #       may need a default JobDefinition to be set up
        lmbda.Function(self,
                       'UmccriseLambda',
                       function_name='umccrise_batch_lambda',
                       handler='umccrise.lambda_handler',
                       runtime=lmbda.Runtime.PYTHON_3_7,
                       code=lmbda.Code.from_asset('lambdas/umccrise'),
                       environment={
                           'JOBNAME_PREFIX': "UMCCRISE_",
                           'JOBQUEUE': job_queue.job_queue_name,
                           'REFDATA_BUCKET': props['refdata_bucket'],
                           'DATA_BUCKET': props['data_bucket'],
                           'UMCCRISE_MEM': '50000',
                           'UMCCRISE_VCPUS': '16'
                       },
                       role=lambda_role)
コード例 #4
0
    def _add_compute_resource_launch_template(
        self,
        queue,
        compute_resource,
        instance_type,
        queue_pre_install_action,
        queue_post_install_action,
        queue_lt_security_groups,
        queue_placement_group,
    ):
        # LT network interfaces
        compute_lt_nw_interfaces = [
            ec2.CfnLaunchTemplate.NetworkInterfaceProperty(
                device_index=0,
                associate_public_ip_address=queue.networking.assign_public_ip
                if compute_resource.max_network_interface_count == 1 else
                None,  # parameter not supported for instance types with multiple network interfaces
                interface_type="efa" if compute_resource.efa
                and compute_resource.efa.enabled else None,
                groups=queue_lt_security_groups,
                subnet_id=queue.networking.subnet_ids[0],
            )
        ]
        for device_index in range(
                1, compute_resource.max_network_interface_count):
            compute_lt_nw_interfaces.append(
                ec2.CfnLaunchTemplate.NetworkInterfaceProperty(
                    device_index=device_index,
                    network_card_index=device_index,
                    interface_type="efa" if compute_resource.efa
                    and compute_resource.efa.enabled else None,
                    groups=queue_lt_security_groups,
                    subnet_id=queue.networking.subnet_ids[0],
                ))

        instance_market_options = None
        if queue.capacity_type == CapacityType.SPOT:
            instance_market_options = ec2.CfnLaunchTemplate.InstanceMarketOptionsProperty(
                market_type="spot",
                spot_options=ec2.CfnLaunchTemplate.SpotOptionsProperty(
                    spot_instance_type="one-time",
                    instance_interruption_behavior="terminate",
                    max_price=None if compute_resource.spot_price is None else
                    str(compute_resource.spot_price),
                ),
            )

        ec2.CfnLaunchTemplate(
            self.stack_scope,
            f"ComputeServerLaunchTemplate{create_hash_suffix(queue.name + instance_type)}",
            launch_template_name=
            f"{self.stack_name}-{queue.name}-{instance_type}",
            launch_template_data=ec2.CfnLaunchTemplate.
            LaunchTemplateDataProperty(
                instance_type=instance_type,
                cpu_options=ec2.CfnLaunchTemplate.CpuOptionsProperty(
                    core_count=compute_resource.vcpus, threads_per_core=1) if
                compute_resource.pass_cpu_options_in_launch_template else None,
                block_device_mappings=get_block_device_mappings(
                    queue.compute_settings.local_storage,
                    self.config.image.os),
                # key_name=,
                network_interfaces=compute_lt_nw_interfaces,
                placement=ec2.CfnLaunchTemplate.PlacementProperty(
                    group_name=queue_placement_group),
                image_id=self.config.image_dict[queue.name],
                ebs_optimized=compute_resource.is_ebs_optimized,
                iam_instance_profile=ec2.CfnLaunchTemplate.
                IamInstanceProfileProperty(
                    name=self.instance_profiles[queue.name]),
                instance_market_options=instance_market_options,
                user_data=Fn.base64(
                    Fn.sub(
                        get_user_data_content(
                            "../resources/compute_node/user_data.sh"),
                        {
                            **{
                                "EnableEfa":
                                "efa" if compute_resource.efa and compute_resource.efa.enabled else "NONE",
                                "RAIDOptions":
                                get_shared_storage_options_by_type(
                                    self.shared_storage_options, SharedStorageType.RAID),
                                "DisableHyperThreadingManually":
                                "true" if compute_resource.disable_simultaneous_multithreading_manually else "false",
                                "BaseOS":
                                self.config.image.os,
                                "PreInstallScript":
                                queue_pre_install_action.script if queue_pre_install_action else "NONE",
                                "PreInstallArgs":
                                join_shell_args(queue_pre_install_action.args) if queue_pre_install_action and queue_pre_install_action.args else "NONE",
                                "PostInstallScript":
                                queue_post_install_action.script if queue_post_install_action else "NONE",
                                "PostInstallArgs":
                                join_shell_args(queue_post_install_action.args) if queue_post_install_action and queue_post_install_action.args else "NONE",
                                "EFSId":
                                get_shared_storage_ids_by_type(
                                    self.shared_storage_mappings, SharedStorageType.EFS),
                                "EFSOptions":
                                get_shared_storage_options_by_type(
                                    self.shared_storage_options, SharedStorageType.EFS),  # FIXME
                                "FSXId":
                                get_shared_storage_ids_by_type(
                                    self.shared_storage_mappings, SharedStorageType.FSX),
                                "FSXMountName":
                                self.shared_storage_attributes[SharedStorageType.FSX].get(
                                    "MountName", ""),
                                "FSXDNSName":
                                self.shared_storage_attributes[SharedStorageType.FSX].get(
                                    "DNSName", ""),
                                "FSXOptions":
                                get_shared_storage_options_by_type(
                                    self.shared_storage_options, SharedStorageType.FSX),
                                "Scheduler":
                                self.config.scheduling.scheduler,
                                "EphemeralDir":
                                queue.compute_settings.local_storage.ephemeral_volume.mount_dir if queue.compute_settings and queue.compute_settings.local_storage and queue.compute_settings.local_storage.ephemeral_volume else "/scratch",
                                "EbsSharedDirs":
                                get_shared_storage_options_by_type(
                                    self.shared_storage_options, SharedStorageType.EBS),
                                "ClusterDNSDomain":
                                str(self.cluster_hosted_zone.name) if self.cluster_hosted_zone else "",
                                "ClusterHostedZone":
                                str(self.cluster_hosted_zone.ref) if self.cluster_hosted_zone else "",
                                "OSUser":
                                OS_MAPPING[self.config.image.os]["user"],
                                "DynamoDBTable":
                                self.dynamodb_table.ref,
                                "LogGroupName":
                                self.log_group.log_group_name if self.config.monitoring.logs.cloud_watch.enabled else "NONE",
                                "IntelHPCPlatform":
                                "true" if self.config.is_intel_hpc_platform_enabled else "false",
                                "CWLoggingEnabled":
                                "true" if self.config.is_cw_logging_enabled else "false",
                                "QueueName":
                                queue.name,
                                "EnableEfaGdr":
                                "compute" if compute_resource.efa and compute_resource.efa.gdr_support else "NONE",
                                "CustomNodePackage":
                                self.config.custom_node_package or "",
                                "CustomAwsBatchCliPackage":
                                self.config.custom_aws_batch_cli_package or "",
                                "ExtraJson":
                                self.config.extra_chef_attributes,
                            },
                            **get_common_user_data_env(queue, self.config),
                        },
                    )),
                monitoring=ec2.CfnLaunchTemplate.MonitoringProperty(
                    enabled=False),
                tag_specifications=[
                    ec2.CfnLaunchTemplate.TagSpecificationProperty(
                        resource_type="instance",
                        tags=get_default_instance_tags(
                            self.stack_name, self.config, compute_resource,
                            "Compute", self.shared_storage_mappings) + [
                                CfnTag(key=PCLUSTER_QUEUE_NAME_TAG,
                                       value=queue.name)
                            ] + get_custom_tags(self.config),
                    ),
                    ec2.CfnLaunchTemplate.TagSpecificationProperty(
                        resource_type="volume",
                        tags=get_default_volume_tags(
                            self.stack_name, "Compute") + [
                                CfnTag(key=PCLUSTER_QUEUE_NAME_TAG,
                                       value=queue.name)
                            ] + get_custom_tags(self.config),
                    ),
                ],
            ),
        )
コード例 #5
0
    def __init__(self, scope: core.Construct, id: str, props,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        ################################################################################
        # Set up permissions
        ro_buckets = set()
        for bucket in props['ro_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(self,
                                                    bucket,
                                                    bucket_name=bucket)
            ro_buckets.add(tmp_bucket)

        rw_buckets = set()
        for bucket in props['rw_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(self,
                                                    bucket,
                                                    bucket_name=bucket)
            rw_buckets.add(tmp_bucket)

        batch_service_role = iam.Role(
            self,
            'BatchServiceRole',
            assumed_by=iam.ServicePrincipal('batch.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSBatchServiceRole')
            ])

        spotfleet_role = iam.Role(
            self,
            'AmazonEC2SpotFleetRole',
            assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2SpotFleetTaggingRole')
            ])

        # Create role for Batch instances
        batch_instance_role = iam.Role(
            self,
            'BatchInstanceRole',
            role_name='RnasumBatchInstanceRole',
            assumed_by=iam.CompositePrincipal(
                iam.ServicePrincipal('ec2.amazonaws.com'),
                iam.ServicePrincipal('ecs.amazonaws.com')),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2RoleforSSM'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2ContainerServiceforEC2Role')
            ])
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(actions=[
                "ec2:Describe*", "ec2:AttachVolume", "ec2:CreateVolume",
                "ec2:CreateTags", "ec2:ModifyInstanceAttribute"
            ],
                                resources=["*"]))
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(actions=["ecs:ListClusters"], resources=["*"]))
        for bucket in ro_buckets:
            bucket.grant_read(batch_instance_role)
        for bucket in rw_buckets:
            # TODO: restirct write to paths with */rnasum/*
            bucket.grant_read_write(batch_instance_role)

        # Turn the instance role into a Instance Profile
        batch_instance_profile = iam.CfnInstanceProfile(
            self,
            'BatchInstanceProfile',
            instance_profile_name='RnasumBatchInstanceProfile',
            roles=[batch_instance_role.role_name])

        ################################################################################
        # Minimal networking
        # TODO: import resource created with TF
        vpc = props['vpc']

        ################################################################################
        # Setup Batch compute resources

        # Configure BlockDevice to expand instance disk space (if needed?)
        block_device_mappings = [{
            'deviceName': '/dev/xvdf',
            'ebs': {
                'deleteOnTermination': True,
                'volumeSize': 1024,
                'volumeType': 'gp2'
            }
        }]

        launch_template = ec2.CfnLaunchTemplate(
            self,
            'RnasumBatchComputeLaunchTemplate',
            launch_template_name='RnasumBatchComputeLaunchTemplate',
            launch_template_data={
                # 'userData': core.Fn.base64(user_data_script),   FIXME may not need this for RNAsum case? see job_definition below
                'blockDeviceMappings': block_device_mappings
            })

        launch_template_spec = batch.LaunchTemplateSpecification(
            launch_template_name=launch_template.launch_template_name,
            version='$Latest')

        my_compute_res = batch.ComputeResources(
            type=batch.ComputeResourceType.SPOT,
            allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
            desiredv_cpus=0,
            maxv_cpus=80,
            minv_cpus=0,
            image=ec2.MachineImage.generic_linux(
                ami_map={'ap-southeast-2': props['compute_env_ami']}),
            launch_template=launch_template_spec,
            spot_fleet_role=spotfleet_role,
            instance_role=batch_instance_profile.instance_profile_name,
            vpc=vpc,
            #compute_resources_tags=core.Tag('Creator', 'Batch')
        )
        # XXX: How to add more than one tag above??
        # core.Tag.add(my_compute_res, 'Foo', 'Bar')

        my_compute_env = batch.ComputeEnvironment(
            self,
            'RnasumBatchComputeEnv',
            compute_environment_name="RnasumBatchComputeEnv",
            service_role=batch_service_role,
            compute_resources=my_compute_res)

        job_queue = batch.JobQueue(self,
                                   'RnasumJobQueue',
                                   job_queue_name='rnasum_job_queue',
                                   compute_environments=[
                                       batch.JobQueueComputeEnvironment(
                                           compute_environment=my_compute_env,
                                           order=1)
                                   ],
                                   priority=10)

        # it is equivalent of
        # https://github.com/umccr/infrastructure/blob/master/terraform/stacks/wts_report/jobs/wts_report.json
        default_container_props = {
            'image':
            props['container_image'],
            'vcpus':
            2,
            'memory':
            2048,
            'command': ['/opt/container/WTS-report-wrapper.sh', 'Ref::vcpus'],
            'volumes': [{
                'host': {
                    'sourcePath': '/mnt'
                },
                'name': 'work'
            }, {
                'host': {
                    'sourcePath': '/opt/container'
                },
                'name': 'container'
            }],
            'mountPoints': [{
                'containerPath': '/work',
                'readOnly': False,
                'sourceVolume': 'work'
            }, {
                'containerPath': '/opt/container',
                'readOnly': True,
                'sourceVolume': 'container'
            }],
            'readonlyRootFilesystem':
            False,
            'privileged':
            True,
            'ulimits': []
        }

        # and CDK equivalent of
        # https://github.com/umccr/infrastructure/blob/master/terraform/stacks/wts_report/main.tf#L113
        job_definition = batch.CfnJobDefinition(
            self,
            'RnasumJobDefinition',
            job_definition_name='rnasum_job_dev',
            type='container',
            container_properties=default_container_props,
            parameters={
                'vcpus': 1,
            })

        ################################################################################
        # Set up job submission Lambda

        lambda_role = iam.Role(
            self,
            'RnasumLambdaRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSLambdaBasicExecutionRole'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AWSBatchFullAccess')  # TODO: restrict!
            ])

        for bucket in ro_buckets:
            bucket.grant_read(lambda_role)
        for bucket in rw_buckets:
            bucket.grant_read(lambda_role)

        # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod
        #       may need a default JobDefinition to be set up
        # and CDK equivalent of
        # https://github.com/umccr/infrastructure/blob/master/terraform/stacks/wts_report/main.tf#L159
        lmbda.Function(self,
                       'RnasumLambda',
                       function_name='rnasum_batch_lambda',
                       handler='trigger_wts_report.lambda_handler',
                       runtime=lmbda.Runtime.PYTHON_3_7,
                       code=lmbda.Code.from_asset('lambdas/'),
                       environment={
                           'JOBNAME_PREFIX': "rnasum_",
                           'JOBQUEUE': job_queue.job_queue_name,
                           'JOBDEF': job_definition.job_definition_name,
                           'REFDATA_BUCKET': props['refdata_bucket'],
                           'DATA_BUCKET': props['data_bucket'],
                           'JOB_MEM': '32000',
                           'JOB_VCPUS': '8',
                           'REF_DATASET': 'PANCAN',
                           'GENOME_BUILD': '38',
                       },
                       role=lambda_role)
    def create_eks(self, vpc, k8sVersionTxt):

        # map input k8s version string to eks.KubernetesVersion object
        k8sVersion = self.getK8sVersion(k8sVersionTxt)

        # initialize bottlerocket arm64 AMI id from SSM parameter store
        bottleRkt_arm64Ami = ssm.StringParameter.value_for_string_parameter(
            self, "/aws/service/bottlerocket/aws-k8s-" + k8sVersionTxt +
            "/arm64/latest/image_id")

        # initialize bottlerocket x86 AMI id from SSM parameter store
        bottleRkt_x86Ami = ssm.StringParameter.value_for_string_parameter(
            self, "/aws/service/bottlerocket/aws-k8s-" + k8sVersionTxt +
            "/x86_64/latest/image_id")

        # create eks cluster
        cluster = eks.Cluster(self,
                              "EKS",
                              vpc=vpc,
                              version=k8sVersion,
                              default_capacity=0)

        # prepare userdata in TOML format
        clusterCertAuthorityData = cluster.cluster_certificate_authority_data
        clusterEndpoint = cluster.cluster_endpoint
        clusterName = cluster.cluster_name
        userdata = "settings.kubernetes.api-server = \"" + \
                    clusterEndpoint + \
                    "\"\nsettings.kubernetes.cluster-certificate = \"" + \
                    clusterCertAuthorityData + \
                    "\"\nsettings.kubernetes.cluster-name = \"" \
                    + clusterName + "\""
        core.CfnOutput(self, "EC2-Instance-UserData", value=userdata)

        # create a launch template for arm64
        launchTemplData = ec2.CfnLaunchTemplate.LaunchTemplateDataProperty(
            image_id=bottleRkt_arm64Ami,
            instance_type="c6g.medium",
            user_data=core.Fn.base64(userdata))

        launchTempl = ec2.CfnLaunchTemplate(
            self,
            id="bottle_arm64_lt",
            launch_template_data=launchTemplData,
            launch_template_name="bottlerocket-arm64-launchTempl")

        launchTemplSpec = eks.LaunchTemplateSpec(
            id=launchTempl.ref,
            version=launchTempl.attr_default_version_number)

        # add arm/graviton nodegroup
        ng = cluster.add_nodegroup_capacity(
            "bottle_arm64_ng",
            desired_size=1,
            nodegroup_name="bottlerocket_arm64_ng",
            launch_template_spec=launchTemplSpec)

        # add ssm access and secret access to eks node role
        ng.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonSSMManagedInstanceCore"))
        ng.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "SecretsManagerReadWrite"))

        # Now repeat the same steps for x86
        # create a launch template for x86
        launchTemplData = ec2.CfnLaunchTemplate.LaunchTemplateDataProperty(
            image_id=bottleRkt_x86Ami,
            instance_type="c5.large",
            user_data=core.Fn.base64(userdata))
        launchTempl = ec2.CfnLaunchTemplate(
            self,
            id="bottle_x86_lt",
            launch_template_data=launchTemplData,
            launch_template_name="bottlerocket-x86-launchTempl")
        launchTemplSpec = eks.LaunchTemplateSpec(
            id=launchTempl.ref,
            version=launchTempl.attr_default_version_number)

        # add x86 nodegroup
        ng = cluster.add_nodegroup_capacity(
            "bottle_x86_ng",
            desired_size=1,
            nodegroup_name="bottlerocket_x86_ng",
            launch_template_spec=launchTemplSpec)

        # add ssm access and secret access to eks node role
        ng.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonSSMManagedInstanceCore"))
        ng.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "SecretsManagerReadWrite"))

        return cluster
コード例 #7
0
    def __init__(self, scope: core.Stack, id: str, **kwargs):
        super().__init__(scope, id, **kwargs)

        # This resource alone will create a private/public subnet in each AZ as well as nat/internet gateway(s)
        self.vpc = aws_ec2.Vpc(
            self,
            "BaseVPC",
            cidr='10.0.0.0/24',
        )

        # Creating ECS Cluster in the VPC created above
        self.ecs_cluster = aws_ecs.Cluster(self,
                                           "ECSCluster",
                                           vpc=self.vpc,
                                           cluster_name="container-demo")

        # Adding service discovery namespace to cluster
        self.ecs_cluster.add_default_cloud_map_namespace(name="service", )

        ###### EC2 SPOT CAPACITY PROVIDER SECTION ######

        ## As of today, AWS CDK doesn't support Launch Templates on the AutoScaling construct, hence it
        ## doesn't support Mixed Instances Policy to combine instance types on Auto Scaling and adhere to Spot best practices
        ## In the meantime, CfnLaunchTemplate and CfnAutoScalingGroup resources are used to configure Spot capacity
        ## https://github.com/aws/aws-cdk/issues/6734

        self.ecs_spot_instance_role = aws_iam.Role(
            self,
            "ECSSpotECSInstanceRole",
            assumed_by=aws_iam.ServicePrincipal("ec2.amazonaws.com"),
            managed_policies=[
                aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                    "service-role/AmazonEC2ContainerServiceforEC2Role"),
                aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                    "service-role/AmazonEC2RoleforSSM")
            ])

        self.ecs_spot_instance_profile = aws_iam.CfnInstanceProfile(
            self,
            "ECSSpotInstanceProfile",
            roles=[self.ecs_spot_instance_role.role_name])

        ## This creates a Launch Template for the Auto Scaling group
        self.lt = aws_ec2.CfnLaunchTemplate(
            self,
            "ECSEC2SpotCapacityLaunchTemplate",
            launch_template_data={
                "instanceType":
                "m5.large",
                "imageId":
                aws_ssm.StringParameter.value_for_string_parameter(
                    self,
                    "/aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id"
                ),
                "securityGroupIds": [
                    x.security_group_id
                    for x in self.ecs_cluster.connections.security_groups
                ],
                "iamInstanceProfile": {
                    "arn": self.ecs_spot_instance_profile.attr_arn
                },
                #
                ## Here we configure the ECS agent to drain Spot Instances upon catching a Spot Interruption notice from instance metadata
                "userData":
                core.Fn.base64(
                    core.Fn.sub(
                        "#!/usr/bin/bash\n"
                        "echo ECS_CLUSTER=${cluster_name} >> /etc/ecs/ecs.config\n"
                        "sudo iptables --insert FORWARD 1 --in-interface docker+ --destination 169.254.169.254/32 --jump DROP\n"
                        "sudo service iptables save\n"
                        "echo ECS_ENABLE_SPOT_INSTANCE_DRAINING=true >> /etc/ecs/ecs.config\n"
                        "echo ECS_AWSVPC_BLOCK_IMDS=true >> /etc/ecs/ecs.config\n"
                        "cat /etc/ecs/ecs.config",
                        variables={
                            "cluster_name": self.ecs_cluster.cluster_name
                        }))
            },
            launch_template_name="ECSEC2SpotCapacityLaunchTemplate")

        self.ecs_ec2_spot_mig_asg = aws_autoscaling.CfnAutoScalingGroup(
            self,
            "ECSEC2SpotCapacity",
            min_size="0",
            max_size="10",
            vpc_zone_identifier=[
                x.subnet_id for x in self.vpc.private_subnets
            ],
            mixed_instances_policy={
                "instancesDistribution": {
                    "onDemandAllocationStrategy": "prioritized",
                    "onDemandBaseCapacity": 0,
                    "onDemandPercentageAboveBaseCapacity": 0,
                    "spotAllocationStrategy": "capacity-optimized"
                },
                "launchTemplate": {
                    "launchTemplateSpecification": {
                        "launchTemplateId": self.lt.ref,
                        "version": self.lt.attr_default_version_number
                    },
                    "overrides": [{
                        "instanceType": "m5.large"
                    }, {
                        "instanceType": "m5d.large"
                    }, {
                        "instanceType": "m5a.large"
                    }, {
                        "instanceType": "m5ad.large"
                    }, {
                        "instanceType": "m5n.large"
                    }, {
                        "instanceType": "m5dn.large"
                    }, {
                        "instanceType": "m3.large"
                    }, {
                        "instanceType": "m4.large"
                    }, {
                        "instanceType": "t3.large"
                    }, {
                        "instanceType": "t2.large"
                    }]
                }
            })
        #
        core.Tag.add(self.ecs_ec2_spot_mig_asg, "Name",
                     self.ecs_ec2_spot_mig_asg.node.path)
        core.CfnOutput(self,
                       "EC2SpotAutoScalingGroupName",
                       value=self.ecs_ec2_spot_mig_asg.ref,
                       export_name="EC2SpotASGName")
        #
        ##### END EC2 SPOT CAPACITY PROVIDER SECTION #####

        # Namespace details as CFN output
        self.namespace_outputs = {
            'ARN':
            self.ecs_cluster.default_cloud_map_namespace.
            private_dns_namespace_arn,
            'NAME':
            self.ecs_cluster.default_cloud_map_namespace.
            private_dns_namespace_name,
            'ID':
            self.ecs_cluster.default_cloud_map_namespace.
            private_dns_namespace_id,
        }

        # Cluster Attributes
        self.cluster_outputs = {
            'NAME': self.ecs_cluster.cluster_name,
            'SECGRPS': str(self.ecs_cluster.connections.security_groups)
        }

        # When enabling EC2, we need the security groups "registered" to the cluster for imports in other service stacks
        if self.ecs_cluster.connections.security_groups:
            self.cluster_outputs['SECGRPS'] = str([
                x.security_group_id
                for x in self.ecs_cluster.connections.security_groups
            ][0])

        # Frontend service to backend services on 3000
        self.services_3000_sec_group = aws_ec2.SecurityGroup(
            self,
            "FrontendToBackendSecurityGroup",
            allow_all_outbound=True,
            description=
            "Security group for frontend service to talk to backend services",
            vpc=self.vpc)

        # Allow inbound 3000 from ALB to Frontend Service
        self.sec_grp_ingress_self_3000 = aws_ec2.CfnSecurityGroupIngress(
            self,
            "InboundSecGrp3000",
            ip_protocol='TCP',
            source_security_group_id=self.services_3000_sec_group.
            security_group_id,
            from_port=3000,
            to_port=3000,
            group_id=self.services_3000_sec_group.security_group_id)

        # Creating an EC2 bastion host to perform load test on private backend services
        amzn_linux = aws_ec2.MachineImage.latest_amazon_linux(
            generation=aws_ec2.AmazonLinuxGeneration.AMAZON_LINUX_2,
            edition=aws_ec2.AmazonLinuxEdition.STANDARD,
            virtualization=aws_ec2.AmazonLinuxVirt.HVM,
            storage=aws_ec2.AmazonLinuxStorage.GENERAL_PURPOSE)

        # Instance Role/profile that will be attached to the ec2 instance
        # Enabling service role so the EC2 service can use ssm
        role = aws_iam.Role(
            self,
            "InstanceSSM",
            assumed_by=aws_iam.ServicePrincipal("ec2.amazonaws.com"))

        # Attaching the SSM policy to the role so we can use SSM to ssh into the ec2 instance
        role.add_managed_policy(
            aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                "service-role/AmazonEC2RoleforSSM"))

        # Reading user data, to install siege into the ec2 instance.
        with open("stresstool_user_data.sh") as f:
            user_data = f.read()

        # Instance creation
        self.instance = aws_ec2.Instance(
            self,
            "Instance",
            instance_name="{}-stresstool".format(stack_name),
            instance_type=aws_ec2.InstanceType("t3.medium"),
            machine_image=amzn_linux,
            vpc=self.vpc,
            role=role,
            user_data=aws_ec2.UserData.custom(user_data),
            security_group=self.services_3000_sec_group)

        # All Outputs required for other stacks to build
        core.CfnOutput(self,
                       "NSArn",
                       value=self.namespace_outputs['ARN'],
                       export_name="NSARN")
        core.CfnOutput(self,
                       "NSName",
                       value=self.namespace_outputs['NAME'],
                       export_name="NSNAME")
        core.CfnOutput(self,
                       "NSId",
                       value=self.namespace_outputs['ID'],
                       export_name="NSID")
        core.CfnOutput(self,
                       "FE2BESecGrp",
                       value=self.services_3000_sec_group.security_group_id,
                       export_name="SecGrpId")
        core.CfnOutput(self,
                       "ECSClusterName",
                       value=self.cluster_outputs['NAME'],
                       export_name="ECSClusterName")
        core.CfnOutput(self,
                       "ECSClusterSecGrp",
                       value=self.cluster_outputs['SECGRPS'],
                       export_name="ECSSecGrpList")
        core.CfnOutput(self,
                       "ServicesSecGrp",
                       value=self.services_3000_sec_group.security_group_id,
                       export_name="ServicesSecGrp")
        core.CfnOutput(self,
                       "StressToolEc2Id",
                       value=self.instance.instance_id)
        core.CfnOutput(self,
                       "StressToolEc2Ip",
                       value=self.instance.instance_private_ip)
コード例 #8
0
    def __init__(self, scope: core.Construct, id: str, props, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        dirname = os.path.dirname(__file__)

        ecr_repo = ecr.Repository.from_repository_name(
            self,
            'UmccriseEcrRepo',
            repository_name='umccrise'
        )

        ################################################################################
        # Set up permissions
        ro_buckets = set()
        for bucket in props['ro_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(
                self,
                bucket,
                bucket_name=bucket
            )
            ro_buckets.add(tmp_bucket)

        rw_buckets = set()
        for bucket in props['rw_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(
                self,
                bucket,
                bucket_name=bucket
            )
            rw_buckets.add(tmp_bucket)

        batch_service_role = iam.Role(
            self,
            'BatchServiceRole',
            assumed_by=iam.ServicePrincipal('batch.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSBatchServiceRole')
            ]
        )

        spotfleet_role = iam.Role(
            self,
            'AmazonEC2SpotFleetRole',
            assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2SpotFleetTaggingRole')
            ]
        )

        # Create role for Batch instances
        batch_instance_role = iam.Role(
            self,
            'BatchInstanceRole',
            role_name='UmccriseBatchInstanceRole',
            assumed_by=iam.CompositePrincipal(
                iam.ServicePrincipal('ec2.amazonaws.com'),
                iam.ServicePrincipal('ecs.amazonaws.com')
            ),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2RoleforSSM'),
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2ContainerServiceforEC2Role')
            ]
        )
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(
                actions=[
                    "ec2:Describe*",
                    "ec2:AttachVolume",
                    "ec2:CreateVolume",
                    "ec2:CreateTags",
                    "ec2:ModifyInstanceAttribute"
                ],
                resources=["*"]
            )
        )
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(
                actions=[
                    "ecs:ListClusters"
                ],
                resources=["*"]
            )
        )
        for bucket in ro_buckets:
            bucket.grant_read(batch_instance_role)
        for bucket in rw_buckets:
            # restirct write to paths with */umccrise/*
            bucket.grant_read_write(batch_instance_role, '*/umccrised/*')

        # Turn the instance role into a Instance Profile
        batch_instance_profile = iam.CfnInstanceProfile(
            self,
            'BatchInstanceProfile',
            instance_profile_name='UmccriseBatchInstanceProfile',
            roles=[batch_instance_role.role_name]
        )

        ################################################################################
        # Network
        # (Import common infrastructure (maintained via TerraForm)

        # VPC
        vpc = ec2.Vpc.from_lookup(
            self,
            'UmccrMainVpc',
            tags={'Name': 'main-vpc', 'Stack': 'networking'}
        )

        batch_security_group = ec2.SecurityGroup(
            self,
            "BatchSecurityGroup",
            vpc=vpc,
            description="Allow all outbound, no inbound traffic"
        )
        ################################################################################
        # Setup Batch compute resources

        # Configure BlockDevice to expand instance disk space (if needed?)
        block_device_mappings = [
            {
                'deviceName': '/dev/xvdf',
                'ebs': {
                    'deleteOnTermination': True,
                    'encrypted': True,
                    'volumeSize': 2048,
                    'volumeType': 'gp2'
                }
            }
        ]

        # Set up custom user data to configure the Batch instances
        umccrise_wrapper_asset = assets.Asset(
            self,
            'UmccriseWrapperAsset',
            path=os.path.join(dirname, '..', 'assets', "umccrise-wrapper.sh")
        )
        umccrise_wrapper_asset.grant_read(batch_instance_role)

        user_data_asset = assets.Asset(
            self,
            'UserDataAsset',
            path=os.path.join(dirname, '..', 'assets', "batch-user-data.sh")
        )
        user_data_asset.grant_read(batch_instance_role)

        user_data = ec2.UserData.for_linux()
        local_path = user_data.add_s3_download_command(
            bucket=user_data_asset.bucket,
            bucket_key=user_data_asset.s3_object_key
        )
        user_data.add_execute_file_command(
            file_path=local_path,
            arguments=f"s3://{umccrise_wrapper_asset.bucket.bucket_name}/{umccrise_wrapper_asset.s3_object_key}"
        )

        # Generate user data wrapper to comply with LaunchTemplate required MIME multi-part archive format for user data
        mime_wrapper = ec2.UserData.custom('MIME-Version: 1.0')
        mime_wrapper.add_commands('Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="')
        mime_wrapper.add_commands('')
        mime_wrapper.add_commands('--==MYBOUNDARY==')
        mime_wrapper.add_commands('Content-Type: text/x-shellscript; charset="us-ascii"')
        mime_wrapper.add_commands('')
        # install AWS CLI, as it's unexpectedly missing from the AWS Linux 2 AMI...
        mime_wrapper.add_commands('yum -y install unzip')
        mime_wrapper.add_commands('cd /opt')
        mime_wrapper.add_commands('curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"')
        mime_wrapper.add_commands('unzip awscliv2.zip')
        mime_wrapper.add_commands('sudo ./aws/install --bin-dir /usr/bin')
        # insert our actual user data payload
        mime_wrapper.add_commands(user_data.render())
        mime_wrapper.add_commands('--==MYBOUNDARY==--')

        launch_template = ec2.CfnLaunchTemplate(
            self,
            'UmccriseBatchComputeLaunchTemplate',
            launch_template_name='UmccriseBatchComputeLaunchTemplate',
            launch_template_data={
                'userData': core.Fn.base64(mime_wrapper.render()),
                'blockDeviceMappings': block_device_mappings
            }
        )

        launch_template_spec = batch.LaunchTemplateSpecification(
            launch_template_name=launch_template.launch_template_name,
            version='$Latest'
        )

        my_compute_res = batch.ComputeResources(
            type=(batch.ComputeResourceType.SPOT if props['compute_env_type'].lower() == 'spot' else batch.ComputeResourceType.ON_DEMAND),
            allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
            desiredv_cpus=0,
            maxv_cpus=320,
            minv_cpus=0,
            image=ec2.MachineImage.generic_linux(ami_map={'ap-southeast-2': props['compute_env_ami']}),
            launch_template=launch_template_spec,
            spot_fleet_role=spotfleet_role,
            instance_role=batch_instance_profile.instance_profile_name,
            vpc=vpc,
            vpc_subnets=ec2.SubnetSelection(
                subnet_type=ec2.SubnetType.PRIVATE,
                # availability_zones=["ap-southeast-2a"]
            ),
            security_groups=[batch_security_group]
            # compute_resources_tags=core.Tag('Creator', 'Batch')
        )
        # XXX: How to add more than one tag above??
        # https://github.com/aws/aws-cdk/issues/7350
        # core.Tag.add(my_compute_res, 'Foo', 'Bar')

        my_compute_env = batch.ComputeEnvironment(
            self,
            'UmccriseBatchComputeEnv',
            compute_environment_name="cdk-umccr_ise-batch-compute-env",
            service_role=batch_service_role,
            compute_resources=my_compute_res
        )
        # child = my_compute_env.node.default_child
        # child_comp_res = child.compute_resources
        # child_comp_res.tags = "{'Foo': 'Bar'}"

        job_queue = batch.JobQueue(
            self,
            'UmccriseJobQueue',
            job_queue_name='cdk-umccrise_job_queue',
            compute_environments=[
                batch.JobQueueComputeEnvironment(
                    compute_environment=my_compute_env,
                    order=1
                )
            ],
            priority=10
        )

        job_container = batch.JobDefinitionContainer(
            image=ecs.ContainerImage.from_registry(name=props['container_image']),
            vcpus=32,
            memory_limit_mib=100000,
            command=[
                "/opt/container/umccrise-wrapper.sh",
                "Ref::vcpus"
            ],
            mount_points=[
                ecs.MountPoint(
                    container_path='/work',
                    read_only=False,
                    source_volume='work'
                ),
                ecs.MountPoint(
                    container_path='/opt/container',
                    read_only=True,
                    source_volume='container'
                )
            ],
            volumes=[
                ecs.Volume(
                    name='container',
                    host=ecs.Host(
                        source_path='/opt/container'
                    )
                ),
                ecs.Volume(
                    name='work',
                    host=ecs.Host(
                        source_path='/mnt'
                    )
                )
            ],
            privileged=True
        )

        job_definition = batch.JobDefinition(
            self,
            'UmccriseJobDefinition',
            job_definition_name='cdk-umccrise-job-definition',
            parameters={'vcpus': '1'},
            container=job_container,
            timeout=core.Duration.hours(5)
        )

        ################################################################################
        # Set up job submission Lambda

        lambda_role = iam.Role(
            self,
            'UmccriseLambdaRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaBasicExecutionRole'),
                iam.ManagedPolicy.from_aws_managed_policy_name('AWSBatchFullAccess')  # TODO: restrict!
            ]
        )

        for bucket in ro_buckets:
            bucket.grant_read(lambda_role)
        for bucket in rw_buckets:
            bucket.grant_read(lambda_role)
        ecr_repo.grant(lambda_role, 'ecr:ListImages')

        # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod
        #       may need a default JobDefinition to be set up
        lmbda.Function(
            self,
            'UmccriseLambda',
            function_name='umccrise_batch_lambda',
            handler='umccrise.lambda_handler',
            runtime=lmbda.Runtime.PYTHON_3_7,
            code=lmbda.Code.from_asset('lambdas/umccrise'),
            environment={
                'JOBNAME_PREFIX': "UMCCRISE_",
                'JOBQUEUE': job_queue.job_queue_name,
                'UMCCRISE_MEM': '100000',
                'UMCCRISE_VCPUS': '32',
                'JOBDEF': job_definition.job_definition_name,
                'REFDATA_BUCKET': props['refdata_bucket'],
                'INPUT_BUCKET': props['input_bucket'],
                'RESULT_BUCKET': props['result_bucket'],
                'IMAGE_CONFIGURABLE': props['image_configurable']
            },
            role=lambda_role
        )
コード例 #9
0
ファイル: batch.py プロジェクト: Tefnet/infrastructure
    def __init__(self, scope: core.Construct, id: str, props,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        ################################################################################
        # Set up permissions
        ro_buckets = set()
        for bucket in props['ro_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(self,
                                                    bucket,
                                                    bucket_name=bucket)
            ro_buckets.add(tmp_bucket)

        rw_buckets = set()
        for bucket in props['rw_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(self,
                                                    bucket,
                                                    bucket_name=bucket)
            rw_buckets.add(tmp_bucket)

        batch_service_role = iam.Role(
            self,
            'BatchServiceRole',
            assumed_by=iam.ServicePrincipal('batch.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSBatchServiceRole')
            ])

        spotfleet_role = iam.Role(
            self,
            'AmazonEC2SpotFleetRole',
            assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2SpotFleetTaggingRole')
            ])

        # Create role for Batch instances
        batch_instance_role = iam.Role(
            self,
            'BatchInstanceRole',
            role_name='UmccriseBatchInstanceRole',
            assumed_by=iam.CompositePrincipal(
                iam.ServicePrincipal('ec2.amazonaws.com'),
                iam.ServicePrincipal('ecs.amazonaws.com')),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2RoleforSSM'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2ContainerServiceforEC2Role')
            ])
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(actions=[
                "ec2:Describe*", "ec2:AttachVolume", "ec2:CreateVolume",
                "ec2:CreateTags", "ec2:ModifyInstanceAttribute"
            ],
                                resources=["*"]))
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(actions=["ecs:ListClusters"], resources=["*"]))
        for bucket in ro_buckets:
            bucket.grant_read(batch_instance_role)
        for bucket in rw_buckets:
            bucket.grant_read_write(batch_instance_role)

        # Turn the instance role into a Instance Profile
        batch_instance_profile = iam.CfnInstanceProfile(
            self,
            'BatchInstanceProfile',
            instance_profile_name='UmccriseBatchInstanceProfile',
            roles=[batch_instance_role.role_name])

        ################################################################################
        # Minimal networking
        # TODO: use exiting common setup
        # TODO: roll out across all AZs? (Will require more subnets, NATs, ENIs, etc...)
        vpc = ec2.Vpc(self, 'UmccrVpc', cidr="10.2.0.0/16", max_azs=1)

        ################################################################################
        # Setup Batch compute resources

        # Configure BlockDevice to expand instance disk space (if needed?)
        block_device_mappings = [{
            'deviceName': '/dev/xvdf',
            'ebs': {
                'deleteOnTermination': True,
                'volumeSize': 1024,
                'volumeType': 'gp2'
            }
        }]

        launch_template = ec2.CfnLaunchTemplate(
            self,
            'UmccriseBatchComputeLaunchTemplate',
            launch_template_name='UmccriseBatchComputeLaunchTemplate',
            launch_template_data={
                'userData': core.Fn.base64(user_data_script),
                'blockDeviceMappings': block_device_mappings
            })

        # TODO: Replace with proper CDK construct once available
        # TODO: Uses public subnet and default security group
        batch_comp_env = batch.CfnComputeEnvironment(
            self,
            'UmccriseBatchComputeEnv',
            type='MANAGED',
            service_role=batch_service_role.role_arn,
            compute_resources={
                'type': props['compute_env_type'],
                'allocationStrategy': 'BEST_FIT_PROGRESSIVE',
                'maxvCpus': 128,
                'minvCpus': 0,
                'desiredvCpus': 0,
                'imageId': props['compute_env_ami'],
                'launchTemplate': {
                    'launchTemplateName': launch_template.launch_template_name,
                    'version': '$Latest'
                },
                'spotIamFleetRole': spotfleet_role.role_arn,
                'instanceRole': batch_instance_profile.instance_profile_name,
                'instanceTypes': ['optimal'],
                'subnets': [vpc.public_subnets[0].subnet_id],
                'securityGroupIds': [vpc.vpc_default_security_group],
                'tags': {
                    'Creator': 'Batch',
                    'Name': 'BatchWorker'
                }
            })

        # TODO: Replace with proper CDK construct once available
        # TODO: job_queue_name could result in a clash, but is currently necessary
        #       as we need a reference for the ENV variables of the lambda
        #       Could/Should append a unique element/string.
        job_queue = batch.CfnJobQueue(self,
                                      'UmccriseJobQueue',
                                      compute_environment_order=[{
                                          'computeEnvironment':
                                          batch_comp_env.ref,
                                          'order':
                                          1
                                      }],
                                      priority=10,
                                      job_queue_name='umccrise_job_queue')

        ################################################################################
        # Set up job submission Lambda

        lambda_role = iam.Role(
            self,
            'UmccriseLambdaRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSLambdaBasicExecutionRole'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AWSBatchFullAccess')  # TODO: restrict!
            ])

        for bucket in ro_buckets:
            bucket.grant_read(lambda_role)
        for bucket in rw_buckets:
            bucket.grant_read(lambda_role)

        lmbda.Function(self,
                       'UmccriseLambda',
                       function_name='umccrise_batch_lambda',
                       handler='umccrise.lambda_handler',
                       runtime=lmbda.Runtime.PYTHON_3_7,
                       code=lmbda.Code.from_asset('lambdas/umccrise'),
                       environment={
                           'JOBNAME_PREFIX': "UMCCRISE_",
                           'JOBQUEUE': job_queue.job_queue_name,
                           'REFDATA_BUCKET': props['refdata_bucket'],
                           'DATA_BUCKET': props['data_bucket'],
                           'UMCCRISE_MEM': '50000',
                           'UMCCRISE_VCPUS': '16'
                       },
                       role=lambda_role)
コード例 #10
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Get/set stack name for context
        self.node.set_context("STACK_NAME", self.stack_name)

        # The code that defines your stack goes here
        # Set a vpc
        vpc = ec2.Vpc.from_lookup(self, "VPC", is_default=True)
        vpc_subnets = ec2.SubnetSelection()

        # Set access policies for the instance
        policies = [
            # Read only access for all our s3 buckets
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonS3ReadOnlyAccess"),
            # Set the container registry policy so we can pull docker containers from our ECR repo
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonEC2ContainerRegistryReadOnly"),
            # Allow us login by the ssm manger
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonSSMManagedInstanceCore")
        ]

        # Get role object with set policies
        role = iam.Role(self,
                        "EC2Role",
                        assumed_by=iam.ServicePrincipal("ec2.amazonaws.com"),
                        managed_policies=policies)

        # Get a root ebs volume (we mount it on /dev/xvda1)
        ebs_var_vol = ec2.BlockDeviceVolume.ebs(
            volume_size=int(self.node.try_get_context("VAR_VOLUME_SIZE")))
        # Place volume on a block device with the set mount point
        ebs_var_block_device = ec2.BlockDevice(device_name="/dev/sdf",
                                               volume=ebs_var_vol)

        # Get volume - contains a block device volume and a block device
        ebs_extended_vol = ec2.BlockDeviceVolume.ebs(
            volume_size=int(self.node.try_get_context("EXTENDED_VOLUME_SIZE")))
        # Place volume on a block device with a set mount point
        ebs_extended_block_device = ec2.BlockDevice(device_name="/dev/sdg",
                                                    volume=ebs_extended_vol)

        # Run boot strap -
        """
        The code under userdata.sh completes the following steps
        1. Installs docker into ec2 instance
        2. Mounts our volume to /mnt/
        3. Log into docker
        """

        mappings = {
            "__ACCOUNT_ID__": str(self.account),
            "__REGION__": str(self.region)
        }

        with open("user_data/user_data.sh", 'r') as user_data_h:
            # Use a substitution
            user_data_sub = core.Fn.sub(user_data_h.read(), mappings)

        # Import substitution object into user_data set
        user_data = ec2.UserData.custom(user_data_sub)

        # Set instance type from ec2-type in context
        instance_type = ec2.InstanceType(
            instance_type_identifier=self.node.try_get_context("EC2_TYPE"))

        # Get machine type from context
        machine_image = ec2.GenericLinuxImage({
            self.region:
            self.node.try_get_context(
                "MACHINE_IMAGE"),  # Refer to an existing AMI type
        })

        # The code that defines your stack goes here
        # We take all of the parameters we have and place this into the ec2 instance class
        # Except LaunchTemplate which is added as a property to the instance
        host = ec2.Instance(
            self,
            id="{}-instance".format(self.node.try_get_context("STACK_NAME")),
            instance_type=instance_type,
            instance_name=self.node.try_get_context("INSTANCE_NAME"),
            machine_image=machine_image,
            vpc=vpc,
            vpc_subnets=vpc_subnets,
            role=role,
            user_data=user_data,
            block_devices=[ebs_var_block_device, ebs_extended_block_device],
        )

        if self.node.try_get_context("USE_SPOT_INSTANCE").lower() == 'true':
            # Spot pricing via ec2 fleet
            spot_price = self.node.try_get_context("MAX_SPOT_PRICE")
            market_options = {"MarketType": "spot"}
            if spot_price is not None:
                spot_options = {"MaxPrice": spot_price}
                market_options["SpotOptions"] = spot_options
            launch_template_data = {"InstanceMarketOptions": market_options}
            launch_template = ec2.CfnLaunchTemplate(self, "LaunchTemplate")
            launch_template.add_property_override("LaunchTemplateData",
                                                  launch_template_data)

            host.instance.add_property_override(
                "LaunchTemplate", {
                    "LaunchTemplateId": launch_template.ref,
                    "Version": launch_template.attr_latest_version_number
                })

        # Return public IP address s.t we can ssh into it
        # Note that we may return an IP prior to the user_data shell script completing so not
        # all of our goodies may be here yet
        core.CfnOutput(self, "Output", value=host.instance_id)
コード例 #11
0
ファイル: cdk_app.py プロジェクト: pflashgary/HKO-7
    def __init__(self, app, id, target, **kwargs):
        """ Initializer """
        super().__init__(app, id, **kwargs)

        self.config = consolidate_context(self, target)

        # Fetch VPC info
        vpc = ec2.Vpc.from_lookup(self,
                                  "VPC",
                                  vpc_id=self.config["vpc_id"]["us-west-2"])
        subnets = vpc.private_subnets

        # AMI
        ami = ec2.MachineImage.generic_linux(
            {self.region: self.config["ami_name"]})

        # Create a specific SG for DT instances
        ec2_sg = ec2.SecurityGroup(self,
                                   "EC2SG",
                                   description='ec2 SG',
                                   vpc=vpc)
        # Add default rules for the SG
        self._add_default_rules(ec2_sg, vpc)

        # IAM role and profile for the EC2
        iam_role = iam.Role(
            self,
            "EC2Role",
            assumed_by=iam.ServicePrincipal("ec2.amazonaws.com"),
            inline_policies={
                "extra-permissions":
                iam.PolicyDocument(statements=[
                    iam.PolicyStatement(
                        actions=["s3:*"],
                        resources=[
                            "arn:aws:iam::aws:policy/AmazonS3FullAccess"
                        ],
                    )
                ])
            },
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    managed_policy_name="ReadOnlyAccess"),
                # Required by SSM StateManager
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    managed_policy_name="AmazonSSMManagedInstanceCore"),
            ],
        )
        iam_ip = iam.CfnInstanceProfile(
            self,
            "EC2InstProf",
            path="/",
            roles=[iam_role.role_name],
        )

        # Create a placement group in cluster mode to locate all the DT nodes close
        # to each other. This mode means we restrict ourself to a single AZ
        placement_group = ec2.CfnPlacementGroup(self,
                                                "EC2PG",
                                                strategy="cluster")

        instance_specs = self.config["instance_spec"]
        for key, spec in instance_specs.items():

            launch_template = ec2.CfnLaunchTemplate(
                self,
                f"EC2LT{key}",
                launch_template_data=ec2.CfnLaunchTemplate.
                LaunchTemplateDataProperty(
                    block_device_mappings=[
                        ec2.CfnLaunchTemplate.BlockDeviceMappingProperty(
                            device_name="/dev/xvda",
                            ebs=ec2.CfnLaunchTemplate.EbsProperty(
                                volume_size=self.config["ebs_volume_size"],
                                volume_type="gp2",
                            ),
                        )
                    ],
                    iam_instance_profile=ec2.CfnLaunchTemplate.
                    IamInstanceProfileProperty(arn=iam_ip.attr_arn),
                    image_id=str(ami.get_image(self).image_id),
                    instance_type=spec["instance_type"],
                    # TODO: we should use SSM Systems Manager rather than native SSH
                    key_name=self.config["key_pair"].format(
                        region=self.region),
                    security_group_ids=[ec2_sg.security_group_id]),
            )

            asg = autoscaling.CfnAutoScalingGroup(
                self,
                f"ASG{key}",
                desired_capacity="1",
                min_size="1",
                max_size="1",
                mixed_instances_policy=autoscaling.CfnAutoScalingGroup.
                MixedInstancesPolicyProperty(
                    instances_distribution=autoscaling.CfnAutoScalingGroup.
                    InstancesDistributionProperty(
                        on_demand_base_capacity=0,
                        on_demand_percentage_above_base_capacity=spec[
                            'on_demand_percentage_above_base_capacity'],
                        spot_allocation_strategy="lowest-price",
                        spot_instance_pools=1,
                    ),
                    launch_template=autoscaling.CfnAutoScalingGroup.
                    LaunchTemplateProperty(
                        launch_template_specification=autoscaling.
                        CfnAutoScalingGroup.
                        LaunchTemplateSpecificationProperty(
                            launch_template_id=launch_template.ref,
                            version=launch_template.attr_latest_version_number,
                        ))),
                # Use placement group, which means we restrict ourself to a single AZ
                placement_group=placement_group.ref,
                # Restrict to a single subnet because of the placement group
                vpc_zone_identifier=[subnets[0].subnet_id],
                # Set max instance lifetime to 7 days for worker nodes and 30 days for master
            )
            # Add a name to the ASG and it will be propagated to underlying EC2 instances
            tag_name = f'{self.config["prefix"]} ASG {key}'
            core.Tags.of(asg).add("Name", tag_name)
コード例 #12
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # import default vpc
        # vpc = ec2.Vpc.from_lookup(self, id='Vpc', is_default=True)
        # from vpcId
        # vpc = ec2.Vpc.from_lookup(self, id='Vpc', vpc_id='vpc-0417e46d')
        # create vpc
        vpc = ec2.Vpc(self,
                      'eks-vpc',
                      cidr='10.3.0.0/16',
                      max_azs=3,
                      nat_gateways=1)

        # create eks admin role
        eks_master_role = iam.Role(self,
                                   'EksMasterRole',
                                   role_name='EksAdminRole',
                                   assumed_by=iam.AccountRootPrincipal())

        cluster = eks.Cluster(self,
                              'Cluster',
                              vpc=vpc,
                              version=eks.KubernetesVersion.V1_18,
                              masters_role=eks_master_role,
                              default_capacity=0)
        # # Conditionally dd aws console login user to the RBAC so we can browse the EKS workloads
        console_user_string = self.node.try_get_context('console_user')
        if console_user_string and 'console_user_string' in vars():
            console_user = iam.User.from_user_name(
                self, 'ConsoleUser', user_name=console_user_string)
            cluster.aws_auth.add_user_mapping(console_user,
                                              groups=['system:masters'])

        # user data
        user_data = ec2.UserData.for_linux()
        user_data.add_commands(
            'set -o xtrace',
            '/etc/eks/bootstrap.sh {}'.format('cluster.clusterName'))
        lt = ec2.CfnLaunchTemplate(
            self,
            'LT',
            launch_template_data={
                'imageId':
                eks.EksOptimizedImage().get_image(self).image_id,
                'instanceType':
                ec2.InstanceType('t3.large').to_string(),
                'user_data':
                core.Fn.base64(user_data.render()),
                'tagSpecifications': [{
                    'resourceType':
                    'instance',
                    'tags': [{
                        'key': 'Name',
                        'value': 'MNG'
                    }, {
                        'key': 'Foo',
                        'value': 'Bar'
                    }]
                }]
            })
        # create eks managed nodegroup
        cluster.add_nodegroup_capacity('MNG',
                                       launch_template_spec={
                                           'id':
                                           lt.ref,
                                           'version':
                                           lt.attr_default_version_number,
                                       },
                                       desired_size=2)