Beispiel #1
0
    def __init__(self, scope: core.Construct, id: str, vpc, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # The code that defines your stack goes here
        ce_resources = batch.ComputeResources(type=batch.ComputeResourceType.SPOT,
                                       bid_percentage=50,
                                       allocation_strategy=batch.AllocationStrategy.SPOT_CAPACITY_OPTIMIZED,
                                       instance_types= [ec2.InstanceType("c5.large"), ec2.InstanceType("c5.xlarge")],
                                       vpc=vpc
                                      )
                                      
        spot_environment = batch.ComputeEnvironment(self, "MySpotEnvironment",
                                                    compute_resources=ce_resources
                                                   )
        queue = batch.JobQueue(self, "BatchQueue",
                               compute_environments=[batch.JobQueueComputeEnvironment(compute_environment=spot_environment, order=1)],
                               priority=1)
Beispiel #2
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # ========================
        # VPC
        # ========================
        
        # VPC
        vpc = ec2.Vpc(
            self, 'fetch-and-run-vpc',
            max_azs=2,
            subnet_configuration=[
                ec2.SubnetConfiguration(
                    name='public-subnet',
                    subnet_type=ec2.SubnetType.PUBLIC
                )
            ],
            nat_gateways=0
        )

        # Security Group
        sg = ec2.SecurityGroup(
            self, 'fetch-and-run-sg',
            vpc=vpc,
            description='SG for fetch and run',
            security_group_name='fetch-and-run-sg'
        )

        # Ingress from IP address via HTTP, SSH
        for port in PORTS:
            sg.add_ingress_rule(
                peer=ec2.Peer.ipv4(IP_ADDRESS),
                connection=ec2.Port.tcp(port)   
            )

        # ========================
        # IAM
        # ========================

        '''
        I. Batch Instance Role
        - Makes calls to other AWS services on your behalf to
        manage the resources that you use with the service
        '''

        batch_service_role = iam.Role.from_role_arn(
            self, 'batch-service-role',
            role_arn=BATCH_SERVICE_ROLE_ARN
        )

        '''
        II. ECS Instance Role
        - Batch compute environmens are populated with ECS container instances,
        which run the ECS container agent locally
        - ECS container agent makes calls to AWS APIs on your behalf
        - Container instances that run the agent require a policy and role for
        these services to know that the agent belongs to you

        - Instance Profile uses the batch instance role name
        - This is fed into the compute environment    
        '''

        batch_instance_role = iam.Role.from_role_arn(
            self, 'batch-instance-role',
            role_arn=ECS_INSTANCE_ROLE_ARN
        )

        instance_profile = iam.CfnInstanceProfile(
            self, 'instance-profile',
            roles=[batch_instance_role.role_name]
        )

        '''
        Job Role
        - Used in the job definition
        - IAM role that the container can assume for AWS permissions
        
        When the fetch_and_run image runs as an AWS Batch job, it fetches the job
        script from Amazon S3. You need an IAM role that the AWS Batch job can use
        to access S3

        Trusted Entity --> AWS service --> Elastic Container Service --> Elastic
        Container Service Task 
        - In the Role's trust relationship, this will be displayed as follows:
        {
            "Version": "2012-10-17",
            "Statement": [
                {
                "Sid": "",
                "Effect": "Allow",
                "Principal": {
                    "Service": "ecs-tasks.amazonaws.com"
                },
                "Action": "sts:AssumeRole"
                }
            ]
        }

        Default is for a role to be created
        '''
        batch_job_role = iam.Role.from_role_arn(
            self, 'batch-job-role',
            role_arn=BATCH_JOB_ROLE_ARN
        )

        # ========================
        # ECR
        # ========================
        '''
        Repository

        TODO: Evaluate integrating repository into CDK (in this stack or another)
        '''
        ecr_repository = ecr.Repository.from_repository_name(
            self, 'ecr-repository',
            repository_name=ECR_REPOSITORY_NAME
        )
        
        '''
        Container Image
        
        NOTE: We are pulling the image directly from ECR. Pushed before stack is created.
        - Can alternatively create the image from files in the stack (commented out)
        
        TODO: Evaluate ability to programatically update the tag.
        - Manually updating the tag follows approach of pushing image before stack creation/updates
        - Review adding alphanumeric tag as opposed to simply 'latest' --> more detail for auditing
        '''
        # image_asset = ecr_assets.DockerImageAsset(
        #     self, 'docker-image',
        #     directory='./fetch-and-run',
        #     file='./Dockerfile'
        # )
        # image = ecs.ContainerImage.from_docker_image_asset(image_asset)

        image = ecs.ContainerImage.from_ecr_repository(
            repository=ecr_repository,
            tag='latest'
        )

        # ========================
        # BATCH
        # ========================

        '''
        I. Compute Environment
        - Execution runtime of submitted batch jobs 
        '''
        compute_environment = batch.ComputeEnvironment(
            self, 'batch-compute-environment',
            compute_environment_name='batch-compute-environment',
            compute_resources=batch.ComputeResources(
                vpc=vpc,
                # BEST_FIT_PROGRESSIVE will select an additional instance type that is large enough to meet the requirements of the jobs in the queue, with a preference for an instance type with a lower cost.
                allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
                compute_resources_tags={
                    "name": "fetch-and-run"
                },
                ec2_key_pair=KEY_PAIR,
                instance_role=instance_profile.attr_arn,
                security_groups=[sg],
                type=batch.ComputeResourceType.ON_DEMAND,
                vpc_subnets=ec2.SubnetSelection(
                        subnet_type=ec2.SubnetType.PUBLIC)
            ),
            service_role=batch_service_role,
        )

        '''
        II. Job Queue
        - Queue where batch jobs can be submitted
        '''

        job_queue = batch.JobQueue(
            self, 'fetch-and-run-queue',
            compute_environments=[
                batch.JobQueueComputeEnvironment(
                    compute_environment=compute_environment,
                    order=1
                )],
            job_queue_name='fetch-and-run-queue'
        )

        '''
        III. Job Definition
        - Group various job properties (image, resource requirements, env variables)
        into a single definition. Definitionns are used to job submission time
        
        TODO: Build out functionality for the following:
        - `command` => The command that is passed to the container. If you provide a shell command as a single string, you have to quote command-line arguments
        - `environment` => The environment variables to pass to the container
        - `mount_points` => The mount points for data volumes in your container
        - `volumes` => A list of data volumes used in a job.
        
        NOTE: Can optionally add command, environment variables directly in code
        - Alternatively can reference them in `fetch_and_run.sh`
        '''

        job_definition = batch.JobDefinition(
            self, 'fetch-and-run-job-definition',
            container=batch.JobDefinitionContainer(
                image=image,
                job_role=batch_job_role,
                # The hard limit (in MiB) of memory to present to the container
                memory_limit_mib=500,

                # The number of vCPUs reserved for the container. Each vCPU is equivalent to 1,024 CPU
                vcpus=1,
                user="******"
            )
        )
Beispiel #3
0
    def __init__(
        self,
        scope: Construct,
        construct_id: str,
        *,
        deploy_env: str,
        processing_assets_table: aws_dynamodb.Table,
    ):
        # pylint: disable=too-many-locals
        super().__init__(scope, construct_id)

        if deploy_env == "prod":
            instance_types = [
                aws_ec2.InstanceType("c5.xlarge"),
                aws_ec2.InstanceType("c5.2xlarge"),
                aws_ec2.InstanceType("c5.4xlarge"),
                aws_ec2.InstanceType("c5.9xlarge"),
            ]
        else:
            instance_types = [
                aws_ec2.InstanceType("m5.large"),
                aws_ec2.InstanceType("m5.xlarge"),
            ]

        ec2_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name(
            "service-role/AmazonEC2ContainerServiceforEC2Role")

        batch_instance_role = aws_iam.Role(
            self,
            "batch-instance-role",
            assumed_by=aws_iam.ServicePrincipal(
                "ec2.amazonaws.com"),  # type: ignore[arg-type]
            managed_policies=[ec2_policy],
        )
        processing_assets_table.grant_read_write_data(
            batch_instance_role)  # type: ignore[arg-type]

        batch_instance_profile = aws_iam.CfnInstanceProfile(
            self,
            "batch-instance-profile",
            roles=[batch_instance_role.role_name],
        )

        batch_launch_template_data = textwrap.dedent("""
            MIME-Version: 1.0
            Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="

            --==MYBOUNDARY==
            Content-Type: text/x-shellscript; charset="us-ascii"

            #!/bin/bash
            echo ECS_IMAGE_PULL_BEHAVIOR=prefer-cached >> /etc/ecs/ecs.config

            --==MYBOUNDARY==--
            """)
        launch_template_data = aws_ec2.CfnLaunchTemplate.LaunchTemplateDataProperty(
            user_data=Fn.base64(batch_launch_template_data.strip()))
        cloudformation_launch_template = aws_ec2.CfnLaunchTemplate(
            self,
            "batch-launch-template",
            launch_template_name=f"{deploy_env}-datalake-batch-launch-template",
            launch_template_data=launch_template_data,
        )
        assert cloudformation_launch_template.launch_template_name is not None
        launch_template = aws_batch.LaunchTemplateSpecification(
            launch_template_name=cloudformation_launch_template.
            launch_template_name)

        # use existing VPC in LINZ AWS account.
        # VPC with these tags is required to exist in AWS account before being deployed.
        # A VPC will not be deployed by this project.
        vpc = aws_ec2.Vpc.from_lookup(
            self,
            "datalake-vpc",
            tags={
                APPLICATION_NAME_TAG_NAME: APPLICATION_NAME,
                "ApplicationLayer": "networking",
            },
        )

        compute_resources = aws_batch.ComputeResources(
            vpc=vpc,
            minv_cpus=0,
            desiredv_cpus=0,
            maxv_cpus=1000,
            instance_types=instance_types,
            instance_role=batch_instance_profile.instance_profile_name,
            allocation_strategy=aws_batch.AllocationStrategy(
                "BEST_FIT_PROGRESSIVE"),
            launch_template=launch_template,
        )
        batch_service_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name(
            "service-role/AWSBatchServiceRole")
        service_role = aws_iam.Role(
            self,
            "batch-service-role",
            assumed_by=aws_iam.ServicePrincipal(
                "batch.amazonaws.com"),  # type: ignore[arg-type]
            managed_policies=[batch_service_policy],
        )
        compute_environment = aws_batch.ComputeEnvironment(
            self,
            "compute-environment",
            compute_resources=compute_resources,
            service_role=service_role,  # type: ignore[arg-type]
        )

        self.job_queue = aws_batch.JobQueue(
            scope,
            f"{construct_id}-job-queue",
            compute_environments=[
                aws_batch.JobQueueComputeEnvironment(
                    compute_environment=compute_environment,
                    order=10  # type: ignore[arg-type]
                ),
            ],
            priority=10,
        )
Beispiel #4
0
    def __init__(self, scope: core.Construct, id: str, props,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        ################################################################################
        # Set up permissions
        ro_buckets = set()
        for bucket in props['ro_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(self,
                                                    bucket,
                                                    bucket_name=bucket)
            ro_buckets.add(tmp_bucket)

        rw_buckets = set()
        for bucket in props['rw_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(self,
                                                    bucket,
                                                    bucket_name=bucket)
            rw_buckets.add(tmp_bucket)

        batch_service_role = iam.Role(
            self,
            'BatchServiceRole',
            assumed_by=iam.ServicePrincipal('batch.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSBatchServiceRole')
            ])

        spotfleet_role = iam.Role(
            self,
            'AmazonEC2SpotFleetRole',
            assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2SpotFleetTaggingRole')
            ])

        # Create role for Batch instances
        batch_instance_role = iam.Role(
            self,
            'BatchInstanceRole',
            role_name='UmccriseBatchInstanceRole',
            assumed_by=iam.CompositePrincipal(
                iam.ServicePrincipal('ec2.amazonaws.com'),
                iam.ServicePrincipal('ecs.amazonaws.com')),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2RoleforSSM'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2ContainerServiceforEC2Role')
            ])
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(actions=[
                "ec2:Describe*", "ec2:AttachVolume", "ec2:CreateVolume",
                "ec2:CreateTags", "ec2:ModifyInstanceAttribute"
            ],
                                resources=["*"]))
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(actions=["ecs:ListClusters"], resources=["*"]))
        for bucket in ro_buckets:
            bucket.grant_read(batch_instance_role)
        for bucket in rw_buckets:
            # restirct write to paths with */umccrise/*
            bucket.grant_read_write(batch_instance_role, '*/umccrised/*')

        # Turn the instance role into a Instance Profile
        batch_instance_profile = iam.CfnInstanceProfile(
            self,
            'BatchInstanceProfile',
            instance_profile_name='UmccriseBatchInstanceProfile',
            roles=[batch_instance_role.role_name])

        ################################################################################
        # Minimal networking
        # TODO: import resource created with TF
        vpc = props['vpc']

        ################################################################################
        # Setup Batch compute resources

        # Configure BlockDevice to expand instance disk space (if needed?)
        block_device_mappings = [{
            'deviceName': '/dev/xvdf',
            'ebs': {
                'deleteOnTermination': True,
                'volumeSize': 1024,
                'volumeType': 'gp2'
            }
        }]

        launch_template = ec2.CfnLaunchTemplate(
            self,
            'UmccriseBatchComputeLaunchTemplate',
            launch_template_name='UmccriseBatchComputeLaunchTemplate',
            launch_template_data={
                'userData': core.Fn.base64(user_data_script),
                'blockDeviceMappings': block_device_mappings
            })

        launch_template_spec = batch.LaunchTemplateSpecification(
            launch_template_name=launch_template.launch_template_name,
            version='$Latest')

        my_compute_res = batch.ComputeResources(
            type=batch.ComputeResourceType.SPOT,
            allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
            desiredv_cpus=0,
            maxv_cpus=128,
            minv_cpus=0,
            image=ec2.MachineImage.generic_linux(
                ami_map={'ap-southeast-2': props['compute_env_ami']}),
            launch_template=launch_template_spec,
            spot_fleet_role=spotfleet_role,
            instance_role=batch_instance_profile.instance_profile_name,
            vpc=vpc,
            #compute_resources_tags=core.Tag('Creator', 'Batch')
        )
        # XXX: How to add more than one tag above??
        # core.Tag.add(my_compute_res, 'Foo', 'Bar')

        my_compute_env = batch.ComputeEnvironment(
            self,
            'UmccriseBatchComputeEnv',
            compute_environment_name="cdk-umccrise-batch-compute-env",
            service_role=batch_service_role,
            compute_resources=my_compute_res)

        job_queue = batch.JobQueue(self,
                                   'UmccriseJobQueue',
                                   job_queue_name='cdk-umccrise_job_queue',
                                   compute_environments=[
                                       batch.JobQueueComputeEnvironment(
                                           compute_environment=my_compute_env,
                                           order=1)
                                   ],
                                   priority=10)

        job_container = batch.JobDefinitionContainer(
            image=ecs.ContainerImage.from_registry(
                name=props['container_image']),
            vcpus=2,
            memory_limit_mib=2048,
            command=["/opt/container/umccrise-wrapper.sh", "Ref::vcpus"],
            mount_points=[
                ecs.MountPoint(container_path='/work',
                               read_only=False,
                               source_volume='work'),
                ecs.MountPoint(container_path='/opt/container',
                               read_only=True,
                               source_volume='container')
            ],
            volumes=[
                ecs.Volume(name='container',
                           host=ecs.Host(source_path='/opt/container')),
                ecs.Volume(name='work', host=ecs.Host(source_path='/mnt'))
            ],
            privileged=True)

        job_definition = batch.JobDefinition(
            self,
            'UmccriseJobDefinition',
            job_definition_name='cdk-umccrise-job-definition',
            parameters={'vcpus': '1'},
            container=job_container,
            timeout=core.Duration.hours(5))

        ################################################################################
        # Set up job submission Lambda

        lambda_role = iam.Role(
            self,
            'UmccriseLambdaRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSLambdaBasicExecutionRole'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AWSBatchFullAccess')  # TODO: restrict!
            ])

        for bucket in ro_buckets:
            bucket.grant_read(lambda_role)
        for bucket in rw_buckets:
            bucket.grant_read(lambda_role)

        # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod
        #       may need a default JobDefinition to be set up
        lmbda.Function(self,
                       'UmccriseLambda',
                       function_name='umccrise_batch_lambda',
                       handler='umccrise.lambda_handler',
                       runtime=lmbda.Runtime.PYTHON_3_7,
                       code=lmbda.Code.from_asset('lambdas/umccrise'),
                       environment={
                           'JOBNAME_PREFIX': "UMCCRISE_",
                           'JOBQUEUE': job_queue.job_queue_name,
                           'REFDATA_BUCKET': props['refdata_bucket'],
                           'DATA_BUCKET': props['data_bucket'],
                           'UMCCRISE_MEM': '50000',
                           'UMCCRISE_VCPUS': '16'
                       },
                       role=lambda_role)
    def __init__(self, scope: core.Construct, id: str, config_dict,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)
        """ Get VPC details """
        vpc = ec2.Vpc.from_lookup(self, "VPC", vpc_id=config_dict['vpc_id'])
        """ Create Security Group for Batch Env """
        batch_security_group = "datalake-batch-security-group"

        createBatchSecurityGroup = ec2.SecurityGroup(
            self,
            "createBatchSecurityGroup",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            "This security group will be used for AWS Batch Compute Env",
            security_group_name=batch_security_group)

        createBatchSecurityGroup.add_ingress_rule(
            peer=ec2.Peer.ipv4("0.0.0.0/0"),
            connection=ec2.Port(protocol=ec2.Protocol.TCP,
                                string_representation="ingress_rule",
                                from_port=22,
                                to_port=22))

        createBatchSecurityGroup.add_egress_rule(
            peer=ec2.Peer.ipv4("0.0.0.0/0"),
            connection=ec2.Port(protocol=ec2.Protocol.TCP,
                                string_representation="egress_rule",
                                from_port=-1,
                                to_port=-1))

        core.CfnOutput(self,
                       "createBatchSecurityGroupId",
                       value=createBatchSecurityGroup.security_group_id)
        """ Create IAM Role for ecsInstance """
        createECSInstanceRole = iam.Role(
            self,
            "createECSInstanceRole",
            assumed_by=iam.ServicePrincipal("ec2.amazonaws.com"),
            description=
            "This instance role will be used by the ECS cluster instances",
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    "AmazonEC2FullAccess"),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    "AmazonS3FullAccess"),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    "AWSBatchFullAccess"),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    "SecretsManagerReadWrite"),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    "AmazonAthenaFullAccess"),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    "service-role/"
                    "AmazonEC2ContainerServiceforEC2Role"),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    "service-role/AWSBatchServiceRole")
            ],
            role_name="datalake-ecsinstance-role")

        createInstanceProfile = iam.CfnInstanceProfile(
            self,
            "createInstanceProfile",
            roles=[createECSInstanceRole.role_name],
            instance_profile_name="datalake-ecsinstance-role")

        useECSInstanceProfile = createInstanceProfile.instance_profile_name

        core.CfnOutput(self,
                       "createECSInstanceRoleName",
                       value=createECSInstanceRole.role_name)
        """ Create Spot Fleet Role """
        createSpotFleetRole = iam.Role(
            self,
            'createSpotFleetRole',
            assumed_by=iam.ServicePrincipal("spotfleet.amazonaws.com"),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    "service-role/AmazonEC2SpotFleetTaggingRole")
            ])

        core.CfnOutput(self,
                       "createSpotFleetRoleName",
                       value=createSpotFleetRole.role_name)

        useSpotFleetRole = createSpotFleetRole.without_policy_updates()
        """ Create Batch Service Role """
        createBatchServiceRole = iam.Role(
            self,
            'createBatchServiceRole',
            assumed_by=iam.ServicePrincipal("batch.amazonaws.com"),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    "service-role/AWSBatchServiceRole")
            ])

        core.CfnOutput(self,
                       "createBatchServiceRoleName",
                       value=createBatchServiceRole.role_name)

        useBatchServiceRole = createBatchServiceRole.without_policy_updates()
        """ Create Compute Environment """

        subnet_1 = ec2.Subnet.from_subnet_attributes(
            self,
            "subnet_1",
            subnet_id=config_dict['SubnetIds'].split(",")[0],
            availability_zone=config_dict['AvailabilityZones'].split(",")[0])
        subnet_2 = ec2.Subnet.from_subnet_attributes(
            self,
            "subnet_2",
            subnet_id=config_dict['SubnetIds'].split(",")[1],
            availability_zone=config_dict['AvailabilityZones'].split(",")[1])

        createBatchComputeEnv = batch.ComputeEnvironment(
            self,
            "createBatchComputeEnv",
            compute_environment_name="datalake-compute-env",
            service_role=useBatchServiceRole,
            compute_resources=batch.ComputeResources(
                vpc=vpc,
                type=batch.ComputeResourceType.SPOT,
                bid_percentage=60,
                desiredv_cpus=0,
                maxv_cpus=100,
                minv_cpus=0,
                security_groups=[createBatchSecurityGroup],
                vpc_subnets=ec2.SubnetSelection(subnets=[subnet_1, subnet_2]),
                instance_role=useECSInstanceProfile,
                spot_fleet_role=useSpotFleetRole,
                compute_resources_tags=core.Tag.add(
                    self, 'Name', 'Datalake Pipeline Instance')))

        core.CfnOutput(self,
                       "createBatchComputeEnvName",
                       value=createBatchComputeEnv.compute_environment_name)

        getIComputeEnvObject = batch.ComputeEnvironment.from_compute_environment_arn(
            self,
            "getComputeEnvAtrributes",
            compute_environment_arn=createBatchComputeEnv.
            compute_environment_arn)
        """ Create Batch Job Queue """
        createBatchJobQueue = batch.JobQueue(
            self,
            "createBatchJobQueue",
            compute_environments=[
                batch.JobQueueComputeEnvironment(
                    compute_environment=getIComputeEnvObject, order=1)
            ],
            enabled=True,
            job_queue_name="datalake-job-queue",
            priority=1)

        core.CfnOutput(self,
                       "createBatchJobQueueName",
                       value=createBatchJobQueue.job_queue_name)
        """ Create ECR Repo for datalake images """
        createECRRepo = ecr.Repository(
            self,
            "createECRRepo",
            repository_name=config_dict['workflow_ecr_repo'])

        core.CfnOutput(self,
                       "createECRRepoName",
                       value=createECRRepo.repository_name)
Beispiel #6
0
    def __init__(self, scope: core.Construct, id: str, props,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        ################################################################################
        # Set up permissions
        ro_buckets = set()
        for bucket in props['ro_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(self,
                                                    bucket,
                                                    bucket_name=bucket)
            ro_buckets.add(tmp_bucket)

        rw_buckets = set()
        for bucket in props['rw_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(self,
                                                    bucket,
                                                    bucket_name=bucket)
            rw_buckets.add(tmp_bucket)

        batch_service_role = iam.Role(
            self,
            'BatchServiceRole',
            assumed_by=iam.ServicePrincipal('batch.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSBatchServiceRole')
            ])

        spotfleet_role = iam.Role(
            self,
            'AmazonEC2SpotFleetRole',
            assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2SpotFleetTaggingRole')
            ])

        # Create role for Batch instances
        batch_instance_role = iam.Role(
            self,
            'BatchInstanceRole',
            role_name='RnasumBatchInstanceRole',
            assumed_by=iam.CompositePrincipal(
                iam.ServicePrincipal('ec2.amazonaws.com'),
                iam.ServicePrincipal('ecs.amazonaws.com')),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2RoleforSSM'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2ContainerServiceforEC2Role')
            ])
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(actions=[
                "ec2:Describe*", "ec2:AttachVolume", "ec2:CreateVolume",
                "ec2:CreateTags", "ec2:ModifyInstanceAttribute"
            ],
                                resources=["*"]))
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(actions=["ecs:ListClusters"], resources=["*"]))
        for bucket in ro_buckets:
            bucket.grant_read(batch_instance_role)
        for bucket in rw_buckets:
            # TODO: restirct write to paths with */rnasum/*
            bucket.grant_read_write(batch_instance_role)

        # Turn the instance role into a Instance Profile
        batch_instance_profile = iam.CfnInstanceProfile(
            self,
            'BatchInstanceProfile',
            instance_profile_name='RnasumBatchInstanceProfile',
            roles=[batch_instance_role.role_name])

        ################################################################################
        # Minimal networking
        # TODO: import resource created with TF
        vpc = props['vpc']

        ################################################################################
        # Setup Batch compute resources

        # Configure BlockDevice to expand instance disk space (if needed?)
        block_device_mappings = [{
            'deviceName': '/dev/xvdf',
            'ebs': {
                'deleteOnTermination': True,
                'volumeSize': 1024,
                'volumeType': 'gp2'
            }
        }]

        launch_template = ec2.CfnLaunchTemplate(
            self,
            'RnasumBatchComputeLaunchTemplate',
            launch_template_name='RnasumBatchComputeLaunchTemplate',
            launch_template_data={
                # 'userData': core.Fn.base64(user_data_script),   FIXME may not need this for RNAsum case? see job_definition below
                'blockDeviceMappings': block_device_mappings
            })

        launch_template_spec = batch.LaunchTemplateSpecification(
            launch_template_name=launch_template.launch_template_name,
            version='$Latest')

        my_compute_res = batch.ComputeResources(
            type=batch.ComputeResourceType.SPOT,
            allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
            desiredv_cpus=0,
            maxv_cpus=80,
            minv_cpus=0,
            image=ec2.MachineImage.generic_linux(
                ami_map={'ap-southeast-2': props['compute_env_ami']}),
            launch_template=launch_template_spec,
            spot_fleet_role=spotfleet_role,
            instance_role=batch_instance_profile.instance_profile_name,
            vpc=vpc,
            #compute_resources_tags=core.Tag('Creator', 'Batch')
        )
        # XXX: How to add more than one tag above??
        # core.Tag.add(my_compute_res, 'Foo', 'Bar')

        my_compute_env = batch.ComputeEnvironment(
            self,
            'RnasumBatchComputeEnv',
            compute_environment_name="RnasumBatchComputeEnv",
            service_role=batch_service_role,
            compute_resources=my_compute_res)

        job_queue = batch.JobQueue(self,
                                   'RnasumJobQueue',
                                   job_queue_name='rnasum_job_queue',
                                   compute_environments=[
                                       batch.JobQueueComputeEnvironment(
                                           compute_environment=my_compute_env,
                                           order=1)
                                   ],
                                   priority=10)

        # it is equivalent of
        # https://github.com/umccr/infrastructure/blob/master/terraform/stacks/wts_report/jobs/wts_report.json
        default_container_props = {
            'image':
            props['container_image'],
            'vcpus':
            2,
            'memory':
            2048,
            'command': ['/opt/container/WTS-report-wrapper.sh', 'Ref::vcpus'],
            'volumes': [{
                'host': {
                    'sourcePath': '/mnt'
                },
                'name': 'work'
            }, {
                'host': {
                    'sourcePath': '/opt/container'
                },
                'name': 'container'
            }],
            'mountPoints': [{
                'containerPath': '/work',
                'readOnly': False,
                'sourceVolume': 'work'
            }, {
                'containerPath': '/opt/container',
                'readOnly': True,
                'sourceVolume': 'container'
            }],
            'readonlyRootFilesystem':
            False,
            'privileged':
            True,
            'ulimits': []
        }

        # and CDK equivalent of
        # https://github.com/umccr/infrastructure/blob/master/terraform/stacks/wts_report/main.tf#L113
        job_definition = batch.CfnJobDefinition(
            self,
            'RnasumJobDefinition',
            job_definition_name='rnasum_job_dev',
            type='container',
            container_properties=default_container_props,
            parameters={
                'vcpus': 1,
            })

        ################################################################################
        # Set up job submission Lambda

        lambda_role = iam.Role(
            self,
            'RnasumLambdaRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSLambdaBasicExecutionRole'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AWSBatchFullAccess')  # TODO: restrict!
            ])

        for bucket in ro_buckets:
            bucket.grant_read(lambda_role)
        for bucket in rw_buckets:
            bucket.grant_read(lambda_role)

        # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod
        #       may need a default JobDefinition to be set up
        # and CDK equivalent of
        # https://github.com/umccr/infrastructure/blob/master/terraform/stacks/wts_report/main.tf#L159
        lmbda.Function(self,
                       'RnasumLambda',
                       function_name='rnasum_batch_lambda',
                       handler='trigger_wts_report.lambda_handler',
                       runtime=lmbda.Runtime.PYTHON_3_7,
                       code=lmbda.Code.from_asset('lambdas/'),
                       environment={
                           'JOBNAME_PREFIX': "rnasum_",
                           'JOBQUEUE': job_queue.job_queue_name,
                           'JOBDEF': job_definition.job_definition_name,
                           'REFDATA_BUCKET': props['refdata_bucket'],
                           'DATA_BUCKET': props['data_bucket'],
                           'JOB_MEM': '32000',
                           'JOB_VCPUS': '8',
                           'REF_DATASET': 'PANCAN',
                           'GENOME_BUILD': '38',
                       },
                       role=lambda_role)
Beispiel #7
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        stack_role = iam.Role(
            self,
            "SimulationServiceRole",
            assumed_by=iam.ServicePrincipal("batch.amazonaws.com"),
        )

        stack_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "AdministratorAccess"))

        job_role = iam.Role(
            self,
            "SimulationJobServiceRole",
            assumed_by=iam.ServicePrincipal("ecs-tasks.amazonaws.com"),
        )

        job_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "AdministratorAccess"))

        lambda_role = iam.Role(
            self,
            "SimulationLambdaServiceRole",
            assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"),
        )

        lambda_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "AdministratorAccess"))

        # Create Input S3
        input_bucket = s3.Bucket(self, "InputS3Bucket")

        # Create Output S3
        output_bucket = s3.Bucket(self, "OutputS3Bucket")

        # admin_policy = iam.from_policy_name("AdministratorAccess", "AdministratorAccess")

        job_table = aws_dynamodb.Table(
            self,
            id="JobTable",
            partition_key=aws_dynamodb.Attribute(
                name="PK", type=aws_dynamodb.AttributeType.STRING),
            stream=aws_dynamodb.StreamViewType.NEW_AND_OLD_IMAGES,
            removal_policy=core.RemovalPolicy.DESTROY,
        )

        orchestration_handler_lambda = aws_lambda.Function(
            self,
            id="JobOrchestrationHandler",
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            handler="orchestration_handler_lambda.handler",
            code=aws_lambda.Code.asset("./simulations_service/functions/"),
        )

        # Give only write access to the post handler
        job_table.grant_write_data(orchestration_handler_lambda)

        # Pass table_name as env variable
        orchestration_handler_lambda.add_environment("TABLE_NAME",
                                                     job_table.table_name)

        # Create lambda function for processing dynamodb streams
        dynamodb_streams_processor_lambda = aws_lambda.Function(
            self,
            id="JobsDynamoDBStreamsProcessor",
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            handler="dynamodb_streams_processor_lambda.handler",
            code=aws_lambda.Code.asset("./simulations_service/functions/"),
            role=lambda_role,
        )

        # Add dynamo db as lambda event source
        dynamodb_streams_processor_lambda.add_event_source(
            aws_lambda_event_sources.DynamoEventSource(
                job_table,
                starting_position=aws_lambda.StartingPosition.LATEST,
                batch_size=1,
            ))

        dynamodb_streams_processor_lambda.add_environment(
            "S3_OUTPUT_BUCKET", output_bucket.bucket_name)

        dynamodb_streams_processor_lambda.add_environment(
            "TABLE_NAME", job_table.table_name)

        vpc = ec2.Vpc(self, "VPC")

        spot_environment = batch.ComputeEnvironment(
            self,
            "MyComputedEnvironment",
            compute_resources={
                "vpc": vpc,
            },
            service_role=stack_role.without_policy_updates(),
        )

        job_queue = batch.JobQueue(
            self,
            "JobQueue",
            compute_environments=[
                batch.JobQueueComputeEnvironment(
                    compute_environment=spot_environment, order=1)
            ],
        )

        dynamodb_streams_processor_lambda.add_environment(
            "JOB_QUEUE", job_queue.job_queue_name)

        job_definition = batch.JobDefinition(
            self,
            "batch-job-def-from-local",
            container={
                "image":
                ecs.ContainerImage.from_asset("./simulations_service/job/"),
                "memory_limit_mib":
                500,
                "privileged":
                True,
                "job_role":
                job_role,
            },
        )

        dynamodb_streams_processor_lambda.add_environment(
            "JOB_DEFINITION", job_definition.job_definition_name)

        orchestration_handler_lambda.add_event_source(
            aws_lambda_event_sources.S3EventSource(
                bucket=input_bucket,
                events=[s3.EventType.OBJECT_CREATED],
            ))
Beispiel #8
0
    def __init__(self, app: core.Construct, stack_name: str, vpc: aws_ec2.Vpc,
                 security_group: aws_ec2.SecurityGroup):
        super().__init__(scope=app, id=f"{stack_name}-batch")

        batch_role = aws_iam.Role(
            scope=self,
            id=f"batch_role",
            role_name=f"batch_role",
            assumed_by=aws_iam.ServicePrincipal("batch.amazonaws.com"))

        batch_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AWSBatchServiceRole",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole"))

        batch_role.add_to_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["arn:aws:logs:*:*:*"],
                                    actions=[
                                        "logs:CreateLogGroup",
                                        "logs:CreateLogStream",
                                        "logs:PutLogEvents",
                                        "logs:DescribeLogStreams"
                                    ]))

        # Role to attach EC2
        instance_role = aws_iam.Role(
            scope=self,
            id=f"instance_role",
            role_name=f"instance_role_for",
            assumed_by=aws_iam.ServicePrincipal("ec2.amazonaws.com"))

        instance_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AmazonEC2ContainerServiceforEC2Role",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role"
            ))

        # add policy to access S3
        instance_role.add_to_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=["s3:*"]))

        # add policy to access CloudWatch Logs
        instance_role.add_to_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["arn:aws:logs:*:*:*"],
                                    actions=[
                                        "logs:CreateLogGroup",
                                        "logs:CreateLogStream",
                                        "logs:PutLogEvents",
                                        "logs:DescribeLogStreams"
                                    ]))

        # attach role to EC2
        instance_profile = aws_iam.CfnInstanceProfile(
            scope=self,
            id=f"instance_profile",
            instance_profile_name=f"instance_profile",
            roles=[instance_role.role_name])

        # ===== #
        # batch #
        # ===== #
        batch_compute_resources = aws_batch.ComputeResources(
            vpc=vpc,
            maxv_cpus=4,
            minv_cpus=0,
            security_groups=[security_group],
            instance_role=instance_profile.attr_arn,
            type=aws_batch.ComputeResourceType.SPOT)

        batch_compute_environment = aws_batch.ComputeEnvironment(
            scope=self,
            id="batch_compute_environment",
            compute_environment_name="batch_compute_environment",
            compute_resources=batch_compute_resources,
            service_role=batch_role)

        job_role = aws_iam.Role(
            scope=self,
            id=f"job_role",
            role_name=f"job_role",
            assumed_by=aws_iam.ServicePrincipal("ecs-tasks.amazonaws.com"))

        job_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AmazonECSTaskExecutionRolePolicy",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
            ))

        job_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AmazonS3FullAccess",
                managed_policy_arn="arn:aws:iam::aws:policy/AmazonS3FullAccess"
            ))

        job_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"CloudWatchLogsFullAccess",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess"))

        self.batch_job_queue = aws_batch.JobQueue(
            scope=self,
            id=f"job_queue",
            job_queue_name=f"job_queue",
            compute_environments=[
                aws_batch.JobQueueComputeEnvironment(
                    compute_environment=batch_compute_environment, order=1)
            ],
            priority=1)

        # ECR repository
        ecr_repository = aws_ecr_assets.DockerImageAsset(
            scope=self,
            id=f"ecr_image",
            directory="./docker",
            repository_name=f"repository")

        # get image from ECR
        container_image = aws_ecs.ContainerImage.from_ecr_repository(
            repository=ecr_repository.repository)

        # job define
        # pass `S3_BUCKET` as environment argument.
        self.batch_job_definition = aws_batch.JobDefinition(
            scope=self,
            id=f"job_definition",
            job_definition_name=f"job_definition",
            container=aws_batch.JobDefinitionContainer(
                image=container_image,
                environment={"S3_BUCKET": f"{S3_BUCKET}"},
                job_role=job_role,
                vcpus=1,
                memory_limit_mib=1024))
Beispiel #9
0
    def __init__(self, scope: core.Construct, id: str, props, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        dirname = os.path.dirname(__file__)

        ecr_repo = ecr.Repository.from_repository_name(
            self,
            'UmccriseEcrRepo',
            repository_name='umccrise'
        )

        ################################################################################
        # Set up permissions
        ro_buckets = set()
        for bucket in props['ro_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(
                self,
                bucket,
                bucket_name=bucket
            )
            ro_buckets.add(tmp_bucket)

        rw_buckets = set()
        for bucket in props['rw_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(
                self,
                bucket,
                bucket_name=bucket
            )
            rw_buckets.add(tmp_bucket)

        batch_service_role = iam.Role(
            self,
            'BatchServiceRole',
            assumed_by=iam.ServicePrincipal('batch.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSBatchServiceRole')
            ]
        )

        spotfleet_role = iam.Role(
            self,
            'AmazonEC2SpotFleetRole',
            assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2SpotFleetTaggingRole')
            ]
        )

        # Create role for Batch instances
        batch_instance_role = iam.Role(
            self,
            'BatchInstanceRole',
            role_name='UmccriseBatchInstanceRole',
            assumed_by=iam.CompositePrincipal(
                iam.ServicePrincipal('ec2.amazonaws.com'),
                iam.ServicePrincipal('ecs.amazonaws.com')
            ),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2RoleforSSM'),
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2ContainerServiceforEC2Role')
            ]
        )
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(
                actions=[
                    "ec2:Describe*",
                    "ec2:AttachVolume",
                    "ec2:CreateVolume",
                    "ec2:CreateTags",
                    "ec2:ModifyInstanceAttribute"
                ],
                resources=["*"]
            )
        )
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(
                actions=[
                    "ecs:ListClusters"
                ],
                resources=["*"]
            )
        )
        for bucket in ro_buckets:
            bucket.grant_read(batch_instance_role)
        for bucket in rw_buckets:
            # restirct write to paths with */umccrise/*
            bucket.grant_read_write(batch_instance_role, '*/umccrised/*')

        # Turn the instance role into a Instance Profile
        batch_instance_profile = iam.CfnInstanceProfile(
            self,
            'BatchInstanceProfile',
            instance_profile_name='UmccriseBatchInstanceProfile',
            roles=[batch_instance_role.role_name]
        )

        ################################################################################
        # Network
        # (Import common infrastructure (maintained via TerraForm)

        # VPC
        vpc = ec2.Vpc.from_lookup(
            self,
            'UmccrMainVpc',
            tags={'Name': 'main-vpc', 'Stack': 'networking'}
        )

        batch_security_group = ec2.SecurityGroup(
            self,
            "BatchSecurityGroup",
            vpc=vpc,
            description="Allow all outbound, no inbound traffic"
        )
        ################################################################################
        # Setup Batch compute resources

        # Configure BlockDevice to expand instance disk space (if needed?)
        block_device_mappings = [
            {
                'deviceName': '/dev/xvdf',
                'ebs': {
                    'deleteOnTermination': True,
                    'encrypted': True,
                    'volumeSize': 2048,
                    'volumeType': 'gp2'
                }
            }
        ]

        # Set up custom user data to configure the Batch instances
        umccrise_wrapper_asset = assets.Asset(
            self,
            'UmccriseWrapperAsset',
            path=os.path.join(dirname, '..', 'assets', "umccrise-wrapper.sh")
        )
        umccrise_wrapper_asset.grant_read(batch_instance_role)

        user_data_asset = assets.Asset(
            self,
            'UserDataAsset',
            path=os.path.join(dirname, '..', 'assets', "batch-user-data.sh")
        )
        user_data_asset.grant_read(batch_instance_role)

        user_data = ec2.UserData.for_linux()
        local_path = user_data.add_s3_download_command(
            bucket=user_data_asset.bucket,
            bucket_key=user_data_asset.s3_object_key
        )
        user_data.add_execute_file_command(
            file_path=local_path,
            arguments=f"s3://{umccrise_wrapper_asset.bucket.bucket_name}/{umccrise_wrapper_asset.s3_object_key}"
        )

        # Generate user data wrapper to comply with LaunchTemplate required MIME multi-part archive format for user data
        mime_wrapper = ec2.UserData.custom('MIME-Version: 1.0')
        mime_wrapper.add_commands('Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="')
        mime_wrapper.add_commands('')
        mime_wrapper.add_commands('--==MYBOUNDARY==')
        mime_wrapper.add_commands('Content-Type: text/x-shellscript; charset="us-ascii"')
        mime_wrapper.add_commands('')
        # install AWS CLI, as it's unexpectedly missing from the AWS Linux 2 AMI...
        mime_wrapper.add_commands('yum -y install unzip')
        mime_wrapper.add_commands('cd /opt')
        mime_wrapper.add_commands('curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"')
        mime_wrapper.add_commands('unzip awscliv2.zip')
        mime_wrapper.add_commands('sudo ./aws/install --bin-dir /usr/bin')
        # insert our actual user data payload
        mime_wrapper.add_commands(user_data.render())
        mime_wrapper.add_commands('--==MYBOUNDARY==--')

        launch_template = ec2.CfnLaunchTemplate(
            self,
            'UmccriseBatchComputeLaunchTemplate',
            launch_template_name='UmccriseBatchComputeLaunchTemplate',
            launch_template_data={
                'userData': core.Fn.base64(mime_wrapper.render()),
                'blockDeviceMappings': block_device_mappings
            }
        )

        launch_template_spec = batch.LaunchTemplateSpecification(
            launch_template_name=launch_template.launch_template_name,
            version='$Latest'
        )

        my_compute_res = batch.ComputeResources(
            type=(batch.ComputeResourceType.SPOT if props['compute_env_type'].lower() == 'spot' else batch.ComputeResourceType.ON_DEMAND),
            allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
            desiredv_cpus=0,
            maxv_cpus=320,
            minv_cpus=0,
            image=ec2.MachineImage.generic_linux(ami_map={'ap-southeast-2': props['compute_env_ami']}),
            launch_template=launch_template_spec,
            spot_fleet_role=spotfleet_role,
            instance_role=batch_instance_profile.instance_profile_name,
            vpc=vpc,
            vpc_subnets=ec2.SubnetSelection(
                subnet_type=ec2.SubnetType.PRIVATE,
                # availability_zones=["ap-southeast-2a"]
            ),
            security_groups=[batch_security_group]
            # compute_resources_tags=core.Tag('Creator', 'Batch')
        )
        # XXX: How to add more than one tag above??
        # https://github.com/aws/aws-cdk/issues/7350
        # core.Tag.add(my_compute_res, 'Foo', 'Bar')

        my_compute_env = batch.ComputeEnvironment(
            self,
            'UmccriseBatchComputeEnv',
            compute_environment_name="cdk-umccr_ise-batch-compute-env",
            service_role=batch_service_role,
            compute_resources=my_compute_res
        )
        # child = my_compute_env.node.default_child
        # child_comp_res = child.compute_resources
        # child_comp_res.tags = "{'Foo': 'Bar'}"

        job_queue = batch.JobQueue(
            self,
            'UmccriseJobQueue',
            job_queue_name='cdk-umccrise_job_queue',
            compute_environments=[
                batch.JobQueueComputeEnvironment(
                    compute_environment=my_compute_env,
                    order=1
                )
            ],
            priority=10
        )

        job_container = batch.JobDefinitionContainer(
            image=ecs.ContainerImage.from_registry(name=props['container_image']),
            vcpus=32,
            memory_limit_mib=100000,
            command=[
                "/opt/container/umccrise-wrapper.sh",
                "Ref::vcpus"
            ],
            mount_points=[
                ecs.MountPoint(
                    container_path='/work',
                    read_only=False,
                    source_volume='work'
                ),
                ecs.MountPoint(
                    container_path='/opt/container',
                    read_only=True,
                    source_volume='container'
                )
            ],
            volumes=[
                ecs.Volume(
                    name='container',
                    host=ecs.Host(
                        source_path='/opt/container'
                    )
                ),
                ecs.Volume(
                    name='work',
                    host=ecs.Host(
                        source_path='/mnt'
                    )
                )
            ],
            privileged=True
        )

        job_definition = batch.JobDefinition(
            self,
            'UmccriseJobDefinition',
            job_definition_name='cdk-umccrise-job-definition',
            parameters={'vcpus': '1'},
            container=job_container,
            timeout=core.Duration.hours(5)
        )

        ################################################################################
        # Set up job submission Lambda

        lambda_role = iam.Role(
            self,
            'UmccriseLambdaRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaBasicExecutionRole'),
                iam.ManagedPolicy.from_aws_managed_policy_name('AWSBatchFullAccess')  # TODO: restrict!
            ]
        )

        for bucket in ro_buckets:
            bucket.grant_read(lambda_role)
        for bucket in rw_buckets:
            bucket.grant_read(lambda_role)
        ecr_repo.grant(lambda_role, 'ecr:ListImages')

        # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod
        #       may need a default JobDefinition to be set up
        lmbda.Function(
            self,
            'UmccriseLambda',
            function_name='umccrise_batch_lambda',
            handler='umccrise.lambda_handler',
            runtime=lmbda.Runtime.PYTHON_3_7,
            code=lmbda.Code.from_asset('lambdas/umccrise'),
            environment={
                'JOBNAME_PREFIX': "UMCCRISE_",
                'JOBQUEUE': job_queue.job_queue_name,
                'UMCCRISE_MEM': '100000',
                'UMCCRISE_VCPUS': '32',
                'JOBDEF': job_definition.job_definition_name,
                'REFDATA_BUCKET': props['refdata_bucket'],
                'INPUT_BUCKET': props['input_bucket'],
                'RESULT_BUCKET': props['result_bucket'],
                'IMAGE_CONFIGURABLE': props['image_configurable']
            },
            role=lambda_role
        )
    def __init__(self,
                 scope: core.Construct,
                 id: str,
                 CurrentVPC="default",
                 TargetS3="default",
                 UserName="******",
                 **kwargs):
        super().__init__(scope, id, **kwargs)

        self.job_queue = {}

        # batch service role
        self.batch_service_role = _iam.Role(
            self,
            'BatchServiceRole',
            assumed_by=_iam.ServicePrincipal('batch.amazonaws.com'),
            managed_policies=[
                _iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSBatchServiceRole')
            ])

        # ec2 role with policy that allow to get object from s3 bucket for batch computing
        self.batch_compute_role = _iam.Role(
            self,
            'BatchComputeRole',
            assumed_by=_iam.CompositePrincipal(
                _iam.ServicePrincipal('ec2.amazonaws.com'),
                _iam.ServicePrincipal('ecs.amazonaws.com')),
            managed_policies=[
                _iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2RoleforSSM'),
                _iam.ManagedPolicy.from_aws_managed_policy_name(
                    "service-role/AmazonEC2ContainerServiceforEC2Role"),
                _iam.ManagedPolicy.from_aws_managed_policy_name(
                    "CloudWatchLogsFullAccess")
            ])

        TargetS3.grant_read_write(self.batch_compute_role)

        self.batch_compute_instance_profile = _iam.CfnInstanceProfile(
            self,
            'BatchInstanceProfile' + UserName,
            instance_profile_name='BatchInstanceProfile-' + UserName,
            roles=[self.batch_compute_role.role_name])

        self.ComputeENV = _batch.ComputeEnvironment(
            self,
            "ComputeENV",
            service_role=self.batch_service_role,
            compute_resources={
                "vpc":
                CurrentVPC,
                "instance_types":
                [_ec2.InstanceType("c5"),
                 _ec2.InstanceType("m5")],
                "maxv_cpus":
                128,
                "minv_cpus":
                0,
                "type":
                _batch.ComputeResourceType.SPOT,
                "allocation_strategy":
                _batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
                "instance_role":
                self.batch_compute_instance_profile.instance_profile_name
            })

        self.ComputeQueue = _batch.JobQueue(
            self,
            "ComputeQueue",
            priority=1,
            compute_environments=[
                _batch.JobQueueComputeEnvironment(
                    compute_environment=self.ComputeENV, order=1)
            ])
        self.job_queue["ComputeQueue"] = self.ComputeQueue
Beispiel #11
0
    def __init__(
        self,
        scope: core.Construct,
        id: str,
        cidr_block: str,
        platform_identifier: str = 'covariate-ingest',
        **kwargs
    ) -> None:
        super().__init__(scope, id, **kwargs)

        self.lambda_function_role_name = f'{platform_identifier}-lambda-function'
        self.node.set_context('lambda_function_role_name', self.lambda_function_role_name)

        self.batch_job_role_name = f'{platform_identifier}-batch-job'
        self.node.set_context('batch_job_role_name', self.batch_job_role_name)

        self.vpc = ec2.Vpc(
            self,
            "vpc",
            enable_dns_hostnames=True,
            enable_dns_support=True,
            flow_logs={
                "default":
                    ec2.FlowLogOptions(
                        destination=ec2.FlowLogDestination.to_cloud_watch_logs()
                    )
            },
            # max_azs=99,  # Means use all AZs
            max_azs=3,
            cidr=cidr_block,
            # configuration will create a subnet for each config, in each AZ.
            # So us-east-1 3 public, and 3 private
            subnet_configuration=[
                ec2.SubnetConfiguration(
                    name="Public",
                    cidr_mask=24,
                    subnet_type=ec2.SubnetType.PUBLIC,
                ),
                ec2.SubnetConfiguration(
                    subnet_type=ec2.SubnetType.PRIVATE,
                    name="Private",
                    cidr_mask=20
                )
            ],
            gateway_endpoints={
                "S3":
                    ec2.GatewayVpcEndpointOptions(
                        service=ec2.GatewayVpcEndpointAwsService.S3
                    )
            },
        )
        self.vpc.add_interface_endpoint(
            "EcrDockerEndpoint",
            service=ec2.InterfaceVpcEndpointAwsService.ECR_DOCKER
        )

        # Public NACL
        self.nacl_public = ec2.NetworkAcl(
            self,
            "nacl_public",
            vpc=self.vpc,
            subnet_selection=ec2.SubnetSelection(
                subnet_type=ec2.SubnetType.PUBLIC
            )
        )
        self.nacl_public.add_entry(
            "in-rule",
            rule_number=95,
            cidr=ec2.AclCidr.any_ipv4(),
            rule_action=ec2.Action.ALLOW,
            direction=ec2.TrafficDirection.INGRESS,
            traffic=ec2.AclTraffic.tcp_port_range(start_port=0, end_port=65535)
        )
        self.nacl_public.add_entry(
            "out-rule",
            rule_number=95,
            cidr=ec2.AclCidr.any_ipv4(),
            rule_action=ec2.Action.ALLOW,
            direction=ec2.TrafficDirection.EGRESS,
            traffic=ec2.AclTraffic.tcp_port_range(start_port=0, end_port=65535)
        )

        # Private NACL
        self.nacl_private = ec2.NetworkAcl(
            self,
            "nacl_private",
            vpc=self.vpc,
            subnet_selection=ec2.SubnetSelection(
                subnet_type=ec2.SubnetType.PRIVATE
            )
        )
        self.nacl_private.add_entry(
            "in-rule",
            rule_number=95,
            cidr=ec2.AclCidr.any_ipv4(),
            rule_action=ec2.Action.ALLOW,
            direction=ec2.TrafficDirection.INGRESS,
            traffic=ec2.AclTraffic.tcp_port_range(start_port=0, end_port=65432)
        )
        self.nacl_private.add_entry(
            "out-rule",
            rule_number=95,
            cidr=ec2.AclCidr.any_ipv4(),
            rule_action=ec2.Action.ALLOW,
            direction=ec2.TrafficDirection.EGRESS,
            traffic=ec2.AclTraffic.tcp_port_range(start_port=0, end_port=65432)
        )

        # Add Batch Compute Envs
        cpu_instances = [
            ec2.InstanceType('c5.large'),
            ec2.InstanceType('c5.xlarge'),
            ec2.InstanceType('c5.2xlarge'),
            ec2.InstanceType('c5.4xlarge'),
            ec2.InstanceType('m5.large'),
            ec2.InstanceType('m5.xlarge'),
            ec2.InstanceType('m5.2xlarge'),
            ec2.InstanceType('m5.4xlarge'),
        ]

        self.cpu_on_demand = batch.ComputeEnvironment(
            self,
            'batch-cpu-on-demand',
            managed=True,
            enabled=True,
            compute_resources=batch.ComputeResources(
                vpc=self.vpc,  # Will select only private subnets.
                type=batch.ComputeResourceType.ON_DEMAND,
                allocation_strategy=batch.AllocationStrategy.
                BEST_FIT_PROGRESSIVE,
                minv_cpus=0,
                maxv_cpus=640,
                desiredv_cpus=0,
                instance_types=cpu_instances,
                image=ecs.EcsOptimizedImage.amazon_linux2(
                    hardware_type=ecs.AmiHardwareType.STANDARD
                ),
            ),
        )

        self.cpu_spot = batch.ComputeEnvironment(
            self,
            'batch-cpu-spot',
            managed=True,
            enabled=True,
            compute_resources=batch.ComputeResources(
                vpc=self.vpc,  # Will select only private subnets.
                type=batch.ComputeResourceType.SPOT,
                allocation_strategy=batch.AllocationStrategy.
                SPOT_CAPACITY_OPTIMIZED,
                bid_percentage=80,
                minv_cpus=0,
                maxv_cpus=640,
                desiredv_cpus=0,
                instance_types=cpu_instances,
                image=ecs.EcsOptimizedImage.amazon_linux2(
                    hardware_type=ecs.AmiHardwareType.STANDARD
                ),
            ),
        )

        self.cpu_spot_first = batch.JobQueue(
            self,
            'cpu-spot-first',
            job_queue_name=f'{platform_identifier}-cpu-queue',
            compute_environments=[
                batch.JobQueueComputeEnvironment(
                    compute_environment=self.cpu_spot, order=1
                ),
                batch.JobQueueComputeEnvironment(
                    compute_environment=self.cpu_on_demand, order=2
                ),
            ],
            enabled=True,
            priority=10
        )

        self.lambda_function_role = iam.Role(
            self,
            'lambda-function-role',
            role_name=self.lambda_function_role_name,
            description='',
            assumed_by=iam.ServicePrincipal(service='lambda.amazonaws.com'),
        )
        

        self.batch_job_role = iam.Role(
            self,
            'batch-job-role',
            role_name=self.batch_job_role_name,
            description='',
            assumed_by=iam.ServicePrincipal(service='ecs-tasks.amazonaws.com'),
        )

        self.intermediate_bucket = s3.Bucket(
            self,
            f'{platform_identifier}-data-bucket',
            bucket_name=f'{platform_identifier}-data-dev',
            block_public_access=s3.BlockPublicAccess(
                block_public_acls=False,
                block_public_policy=False,
                ignore_public_acls=False,
                restrict_public_buckets=False
            ),
        )
        self.intermediate_bucket.grant_read_write(self.lambda_function_role)
        self.intermediate_bucket.grant_read_write(self.batch_job_role)

        cluster = ecs.Cluster(
            self, 
            "covar-api-cluster",
            cluster_name='covar-service-cluster',
            vpc=self.vpc
        )
Beispiel #12
0
    def __init__(self, app: core.App, stack_name: str, stack_env: str):
        super().__init__(scope=app, id=f"{stack_name}-{stack_env}")

        # CIDR
        cidr = "192.168.0.0/24"

        # === #
        # vpc #
        # === #
        vpc = aws_ec2.Vpc(
            self,
            id=f"{stack_name}-{stack_env}-vpc",
            cidr=cidr,
            subnet_configuration=[
                # Public Subnet
                aws_ec2.SubnetConfiguration(
                    cidr_mask=28,
                    name=f"{stack_name}-{stack_env}-public",
                    subnet_type=aws_ec2.SubnetType.PUBLIC,
                )
            ],
        )

        security_group = aws_ec2.SecurityGroup(
            self,
            id=f'security-group-for-{stack_name}-{stack_env}',
            vpc=vpc,
            security_group_name=f'security-group-for-{stack_name}-{stack_env}',
            allow_all_outbound=True)

        batch_role = aws_iam.Role(
            scope=self,
            id=f"batch_role_for_{stack_name}-{stack_env}",
            role_name=f"batch_role_for_{stack_name}-{stack_env}",
            assumed_by=aws_iam.ServicePrincipal("batch.amazonaws.com"))

        batch_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AWSBatchServiceRole-{stack_env}",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole"))

        batch_role.add_to_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["arn:aws:logs:*:*:*"],
                                    actions=[
                                        "logs:CreateLogGroup",
                                        "logs:CreateLogStream",
                                        "logs:PutLogEvents",
                                        "logs:DescribeLogStreams"
                                    ]))

        # Role to attach EC2
        instance_role = aws_iam.Role(
            scope=self,
            id=f"instance_role_for_{stack_name}-{stack_env}",
            role_name=f"instance_role_for_{stack_name}-{stack_env}",
            assumed_by=aws_iam.ServicePrincipal("ec2.amazonaws.com"))

        instance_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AmazonEC2ContainerServiceforEC2Role-{stack_env}",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role"
            ))

        # add policy to access S3
        instance_role.add_to_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=["s3:*"]))

        # add policy to access CloudWatch Logs
        instance_role.add_to_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["arn:aws:logs:*:*:*"],
                                    actions=[
                                        "logs:CreateLogGroup",
                                        "logs:CreateLogStream",
                                        "logs:PutLogEvents",
                                        "logs:DescribeLogStreams"
                                    ]))

        # attach role to EC2
        instance_profile = aws_iam.CfnInstanceProfile(
            scope=self,
            id=f"instance_profile_for_{stack_name}-{stack_env}",
            instance_profile_name=
            f"instance_profile_for_{stack_name}-{stack_env}",
            roles=[instance_role.role_name])

        # ===== #
        # batch #
        # ===== #
        batch_compute_resources = aws_batch.ComputeResources(
            vpc=vpc,
            maxv_cpus=4,
            minv_cpus=0,
            security_groups=[security_group],
            instance_role=instance_profile.attr_arn,
            type=aws_batch.ComputeResourceType.SPOT)

        batch_compute_environment = aws_batch.ComputeEnvironment(
            scope=self,
            id=f"ProjectEnvironment-{stack_env}",
            compute_environment_name=f"ProjectEnvironmentBatch-{stack_env}",
            compute_resources=batch_compute_resources,
            service_role=batch_role)

        job_role = aws_iam.Role(
            scope=self,
            id=f"job_role_{stack_name}-{stack_env}",
            role_name=f"job_role_{stack_name}-{stack_env}",
            assumed_by=aws_iam.ServicePrincipal("ecs-tasks.amazonaws.com"))

        job_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AmazonECSTaskExecutionRolePolicy_{stack_name}-{stack_env}",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
            ))

        job_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AmazonS3FullAccess_{stack_name}-{stack_env}",
                managed_policy_arn="arn:aws:iam::aws:policy/AmazonS3FullAccess"
            ))

        job_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"CloudWatchLogsFullAccess_{stack_name}-{stack_env}",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess"))

        batch_job_queue = aws_batch.JobQueue(
            scope=self,
            id=f"job_queue_for_{stack_name}-{stack_env}",
            job_queue_name=f"job_queue_for_{stack_name}-{stack_env}",
            compute_environments=[
                aws_batch.JobQueueComputeEnvironment(
                    compute_environment=batch_compute_environment, order=1)
            ],
            priority=1)

        # ECR repository
        ecr_repository = aws_ecr_assets.DockerImageAsset(
            scope=self,
            id=f"ecr_image_{stack_env}",
            directory="./docker",
            repository_name=f"repository_for_{stack_env}")

        # get image from ECR
        container_image = aws_ecs.ContainerImage.from_ecr_repository(
            repository=ecr_repository.repository)

        # job define
        # pass `S3_BUCKET` as environment argument.
        batch_job_definition = aws_batch.JobDefinition(
            scope=self,
            id=f"job_definition_for_{stack_env}",
            job_definition_name=f"job_definition_for_{stack_env}",
            container=aws_batch.JobDefinitionContainer(
                image=container_image,
                environment={"S3_BUCKET": f"{S3_BUCKET}"},
                job_role=job_role,
                vcpus=1,
                memory_limit_mib=1024))

        # ============= #
        # StepFunctions #
        # ============= #
        # Ref::{keyword} can be replaced with StepFunction input
        command_overrides = ["python", "__init__.py", "--time", "Ref::time"]

        batch_task = aws_sfn_tasks.BatchSubmitJob(
            scope=self,
            id=f"batch_job_{stack_env}",
            job_definition=batch_job_definition,
            job_name=f"batch_job_{stack_env}_today",
            job_queue=batch_job_queue,
            container_overrides=aws_sfn_tasks.BatchContainerOverrides(
                command=command_overrides),
            payload=aws_sfn.TaskInput.from_object({"time.$": "$.time"}))

        # `one step` for StepFunctions
        definition = batch_task

        sfn_daily_process = aws_sfn.StateMachine(
            scope=self,
            id=f"YourProjectSFn-{stack_env}",
            definition=definition)

        # ================ #
        # CloudWatch Event #
        # ================ #

        # Run every day at 21:30 JST
        # See https://docs.aws.amazon.com/lambda/latest/dg/tutorial-scheduled-events-schedule-expressions.html
        events_daily_process = aws_events.Rule(
            scope=self,
            id=f"DailySFnProcess-{stack_env}",
            schedule=aws_events.Schedule.cron(minute="30",
                                              hour="12",
                                              month='*',
                                              day="*",
                                              year='*'),
        )
        events_daily_process.add_target(
            aws_events_targets.SfnStateMachine(sfn_daily_process))