Ejemplo n.º 1
0
    def __init__(self, scope: core.Construct, id: str, props,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        ################################################################################
        # Set up permissions
        ro_buckets = set()
        for bucket in props['ro_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(self,
                                                    bucket,
                                                    bucket_name=bucket)
            ro_buckets.add(tmp_bucket)

        rw_buckets = set()
        for bucket in props['rw_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(self,
                                                    bucket,
                                                    bucket_name=bucket)
            rw_buckets.add(tmp_bucket)

        batch_service_role = iam.Role(
            self,
            'BatchServiceRole',
            assumed_by=iam.ServicePrincipal('batch.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSBatchServiceRole')
            ])

        spotfleet_role = iam.Role(
            self,
            'AmazonEC2SpotFleetRole',
            assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2SpotFleetTaggingRole')
            ])

        # Create role for Batch instances
        batch_instance_role = iam.Role(
            self,
            'BatchInstanceRole',
            role_name='UmccriseBatchInstanceRole',
            assumed_by=iam.CompositePrincipal(
                iam.ServicePrincipal('ec2.amazonaws.com'),
                iam.ServicePrincipal('ecs.amazonaws.com')),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2RoleforSSM'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2ContainerServiceforEC2Role')
            ])
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(actions=[
                "ec2:Describe*", "ec2:AttachVolume", "ec2:CreateVolume",
                "ec2:CreateTags", "ec2:ModifyInstanceAttribute"
            ],
                                resources=["*"]))
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(actions=["ecs:ListClusters"], resources=["*"]))
        for bucket in ro_buckets:
            bucket.grant_read(batch_instance_role)
        for bucket in rw_buckets:
            # restirct write to paths with */umccrise/*
            bucket.grant_read_write(batch_instance_role, '*/umccrised/*')

        # Turn the instance role into a Instance Profile
        batch_instance_profile = iam.CfnInstanceProfile(
            self,
            'BatchInstanceProfile',
            instance_profile_name='UmccriseBatchInstanceProfile',
            roles=[batch_instance_role.role_name])

        ################################################################################
        # Minimal networking
        # TODO: import resource created with TF
        vpc = props['vpc']

        ################################################################################
        # Setup Batch compute resources

        # Configure BlockDevice to expand instance disk space (if needed?)
        block_device_mappings = [{
            'deviceName': '/dev/xvdf',
            'ebs': {
                'deleteOnTermination': True,
                'volumeSize': 1024,
                'volumeType': 'gp2'
            }
        }]

        launch_template = ec2.CfnLaunchTemplate(
            self,
            'UmccriseBatchComputeLaunchTemplate',
            launch_template_name='UmccriseBatchComputeLaunchTemplate',
            launch_template_data={
                'userData': core.Fn.base64(user_data_script),
                'blockDeviceMappings': block_device_mappings
            })

        launch_template_spec = batch.LaunchTemplateSpecification(
            launch_template_name=launch_template.launch_template_name,
            version='$Latest')

        my_compute_res = batch.ComputeResources(
            type=batch.ComputeResourceType.SPOT,
            allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
            desiredv_cpus=0,
            maxv_cpus=128,
            minv_cpus=0,
            image=ec2.MachineImage.generic_linux(
                ami_map={'ap-southeast-2': props['compute_env_ami']}),
            launch_template=launch_template_spec,
            spot_fleet_role=spotfleet_role,
            instance_role=batch_instance_profile.instance_profile_name,
            vpc=vpc,
            #compute_resources_tags=core.Tag('Creator', 'Batch')
        )
        # XXX: How to add more than one tag above??
        # core.Tag.add(my_compute_res, 'Foo', 'Bar')

        my_compute_env = batch.ComputeEnvironment(
            self,
            'UmccriseBatchComputeEnv',
            compute_environment_name="cdk-umccrise-batch-compute-env",
            service_role=batch_service_role,
            compute_resources=my_compute_res)

        job_queue = batch.JobQueue(self,
                                   'UmccriseJobQueue',
                                   job_queue_name='cdk-umccrise_job_queue',
                                   compute_environments=[
                                       batch.JobQueueComputeEnvironment(
                                           compute_environment=my_compute_env,
                                           order=1)
                                   ],
                                   priority=10)

        job_container = batch.JobDefinitionContainer(
            image=ecs.ContainerImage.from_registry(
                name=props['container_image']),
            vcpus=2,
            memory_limit_mib=2048,
            command=["/opt/container/umccrise-wrapper.sh", "Ref::vcpus"],
            mount_points=[
                ecs.MountPoint(container_path='/work',
                               read_only=False,
                               source_volume='work'),
                ecs.MountPoint(container_path='/opt/container',
                               read_only=True,
                               source_volume='container')
            ],
            volumes=[
                ecs.Volume(name='container',
                           host=ecs.Host(source_path='/opt/container')),
                ecs.Volume(name='work', host=ecs.Host(source_path='/mnt'))
            ],
            privileged=True)

        job_definition = batch.JobDefinition(
            self,
            'UmccriseJobDefinition',
            job_definition_name='cdk-umccrise-job-definition',
            parameters={'vcpus': '1'},
            container=job_container,
            timeout=core.Duration.hours(5))

        ################################################################################
        # Set up job submission Lambda

        lambda_role = iam.Role(
            self,
            'UmccriseLambdaRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSLambdaBasicExecutionRole'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AWSBatchFullAccess')  # TODO: restrict!
            ])

        for bucket in ro_buckets:
            bucket.grant_read(lambda_role)
        for bucket in rw_buckets:
            bucket.grant_read(lambda_role)

        # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod
        #       may need a default JobDefinition to be set up
        lmbda.Function(self,
                       'UmccriseLambda',
                       function_name='umccrise_batch_lambda',
                       handler='umccrise.lambda_handler',
                       runtime=lmbda.Runtime.PYTHON_3_7,
                       code=lmbda.Code.from_asset('lambdas/umccrise'),
                       environment={
                           'JOBNAME_PREFIX': "UMCCRISE_",
                           'JOBQUEUE': job_queue.job_queue_name,
                           'REFDATA_BUCKET': props['refdata_bucket'],
                           'DATA_BUCKET': props['data_bucket'],
                           'UMCCRISE_MEM': '50000',
                           'UMCCRISE_VCPUS': '16'
                       },
                       role=lambda_role)
Ejemplo n.º 2
0
    def __init__(self, scope: core.Stack, id: str, cluster, vpc, worker,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)
        self.cluster = cluster
        self.vpc = vpc
        self.worker = worker

        # Building a custom image for jenkins master.
        self.container_image = ecr.DockerImageAsset(
            self, "JenkinsMasterDockerImage", directory='./docker/master/')

        if config['DEFAULT']['fargate_enabled'] == "yes" or not config[
                'DEFAULT']['ec2_enabled'] == "yes":
            # Task definition details to define the Jenkins master container
            self.jenkins_task = ecs_patterns.ApplicationLoadBalancedTaskImageOptions(
                # image=ecs.ContainerImage.from_ecr_repository(self.container_image.repository),
                image=ecs.ContainerImage.from_docker_image_asset(
                    self.container_image),
                container_port=8080,
                enable_logging=True,
                environment={
                    # https://github.com/jenkinsci/docker/blob/master/README.md#passing-jvm-parameters
                    'JAVA_OPTS':
                    '-Djenkins.install.runSetupWizard=false',
                    # https://github.com/jenkinsci/configuration-as-code-plugin/blob/master/README.md#getting-started
                    'CASC_JENKINS_CONFIG':
                    '/config-as-code.yaml',
                    'network_stack':
                    self.vpc.stack_name,
                    'cluster_stack':
                    self.cluster.stack_name,
                    'worker_stack':
                    self.worker.stack_name,
                    'cluster_arn':
                    self.cluster.cluster.cluster_arn,
                    'aws_region':
                    config['DEFAULT']['region'],
                    'jenkins_url':
                    config['DEFAULT']['jenkins_url'],
                    'subnet_ids':
                    ",".join(
                        [x.subnet_id for x in self.vpc.vpc.private_subnets]),
                    'security_group_ids':
                    self.worker.worker_security_group.security_group_id,
                    'execution_role_arn':
                    self.worker.worker_execution_role.role_arn,
                    'task_role_arn':
                    self.worker.worker_task_role.role_arn,
                    'worker_log_group':
                    self.worker.worker_logs_group.log_group_name,
                    'worker_log_stream_prefix':
                    self.worker.worker_log_stream.log_stream_name
                },
            )

            # Create the Jenkins master service
            self.jenkins_master_service_main = ecs_patterns.ApplicationLoadBalancedFargateService(
                self,
                "JenkinsMasterService",
                cpu=int(config['DEFAULT']['fargate_cpu']),
                memory_limit_mib=int(
                    config['DEFAULT']['fargate_memory_limit_mib']),
                cluster=self.cluster.cluster,
                desired_count=1,
                enable_ecs_managed_tags=True,
                task_image_options=self.jenkins_task,
                cloud_map_options=ecs.CloudMapOptions(
                    name="master", dns_record_type=sd.DnsRecordType('A')))

            self.jenkins_master_service = self.jenkins_master_service_main.service
            self.jenkins_master_task = self.jenkins_master_service.task_definition

        if config['DEFAULT']['ec2_enabled'] == "yes":
            self.jenkins_load_balancer = elb.ApplicationLoadBalancer(
                self,
                "JenkinsMasterELB",
                vpc=self.vpc.vpc,
                internet_facing=True,
            )

            self.listener = self.jenkins_load_balancer.add_listener("Listener",
                                                                    port=80)

            self.jenkins_master_task = ecs.Ec2TaskDefinition(
                self,
                "JenkinsMasterTaskDef",
                network_mode=ecs.NetworkMode.AWS_VPC,
                volumes=[
                    ecs.Volume(name="efs_mount",
                               host=ecs.Host(source_path='/mnt/efs'))
                ],
            )

            self.jenkins_master_task.add_container(
                "JenkinsMasterContainer",
                image=ecs.ContainerImage.from_ecr_repository(
                    self.container_image.repository),
                cpu=int(config['DEFAULT']['ec2_cpu']),
                memory_limit_mib=int(
                    config['DEFAULT']['ec2_memory_limit_mib']),
                environment={
                    # https://github.com/jenkinsci/docker/blob/master/README.md#passing-jvm-parameters
                    'JAVA_OPTS':
                    '-Djenkins.install.runSetupWizard=false',
                    'CASC_JENKINS_CONFIG':
                    '/config-as-code.yaml',
                    'network_stack':
                    self.vpc.stack_name,
                    'cluster_stack':
                    self.cluster.stack_name,
                    'worker_stack':
                    self.worker.stack_name,
                    'cluster_arn':
                    self.cluster.cluster.cluster_arn,
                    'aws_region':
                    config['DEFAULT']['region'],
                    'jenkins_url':
                    config['DEFAULT']['jenkins_url'],
                    'subnet_ids':
                    ",".join(
                        [x.subnet_id for x in self.vpc.vpc.private_subnets]),
                    'security_group_ids':
                    self.worker.worker_security_group.security_group_id,
                    'execution_role_arn':
                    self.worker.worker_execution_role.role_arn,
                    'task_role_arn':
                    self.worker.worker_task_role.role_arn,
                    'worker_log_group':
                    self.worker.worker_logs_group.log_group_name,
                    'worker_log_stream_prefix':
                    self.worker.worker_log_stream.log_stream_name
                },
                logging=ecs.LogDriver.aws_logs(
                    stream_prefix="JenkinsMaster",
                    log_retention=logs.RetentionDays.ONE_WEEK),
            )

            self.jenkins_master_task.default_container.add_mount_points(
                ecs.MountPoint(container_path='/var/jenkins_home',
                               source_volume="efs_mount",
                               read_only=False))

            self.jenkins_master_task.default_container.add_port_mappings(
                ecs.PortMapping(container_port=8080, host_port=8080))

            self.jenkins_master_service = ecs.Ec2Service(
                self,
                "EC2MasterService",
                task_definition=self.jenkins_master_task,
                cloud_map_options=ecs.CloudMapOptions(
                    name="master", dns_record_type=sd.DnsRecordType('A')),
                desired_count=1,
                min_healthy_percent=0,
                max_healthy_percent=100,
                enable_ecs_managed_tags=True,
                cluster=self.cluster.cluster,
            )

            self.target_group = self.listener.add_targets(
                "JenkinsMasterTarget",
                port=80,
                targets=[
                    self.jenkins_master_service.load_balancer_target(
                        container_name=self.jenkins_master_task.
                        default_container.container_name,
                        container_port=8080,
                    )
                ],
                deregistration_delay=core.Duration.seconds(10))

        # Opening port 5000 for master <--> worker communications
        self.jenkins_master_service.task_definition.default_container.add_port_mappings(
            ecs.PortMapping(container_port=50000, host_port=50000))

        # Enable connection between Master and Worker
        self.jenkins_master_service.connections.allow_from(
            other=self.worker.worker_security_group,
            port_range=ec2.Port(protocol=ec2.Protocol.TCP,
                                string_representation='Master to Worker 50000',
                                from_port=50000,
                                to_port=50000))

        # Enable connection between Master and Worker on 8080
        self.jenkins_master_service.connections.allow_from(
            other=self.worker.worker_security_group,
            port_range=ec2.Port(protocol=ec2.Protocol.TCP,
                                string_representation='Master to Worker 8080',
                                from_port=8080,
                                to_port=8080))

        # IAM Statements to allow jenkins ecs plugin to talk to ECS as well as the Jenkins cluster #
        self.jenkins_master_task.add_to_task_role_policy(
            iam.PolicyStatement(
                actions=[
                    "ecs:RegisterTaskDefinition",
                    "ecs:DeregisterTaskDefinition", "ecs:ListClusters",
                    "ecs:DescribeContainerInstances",
                    "ecs:ListTaskDefinitions", "ecs:DescribeTaskDefinition",
                    "ecs:DescribeTasks"
                ],
                resources=["*"],
            ))

        self.jenkins_master_task.add_to_task_role_policy(
            iam.PolicyStatement(actions=["ecs:ListContainerInstances"],
                                resources=[self.cluster.cluster.cluster_arn]))

        self.jenkins_master_task.add_to_task_role_policy(
            iam.PolicyStatement(
                actions=["ecs:RunTask"],
                resources=[
                    "arn:aws:ecs:{0}:{1}:task-definition/fargate-workers*".
                    format(
                        self.region,
                        self.account,
                    )
                ]))

        self.jenkins_master_task.add_to_task_role_policy(
            iam.PolicyStatement(actions=["ecs:StopTask"],
                                resources=[
                                    "arn:aws:ecs:{0}:{1}:task/*".format(
                                        self.region, self.account)
                                ],
                                conditions={
                                    "ForAnyValue:ArnEquals": {
                                        "ecs:cluster":
                                        self.cluster.cluster.cluster_arn
                                    }
                                }))

        self.jenkins_master_task.add_to_task_role_policy(
            iam.PolicyStatement(actions=["iam:PassRole"],
                                resources=[
                                    self.worker.worker_task_role.role_arn,
                                    self.worker.worker_execution_role.role_arn
                                ]))
        # END OF JENKINS ECS PLUGIN IAM POLICIES #
        self.jenkins_master_task.add_to_task_role_policy(
            iam.PolicyStatement(
                actions=["*"],
                resources=[self.worker.worker_logs_group.log_group_arn]))
Ejemplo n.º 3
0
    def __init__(self, scope: core.Construct, id: str, vpc: ec2.Vpc, cluster: ecs.Cluster, repository: ecr.Repository, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        namespace = servicediscovery.PrivateDnsNamespace(
            scope=self,
            id="PRIVATE-DNS",
            vpc=vpc,
            name="private",
            description="a private dns"
        )

        sg = ec2.SecurityGroup(
            scope=self,
            id="SG",
            vpc=vpc,
            allow_all_outbound=True,
            description="open 9200 and 9300 ports",
            security_group_name="es-group"
        )
        sg.add_ingress_rule(
            peer=ec2.Peer.any_ipv4(),
            connection=ec2.Port.tcp(port=9200),
        )
        sg.add_ingress_rule(
            peer=ec2.Peer.any_ipv4(),
            connection=ec2.Port.tcp(port=9300),
        )

        #####################################################
        elastic_task_def = ecs.Ec2TaskDefinition(
            scope=self,
            id="ES-TASK-DEF",
            network_mode=ecs.NetworkMode.AWS_VPC,
            volumes=[ecs.Volume(
                name="esdata",
                host=ecs.Host(source_path="/usr/share/elasticsearch/data"),
            )],
        )

        elastic = ecs.ContainerDefinition(
            scope=self,
            id=constants.ES_CONTAINER_NAME,
            start_timeout=core.Duration.seconds(amount=30),
            task_definition=elastic_task_def,
            memory_limit_mib=4500,
            essential=True,
            image=ecs.ContainerImage.from_ecr_repository(
                repository=repository, tag='latest'),
            environment={
                "cluster.name": constants.ES_CLUSTER_NAME,
                "bootstrap.memory_lock": "true",
                # "discovery.zen.ping.unicast.hosts": "elasticsearch",
                "node.name": constants.ES_CONTAINER_NAME,
                "node.master": "true",
                "node.data": "true",
                "ES_JAVA_OPTS": "-Xms4g -Xmx4g",
            },
            logging=ecs.AwsLogDriver(
                stream_prefix="ES",
                log_retention=logs.RetentionDays.ONE_DAY,
            ),
        )
        elastic.add_ulimits(ecs.Ulimit(
            name=ecs.UlimitName.NOFILE, hard_limit=65535, soft_limit=65535))
        elastic.add_ulimits(ecs.Ulimit(
            name=ecs.UlimitName.MEMLOCK, hard_limit=-1, soft_limit=-1))

        elastic.add_port_mappings(ecs.PortMapping(container_port=9200))
        elastic.add_port_mappings(ecs.PortMapping(container_port=9300))

        elastic.add_mount_points(ecs.MountPoint(
            container_path="/usr/share/elasticsearch/data",
            source_volume="esdata",
            read_only=False,
        ))
        # elastic.add_volumes_from(ecs.VolumeFrom(
        #     source_container="esdata",
        #     read_only=False,
        #     ))

        es_service = ecs.Ec2Service(
            scope=self,
            id="ES-SERVICE",
            cluster=cluster,
            task_definition=elastic_task_def,
            desired_count=1,
            service_name="ES",
            security_group=sg,
        )

        es_lb = elbv2.ApplicationLoadBalancer(
            scope=self,
            id="ES-ELB",
            vpc=vpc,
            internet_facing=True,
        )
        es_listener = es_lb.add_listener(
            id="ES-LISTENER",
            port=80,
        )
        es_service.register_load_balancer_targets(
            ecs.EcsTarget(
                new_target_group_id="ES-GRP",
                container_name=elastic.container_name,
                listener=ecs.ListenerConfig.application_listener(
                    listener=es_listener,
                    protocol=elbv2.ApplicationProtocol.HTTP),
            ))

        service = es_service.enable_cloud_map(
            cloud_map_namespace=namespace,
            dns_record_type=servicediscovery.DnsRecordType.A,
            # dns_ttl=core.Duration.seconds(amount=30),
            failure_threshold=1,
            name="elastic",
        )

        core.CfnOutput(
            scope=self,
            id="DNS-ES",
            value=es_lb.load_balancer_dns_name,
        )

        #####################################################

        node_task_def = ecs.Ec2TaskDefinition(
            scope=self,
            id="NODE-TASK-DEF",
            network_mode=ecs.NetworkMode.AWS_VPC,
            volumes=[ecs.Volume(
                name="esdata",
                host=ecs.Host(source_path="/usr/share/elasticsearch/data"),
            )],
        )

        node = ecs.ContainerDefinition(
            scope=self,
            id=constants.ES_NODE_CONTAINER_NAME,
            start_timeout=core.Duration.seconds(amount=40),
            task_definition=node_task_def,
            memory_limit_mib=4500,
            essential=True,
            image=ecs.ContainerImage.from_ecr_repository(
                repository=repository, tag='latest'),
            environment={
                "cluster.name": constants.ES_CLUSTER_NAME,
                "bootstrap.memory_lock": "true",
                "discovery.zen.ping.unicast.hosts": "elastic.private",
                "node.name": constants.ES_NODE_CONTAINER_NAME,
                "node.master": "false",
                "node.data": "true",
                "ES_JAVA_OPTS": "-Xms4g -Xmx4g",
            },
            logging=ecs.LogDrivers.aws_logs(
                stream_prefix="NODE",
                log_retention=logs.RetentionDays.ONE_DAY,
            ))

        node.add_port_mappings(ecs.PortMapping(container_port=9200))
        node.add_port_mappings(ecs.PortMapping(container_port=9300))

        node.add_ulimits(ecs.Ulimit(
            name=ecs.UlimitName.NOFILE, hard_limit=65536, soft_limit=65536))
        node.add_ulimits(ecs.Ulimit(
            name=ecs.UlimitName.MEMLOCK, hard_limit=-1, soft_limit=-1))
        node.add_mount_points(ecs.MountPoint(
            container_path="/usr/share/elasticsearch/data",
            source_volume="esdata",
            read_only=False,
        ))

        node_service = ecs.Ec2Service(
            scope=self,
            id="ES-NODE-SERVICE",
            cluster=cluster,
            task_definition=node_task_def,
            desired_count=1,
            service_name="NODE",
            security_group=sg,
        )

        node_lb = elbv2.ApplicationLoadBalancer(
            scope=self,
            id="NODE-ELB",
            vpc=vpc,
            internet_facing=True,
        )
        node_listener = node_lb.add_listener(
            id="NODE-LISTENER",
            port=80,
        )
        node_service.register_load_balancer_targets(
            ecs.EcsTarget(
                new_target_group_id="NODE-GRP",
                container_name=node.container_name,
                listener=ecs.ListenerConfig.application_listener(
                    listener=node_listener,
                    protocol=elbv2.ApplicationProtocol.HTTP),
            ))
        core.CfnOutput(
            scope=self,
            id="DNS-NODE",
            value=node_lb.load_balancer_dns_name,
        )
Ejemplo n.º 4
0
    def __init__(self, scope: core.Construct, id: str, props, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        dirname = os.path.dirname(__file__)

        ecr_repo = ecr.Repository.from_repository_name(
            self,
            'UmccriseEcrRepo',
            repository_name='umccrise'
        )

        ################################################################################
        # Set up permissions
        ro_buckets = set()
        for bucket in props['ro_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(
                self,
                bucket,
                bucket_name=bucket
            )
            ro_buckets.add(tmp_bucket)

        rw_buckets = set()
        for bucket in props['rw_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(
                self,
                bucket,
                bucket_name=bucket
            )
            rw_buckets.add(tmp_bucket)

        batch_service_role = iam.Role(
            self,
            'BatchServiceRole',
            assumed_by=iam.ServicePrincipal('batch.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSBatchServiceRole')
            ]
        )

        spotfleet_role = iam.Role(
            self,
            'AmazonEC2SpotFleetRole',
            assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2SpotFleetTaggingRole')
            ]
        )

        # Create role for Batch instances
        batch_instance_role = iam.Role(
            self,
            'BatchInstanceRole',
            role_name='UmccriseBatchInstanceRole',
            assumed_by=iam.CompositePrincipal(
                iam.ServicePrincipal('ec2.amazonaws.com'),
                iam.ServicePrincipal('ecs.amazonaws.com')
            ),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2RoleforSSM'),
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2ContainerServiceforEC2Role')
            ]
        )
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(
                actions=[
                    "ec2:Describe*",
                    "ec2:AttachVolume",
                    "ec2:CreateVolume",
                    "ec2:CreateTags",
                    "ec2:ModifyInstanceAttribute"
                ],
                resources=["*"]
            )
        )
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(
                actions=[
                    "ecs:ListClusters"
                ],
                resources=["*"]
            )
        )
        for bucket in ro_buckets:
            bucket.grant_read(batch_instance_role)
        for bucket in rw_buckets:
            # restirct write to paths with */umccrise/*
            bucket.grant_read_write(batch_instance_role, '*/umccrised/*')

        # Turn the instance role into a Instance Profile
        batch_instance_profile = iam.CfnInstanceProfile(
            self,
            'BatchInstanceProfile',
            instance_profile_name='UmccriseBatchInstanceProfile',
            roles=[batch_instance_role.role_name]
        )

        ################################################################################
        # Network
        # (Import common infrastructure (maintained via TerraForm)

        # VPC
        vpc = ec2.Vpc.from_lookup(
            self,
            'UmccrMainVpc',
            tags={'Name': 'main-vpc', 'Stack': 'networking'}
        )

        batch_security_group = ec2.SecurityGroup(
            self,
            "BatchSecurityGroup",
            vpc=vpc,
            description="Allow all outbound, no inbound traffic"
        )
        ################################################################################
        # Setup Batch compute resources

        # Configure BlockDevice to expand instance disk space (if needed?)
        block_device_mappings = [
            {
                'deviceName': '/dev/xvdf',
                'ebs': {
                    'deleteOnTermination': True,
                    'encrypted': True,
                    'volumeSize': 2048,
                    'volumeType': 'gp2'
                }
            }
        ]

        # Set up custom user data to configure the Batch instances
        umccrise_wrapper_asset = assets.Asset(
            self,
            'UmccriseWrapperAsset',
            path=os.path.join(dirname, '..', 'assets', "umccrise-wrapper.sh")
        )
        umccrise_wrapper_asset.grant_read(batch_instance_role)

        user_data_asset = assets.Asset(
            self,
            'UserDataAsset',
            path=os.path.join(dirname, '..', 'assets', "batch-user-data.sh")
        )
        user_data_asset.grant_read(batch_instance_role)

        user_data = ec2.UserData.for_linux()
        local_path = user_data.add_s3_download_command(
            bucket=user_data_asset.bucket,
            bucket_key=user_data_asset.s3_object_key
        )
        user_data.add_execute_file_command(
            file_path=local_path,
            arguments=f"s3://{umccrise_wrapper_asset.bucket.bucket_name}/{umccrise_wrapper_asset.s3_object_key}"
        )

        # Generate user data wrapper to comply with LaunchTemplate required MIME multi-part archive format for user data
        mime_wrapper = ec2.UserData.custom('MIME-Version: 1.0')
        mime_wrapper.add_commands('Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="')
        mime_wrapper.add_commands('')
        mime_wrapper.add_commands('--==MYBOUNDARY==')
        mime_wrapper.add_commands('Content-Type: text/x-shellscript; charset="us-ascii"')
        mime_wrapper.add_commands('')
        # install AWS CLI, as it's unexpectedly missing from the AWS Linux 2 AMI...
        mime_wrapper.add_commands('yum -y install unzip')
        mime_wrapper.add_commands('cd /opt')
        mime_wrapper.add_commands('curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"')
        mime_wrapper.add_commands('unzip awscliv2.zip')
        mime_wrapper.add_commands('sudo ./aws/install --bin-dir /usr/bin')
        # insert our actual user data payload
        mime_wrapper.add_commands(user_data.render())
        mime_wrapper.add_commands('--==MYBOUNDARY==--')

        launch_template = ec2.CfnLaunchTemplate(
            self,
            'UmccriseBatchComputeLaunchTemplate',
            launch_template_name='UmccriseBatchComputeLaunchTemplate',
            launch_template_data={
                'userData': core.Fn.base64(mime_wrapper.render()),
                'blockDeviceMappings': block_device_mappings
            }
        )

        launch_template_spec = batch.LaunchTemplateSpecification(
            launch_template_name=launch_template.launch_template_name,
            version='$Latest'
        )

        my_compute_res = batch.ComputeResources(
            type=(batch.ComputeResourceType.SPOT if props['compute_env_type'].lower() == 'spot' else batch.ComputeResourceType.ON_DEMAND),
            allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
            desiredv_cpus=0,
            maxv_cpus=320,
            minv_cpus=0,
            image=ec2.MachineImage.generic_linux(ami_map={'ap-southeast-2': props['compute_env_ami']}),
            launch_template=launch_template_spec,
            spot_fleet_role=spotfleet_role,
            instance_role=batch_instance_profile.instance_profile_name,
            vpc=vpc,
            vpc_subnets=ec2.SubnetSelection(
                subnet_type=ec2.SubnetType.PRIVATE,
                # availability_zones=["ap-southeast-2a"]
            ),
            security_groups=[batch_security_group]
            # compute_resources_tags=core.Tag('Creator', 'Batch')
        )
        # XXX: How to add more than one tag above??
        # https://github.com/aws/aws-cdk/issues/7350
        # core.Tag.add(my_compute_res, 'Foo', 'Bar')

        my_compute_env = batch.ComputeEnvironment(
            self,
            'UmccriseBatchComputeEnv',
            compute_environment_name="cdk-umccr_ise-batch-compute-env",
            service_role=batch_service_role,
            compute_resources=my_compute_res
        )
        # child = my_compute_env.node.default_child
        # child_comp_res = child.compute_resources
        # child_comp_res.tags = "{'Foo': 'Bar'}"

        job_queue = batch.JobQueue(
            self,
            'UmccriseJobQueue',
            job_queue_name='cdk-umccrise_job_queue',
            compute_environments=[
                batch.JobQueueComputeEnvironment(
                    compute_environment=my_compute_env,
                    order=1
                )
            ],
            priority=10
        )

        job_container = batch.JobDefinitionContainer(
            image=ecs.ContainerImage.from_registry(name=props['container_image']),
            vcpus=32,
            memory_limit_mib=100000,
            command=[
                "/opt/container/umccrise-wrapper.sh",
                "Ref::vcpus"
            ],
            mount_points=[
                ecs.MountPoint(
                    container_path='/work',
                    read_only=False,
                    source_volume='work'
                ),
                ecs.MountPoint(
                    container_path='/opt/container',
                    read_only=True,
                    source_volume='container'
                )
            ],
            volumes=[
                ecs.Volume(
                    name='container',
                    host=ecs.Host(
                        source_path='/opt/container'
                    )
                ),
                ecs.Volume(
                    name='work',
                    host=ecs.Host(
                        source_path='/mnt'
                    )
                )
            ],
            privileged=True
        )

        job_definition = batch.JobDefinition(
            self,
            'UmccriseJobDefinition',
            job_definition_name='cdk-umccrise-job-definition',
            parameters={'vcpus': '1'},
            container=job_container,
            timeout=core.Duration.hours(5)
        )

        ################################################################################
        # Set up job submission Lambda

        lambda_role = iam.Role(
            self,
            'UmccriseLambdaRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaBasicExecutionRole'),
                iam.ManagedPolicy.from_aws_managed_policy_name('AWSBatchFullAccess')  # TODO: restrict!
            ]
        )

        for bucket in ro_buckets:
            bucket.grant_read(lambda_role)
        for bucket in rw_buckets:
            bucket.grant_read(lambda_role)
        ecr_repo.grant(lambda_role, 'ecr:ListImages')

        # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod
        #       may need a default JobDefinition to be set up
        lmbda.Function(
            self,
            'UmccriseLambda',
            function_name='umccrise_batch_lambda',
            handler='umccrise.lambda_handler',
            runtime=lmbda.Runtime.PYTHON_3_7,
            code=lmbda.Code.from_asset('lambdas/umccrise'),
            environment={
                'JOBNAME_PREFIX': "UMCCRISE_",
                'JOBQUEUE': job_queue.job_queue_name,
                'UMCCRISE_MEM': '100000',
                'UMCCRISE_VCPUS': '32',
                'JOBDEF': job_definition.job_definition_name,
                'REFDATA_BUCKET': props['refdata_bucket'],
                'INPUT_BUCKET': props['input_bucket'],
                'RESULT_BUCKET': props['result_bucket'],
                'IMAGE_CONFIGURABLE': props['image_configurable']
            },
            role=lambda_role
        )
Ejemplo n.º 5
0
    def __init__(self, scope: core.Construct, id_: str, props,
                 **kwargs) -> None:
        super().__init__(scope, id_, **kwargs)

        namespace = props['namespace']
        htsget_refserver_ecr_repo: ecr.Repository = props['ecr_repo']
        htsget_refserver_image_tag = props['htsget_refserver_image_tag']
        cors_allowed_origins = props['cors_allowed_origins']

        # --- Query deployment env specific config from SSM Parameter Store

        cert_apse2_arn = ssm.StringParameter.from_string_parameter_name(
            self,
            "SSLCertAPSE2ARN",
            string_parameter_name="/htsget/acm/apse2_arn",
        )
        cert_apse2 = acm.Certificate.from_certificate_arn(
            self,
            "SSLCertAPSE2",
            certificate_arn=cert_apse2_arn.string_value,
        )

        hosted_zone_id = ssm.StringParameter.from_string_parameter_name(
            self, "HostedZoneID", string_parameter_name="hosted_zone_id")
        hosted_zone_name = ssm.StringParameter.from_string_parameter_name(
            self, "HostedZoneName", string_parameter_name="hosted_zone_name")

        domain_name = ssm.StringParameter.from_string_parameter_name(
            self,
            "DomainName",
            string_parameter_name="/htsget/domain",
        )

        # --- Cognito parameters are from data portal terraform stack

        cog_user_pool_id = ssm.StringParameter.from_string_parameter_name(
            self,
            "CogUserPoolID",
            string_parameter_name="/data_portal/client/cog_user_pool_id",
        )

        cog_app_client_id_stage = ssm.StringParameter.from_string_parameter_name(
            self,
            "CogAppClientIDStage",
            string_parameter_name="/data_portal/client/cog_app_client_id_stage",
        )

        cog_app_client_id_local = ssm.StringParameter.from_string_parameter_name(
            self,
            "CogAppClientIDLocal",
            string_parameter_name="/data_portal/client/cog_app_client_id_local",
        )

        # --- Query main VPC and setup Security Groups

        vpc = ec2.Vpc.from_lookup(
            self,
            "VPC",
            vpc_name="main-vpc",
            tags={
                'Stack': "networking",
            },
        )
        private_subnets = ec2.SubnetSelection(
            subnet_type=ec2.SubnetType.PRIVATE,
            availability_zones=["ap-southeast-2a"],
        )

        sg_elb = ec2.SecurityGroup(
            self,
            "ELBSecurityGroup",
            vpc=vpc,
            description=f"Security Group for ELB in {namespace} stack",
            security_group_name=f"{namespace} ELB Security Group",
            allow_all_outbound=False,
        )
        sg_elb.add_ingress_rule(peer=ec2.Peer.any_ipv4(),
                                connection=ec2.Port.tcp(80),
                                description="Allow http inbound within VPC")

        sg_ecs_service = ec2.SecurityGroup(
            self,
            "ECSServiceSecurityGroup",
            vpc=vpc,
            description=f"Security Group for ECS Service in {namespace} stack",
            security_group_name=f"{namespace} ECS Security Group",
        )
        sg_ecs_service.add_ingress_rule(
            peer=sg_elb,
            connection=ec2.Port.tcp(3000),
            description="Allow traffic from Load balancer to ECS service")

        # --- Setup ECS Fargate cluster

        config_vol = ecs.Volume(
            name="config-vol",
            host=ecs.Host(),
        )

        task_execution_role = iam.Role(
            self,
            "ecsTaskExecutionRole",
            assumed_by=iam.ServicePrincipal("ecs-tasks.amazonaws.com"))
        task_execution_role.add_to_policy(
            iam.PolicyStatement(
                actions=[
                    "s3:GetBucketLocation",
                    "s3:GetObject",
                    "s3:ListBucket",
                    "s3:ListBucketMultipartUploads",
                    "s3:ListMultipartUploadParts",
                    "s3:GetObjectTagging",
                    "s3:GetObjectVersionTagging",
                    "logs:CreateLogStream",
                    "logs:PutLogEvents",
                    "ssm:GetParameterHistory",
                    "ssm:GetParametersByPath",
                    "ssm:GetParameters",
                    "ssm:GetParameter",
                ],
                resources=["*"],
            ))
        task_execution_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'service-role/AmazonECSTaskExecutionRolePolicy'))

        task = ecs.FargateTaskDefinition(
            self,
            f"{namespace}-task",
            cpu=512,
            memory_limit_mib=1024,
            volumes=[config_vol],
            task_role=task_execution_role,
            execution_role=task_execution_role,
        )

        cmd_ssm = "ssm get-parameter --name '/htsget/refserver/config' --output text --query Parameter.Value"
        sidecar_container: ecs.ContainerDefinition = task.add_container(
            f"{namespace}-sidecar",
            image=ecs.ContainerImage.from_registry(
                "quay.io/victorskl/aws-cli:2.1.3"),
            essential=False,
            entry_point=[
                "/bin/bash",
                "-c",
                f"aws {cmd_ssm} > config.json",
            ],
            logging=ecs.LogDriver.aws_logs(stream_prefix=f"{namespace}", ),
        )
        sidecar_container.add_mount_points(
            ecs.MountPoint(
                container_path="/aws",
                read_only=False,
                source_volume=config_vol.name,
            ))

        main_container: ecs.ContainerDefinition = task.add_container(
            namespace,
            image=ecs.ContainerImage.from_ecr_repository(
                repository=htsget_refserver_ecr_repo,
                tag=htsget_refserver_image_tag,
            ),
            essential=True,
            command=[
                "./htsget-refserver", "-config",
                "/usr/src/app/config/config.json"
            ],
            logging=ecs.LogDriver.aws_logs(stream_prefix=f"{namespace}", ),
        )
        main_container.add_port_mappings(
            ecs.PortMapping(
                container_port=3000,
                protocol=ecs.Protocol.TCP,
            ))
        main_container.add_mount_points(
            ecs.MountPoint(
                container_path="/usr/src/app/config",
                read_only=True,
                source_volume=config_vol.name,
            ))
        main_container.add_container_dependencies(
            ecs.ContainerDependency(
                container=sidecar_container,
                condition=ecs.ContainerDependencyCondition.COMPLETE,
            ))

        cluster = ecs.Cluster(self, f"{namespace}-cluster", vpc=vpc)

        service = ecs.FargateService(
            self,
            f"{namespace}-service",
            platform_version=ecs.FargatePlatformVersion.VERSION1_4,
            task_definition=task,
            cluster=cluster,
            vpc_subnets=private_subnets,
            desired_count=1,
            security_groups=[
                sg_ecs_service,
            ],
        )

        # --- Setup Application Load Balancer in front of ECS cluster

        lb = elbv2.ApplicationLoadBalancer(
            self,
            f"{namespace}-lb",
            vpc=vpc,
            internet_facing=False,
            security_group=sg_elb,
            deletion_protection=True,
        )
        http_listener = lb.add_listener(
            "HttpLBListener",
            port=80,
        )
        health_check = elbv2.HealthCheck(interval=core.Duration.seconds(30),
                                         path="/reads/service-info",
                                         timeout=core.Duration.seconds(5))
        http_listener.add_targets(
            "LBtoECS",
            port=3000,
            protocol=elbv2.ApplicationProtocol.HTTP,
            targets=[service],
            health_check=health_check,
        )
        core.CfnOutput(self,
                       "LoadBalancerDNS",
                       value=lb.load_balancer_dns_name)

        # --- Setup APIGatewayv2 HttpApi using VpcLink private integration to ALB/ECS in private subnets

        vpc_link = apigwv2.VpcLink(self,
                                   f"{namespace}-VpcLink",
                                   vpc=vpc,
                                   security_groups=[
                                       sg_ecs_service,
                                       sg_elb,
                                   ])
        self.apigwv2_alb_integration = apigwv2i.HttpAlbIntegration(
            listener=http_listener,
            vpc_link=vpc_link,
        )
        custom_domain = apigwv2.DomainName(
            self,
            "CustomDomain",
            certificate=cert_apse2,
            domain_name=domain_name.string_value,
        )
        self.http_api = apigwv2.HttpApi(
            self,
            f"{namespace}-apigw",
            default_domain_mapping=apigwv2.DomainMappingOptions(
                domain_name=custom_domain),
            cors_preflight=apigwv2.CorsPreflightOptions(
                allow_origins=cors_allowed_origins,
                allow_headers=["*"],
                allow_methods=[
                    apigwv2.CorsHttpMethod.ANY,
                ],
                allow_credentials=True,
            ))
        core.CfnOutput(self, "ApiEndpoint", value=self.http_api.api_endpoint)

        # --- Setup DNS for the custom domain

        hosted_zone = route53.HostedZone.from_hosted_zone_attributes(
            self,
            "HostedZone",
            hosted_zone_id=hosted_zone_id.string_value,
            zone_name=hosted_zone_name.string_value,
        )
        route53.ARecord(
            self,
            "ApiCustomDomainAlias",
            zone=hosted_zone,
            record_name="htsget",
            target=route53.RecordTarget.from_alias(
                route53t.ApiGatewayv2DomainProperties(
                    regional_domain_name=custom_domain.regional_domain_name,
                    regional_hosted_zone_id=custom_domain.
                    regional_hosted_zone_id)),
        )
        core.CfnOutput(
            self,
            "HtsgetEndpoint",
            value=custom_domain.name,
        )

        cognito_authzr = apigwv2.CfnAuthorizer(
            self,
            "CognitoAuthorizer",
            api_id=self.http_api.http_api_id,
            authorizer_type="JWT",
            identity_source=[
                "$request.header.Authorization",
            ],
            name="CognitoAuthorizer",
            jwt_configuration=apigwv2.CfnAuthorizer.JWTConfigurationProperty(
                audience=[
                    cog_app_client_id_stage.string_value,
                    cog_app_client_id_local.string_value,
                ],
                issuer=
                f"https://cognito-idp.{self.region}.amazonaws.com/{cog_user_pool_id.string_value}"
            ))

        # Add catch all routes
        rt_catchall = apigwv2.HttpRoute(
            self,
            "CatchallRoute",
            http_api=self.http_api,
            route_key=apigwv2.HttpRouteKey.with_(
                path="/{proxy+}", method=apigwv2.HttpMethod.GET),
            integration=self.apigwv2_alb_integration)
        rt_catchall_cfn: apigwv2.CfnRoute = rt_catchall.node.default_child
        rt_catchall_cfn.authorizer_id = cognito_authzr.ref
        rt_catchall_cfn.authorization_type = "JWT"

        # Comment this to opt-out setting up experimental Passport + htsget
        self.setup_ga4gh_passport()