def authorize_output_bucket(self,
                                bucket: s3.Bucket,
                                objects_key_pattern: Optional[str] = None):
        if self._rehydrated and not self._mutable_instance_role:
            raise ReadOnlyEMRProfileError()

        bucket.grant_read_write(self._roles.instance_role,
                                objects_key_pattern).assert_success()
        return self
Example #2
0
    def add_endpoint(self, bucket: s3.Bucket, fn: Function):
        # create the queue
        queue = sqs.Queue(self,
                          f'{fn.id_prefix}Queue',
                          dead_letter_queue=sqs.DeadLetterQueue(
                              max_receive_count=5,
                              queue=sqs.Queue(
                                  self,
                                  f'{fn.id_prefix}DLQ',
                                  queue_name=f'{fn.queue_name}-dlq')),
                          queue_name=fn.queue_name)

        # create the receiver function
        # add the queue url as an environment variable
        receiver_function = lambda_.Function(
            self,
            f'{fn.id_prefix}ReceiverFunction',
            code=fn.function_code,
            environment={'QUEUE_URL': queue.queue_url},
            function_name=f'{fn.function_name_prefix}-receiver',
            handler=fn.receiver_function_handler,
            layers=[fn.function_dependencies_layer],
            # memory_size=256,
            runtime=lambda_.Runtime.PYTHON_3_8)

        # allow the receiver function to enqueue messages
        queue.grant_send_messages(receiver_function)

        # route requests to the receiver lambda
        self.api.add_routes(integration=apigw.LambdaProxyIntegration(
            handler=receiver_function),
                            methods=[fn.api_method],
                            path=fn.api_path)

        # create the handler function
        # add the bucket name as an environment variable
        handler_function = lambda_.Function(
            self,
            f'{fn.id_prefix}HandlerFunction',
            code=fn.function_code,
            environment={'BUCKET_NAME': bucket.bucket_name},
            function_name=f'{fn.function_name_prefix}-handler',
            handler=fn.handler_function_handler,
            layers=[fn.function_dependencies_layer],
            # memory_size=256,
            runtime=lambda_.Runtime.PYTHON_3_8)

        # add the queue as a trigger for the handler function
        handler_function.add_event_source(SqsEventSource(queue))

        # allow the handler function to access the bucket
        bucket.grant_read_write(handler_function)
    def __init__(self, scope: cdk.Construct, construct_id: str, config, vpc: IVpc, instance: IInstance, neo4j_user_secret: ISecret, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        bucket = Bucket(self, "s3-bucket-altimeter", 
            bucket_name=config["s3_bucket"],
            encryption=BucketEncryption.UNENCRYPTED, #.S3_MANAGED, # Disable encryption since it's not really required and it conflicts with SCP guardrails set by Control Tower on the Audit account.
            block_public_access=BlockPublicAccess.BLOCK_ALL
        )

        cluster = Cluster(self, "ecs-cluster-altimeter", 
            cluster_name="ecsclstr-altimeter--default",
            vpc=vpc               
        )

        task_role = Role(self, "iam-role-altimeter-task-role",
            assumed_by=ServicePrincipal("ecs-tasks.amazonaws.com"),
            # It appears that within the account where the scanner is running, the task role is (partially) used for scanning resources (rather than the altimeter-scanner-access role).      
            managed_policies=[
                ManagedPolicy.from_aws_managed_policy_name('SecurityAudit'),
                ManagedPolicy.from_aws_managed_policy_name('job-function/ViewOnlyAccess')
            ]
        )

        task_definition = FargateTaskDefinition(self, "ecs-fgtd-altimeter",
            task_role=task_role,
            memory_limit_mib=self.MEMORY_LIMIT,
            cpu=self.CPU
        )

        docker_path = os.path.join(os.path.curdir,"..")

        image_asset = DockerImageAsset(self, 'ecr-assets-dia-altimeter', 
            directory=docker_path,
            file="scanner.Dockerfile"
        )            

        task_definition.add_container("ecs-container-altimeter",            
            image= ContainerImage.from_docker_image_asset(image_asset),
            # memory_limit_mib=self.MEMORY_LIMIT,
            # cpu=self.CPU,
            environment= {
                "CONFIG_PATH": config["altimeter_config_path"],
                "S3_BUCKET": config["s3_bucket"]
            },
            logging= AwsLogDriver(
                stream_prefix= 'altimeter',
                log_retention= RetentionDays.TWO_WEEKS
            )
        )

        task_definition.add_to_task_role_policy(PolicyStatement(
            resources=["arn:aws:iam::*:role/"+config["account_execution_role"]],
            actions=['sts:AssumeRole']
        ))

        task_definition.add_to_task_role_policy(PolicyStatement(
            resources=[
                "arn:aws:s3:::"+config["s3_bucket"],
                "arn:aws:s3:::"+config["s3_bucket"]+"/*"
            ],
            actions=["s3:GetObject*",
                "s3:GetBucket*",
                "s3:List*",
                "s3:DeleteObject*",
                "s3:PutObject",
                "s3:Abort*",
                "s3:PutObjectTagging"]
        ))

        # Grant the ability to record the stdout to CloudWatch Logs
        # TODO: Refine
        task_definition.add_to_task_role_policy(PolicyStatement(
            resources=["*"],
            actions=['logs:*']
        ))

        # Trigger task every 24 hours
        Rule(self, "events-rule-altimeter-daily-scan",
            rule_name="evrule--altimeter-daily-scan",
            schedule=Schedule.cron(hour="0", minute="0"),
            description="Daily altimeter scan",
            targets=[EcsTask(
                task_definition=task_definition,
                cluster=cluster,
                subnet_selection=SubnetSelection(subnet_type=SubnetType.PRIVATE)
            )]
        )

        # Trigger task manually via event
        Rule(self, "events-rule-altimeter-manual-scan",
            rule_name="evrule--altimeter-manual-scan",
            event_pattern=EventPattern(source=['altimeter']), 
            description="Manual altimeter scan",
            targets=[EcsTask(
                task_definition=task_definition,
                cluster=cluster,
                subnet_selection=SubnetSelection(subnet_type=SubnetType.PRIVATE)
            )]
        )        


        # Don't put Neo4j Importer lambda in a separate stack since it causes a circular reference with the S3 event source, and using an imported bucket as event source is not possible (you need a Bucket, not an IBucket)



        neo4j_importer_function = PythonFunction(self, 'lambda-function-neo4j-importer',
            function_name="function-altimeter--neo4j-importer",             
            entry="../neo4j-importer",
            index="app.py",
            handler="lambda_handler",
            runtime=Runtime.PYTHON_3_8,
            memory_size=256,
            timeout=cdk.Duration.seconds(60),
            vpc=vpc,
            vpc_subnets=SubnetSelection(subnets=vpc.select_subnets(subnet_group_name='Private').subnets),
            environment={
                "neo4j_address": instance.instance_private_ip,
                "neo4j_user_secret_name": neo4j_user_secret.secret_name
            }
        )

        neo4j_importer_function.add_event_source(
            S3EventSource(bucket,
                events= [EventType.OBJECT_CREATED, EventType.OBJECT_REMOVED],
                filters= [ { "prefix": "raw/", "suffix": ".rdf"}]
            )
        )

        # Grant lambda read/write access to the S3 bucket for reading raw rdf, writing prepared rdf and generating signed uri
        bucket.grant_read_write(neo4j_importer_function.role)
        # Grant lambda read access to the neo4j user secret
        neo4j_user_secret.grant_read(neo4j_importer_function.role)
Example #4
0
    def __init__(
        self,
        scope: Construct,
        stack_id: str,
        *,
        botocore_lambda_layer: aws_lambda_python.PythonLayerVersion,
        env_name: str,
        storage_bucket: aws_s3.Bucket,
        validation_results_table: Table,
    ) -> None:
        # pylint: disable=too-many-locals, too-many-statements

        super().__init__(scope, stack_id)

        ############################################################################################
        # PROCESSING ASSETS TABLE
        processing_assets_table = Table(
            self,
            f"{env_name}-processing-assets",
            env_name=env_name,
            parameter_name=ParameterName.PROCESSING_ASSETS_TABLE_NAME,
            sort_key=aws_dynamodb.Attribute(name="sk", type=aws_dynamodb.AttributeType.STRING),
        )

        ############################################################################################
        # BATCH JOB DEPENDENCIES
        batch_job_queue = BatchJobQueue(
            self,
            "batch-job-queue",
            env_name=env_name,
            processing_assets_table=processing_assets_table,
        ).job_queue

        s3_read_only_access_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name(
            "AmazonS3ReadOnlyAccess"
        )

        ############################################################################################
        # UPDATE CATALOG UPDATE MESSAGE QUEUE

        dead_letter_queue = aws_sqs.Queue(
            self,
            "dead-letter-queue",
            visibility_timeout=LAMBDA_TIMEOUT,
        )

        self.message_queue = aws_sqs.Queue(
            self,
            "update-catalog-message-queue",
            visibility_timeout=LAMBDA_TIMEOUT,
            dead_letter_queue=aws_sqs.DeadLetterQueue(max_receive_count=3, queue=dead_letter_queue),
        )
        self.message_queue_name_parameter = aws_ssm.StringParameter(
            self,
            "update-catalog-message-queue-name",
            string_value=self.message_queue.queue_name,
            description=f"Update Catalog Message Queue Name for {env_name}",
            parameter_name=ParameterName.UPDATE_CATALOG_MESSAGE_QUEUE_NAME.value,
        )

        populate_catalog_lambda = BundledLambdaFunction(
            self,
            "populate-catalog-bundled-lambda-function",
            directory="populate_catalog",
            extra_environment={ENV_NAME_VARIABLE_NAME: env_name},
            botocore_lambda_layer=botocore_lambda_layer,
        )

        self.message_queue.grant_consume_messages(populate_catalog_lambda)
        populate_catalog_lambda.add_event_source(
            SqsEventSource(self.message_queue, batch_size=1)  # type: ignore[arg-type]
        )

        ############################################################################################
        # STATE MACHINE TASKS

        check_stac_metadata_task = LambdaTask(
            self,
            "check-stac-metadata-task",
            directory="check_stac_metadata",
            botocore_lambda_layer=botocore_lambda_layer,
            extra_environment={ENV_NAME_VARIABLE_NAME: env_name},
        )
        assert check_stac_metadata_task.lambda_function.role
        check_stac_metadata_task.lambda_function.role.add_managed_policy(
            policy=s3_read_only_access_policy
        )

        for table in [processing_assets_table, validation_results_table]:
            table.grant_read_write_data(check_stac_metadata_task.lambda_function)
            table.grant(
                check_stac_metadata_task.lambda_function,
                "dynamodb:DescribeTable",
            )

        content_iterator_task = LambdaTask(
            self,
            "content-iterator-task",
            directory="content_iterator",
            botocore_lambda_layer=botocore_lambda_layer,
            result_path=f"$.{CONTENT_KEY}",
            extra_environment={ENV_NAME_VARIABLE_NAME: env_name},
        )

        check_files_checksums_directory = "check_files_checksums"
        check_files_checksums_default_payload_object = {
            f"{DATASET_ID_KEY}.$": f"$.{DATASET_ID_KEY}",
            f"{VERSION_ID_KEY}.$": f"$.{VERSION_ID_KEY}",
            f"{METADATA_URL_KEY}.$": f"$.{METADATA_URL_KEY}",
            f"{FIRST_ITEM_KEY}.$": f"$.{CONTENT_KEY}.{FIRST_ITEM_KEY}",
            f"{ASSETS_TABLE_NAME_KEY}.$": f"$.{CONTENT_KEY}.{ASSETS_TABLE_NAME_KEY}",
            f"{RESULTS_TABLE_NAME_KEY}.$": f"$.{CONTENT_KEY}.{RESULTS_TABLE_NAME_KEY}",
        }
        check_files_checksums_single_task = BatchSubmitJobTask(
            self,
            "check-files-checksums-single-task",
            env_name=env_name,
            directory=check_files_checksums_directory,
            s3_policy=s3_read_only_access_policy,
            job_queue=batch_job_queue,
            payload_object=check_files_checksums_default_payload_object,
            container_overrides_command=[
                "--dataset-id",
                f"Ref::{DATASET_ID_KEY}",
                "--version-id",
                f"Ref::{VERSION_ID_KEY}",
                "--first-item",
                f"Ref::{FIRST_ITEM_KEY}",
                "--assets-table-name",
                f"Ref::{ASSETS_TABLE_NAME_KEY}",
                "--results-table-name",
                f"Ref::{RESULTS_TABLE_NAME_KEY}",
            ],
        )
        array_size = int(
            aws_stepfunctions.JsonPath.number_at(f"$.{CONTENT_KEY}.{ITERATION_SIZE_KEY}")
        )
        check_files_checksums_array_task = BatchSubmitJobTask(
            self,
            "check-files-checksums-array-task",
            env_name=env_name,
            directory=check_files_checksums_directory,
            s3_policy=s3_read_only_access_policy,
            job_queue=batch_job_queue,
            payload_object=check_files_checksums_default_payload_object,
            container_overrides_command=[
                "--dataset-id",
                f"Ref::{DATASET_ID_KEY}",
                "--version-id",
                f"Ref::{VERSION_ID_KEY}",
                "--first-item",
                f"Ref::{FIRST_ITEM_KEY}",
                "--assets-table-name",
                f"Ref::{ASSETS_TABLE_NAME_KEY}",
                "--results-table-name",
                f"Ref::{RESULTS_TABLE_NAME_KEY}",
            ],
            array_size=array_size,
        )

        for reader in [
            content_iterator_task.lambda_function,
            check_files_checksums_single_task.job_role,
            check_files_checksums_array_task.job_role,
        ]:
            processing_assets_table.grant_read_data(reader)  # type: ignore[arg-type]
            processing_assets_table.grant(
                reader, "dynamodb:DescribeTable"  # type: ignore[arg-type]
            )

        for writer in [
            check_files_checksums_single_task.job_role,
            check_files_checksums_array_task.job_role,
        ]:
            validation_results_table.grant_read_write_data(writer)  # type: ignore[arg-type]
            validation_results_table.grant(
                writer, "dynamodb:DescribeTable"  # type: ignore[arg-type]
            )

        validation_summary_task = LambdaTask(
            self,
            "validation-summary-task",
            directory="validation_summary",
            botocore_lambda_layer=botocore_lambda_layer,
            result_path=f"$.{VALIDATION_KEY}",
            extra_environment={ENV_NAME_VARIABLE_NAME: env_name},
        )
        validation_results_table.grant_read_data(validation_summary_task.lambda_function)
        validation_results_table.grant(
            validation_summary_task.lambda_function, "dynamodb:DescribeTable"
        )

        import_dataset_role = aws_iam.Role(
            self,
            "import-dataset",
            assumed_by=aws_iam.ServicePrincipal(  # type: ignore[arg-type]
                "batchoperations.s3.amazonaws.com"
            ),
        )

        import_asset_file_function = ImportFileFunction(
            self,
            directory="import_asset_file",
            invoker=import_dataset_role,
            env_name=env_name,
            botocore_lambda_layer=botocore_lambda_layer,
        )
        import_metadata_file_function = ImportFileFunction(
            self,
            directory="import_metadata_file",
            invoker=import_dataset_role,
            env_name=env_name,
            botocore_lambda_layer=botocore_lambda_layer,
        )

        import_dataset_task = LambdaTask(
            self,
            "import-dataset-task",
            directory="import_dataset",
            botocore_lambda_layer=botocore_lambda_layer,
            result_path=f"$.{IMPORT_DATASET_KEY}",
            extra_environment={ENV_NAME_VARIABLE_NAME: env_name},
        )

        import_dataset_task.lambda_function.add_to_role_policy(
            aws_iam.PolicyStatement(
                resources=[import_dataset_role.role_arn],
                actions=["iam:PassRole"],
            ),
        )
        import_dataset_task.lambda_function.add_to_role_policy(
            aws_iam.PolicyStatement(resources=["*"], actions=["s3:CreateJob"])
        )

        for table in [processing_assets_table]:
            table.grant_read_data(import_dataset_task.lambda_function)
            table.grant(import_dataset_task.lambda_function, "dynamodb:DescribeTable")

        # Import status check
        wait_before_upload_status_check = Wait(
            self,
            "wait-before-upload-status-check",
            time=WaitTime.duration(Duration.seconds(10)),
        )
        upload_status_task = LambdaTask(
            self,
            "upload-status",
            directory="upload_status",
            botocore_lambda_layer=botocore_lambda_layer,
            result_path="$.upload_status",
            extra_environment={ENV_NAME_VARIABLE_NAME: env_name},
        )
        validation_results_table.grant_read_data(upload_status_task.lambda_function)
        validation_results_table.grant(upload_status_task.lambda_function, "dynamodb:DescribeTable")

        upload_status_task.lambda_function.add_to_role_policy(ALLOW_DESCRIBE_ANY_S3_JOB)

        # Parameters
        import_asset_file_function_arn_parameter = aws_ssm.StringParameter(
            self,
            "import asset file function arn",
            string_value=import_asset_file_function.function_arn,
            description=f"Import asset file function ARN for {env_name}",
            parameter_name=ParameterName.PROCESSING_IMPORT_ASSET_FILE_FUNCTION_TASK_ARN.value,
        )
        import_metadata_file_function_arn_parameter = aws_ssm.StringParameter(
            self,
            "import metadata file function arn",
            string_value=import_metadata_file_function.function_arn,
            description=f"Import metadata file function ARN for {env_name}",
            parameter_name=ParameterName.PROCESSING_IMPORT_METADATA_FILE_FUNCTION_TASK_ARN.value,
        )

        import_dataset_role_arn_parameter = aws_ssm.StringParameter(
            self,
            "import dataset role arn",
            string_value=import_dataset_role.role_arn,
            description=f"Import dataset role ARN for {env_name}",
            parameter_name=ParameterName.PROCESSING_IMPORT_DATASET_ROLE_ARN.value,
        )

        update_dataset_catalog = LambdaTask(
            self,
            "update-dataset-catalog",
            directory="update_dataset_catalog",
            botocore_lambda_layer=botocore_lambda_layer,
            extra_environment={ENV_NAME_VARIABLE_NAME: env_name},
        )
        self.message_queue.grant_send_messages(update_dataset_catalog.lambda_function)

        for storage_writer in [
            import_dataset_role,
            import_dataset_task.lambda_function,
            import_asset_file_function,
            import_metadata_file_function,
            populate_catalog_lambda,
            update_dataset_catalog.lambda_function,
        ]:
            storage_bucket.grant_read_write(storage_writer)  # type: ignore[arg-type]

        grant_parameter_read_access(
            {
                import_asset_file_function_arn_parameter: [import_dataset_task.lambda_function],
                import_dataset_role_arn_parameter: [import_dataset_task.lambda_function],
                import_metadata_file_function_arn_parameter: [import_dataset_task.lambda_function],
                processing_assets_table.name_parameter: [
                    check_stac_metadata_task.lambda_function,
                    content_iterator_task.lambda_function,
                    import_dataset_task.lambda_function,
                ],
                validation_results_table.name_parameter: [
                    check_stac_metadata_task.lambda_function,
                    content_iterator_task.lambda_function,
                    validation_summary_task.lambda_function,
                    upload_status_task.lambda_function,
                ],
                self.message_queue_name_parameter: [update_dataset_catalog.lambda_function],
            }
        )

        success_task = aws_stepfunctions.Succeed(self, "success")
        upload_failure = aws_stepfunctions.Fail(self, "upload failure")
        validation_failure = aws_stepfunctions.Succeed(self, "validation failure")

        ############################################################################################
        # STATE MACHINE
        dataset_version_creation_definition = (
            check_stac_metadata_task.next(content_iterator_task)
            .next(
                aws_stepfunctions.Choice(  # type: ignore[arg-type]
                    self, "check_files_checksums_maybe_array"
                )
                .when(
                    aws_stepfunctions.Condition.number_equals(
                        f"$.{CONTENT_KEY}.{ITERATION_SIZE_KEY}", 1
                    ),
                    check_files_checksums_single_task.batch_submit_job,
                )
                .otherwise(check_files_checksums_array_task.batch_submit_job)
                .afterwards()
            )
            .next(
                aws_stepfunctions.Choice(self, "content_iteration_finished")
                .when(
                    aws_stepfunctions.Condition.number_equals(
                        f"$.{CONTENT_KEY}.{NEXT_ITEM_KEY}", -1
                    ),
                    validation_summary_task.next(
                        aws_stepfunctions.Choice(  # type: ignore[arg-type]
                            self, "validation_successful"
                        )
                        .when(
                            aws_stepfunctions.Condition.boolean_equals(
                                f"$.{VALIDATION_KEY}.{SUCCESS_KEY}", True
                            ),
                            import_dataset_task.next(
                                wait_before_upload_status_check  # type: ignore[arg-type]
                            )
                            .next(upload_status_task)
                            .next(
                                aws_stepfunctions.Choice(
                                    self, "import_completed"  # type: ignore[arg-type]
                                )
                                .when(
                                    aws_stepfunctions.Condition.and_(
                                        aws_stepfunctions.Condition.string_equals(
                                            f"$.upload_status.{ASSET_UPLOAD_KEY}.status", "Complete"
                                        ),
                                        aws_stepfunctions.Condition.string_equals(
                                            f"$.upload_status.{METADATA_UPLOAD_KEY}.status",
                                            "Complete",
                                        ),
                                    ),
                                    update_dataset_catalog.next(
                                        success_task  # type: ignore[arg-type]
                                    ),
                                )
                                .when(
                                    aws_stepfunctions.Condition.or_(
                                        aws_stepfunctions.Condition.string_equals(
                                            f"$.upload_status.{ASSET_UPLOAD_KEY}.status",
                                            "Cancelled",
                                        ),
                                        aws_stepfunctions.Condition.string_equals(
                                            f"$.upload_status.{ASSET_UPLOAD_KEY}.status", "Failed"
                                        ),
                                        aws_stepfunctions.Condition.string_equals(
                                            f"$.upload_status.{METADATA_UPLOAD_KEY}.status",
                                            "Cancelled",
                                        ),
                                        aws_stepfunctions.Condition.string_equals(
                                            f"$.upload_status.{METADATA_UPLOAD_KEY}.status",
                                            "Failed",
                                        ),
                                    ),
                                    upload_failure,  # type: ignore[arg-type]
                                )
                                .otherwise(
                                    wait_before_upload_status_check  # type: ignore[arg-type]
                                )
                            ),
                        )
                        .otherwise(validation_failure)  # type: ignore[arg-type]
                    ),
                )
                .otherwise(content_iterator_task)
            )
        )

        self.state_machine = aws_stepfunctions.StateMachine(
            self,
            f"{env_name}-dataset-version-creation",
            definition=dataset_version_creation_definition,  # type: ignore[arg-type]
        )

        self.state_machine_parameter = aws_ssm.StringParameter(
            self,
            "state machine arn",
            description=f"State machine ARN for {env_name}",
            parameter_name=ParameterName.PROCESSING_DATASET_VERSION_CREATION_STEP_FUNCTION_ARN.value,  # pylint:disable=line-too-long
            string_value=self.state_machine.state_machine_arn,
        )

        Tags.of(self).add("ApplicationLayer", "processing")  # type: ignore[arg-type]
Example #5
0
def create_task_definition(
    scope: core.Construct,
    ecr_repository: ecr.Repository,
    log_group: logs.LogGroup,
    policy: iam.Policy,
    s3_bucket: s3.Bucket,
    stack_name: str,
    service_name: str,
    config: StackConfig,
    role: str = None,
    command: list = None,
):
    task_definition = ecs.FargateTaskDefinition(
        scope, f'TaskDefinition-{role}',
        cpu=512,
        memory_limit_mib=1024,
        family=service_name,
    )

    container_props = dict()
    if command:
        container_props['command'] = command

    app_container = task_definition.add_container(
        'container',
        image=ecs.ContainerImage.from_ecr_repository(ecr_repository),
        logging=ecs.LogDrivers.aws_logs(
            stream_prefix=service_name,
            log_group=log_group
        ),
        environment={
            'AWS_REGION': scope.region,
            'DD_ENV': config.stack_label,
            'DD_API_KEY': config.datadog_api_key,
            'DD_SERVICE': role,
            'DD_VERSION': '1',  # TODO calculate in the building
            'STACK_NAME': stack_name,
            'DD_APM_ENABLED': 'true',
            'DD_AGENT_HOST': '0.0.0.0',
            'DD_TRACE_AGENT_PORT': '8126',
        },
        **container_props,
    )
    app_container.add_port_mappings(ecs.PortMapping(container_port=8000))
    task_definition.task_role.attach_inline_policy(policy)

    #
    #  D A T A D O G
    #
    if config.datadog_api_key:
        datadog_container = task_definition.add_container(
            'datadog-agent',
            image=ecs.ContainerImage.from_registry('datadog/agent:latest'),
            memory_limit_mib=256,
            cpu=12,
            logging=ecs.LogDrivers.aws_logs(
                stream_prefix=stack_name,
                log_group=log_group
            ),
            environment={
                'AWS_REGION': scope.region,
                'DD_API_KEY': config.datadog_api_key,
                'DD_APM_ENABLED': 'true',
                'DD_APM_NON_LOCAL_TRAFFIC': 'true',
                'DD_APM_RECEIVER_PORT': '8126',
                'DD_DOGSTATSD_NON_LOCAL_TRAFFIC': 'true',
                'DD_DOGSTATSD_PORT': '8125',
                'ECS_FARGATE': 'true',
            },
        )
        datadog_container.add_port_mappings(
            ecs.PortMapping(container_port=8126, protocol=ecs.Protocol.TCP)
        )
        datadog_container.add_port_mappings(
            ecs.PortMapping(container_port=8125, protocol=ecs.Protocol.UDP)
        )

    task_definition.task_role.attach_inline_policy(policy)
    s3_bucket.grant_read_write(task_definition.task_role)
    return task_definition
Example #6
0
class ApiStack(Stack):
    vpc: Vpc = None
    db: DatabaseInstance = None
    job_processing_queues: List[Queue] = None
    app_bucket: Bucket = None
    pages_bucket: Bucket = None
    domain_name: str = ""

    def __init__(
        self,
        scope: App,
        id: str,
        envs: EnvSettings,
        components: ComponentsStack,
        base_resources: BaseResources,
    ):
        super().__init__(scope, id)

        self.db_secret_arn = Fn.import_value(
            BaseResources.get_database_secret_arn_output_export_name(envs))

        self.job_processing_queues = components.data_processing_queues
        self.vpc = base_resources.vpc
        self.db = base_resources.db

        self.app_bucket = Bucket(self, "App", versioned=True)

        if self.app_bucket.bucket_arn:
            CfnOutput(
                self,
                id="AppBucketOutput",
                export_name=self.get_app_bucket_arn_output_export_name(envs),
                value=self.app_bucket.bucket_arn,
            )

        self.pages_bucket = Bucket(self, "Pages", public_read_access=True)

        self.domain_name = StringParameter.from_string_parameter_name(
            self,
            "DomainNameParameter",
            string_parameter_name="/schema-cms-app/DOMAIN_NAME").string_value

        self.certificate_arn = StringParameter.from_string_parameter_name(
            self,
            "CertificateArnParameter",
            string_parameter_name="/schema-cms-app/CERTIFICATE_ARN"
        ).string_value

        django_secret = Secret(self,
                               "DjangoSecretKey",
                               secret_name="SCHEMA_CMS_DJANGO_SECRET_KEY")
        lambda_auth_token_secret = Secret(
            self,
            "LambdaAuthToken",
            secret_name="SCHEMA_CMS_LAMBDA_AUTH_TOKEN")

        if lambda_auth_token_secret.secret_arn:
            CfnOutput(
                self,
                id="lambdaAuthTokenArnOutput",
                export_name=self.get_lambda_auth_token_arn_output_export_name(
                    envs),
                value=lambda_auth_token_secret.secret_arn,
            )

        self.django_secret_key = EcsSecret.from_secrets_manager(django_secret)
        self.lambda_auth_token = EcsSecret.from_secrets_manager(
            lambda_auth_token_secret)

        tag_from_context = self.node.try_get_context("app_image_tag")
        tag = tag_from_context if tag_from_context != "undefined" else None

        api_image = ContainerImage.from_ecr_repository(
            repository=Repository.from_repository_name(
                self,
                id="BackendRepository",
                repository_name=BaseECR.get_backend_repository_name(envs)),
            tag=tag,
        )
        nginx_image = ContainerImage.from_ecr_repository(
            repository=Repository.from_repository_name(
                self,
                id="NginxRepository",
                repository_name=BaseECR.get_nginx_repository_name(envs)),
            tag=tag,
        )

        self.api = ApplicationLoadBalancedFargateService(
            self,
            "ApiService",
            service_name=f"{envs.project_name}-api-service",
            cluster=Cluster.from_cluster_attributes(
                self,
                id="WorkersCluster",
                cluster_name="schema-ecs-cluster",
                vpc=self.vpc,
                security_groups=[],
            ),
            task_image_options=ApplicationLoadBalancedTaskImageOptions(
                image=nginx_image,
                container_name="nginx",
                container_port=80,
                enable_logging=True,
            ),
            desired_count=1,
            cpu=512,
            memory_limit_mib=1024,
            certificate=Certificate.from_certificate_arn(
                self, "Cert", certificate_arn=self.certificate_arn),
            domain_name=self.domain_name,
            domain_zone=PrivateHostedZone(
                self,
                "zone",
                vpc=self.vpc,
                zone_name=self.domain_name,
            ),
        )

        self.api.task_definition.add_container(
            "backend",
            image=api_image,
            command=[
                "sh", "-c",
                "/bin/chamber exec $CHAMBER_SERVICE_NAME -- ./scripts/run.sh"
            ],
            logging=AwsLogDriver(stream_prefix="backend-container"),
            environment={
                "POSTGRES_DB": envs.data_base_name,
                "AWS_STORAGE_BUCKET_NAME": self.app_bucket.bucket_name,
                "AWS_STORAGE_PAGES_BUCKET_NAME": self.pages_bucket.bucket_name,
                "SQS_WORKER_QUEUE_URL":
                self.job_processing_queues[0].queue_url,
                "SQS_WORKER_EXT_QUEUE_URL":
                self.job_processing_queues[1].queue_url,
                "SQS_WORKER_MAX_QUEUE_URL":
                self.job_processing_queues[2].queue_url,
                "CHAMBER_SERVICE_NAME": "schema-cms-app",
                "CHAMBER_KMS_KEY_ALIAS": envs.project_name,
            },
            secrets={
                "DB_CONNECTION":
                EcsSecret.from_secrets_manager(
                    Secret.from_secret_arn(self,
                                           id="DbSecret",
                                           secret_arn=self.db_secret_arn)),
                "DJANGO_SECRET_KEY":
                self.django_secret_key,
                "LAMBDA_AUTH_TOKEN":
                self.lambda_auth_token,
            },
            cpu=512,
            memory_limit_mib=1024,
        )

        self.django_secret_key.grant_read(
            self.api.service.task_definition.task_role)

        self.app_bucket.grant_read_write(
            self.api.service.task_definition.task_role)
        self.pages_bucket.grant_read_write(
            self.api.service.task_definition.task_role)

        for queue in self.job_processing_queues:
            queue.grant_send_messages(
                self.api.service.task_definition.task_role)

        self.api.service.connections.allow_to(self.db.connections,
                                              Port.tcp(5432))
        self.api.task_definition.add_to_task_role_policy(
            PolicyStatement(
                actions=["ses:SendRawEmail", "ses:SendBulkTemplatedEmail"],
                resources=["*"],
            ))

        self.api.task_definition.add_to_task_role_policy(
            PolicyStatement(
                actions=[
                    "kms:Get*", "kms:Describe*", "kms:List*", "kms:Decrypt"
                ],
                resources=[
                    Fn.import_value(
                        BaseKMS.get_kms_arn_output_export_name(envs))
                ],
            ))

        self.api.task_definition.add_to_task_role_policy(
            PolicyStatement(actions=["ssm:DescribeParameters"],
                            resources=["*"]))

        self.api.task_definition.add_to_task_role_policy(
            PolicyStatement(
                actions=["ssm:GetParameters*"],
                resources=[
                    f"arn:aws:ssm:{self.region}:{self.account}:parameter/schema-cms-app/*"
                ],
            ))

    def grant_secret_access(self, secret):
        secret.grant_read(self.api.service.task_definition.task_role)

    @staticmethod
    def get_app_bucket_arn_output_export_name(envs: EnvSettings):
        return f"{envs.project_name}-appBucketArn"

    @staticmethod
    def get_lambda_auth_token_arn_output_export_name(envs: EnvSettings):
        return f"{envs.project_name}-lambdaAuthTokenArn"
    def __init__(  # pylint: disable=too-many-arguments
        self,
        scope: Construct,
        stack_id: str,
        *,
        botocore_lambda_layer: aws_lambda_python.PythonLayerVersion,
        datasets_table: Table,
        deploy_env: str,
        storage_bucket: aws_s3.Bucket,
        storage_bucket_parameter: aws_ssm.StringParameter,
        validation_results_table: Table,
        **kwargs: Any,
    ) -> None:
        # pylint: disable=too-many-locals
        super().__init__(scope, stack_id, **kwargs)

        ############################################################################################
        # PROCESSING ASSETS TABLE
        processing_assets_table = Table(
            self,
            f"{deploy_env}-processing-assets",
            deploy_env=deploy_env,
            parameter_name=ParameterName.PROCESSING_ASSETS_TABLE_NAME,
            sort_key=aws_dynamodb.Attribute(
                name="sk", type=aws_dynamodb.AttributeType.STRING),
        )

        ############################################################################################
        # BATCH JOB DEPENDENCIES
        batch_job_queue = BatchJobQueue(
            self,
            "batch-job-queue",
            deploy_env=deploy_env,
            processing_assets_table=processing_assets_table,
        ).job_queue

        s3_read_only_access_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name(
            "AmazonS3ReadOnlyAccess")

        ############################################################################################
        # STATE MACHINE TASKS

        check_stac_metadata_task = LambdaTask(
            self,
            "check-stac-metadata-task",
            directory="check_stac_metadata",
            botocore_lambda_layer=botocore_lambda_layer,
            extra_environment={"DEPLOY_ENV": deploy_env},
        )
        assert check_stac_metadata_task.lambda_function.role
        check_stac_metadata_task.lambda_function.role.add_managed_policy(
            policy=s3_read_only_access_policy)

        for table in [processing_assets_table, validation_results_table]:
            table.grant_read_write_data(
                check_stac_metadata_task.lambda_function)
            table.grant(
                check_stac_metadata_task.lambda_function,
                "dynamodb:DescribeTable",
            )

        content_iterator_task = LambdaTask(
            self,
            "content-iterator-task",
            directory="content_iterator",
            botocore_lambda_layer=botocore_lambda_layer,
            result_path="$.content",
            extra_environment={"DEPLOY_ENV": deploy_env},
        )

        check_files_checksums_directory = "check_files_checksums"
        check_files_checksums_default_payload_object = {
            "dataset_id.$": "$.dataset_id",
            "version_id.$": "$.version_id",
            "metadata_url.$": "$.metadata_url",
            "first_item.$": "$.content.first_item",
            "assets_table_name.$": "$.content.assets_table_name",
            "results_table_name.$": "$.content.results_table_name",
        }
        check_files_checksums_single_task = BatchSubmitJobTask(
            self,
            "check-files-checksums-single-task",
            deploy_env=deploy_env,
            directory=check_files_checksums_directory,
            s3_policy=s3_read_only_access_policy,
            job_queue=batch_job_queue,
            payload_object=check_files_checksums_default_payload_object,
            container_overrides_command=[
                "--dataset-id",
                "Ref::dataset_id",
                "--version-id",
                "Ref::version_id",
                "--first-item",
                "Ref::first_item",
                "--assets-table-name",
                "Ref::assets_table_name",
                "--results-table-name",
                "Ref::results_table_name",
            ],
        )
        array_size = int(
            aws_stepfunctions.JsonPath.number_at("$.content.iteration_size"))
        check_files_checksums_array_task = BatchSubmitJobTask(
            self,
            "check-files-checksums-array-task",
            deploy_env=deploy_env,
            directory=check_files_checksums_directory,
            s3_policy=s3_read_only_access_policy,
            job_queue=batch_job_queue,
            payload_object=check_files_checksums_default_payload_object,
            container_overrides_command=[
                "--dataset-id",
                "Ref::dataset_id",
                "--version-id",
                "Ref::version_id",
                "--first-item",
                "Ref::first_item",
                "--assets-table-name",
                "Ref::assets_table_name",
                "--results-table-name",
                "Ref::results_table_name",
            ],
            array_size=array_size,
        )

        for reader in [
                content_iterator_task.lambda_function,
                check_files_checksums_single_task.job_role,
                check_files_checksums_array_task.job_role,
        ]:
            processing_assets_table.grant_read_data(
                reader)  # type: ignore[arg-type]
            processing_assets_table.grant(
                reader,
                "dynamodb:DescribeTable"  # type: ignore[arg-type]
            )

        for writer in [
                check_files_checksums_single_task.job_role,
                check_files_checksums_array_task.job_role,
        ]:
            validation_results_table.grant_read_write_data(
                writer)  # type: ignore[arg-type]
            validation_results_table.grant(
                writer,
                "dynamodb:DescribeTable"  # type: ignore[arg-type]
            )

        validation_summary_task = LambdaTask(
            self,
            "validation-summary-task",
            directory="validation_summary",
            botocore_lambda_layer=botocore_lambda_layer,
            result_path="$.validation",
            extra_environment={"DEPLOY_ENV": deploy_env},
        )
        validation_results_table.grant_read_data(
            validation_summary_task.lambda_function)
        validation_results_table.grant(validation_summary_task.lambda_function,
                                       "dynamodb:DescribeTable")

        validation_failure_lambda_invoke = LambdaTask(
            self,
            "validation-failure-task",
            directory="validation_failure",
            botocore_lambda_layer=botocore_lambda_layer,
            result_path=aws_stepfunctions.JsonPath.DISCARD,
        ).lambda_invoke

        import_dataset_role = aws_iam.Role(
            self,
            "import-dataset",
            assumed_by=aws_iam.ServicePrincipal(  # type: ignore[arg-type]
                "batchoperations.s3.amazonaws.com"),
        )

        import_asset_file_function = ImportFileFunction(
            self,
            directory="import_asset_file",
            invoker=import_dataset_role,
            deploy_env=deploy_env,
            botocore_lambda_layer=botocore_lambda_layer,
        )
        import_metadata_file_function = ImportFileFunction(
            self,
            directory="import_metadata_file",
            invoker=import_dataset_role,
            deploy_env=deploy_env,
            botocore_lambda_layer=botocore_lambda_layer,
        )

        for storage_writer in [
                import_dataset_role,
                import_asset_file_function.role,
                import_metadata_file_function.role,
        ]:
            storage_bucket.grant_read_write(
                storage_writer)  # type: ignore[arg-type]

        import_dataset_task = LambdaTask(
            self,
            "import-dataset-task",
            directory="import_dataset",
            botocore_lambda_layer=botocore_lambda_layer,
            result_path="$.import_dataset",
            extra_environment={"DEPLOY_ENV": deploy_env},
        )

        assert import_dataset_task.lambda_function.role is not None
        import_dataset_task.lambda_function.role.add_to_policy(
            aws_iam.PolicyStatement(
                resources=[import_dataset_role.role_arn],
                actions=["iam:PassRole"],
            ), )
        import_dataset_task.lambda_function.role.add_to_policy(
            aws_iam.PolicyStatement(resources=["*"], actions=["s3:CreateJob"]))

        storage_bucket.grant_read_write(import_dataset_task.lambda_function)

        for table in [datasets_table, processing_assets_table]:
            table.grant_read_data(import_dataset_task.lambda_function)
            table.grant(import_dataset_task.lambda_function,
                        "dynamodb:DescribeTable")

        # Parameters
        import_asset_file_function_arn_parameter = aws_ssm.StringParameter(
            self,
            "import asset file function arn",
            string_value=import_asset_file_function.function_arn,
            description=f"Import asset file function ARN for {deploy_env}",
            parameter_name=ParameterName.
            PROCESSING_IMPORT_ASSET_FILE_FUNCTION_TASK_ARN.value,
        )
        import_metadata_file_function_arn_parameter = aws_ssm.StringParameter(
            self,
            "import metadata file function arn",
            string_value=import_metadata_file_function.function_arn,
            description=f"Import metadata file function ARN for {deploy_env}",
            parameter_name=ParameterName.
            PROCESSING_IMPORT_METADATA_FILE_FUNCTION_TASK_ARN.value,
        )

        import_dataset_role_arn_parameter = aws_ssm.StringParameter(
            self,
            "import dataset role arn",
            string_value=import_dataset_role.role_arn,
            description=f"Import dataset role ARN for {deploy_env}",
            parameter_name=ParameterName.PROCESSING_IMPORT_DATASET_ROLE_ARN.
            value,
        )

        grant_parameter_read_access({
            datasets_table.name_parameter:
            [import_dataset_task.lambda_function],
            import_asset_file_function_arn_parameter:
            [import_dataset_task.lambda_function],
            import_dataset_role_arn_parameter:
            [import_dataset_task.lambda_function],
            import_metadata_file_function_arn_parameter:
            [import_dataset_task.lambda_function],
            processing_assets_table.name_parameter: [
                check_stac_metadata_task.lambda_function.role,
                content_iterator_task.lambda_function,
                import_dataset_task.lambda_function,
            ],
            storage_bucket_parameter: [
                import_dataset_task.lambda_function,
            ],
            validation_results_table.name_parameter: [
                check_stac_metadata_task.lambda_function.role,
                validation_summary_task.lambda_function,
                content_iterator_task.lambda_function,
            ],
        })

        success_task = aws_stepfunctions.Succeed(self, "success")

        ############################################################################################
        # STATE MACHINE
        dataset_version_creation_definition = (
            check_stac_metadata_task.lambda_invoke.next(
                content_iterator_task.lambda_invoke).next(
                    aws_stepfunctions.Choice(  # type: ignore[arg-type]
                        self, "check_files_checksums_maybe_array").when(
                            aws_stepfunctions.Condition.number_equals(
                                "$.content.iteration_size", 1),
                            check_files_checksums_single_task.batch_submit_job,
                        ).otherwise(check_files_checksums_array_task.
                                    batch_submit_job).afterwards()).
            next(
                aws_stepfunctions.Choice(
                    self, "content_iteration_finished").when(
                        aws_stepfunctions.Condition.number_equals(
                            "$.content.next_item", -1),
                        validation_summary_task.lambda_invoke.next(
                            aws_stepfunctions.Choice(  # type: ignore[arg-type]
                                self, "validation_successful").when(
                                    aws_stepfunctions.Condition.boolean_equals(
                                        "$.validation.success", True),
                                    import_dataset_task.lambda_invoke.next(
                                        success_task  # type: ignore[arg-type]
                                    ),
                                ).otherwise(validation_failure_lambda_invoke)),
                    ).otherwise(content_iterator_task.lambda_invoke)))

        self.state_machine = aws_stepfunctions.StateMachine(
            self,
            f"{deploy_env}-dataset-version-creation",
            definition=
            dataset_version_creation_definition,  # type: ignore[arg-type]
        )

        self.state_machine_parameter = aws_ssm.StringParameter(
            self,
            "state machine arn",
            description=f"State machine ARN for {deploy_env}",
            parameter_name=ParameterName.
            PROCESSING_DATASET_VERSION_CREATION_STEP_FUNCTION_ARN.value,  # pylint:disable=line-too-long
            string_value=self.state_machine.state_machine_arn,
        )

        Tags.of(self).add("ApplicationLayer",
                          "processing")  # type: ignore[arg-type]