def authorize_output_bucket(self, bucket: s3.Bucket, objects_key_pattern: Optional[str] = None): if self._rehydrated and not self._mutable_instance_role: raise ReadOnlyEMRProfileError() bucket.grant_read_write(self._roles.instance_role, objects_key_pattern).assert_success() return self
def add_endpoint(self, bucket: s3.Bucket, fn: Function): # create the queue queue = sqs.Queue(self, f'{fn.id_prefix}Queue', dead_letter_queue=sqs.DeadLetterQueue( max_receive_count=5, queue=sqs.Queue( self, f'{fn.id_prefix}DLQ', queue_name=f'{fn.queue_name}-dlq')), queue_name=fn.queue_name) # create the receiver function # add the queue url as an environment variable receiver_function = lambda_.Function( self, f'{fn.id_prefix}ReceiverFunction', code=fn.function_code, environment={'QUEUE_URL': queue.queue_url}, function_name=f'{fn.function_name_prefix}-receiver', handler=fn.receiver_function_handler, layers=[fn.function_dependencies_layer], # memory_size=256, runtime=lambda_.Runtime.PYTHON_3_8) # allow the receiver function to enqueue messages queue.grant_send_messages(receiver_function) # route requests to the receiver lambda self.api.add_routes(integration=apigw.LambdaProxyIntegration( handler=receiver_function), methods=[fn.api_method], path=fn.api_path) # create the handler function # add the bucket name as an environment variable handler_function = lambda_.Function( self, f'{fn.id_prefix}HandlerFunction', code=fn.function_code, environment={'BUCKET_NAME': bucket.bucket_name}, function_name=f'{fn.function_name_prefix}-handler', handler=fn.handler_function_handler, layers=[fn.function_dependencies_layer], # memory_size=256, runtime=lambda_.Runtime.PYTHON_3_8) # add the queue as a trigger for the handler function handler_function.add_event_source(SqsEventSource(queue)) # allow the handler function to access the bucket bucket.grant_read_write(handler_function)
def __init__(self, scope: cdk.Construct, construct_id: str, config, vpc: IVpc, instance: IInstance, neo4j_user_secret: ISecret, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) bucket = Bucket(self, "s3-bucket-altimeter", bucket_name=config["s3_bucket"], encryption=BucketEncryption.UNENCRYPTED, #.S3_MANAGED, # Disable encryption since it's not really required and it conflicts with SCP guardrails set by Control Tower on the Audit account. block_public_access=BlockPublicAccess.BLOCK_ALL ) cluster = Cluster(self, "ecs-cluster-altimeter", cluster_name="ecsclstr-altimeter--default", vpc=vpc ) task_role = Role(self, "iam-role-altimeter-task-role", assumed_by=ServicePrincipal("ecs-tasks.amazonaws.com"), # It appears that within the account where the scanner is running, the task role is (partially) used for scanning resources (rather than the altimeter-scanner-access role). managed_policies=[ ManagedPolicy.from_aws_managed_policy_name('SecurityAudit'), ManagedPolicy.from_aws_managed_policy_name('job-function/ViewOnlyAccess') ] ) task_definition = FargateTaskDefinition(self, "ecs-fgtd-altimeter", task_role=task_role, memory_limit_mib=self.MEMORY_LIMIT, cpu=self.CPU ) docker_path = os.path.join(os.path.curdir,"..") image_asset = DockerImageAsset(self, 'ecr-assets-dia-altimeter', directory=docker_path, file="scanner.Dockerfile" ) task_definition.add_container("ecs-container-altimeter", image= ContainerImage.from_docker_image_asset(image_asset), # memory_limit_mib=self.MEMORY_LIMIT, # cpu=self.CPU, environment= { "CONFIG_PATH": config["altimeter_config_path"], "S3_BUCKET": config["s3_bucket"] }, logging= AwsLogDriver( stream_prefix= 'altimeter', log_retention= RetentionDays.TWO_WEEKS ) ) task_definition.add_to_task_role_policy(PolicyStatement( resources=["arn:aws:iam::*:role/"+config["account_execution_role"]], actions=['sts:AssumeRole'] )) task_definition.add_to_task_role_policy(PolicyStatement( resources=[ "arn:aws:s3:::"+config["s3_bucket"], "arn:aws:s3:::"+config["s3_bucket"]+"/*" ], actions=["s3:GetObject*", "s3:GetBucket*", "s3:List*", "s3:DeleteObject*", "s3:PutObject", "s3:Abort*", "s3:PutObjectTagging"] )) # Grant the ability to record the stdout to CloudWatch Logs # TODO: Refine task_definition.add_to_task_role_policy(PolicyStatement( resources=["*"], actions=['logs:*'] )) # Trigger task every 24 hours Rule(self, "events-rule-altimeter-daily-scan", rule_name="evrule--altimeter-daily-scan", schedule=Schedule.cron(hour="0", minute="0"), description="Daily altimeter scan", targets=[EcsTask( task_definition=task_definition, cluster=cluster, subnet_selection=SubnetSelection(subnet_type=SubnetType.PRIVATE) )] ) # Trigger task manually via event Rule(self, "events-rule-altimeter-manual-scan", rule_name="evrule--altimeter-manual-scan", event_pattern=EventPattern(source=['altimeter']), description="Manual altimeter scan", targets=[EcsTask( task_definition=task_definition, cluster=cluster, subnet_selection=SubnetSelection(subnet_type=SubnetType.PRIVATE) )] ) # Don't put Neo4j Importer lambda in a separate stack since it causes a circular reference with the S3 event source, and using an imported bucket as event source is not possible (you need a Bucket, not an IBucket) neo4j_importer_function = PythonFunction(self, 'lambda-function-neo4j-importer', function_name="function-altimeter--neo4j-importer", entry="../neo4j-importer", index="app.py", handler="lambda_handler", runtime=Runtime.PYTHON_3_8, memory_size=256, timeout=cdk.Duration.seconds(60), vpc=vpc, vpc_subnets=SubnetSelection(subnets=vpc.select_subnets(subnet_group_name='Private').subnets), environment={ "neo4j_address": instance.instance_private_ip, "neo4j_user_secret_name": neo4j_user_secret.secret_name } ) neo4j_importer_function.add_event_source( S3EventSource(bucket, events= [EventType.OBJECT_CREATED, EventType.OBJECT_REMOVED], filters= [ { "prefix": "raw/", "suffix": ".rdf"}] ) ) # Grant lambda read/write access to the S3 bucket for reading raw rdf, writing prepared rdf and generating signed uri bucket.grant_read_write(neo4j_importer_function.role) # Grant lambda read access to the neo4j user secret neo4j_user_secret.grant_read(neo4j_importer_function.role)
def __init__( self, scope: Construct, stack_id: str, *, botocore_lambda_layer: aws_lambda_python.PythonLayerVersion, env_name: str, storage_bucket: aws_s3.Bucket, validation_results_table: Table, ) -> None: # pylint: disable=too-many-locals, too-many-statements super().__init__(scope, stack_id) ############################################################################################ # PROCESSING ASSETS TABLE processing_assets_table = Table( self, f"{env_name}-processing-assets", env_name=env_name, parameter_name=ParameterName.PROCESSING_ASSETS_TABLE_NAME, sort_key=aws_dynamodb.Attribute(name="sk", type=aws_dynamodb.AttributeType.STRING), ) ############################################################################################ # BATCH JOB DEPENDENCIES batch_job_queue = BatchJobQueue( self, "batch-job-queue", env_name=env_name, processing_assets_table=processing_assets_table, ).job_queue s3_read_only_access_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonS3ReadOnlyAccess" ) ############################################################################################ # UPDATE CATALOG UPDATE MESSAGE QUEUE dead_letter_queue = aws_sqs.Queue( self, "dead-letter-queue", visibility_timeout=LAMBDA_TIMEOUT, ) self.message_queue = aws_sqs.Queue( self, "update-catalog-message-queue", visibility_timeout=LAMBDA_TIMEOUT, dead_letter_queue=aws_sqs.DeadLetterQueue(max_receive_count=3, queue=dead_letter_queue), ) self.message_queue_name_parameter = aws_ssm.StringParameter( self, "update-catalog-message-queue-name", string_value=self.message_queue.queue_name, description=f"Update Catalog Message Queue Name for {env_name}", parameter_name=ParameterName.UPDATE_CATALOG_MESSAGE_QUEUE_NAME.value, ) populate_catalog_lambda = BundledLambdaFunction( self, "populate-catalog-bundled-lambda-function", directory="populate_catalog", extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, botocore_lambda_layer=botocore_lambda_layer, ) self.message_queue.grant_consume_messages(populate_catalog_lambda) populate_catalog_lambda.add_event_source( SqsEventSource(self.message_queue, batch_size=1) # type: ignore[arg-type] ) ############################################################################################ # STATE MACHINE TASKS check_stac_metadata_task = LambdaTask( self, "check-stac-metadata-task", directory="check_stac_metadata", botocore_lambda_layer=botocore_lambda_layer, extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) assert check_stac_metadata_task.lambda_function.role check_stac_metadata_task.lambda_function.role.add_managed_policy( policy=s3_read_only_access_policy ) for table in [processing_assets_table, validation_results_table]: table.grant_read_write_data(check_stac_metadata_task.lambda_function) table.grant( check_stac_metadata_task.lambda_function, "dynamodb:DescribeTable", ) content_iterator_task = LambdaTask( self, "content-iterator-task", directory="content_iterator", botocore_lambda_layer=botocore_lambda_layer, result_path=f"$.{CONTENT_KEY}", extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) check_files_checksums_directory = "check_files_checksums" check_files_checksums_default_payload_object = { f"{DATASET_ID_KEY}.$": f"$.{DATASET_ID_KEY}", f"{VERSION_ID_KEY}.$": f"$.{VERSION_ID_KEY}", f"{METADATA_URL_KEY}.$": f"$.{METADATA_URL_KEY}", f"{FIRST_ITEM_KEY}.$": f"$.{CONTENT_KEY}.{FIRST_ITEM_KEY}", f"{ASSETS_TABLE_NAME_KEY}.$": f"$.{CONTENT_KEY}.{ASSETS_TABLE_NAME_KEY}", f"{RESULTS_TABLE_NAME_KEY}.$": f"$.{CONTENT_KEY}.{RESULTS_TABLE_NAME_KEY}", } check_files_checksums_single_task = BatchSubmitJobTask( self, "check-files-checksums-single-task", env_name=env_name, directory=check_files_checksums_directory, s3_policy=s3_read_only_access_policy, job_queue=batch_job_queue, payload_object=check_files_checksums_default_payload_object, container_overrides_command=[ "--dataset-id", f"Ref::{DATASET_ID_KEY}", "--version-id", f"Ref::{VERSION_ID_KEY}", "--first-item", f"Ref::{FIRST_ITEM_KEY}", "--assets-table-name", f"Ref::{ASSETS_TABLE_NAME_KEY}", "--results-table-name", f"Ref::{RESULTS_TABLE_NAME_KEY}", ], ) array_size = int( aws_stepfunctions.JsonPath.number_at(f"$.{CONTENT_KEY}.{ITERATION_SIZE_KEY}") ) check_files_checksums_array_task = BatchSubmitJobTask( self, "check-files-checksums-array-task", env_name=env_name, directory=check_files_checksums_directory, s3_policy=s3_read_only_access_policy, job_queue=batch_job_queue, payload_object=check_files_checksums_default_payload_object, container_overrides_command=[ "--dataset-id", f"Ref::{DATASET_ID_KEY}", "--version-id", f"Ref::{VERSION_ID_KEY}", "--first-item", f"Ref::{FIRST_ITEM_KEY}", "--assets-table-name", f"Ref::{ASSETS_TABLE_NAME_KEY}", "--results-table-name", f"Ref::{RESULTS_TABLE_NAME_KEY}", ], array_size=array_size, ) for reader in [ content_iterator_task.lambda_function, check_files_checksums_single_task.job_role, check_files_checksums_array_task.job_role, ]: processing_assets_table.grant_read_data(reader) # type: ignore[arg-type] processing_assets_table.grant( reader, "dynamodb:DescribeTable" # type: ignore[arg-type] ) for writer in [ check_files_checksums_single_task.job_role, check_files_checksums_array_task.job_role, ]: validation_results_table.grant_read_write_data(writer) # type: ignore[arg-type] validation_results_table.grant( writer, "dynamodb:DescribeTable" # type: ignore[arg-type] ) validation_summary_task = LambdaTask( self, "validation-summary-task", directory="validation_summary", botocore_lambda_layer=botocore_lambda_layer, result_path=f"$.{VALIDATION_KEY}", extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) validation_results_table.grant_read_data(validation_summary_task.lambda_function) validation_results_table.grant( validation_summary_task.lambda_function, "dynamodb:DescribeTable" ) import_dataset_role = aws_iam.Role( self, "import-dataset", assumed_by=aws_iam.ServicePrincipal( # type: ignore[arg-type] "batchoperations.s3.amazonaws.com" ), ) import_asset_file_function = ImportFileFunction( self, directory="import_asset_file", invoker=import_dataset_role, env_name=env_name, botocore_lambda_layer=botocore_lambda_layer, ) import_metadata_file_function = ImportFileFunction( self, directory="import_metadata_file", invoker=import_dataset_role, env_name=env_name, botocore_lambda_layer=botocore_lambda_layer, ) import_dataset_task = LambdaTask( self, "import-dataset-task", directory="import_dataset", botocore_lambda_layer=botocore_lambda_layer, result_path=f"$.{IMPORT_DATASET_KEY}", extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) import_dataset_task.lambda_function.add_to_role_policy( aws_iam.PolicyStatement( resources=[import_dataset_role.role_arn], actions=["iam:PassRole"], ), ) import_dataset_task.lambda_function.add_to_role_policy( aws_iam.PolicyStatement(resources=["*"], actions=["s3:CreateJob"]) ) for table in [processing_assets_table]: table.grant_read_data(import_dataset_task.lambda_function) table.grant(import_dataset_task.lambda_function, "dynamodb:DescribeTable") # Import status check wait_before_upload_status_check = Wait( self, "wait-before-upload-status-check", time=WaitTime.duration(Duration.seconds(10)), ) upload_status_task = LambdaTask( self, "upload-status", directory="upload_status", botocore_lambda_layer=botocore_lambda_layer, result_path="$.upload_status", extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) validation_results_table.grant_read_data(upload_status_task.lambda_function) validation_results_table.grant(upload_status_task.lambda_function, "dynamodb:DescribeTable") upload_status_task.lambda_function.add_to_role_policy(ALLOW_DESCRIBE_ANY_S3_JOB) # Parameters import_asset_file_function_arn_parameter = aws_ssm.StringParameter( self, "import asset file function arn", string_value=import_asset_file_function.function_arn, description=f"Import asset file function ARN for {env_name}", parameter_name=ParameterName.PROCESSING_IMPORT_ASSET_FILE_FUNCTION_TASK_ARN.value, ) import_metadata_file_function_arn_parameter = aws_ssm.StringParameter( self, "import metadata file function arn", string_value=import_metadata_file_function.function_arn, description=f"Import metadata file function ARN for {env_name}", parameter_name=ParameterName.PROCESSING_IMPORT_METADATA_FILE_FUNCTION_TASK_ARN.value, ) import_dataset_role_arn_parameter = aws_ssm.StringParameter( self, "import dataset role arn", string_value=import_dataset_role.role_arn, description=f"Import dataset role ARN for {env_name}", parameter_name=ParameterName.PROCESSING_IMPORT_DATASET_ROLE_ARN.value, ) update_dataset_catalog = LambdaTask( self, "update-dataset-catalog", directory="update_dataset_catalog", botocore_lambda_layer=botocore_lambda_layer, extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) self.message_queue.grant_send_messages(update_dataset_catalog.lambda_function) for storage_writer in [ import_dataset_role, import_dataset_task.lambda_function, import_asset_file_function, import_metadata_file_function, populate_catalog_lambda, update_dataset_catalog.lambda_function, ]: storage_bucket.grant_read_write(storage_writer) # type: ignore[arg-type] grant_parameter_read_access( { import_asset_file_function_arn_parameter: [import_dataset_task.lambda_function], import_dataset_role_arn_parameter: [import_dataset_task.lambda_function], import_metadata_file_function_arn_parameter: [import_dataset_task.lambda_function], processing_assets_table.name_parameter: [ check_stac_metadata_task.lambda_function, content_iterator_task.lambda_function, import_dataset_task.lambda_function, ], validation_results_table.name_parameter: [ check_stac_metadata_task.lambda_function, content_iterator_task.lambda_function, validation_summary_task.lambda_function, upload_status_task.lambda_function, ], self.message_queue_name_parameter: [update_dataset_catalog.lambda_function], } ) success_task = aws_stepfunctions.Succeed(self, "success") upload_failure = aws_stepfunctions.Fail(self, "upload failure") validation_failure = aws_stepfunctions.Succeed(self, "validation failure") ############################################################################################ # STATE MACHINE dataset_version_creation_definition = ( check_stac_metadata_task.next(content_iterator_task) .next( aws_stepfunctions.Choice( # type: ignore[arg-type] self, "check_files_checksums_maybe_array" ) .when( aws_stepfunctions.Condition.number_equals( f"$.{CONTENT_KEY}.{ITERATION_SIZE_KEY}", 1 ), check_files_checksums_single_task.batch_submit_job, ) .otherwise(check_files_checksums_array_task.batch_submit_job) .afterwards() ) .next( aws_stepfunctions.Choice(self, "content_iteration_finished") .when( aws_stepfunctions.Condition.number_equals( f"$.{CONTENT_KEY}.{NEXT_ITEM_KEY}", -1 ), validation_summary_task.next( aws_stepfunctions.Choice( # type: ignore[arg-type] self, "validation_successful" ) .when( aws_stepfunctions.Condition.boolean_equals( f"$.{VALIDATION_KEY}.{SUCCESS_KEY}", True ), import_dataset_task.next( wait_before_upload_status_check # type: ignore[arg-type] ) .next(upload_status_task) .next( aws_stepfunctions.Choice( self, "import_completed" # type: ignore[arg-type] ) .when( aws_stepfunctions.Condition.and_( aws_stepfunctions.Condition.string_equals( f"$.upload_status.{ASSET_UPLOAD_KEY}.status", "Complete" ), aws_stepfunctions.Condition.string_equals( f"$.upload_status.{METADATA_UPLOAD_KEY}.status", "Complete", ), ), update_dataset_catalog.next( success_task # type: ignore[arg-type] ), ) .when( aws_stepfunctions.Condition.or_( aws_stepfunctions.Condition.string_equals( f"$.upload_status.{ASSET_UPLOAD_KEY}.status", "Cancelled", ), aws_stepfunctions.Condition.string_equals( f"$.upload_status.{ASSET_UPLOAD_KEY}.status", "Failed" ), aws_stepfunctions.Condition.string_equals( f"$.upload_status.{METADATA_UPLOAD_KEY}.status", "Cancelled", ), aws_stepfunctions.Condition.string_equals( f"$.upload_status.{METADATA_UPLOAD_KEY}.status", "Failed", ), ), upload_failure, # type: ignore[arg-type] ) .otherwise( wait_before_upload_status_check # type: ignore[arg-type] ) ), ) .otherwise(validation_failure) # type: ignore[arg-type] ), ) .otherwise(content_iterator_task) ) ) self.state_machine = aws_stepfunctions.StateMachine( self, f"{env_name}-dataset-version-creation", definition=dataset_version_creation_definition, # type: ignore[arg-type] ) self.state_machine_parameter = aws_ssm.StringParameter( self, "state machine arn", description=f"State machine ARN for {env_name}", parameter_name=ParameterName.PROCESSING_DATASET_VERSION_CREATION_STEP_FUNCTION_ARN.value, # pylint:disable=line-too-long string_value=self.state_machine.state_machine_arn, ) Tags.of(self).add("ApplicationLayer", "processing") # type: ignore[arg-type]
def create_task_definition( scope: core.Construct, ecr_repository: ecr.Repository, log_group: logs.LogGroup, policy: iam.Policy, s3_bucket: s3.Bucket, stack_name: str, service_name: str, config: StackConfig, role: str = None, command: list = None, ): task_definition = ecs.FargateTaskDefinition( scope, f'TaskDefinition-{role}', cpu=512, memory_limit_mib=1024, family=service_name, ) container_props = dict() if command: container_props['command'] = command app_container = task_definition.add_container( 'container', image=ecs.ContainerImage.from_ecr_repository(ecr_repository), logging=ecs.LogDrivers.aws_logs( stream_prefix=service_name, log_group=log_group ), environment={ 'AWS_REGION': scope.region, 'DD_ENV': config.stack_label, 'DD_API_KEY': config.datadog_api_key, 'DD_SERVICE': role, 'DD_VERSION': '1', # TODO calculate in the building 'STACK_NAME': stack_name, 'DD_APM_ENABLED': 'true', 'DD_AGENT_HOST': '0.0.0.0', 'DD_TRACE_AGENT_PORT': '8126', }, **container_props, ) app_container.add_port_mappings(ecs.PortMapping(container_port=8000)) task_definition.task_role.attach_inline_policy(policy) # # D A T A D O G # if config.datadog_api_key: datadog_container = task_definition.add_container( 'datadog-agent', image=ecs.ContainerImage.from_registry('datadog/agent:latest'), memory_limit_mib=256, cpu=12, logging=ecs.LogDrivers.aws_logs( stream_prefix=stack_name, log_group=log_group ), environment={ 'AWS_REGION': scope.region, 'DD_API_KEY': config.datadog_api_key, 'DD_APM_ENABLED': 'true', 'DD_APM_NON_LOCAL_TRAFFIC': 'true', 'DD_APM_RECEIVER_PORT': '8126', 'DD_DOGSTATSD_NON_LOCAL_TRAFFIC': 'true', 'DD_DOGSTATSD_PORT': '8125', 'ECS_FARGATE': 'true', }, ) datadog_container.add_port_mappings( ecs.PortMapping(container_port=8126, protocol=ecs.Protocol.TCP) ) datadog_container.add_port_mappings( ecs.PortMapping(container_port=8125, protocol=ecs.Protocol.UDP) ) task_definition.task_role.attach_inline_policy(policy) s3_bucket.grant_read_write(task_definition.task_role) return task_definition
class ApiStack(Stack): vpc: Vpc = None db: DatabaseInstance = None job_processing_queues: List[Queue] = None app_bucket: Bucket = None pages_bucket: Bucket = None domain_name: str = "" def __init__( self, scope: App, id: str, envs: EnvSettings, components: ComponentsStack, base_resources: BaseResources, ): super().__init__(scope, id) self.db_secret_arn = Fn.import_value( BaseResources.get_database_secret_arn_output_export_name(envs)) self.job_processing_queues = components.data_processing_queues self.vpc = base_resources.vpc self.db = base_resources.db self.app_bucket = Bucket(self, "App", versioned=True) if self.app_bucket.bucket_arn: CfnOutput( self, id="AppBucketOutput", export_name=self.get_app_bucket_arn_output_export_name(envs), value=self.app_bucket.bucket_arn, ) self.pages_bucket = Bucket(self, "Pages", public_read_access=True) self.domain_name = StringParameter.from_string_parameter_name( self, "DomainNameParameter", string_parameter_name="/schema-cms-app/DOMAIN_NAME").string_value self.certificate_arn = StringParameter.from_string_parameter_name( self, "CertificateArnParameter", string_parameter_name="/schema-cms-app/CERTIFICATE_ARN" ).string_value django_secret = Secret(self, "DjangoSecretKey", secret_name="SCHEMA_CMS_DJANGO_SECRET_KEY") lambda_auth_token_secret = Secret( self, "LambdaAuthToken", secret_name="SCHEMA_CMS_LAMBDA_AUTH_TOKEN") if lambda_auth_token_secret.secret_arn: CfnOutput( self, id="lambdaAuthTokenArnOutput", export_name=self.get_lambda_auth_token_arn_output_export_name( envs), value=lambda_auth_token_secret.secret_arn, ) self.django_secret_key = EcsSecret.from_secrets_manager(django_secret) self.lambda_auth_token = EcsSecret.from_secrets_manager( lambda_auth_token_secret) tag_from_context = self.node.try_get_context("app_image_tag") tag = tag_from_context if tag_from_context != "undefined" else None api_image = ContainerImage.from_ecr_repository( repository=Repository.from_repository_name( self, id="BackendRepository", repository_name=BaseECR.get_backend_repository_name(envs)), tag=tag, ) nginx_image = ContainerImage.from_ecr_repository( repository=Repository.from_repository_name( self, id="NginxRepository", repository_name=BaseECR.get_nginx_repository_name(envs)), tag=tag, ) self.api = ApplicationLoadBalancedFargateService( self, "ApiService", service_name=f"{envs.project_name}-api-service", cluster=Cluster.from_cluster_attributes( self, id="WorkersCluster", cluster_name="schema-ecs-cluster", vpc=self.vpc, security_groups=[], ), task_image_options=ApplicationLoadBalancedTaskImageOptions( image=nginx_image, container_name="nginx", container_port=80, enable_logging=True, ), desired_count=1, cpu=512, memory_limit_mib=1024, certificate=Certificate.from_certificate_arn( self, "Cert", certificate_arn=self.certificate_arn), domain_name=self.domain_name, domain_zone=PrivateHostedZone( self, "zone", vpc=self.vpc, zone_name=self.domain_name, ), ) self.api.task_definition.add_container( "backend", image=api_image, command=[ "sh", "-c", "/bin/chamber exec $CHAMBER_SERVICE_NAME -- ./scripts/run.sh" ], logging=AwsLogDriver(stream_prefix="backend-container"), environment={ "POSTGRES_DB": envs.data_base_name, "AWS_STORAGE_BUCKET_NAME": self.app_bucket.bucket_name, "AWS_STORAGE_PAGES_BUCKET_NAME": self.pages_bucket.bucket_name, "SQS_WORKER_QUEUE_URL": self.job_processing_queues[0].queue_url, "SQS_WORKER_EXT_QUEUE_URL": self.job_processing_queues[1].queue_url, "SQS_WORKER_MAX_QUEUE_URL": self.job_processing_queues[2].queue_url, "CHAMBER_SERVICE_NAME": "schema-cms-app", "CHAMBER_KMS_KEY_ALIAS": envs.project_name, }, secrets={ "DB_CONNECTION": EcsSecret.from_secrets_manager( Secret.from_secret_arn(self, id="DbSecret", secret_arn=self.db_secret_arn)), "DJANGO_SECRET_KEY": self.django_secret_key, "LAMBDA_AUTH_TOKEN": self.lambda_auth_token, }, cpu=512, memory_limit_mib=1024, ) self.django_secret_key.grant_read( self.api.service.task_definition.task_role) self.app_bucket.grant_read_write( self.api.service.task_definition.task_role) self.pages_bucket.grant_read_write( self.api.service.task_definition.task_role) for queue in self.job_processing_queues: queue.grant_send_messages( self.api.service.task_definition.task_role) self.api.service.connections.allow_to(self.db.connections, Port.tcp(5432)) self.api.task_definition.add_to_task_role_policy( PolicyStatement( actions=["ses:SendRawEmail", "ses:SendBulkTemplatedEmail"], resources=["*"], )) self.api.task_definition.add_to_task_role_policy( PolicyStatement( actions=[ "kms:Get*", "kms:Describe*", "kms:List*", "kms:Decrypt" ], resources=[ Fn.import_value( BaseKMS.get_kms_arn_output_export_name(envs)) ], )) self.api.task_definition.add_to_task_role_policy( PolicyStatement(actions=["ssm:DescribeParameters"], resources=["*"])) self.api.task_definition.add_to_task_role_policy( PolicyStatement( actions=["ssm:GetParameters*"], resources=[ f"arn:aws:ssm:{self.region}:{self.account}:parameter/schema-cms-app/*" ], )) def grant_secret_access(self, secret): secret.grant_read(self.api.service.task_definition.task_role) @staticmethod def get_app_bucket_arn_output_export_name(envs: EnvSettings): return f"{envs.project_name}-appBucketArn" @staticmethod def get_lambda_auth_token_arn_output_export_name(envs: EnvSettings): return f"{envs.project_name}-lambdaAuthTokenArn"
def __init__( # pylint: disable=too-many-arguments self, scope: Construct, stack_id: str, *, botocore_lambda_layer: aws_lambda_python.PythonLayerVersion, datasets_table: Table, deploy_env: str, storage_bucket: aws_s3.Bucket, storage_bucket_parameter: aws_ssm.StringParameter, validation_results_table: Table, **kwargs: Any, ) -> None: # pylint: disable=too-many-locals super().__init__(scope, stack_id, **kwargs) ############################################################################################ # PROCESSING ASSETS TABLE processing_assets_table = Table( self, f"{deploy_env}-processing-assets", deploy_env=deploy_env, parameter_name=ParameterName.PROCESSING_ASSETS_TABLE_NAME, sort_key=aws_dynamodb.Attribute( name="sk", type=aws_dynamodb.AttributeType.STRING), ) ############################################################################################ # BATCH JOB DEPENDENCIES batch_job_queue = BatchJobQueue( self, "batch-job-queue", deploy_env=deploy_env, processing_assets_table=processing_assets_table, ).job_queue s3_read_only_access_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonS3ReadOnlyAccess") ############################################################################################ # STATE MACHINE TASKS check_stac_metadata_task = LambdaTask( self, "check-stac-metadata-task", directory="check_stac_metadata", botocore_lambda_layer=botocore_lambda_layer, extra_environment={"DEPLOY_ENV": deploy_env}, ) assert check_stac_metadata_task.lambda_function.role check_stac_metadata_task.lambda_function.role.add_managed_policy( policy=s3_read_only_access_policy) for table in [processing_assets_table, validation_results_table]: table.grant_read_write_data( check_stac_metadata_task.lambda_function) table.grant( check_stac_metadata_task.lambda_function, "dynamodb:DescribeTable", ) content_iterator_task = LambdaTask( self, "content-iterator-task", directory="content_iterator", botocore_lambda_layer=botocore_lambda_layer, result_path="$.content", extra_environment={"DEPLOY_ENV": deploy_env}, ) check_files_checksums_directory = "check_files_checksums" check_files_checksums_default_payload_object = { "dataset_id.$": "$.dataset_id", "version_id.$": "$.version_id", "metadata_url.$": "$.metadata_url", "first_item.$": "$.content.first_item", "assets_table_name.$": "$.content.assets_table_name", "results_table_name.$": "$.content.results_table_name", } check_files_checksums_single_task = BatchSubmitJobTask( self, "check-files-checksums-single-task", deploy_env=deploy_env, directory=check_files_checksums_directory, s3_policy=s3_read_only_access_policy, job_queue=batch_job_queue, payload_object=check_files_checksums_default_payload_object, container_overrides_command=[ "--dataset-id", "Ref::dataset_id", "--version-id", "Ref::version_id", "--first-item", "Ref::first_item", "--assets-table-name", "Ref::assets_table_name", "--results-table-name", "Ref::results_table_name", ], ) array_size = int( aws_stepfunctions.JsonPath.number_at("$.content.iteration_size")) check_files_checksums_array_task = BatchSubmitJobTask( self, "check-files-checksums-array-task", deploy_env=deploy_env, directory=check_files_checksums_directory, s3_policy=s3_read_only_access_policy, job_queue=batch_job_queue, payload_object=check_files_checksums_default_payload_object, container_overrides_command=[ "--dataset-id", "Ref::dataset_id", "--version-id", "Ref::version_id", "--first-item", "Ref::first_item", "--assets-table-name", "Ref::assets_table_name", "--results-table-name", "Ref::results_table_name", ], array_size=array_size, ) for reader in [ content_iterator_task.lambda_function, check_files_checksums_single_task.job_role, check_files_checksums_array_task.job_role, ]: processing_assets_table.grant_read_data( reader) # type: ignore[arg-type] processing_assets_table.grant( reader, "dynamodb:DescribeTable" # type: ignore[arg-type] ) for writer in [ check_files_checksums_single_task.job_role, check_files_checksums_array_task.job_role, ]: validation_results_table.grant_read_write_data( writer) # type: ignore[arg-type] validation_results_table.grant( writer, "dynamodb:DescribeTable" # type: ignore[arg-type] ) validation_summary_task = LambdaTask( self, "validation-summary-task", directory="validation_summary", botocore_lambda_layer=botocore_lambda_layer, result_path="$.validation", extra_environment={"DEPLOY_ENV": deploy_env}, ) validation_results_table.grant_read_data( validation_summary_task.lambda_function) validation_results_table.grant(validation_summary_task.lambda_function, "dynamodb:DescribeTable") validation_failure_lambda_invoke = LambdaTask( self, "validation-failure-task", directory="validation_failure", botocore_lambda_layer=botocore_lambda_layer, result_path=aws_stepfunctions.JsonPath.DISCARD, ).lambda_invoke import_dataset_role = aws_iam.Role( self, "import-dataset", assumed_by=aws_iam.ServicePrincipal( # type: ignore[arg-type] "batchoperations.s3.amazonaws.com"), ) import_asset_file_function = ImportFileFunction( self, directory="import_asset_file", invoker=import_dataset_role, deploy_env=deploy_env, botocore_lambda_layer=botocore_lambda_layer, ) import_metadata_file_function = ImportFileFunction( self, directory="import_metadata_file", invoker=import_dataset_role, deploy_env=deploy_env, botocore_lambda_layer=botocore_lambda_layer, ) for storage_writer in [ import_dataset_role, import_asset_file_function.role, import_metadata_file_function.role, ]: storage_bucket.grant_read_write( storage_writer) # type: ignore[arg-type] import_dataset_task = LambdaTask( self, "import-dataset-task", directory="import_dataset", botocore_lambda_layer=botocore_lambda_layer, result_path="$.import_dataset", extra_environment={"DEPLOY_ENV": deploy_env}, ) assert import_dataset_task.lambda_function.role is not None import_dataset_task.lambda_function.role.add_to_policy( aws_iam.PolicyStatement( resources=[import_dataset_role.role_arn], actions=["iam:PassRole"], ), ) import_dataset_task.lambda_function.role.add_to_policy( aws_iam.PolicyStatement(resources=["*"], actions=["s3:CreateJob"])) storage_bucket.grant_read_write(import_dataset_task.lambda_function) for table in [datasets_table, processing_assets_table]: table.grant_read_data(import_dataset_task.lambda_function) table.grant(import_dataset_task.lambda_function, "dynamodb:DescribeTable") # Parameters import_asset_file_function_arn_parameter = aws_ssm.StringParameter( self, "import asset file function arn", string_value=import_asset_file_function.function_arn, description=f"Import asset file function ARN for {deploy_env}", parameter_name=ParameterName. PROCESSING_IMPORT_ASSET_FILE_FUNCTION_TASK_ARN.value, ) import_metadata_file_function_arn_parameter = aws_ssm.StringParameter( self, "import metadata file function arn", string_value=import_metadata_file_function.function_arn, description=f"Import metadata file function ARN for {deploy_env}", parameter_name=ParameterName. PROCESSING_IMPORT_METADATA_FILE_FUNCTION_TASK_ARN.value, ) import_dataset_role_arn_parameter = aws_ssm.StringParameter( self, "import dataset role arn", string_value=import_dataset_role.role_arn, description=f"Import dataset role ARN for {deploy_env}", parameter_name=ParameterName.PROCESSING_IMPORT_DATASET_ROLE_ARN. value, ) grant_parameter_read_access({ datasets_table.name_parameter: [import_dataset_task.lambda_function], import_asset_file_function_arn_parameter: [import_dataset_task.lambda_function], import_dataset_role_arn_parameter: [import_dataset_task.lambda_function], import_metadata_file_function_arn_parameter: [import_dataset_task.lambda_function], processing_assets_table.name_parameter: [ check_stac_metadata_task.lambda_function.role, content_iterator_task.lambda_function, import_dataset_task.lambda_function, ], storage_bucket_parameter: [ import_dataset_task.lambda_function, ], validation_results_table.name_parameter: [ check_stac_metadata_task.lambda_function.role, validation_summary_task.lambda_function, content_iterator_task.lambda_function, ], }) success_task = aws_stepfunctions.Succeed(self, "success") ############################################################################################ # STATE MACHINE dataset_version_creation_definition = ( check_stac_metadata_task.lambda_invoke.next( content_iterator_task.lambda_invoke).next( aws_stepfunctions.Choice( # type: ignore[arg-type] self, "check_files_checksums_maybe_array").when( aws_stepfunctions.Condition.number_equals( "$.content.iteration_size", 1), check_files_checksums_single_task.batch_submit_job, ).otherwise(check_files_checksums_array_task. batch_submit_job).afterwards()). next( aws_stepfunctions.Choice( self, "content_iteration_finished").when( aws_stepfunctions.Condition.number_equals( "$.content.next_item", -1), validation_summary_task.lambda_invoke.next( aws_stepfunctions.Choice( # type: ignore[arg-type] self, "validation_successful").when( aws_stepfunctions.Condition.boolean_equals( "$.validation.success", True), import_dataset_task.lambda_invoke.next( success_task # type: ignore[arg-type] ), ).otherwise(validation_failure_lambda_invoke)), ).otherwise(content_iterator_task.lambda_invoke))) self.state_machine = aws_stepfunctions.StateMachine( self, f"{deploy_env}-dataset-version-creation", definition= dataset_version_creation_definition, # type: ignore[arg-type] ) self.state_machine_parameter = aws_ssm.StringParameter( self, "state machine arn", description=f"State machine ARN for {deploy_env}", parameter_name=ParameterName. PROCESSING_DATASET_VERSION_CREATION_STEP_FUNCTION_ARN.value, # pylint:disable=line-too-long string_value=self.state_machine.state_machine_arn, ) Tags.of(self).add("ApplicationLayer", "processing") # type: ignore[arg-type]