def _create_lambdas(self): clean_pycache() for root, dirs, files in os.walk(LAMBDAS_DIR): for f in files: if f != "__init__.py": continue parent_folder = os.path.basename(os.path.dirname(root)) lambda_folder = os.path.basename(root) name = f"{parent_folder}-{lambda_folder}" lambda_config = self.lambdas_config[name] layers = [] for layer_name in lambda_config["layers"]: layers.append(self.layers[layer_name]) lambda_role = Role( self, f"{name}_role", assumed_by=ServicePrincipal(service="lambda.amazonaws.com") ) for policy in lambda_config["policies"]: lambda_role.add_to_policy(policy) lambda_role.add_managed_policy( ManagedPolicy.from_aws_managed_policy_name("service-role/AWSLambdaBasicExecutionRole")) lambda_args = { "code": Code.from_asset(root), "handler": "__init__.handle", "runtime": Runtime.PYTHON_3_8, "layers": layers, "function_name": name, "environment": lambda_config["variables"], "role": lambda_role, "timeout": Duration.seconds(lambda_config["timeout"]), "memory_size": lambda_config["memory"], } if "concurrent_executions" in lambda_config: lambda_args["reserved_concurrent_executions"] = lambda_config["concurrent_executions"] self.lambdas[name] = Function(self, name, **lambda_args) self.lambdas["sqs_handlers-post_anime"].add_event_source(SqsEventSource(self.post_anime_queue)) Rule( self, "titles_updater", schedule=Schedule.cron(hour="2", minute="10"), targets=[LambdaFunction(self.lambdas["crons-titles_updater"])] ) Rule( self, "episodes_updater", schedule=Schedule.cron(hour="4", minute="10"), targets=[LambdaFunction(self.lambdas["crons-episodes_updater"])] )
def __init__(self, scope: core.Construct, _id: str, **kwargs) -> None: super().__init__(scope, _id, **kwargs) ddb_file_list = ddb.Table(self, "ddb", partition_key=ddb.Attribute(name="Key", type=ddb.AttributeType.STRING), billing_mode=ddb.BillingMode.PAY_PER_REQUEST) sqs_queue_DLQ = sqs.Queue(self, "sqs_DLQ", visibility_timeout=core.Duration.minutes(15), retention_period=core.Duration.days(14) ) sqs_queue = sqs.Queue(self, "sqs_queue", visibility_timeout=core.Duration.minutes(15), retention_period=core.Duration.days(14), dead_letter_queue=sqs.DeadLetterQueue( max_receive_count=100, queue=sqs_queue_DLQ ) ) handler = lam.Function(self, "lambdaFunction", code=lam.Code.asset("./lambda"), handler="lambda_function.lambda_handler", runtime=lam.Runtime.PYTHON_3_8, memory_size=1024, timeout=core.Duration.minutes(15), environment={ 'table_name': ddb_file_list.table_name, 'queue_name': sqs_queue.queue_name, 'Des_bucket_default': Des_bucket_default, 'Des_prefix_default': Des_prefix_default, 'Des_region': Des_region, 'StorageClass': StorageClass, 'aws_access_key_id': aws_access_key_id, 'aws_secret_access_key': aws_secret_access_key }) ddb_file_list.grant_read_write_data(handler) handler.add_event_source(SqsEventSource(sqs_queue)) s3bucket = s3.Bucket(self, "s3bucket") s3bucket.grant_read(handler) s3bucket.add_event_notification(s3.EventType.OBJECT_CREATED, s3n.SqsDestination(sqs_queue)) # You can import an existing bucket and grant access to lambda # exist_s3bucket = s3.Bucket.from_bucket_name(self, "import_bucket", # bucket_name="you_bucket_name") # exist_s3bucket.grant_read(handler) # But You have to add sqs as imported bucket event notification manually, it doesn't support by CloudFormation # An work around is to add on_cloud_trail_event for the bucket, but will trigger could_trail first # 因为是导入的Bucket,需要手工建Bucket Event Trigger SQS,以及设置SQS允许该bucekt触发的Permission core.CfnOutput(self, "DynamoDB_Table", value=ddb_file_list.table_name) core.CfnOutput(self, "SQS_Job_Queue", value=sqs_queue.queue_name) core.CfnOutput(self, "SQS_Job_Queue_DLQ", value=sqs_queue_DLQ.queue_name) core.CfnOutput(self, "Worker_Lambda_Function", value=handler.function_name) core.CfnOutput(self, "New_S3_Bucket", value=s3bucket.bucket_name)
def add_endpoint(self, bucket: s3.Bucket, fn: Function): # create the queue queue = sqs.Queue(self, f'{fn.id_prefix}Queue', dead_letter_queue=sqs.DeadLetterQueue( max_receive_count=5, queue=sqs.Queue( self, f'{fn.id_prefix}DLQ', queue_name=f'{fn.queue_name}-dlq')), queue_name=fn.queue_name) # create the receiver function # add the queue url as an environment variable receiver_function = lambda_.Function( self, f'{fn.id_prefix}ReceiverFunction', code=fn.function_code, environment={'QUEUE_URL': queue.queue_url}, function_name=f'{fn.function_name_prefix}-receiver', handler=fn.receiver_function_handler, layers=[fn.function_dependencies_layer], # memory_size=256, runtime=lambda_.Runtime.PYTHON_3_8) # allow the receiver function to enqueue messages queue.grant_send_messages(receiver_function) # route requests to the receiver lambda self.api.add_routes(integration=apigw.LambdaProxyIntegration( handler=receiver_function), methods=[fn.api_method], path=fn.api_path) # create the handler function # add the bucket name as an environment variable handler_function = lambda_.Function( self, f'{fn.id_prefix}HandlerFunction', code=fn.function_code, environment={'BUCKET_NAME': bucket.bucket_name}, function_name=f'{fn.function_name_prefix}-handler', handler=fn.handler_function_handler, layers=[fn.function_dependencies_layer], # memory_size=256, runtime=lambda_.Runtime.PYTHON_3_8) # add the queue as a trigger for the handler function handler_function.add_event_source(SqsEventSource(queue)) # allow the handler function to access the bucket bucket.grant_read_write(handler_function)
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) self._account_id = os.environ["CDK_DEFAULT_ACCOUNT"] self._region = os.environ["CDK_DEFAULT_REGION"] self._queue = _sqs.Queue( self, "ApigwV2SqsLambdaQueue", visibility_timeout=Duration.seconds(300), ) self._sqs_event_source = SqsEventSource(self._queue) self._fn = _lambda.Function( self, 'SqsMessageHandler', runtime=_lambda.Runtime.PYTHON_3_8, handler='app.handler', code=_lambda.Code.from_asset(path='src'), timeout=Duration.minutes(3), memory_size=128, environment={ 'REGION': self._region, 'ACCOUNT_ID': self._account_id }, ) self._fn.add_event_source(self._sqs_event_source) self._http_api = self._create_apigw_v2() self._integration_role = self._create_apigw_to_sqs_role() self._send_msg_route = self._create_sqs_send_msg_route() # Enable Auto Deploy self._stage = self._create_stage() # Outputs CfnOutput(self, "API Endpoint", description="API Endpoint", value=self._http_api.attr_api_endpoint)
def _create_lambda_fn(self, envs: EnvSettings, memory_size: int, queue: Queue): is_app_only = self.node.try_get_context("is_app_only") if is_app_only == "true": code = Code.from_asset( path="../backend/functions/worker/.serverless/main.zip") else: code = Code.from_cfn_parameters() function = Function( self, f"data-processing-worker-{memory_size}", function_name=f"{envs.project_name}-data-processing-{memory_size}", code=code, runtime=Runtime.PYTHON_3_8, handler="handler.main", environment={ "AWS_STORAGE_BUCKET_NAME": self.app_bucket.bucket_name, "IMAGE_SCRAPING_FETCH_TIMEOUT": "15", "AWS_IMAGE_STORAGE_BUCKET_NAME": self.resize_lambda_image_bucket.bucket_name, "AWS_IMAGE_STATIC_URL": self.resize_lambda_image_bucket.bucket_website_url, "BACKEND_URL": self.backend_url, "LAMBDA_AUTH_TOKEN": self.lambda_auth_token.secret_value.to_string(), }, memory_size=memory_size, timeout=Duration.seconds(300), tracing=Tracing.ACTIVE, ) function.add_event_source(SqsEventSource(queue, batch_size=1)) self.app_bucket.grant_read_write(function.role) self.resize_lambda_image_bucket.grant_read_write(function.role) return function, code
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) queue = _sqs.Queue(self, "queue", queue_name="queue") table = _dynamodb.Table(self, "table", partition_key=_dynamodb.Attribute(name="id", type=_dynamodb.AttributeType.NUMBER)) publisherFunction = _lambda.Function( self, 'publisher', runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.asset('projectOne/publisher'), handler='publisher.handler', environment={"QUEUE_URL":queue.queue_url} ) apiGateway = _apigateway.RestApi( self, "api", deploy_options= _apigateway.StageOptions(stage_name="dev") ) lambad_integration = _apigateway.LambdaIntegration(publisherFunction) apiGateway.root.add_method('GET', lambad_integration) subscriberFunction = _lambda.Function( self, 'subscriber', runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.asset('projectOne/subscriber'), handler='subscriber.handler', environment={ "QUEUE_URL":queue.queue_url, "TABLE_NAME":table.table_name }, ) subscriberFunction.add_event_source(SqsEventSource(queue, batch_size=10)) queue.grant_send_messages(publisherFunction) table.grant(subscriberFunction, "dynamodb:PutItem")
def __init__( self, scope: Construct, stack_id: str, *, botocore_lambda_layer: aws_lambda_python.PythonLayerVersion, env_name: str, storage_bucket: aws_s3.Bucket, validation_results_table: Table, ) -> None: # pylint: disable=too-many-locals, too-many-statements super().__init__(scope, stack_id) ############################################################################################ # PROCESSING ASSETS TABLE processing_assets_table = Table( self, f"{env_name}-processing-assets", env_name=env_name, parameter_name=ParameterName.PROCESSING_ASSETS_TABLE_NAME, sort_key=aws_dynamodb.Attribute(name="sk", type=aws_dynamodb.AttributeType.STRING), ) ############################################################################################ # BATCH JOB DEPENDENCIES batch_job_queue = BatchJobQueue( self, "batch-job-queue", env_name=env_name, processing_assets_table=processing_assets_table, ).job_queue s3_read_only_access_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonS3ReadOnlyAccess" ) ############################################################################################ # UPDATE CATALOG UPDATE MESSAGE QUEUE dead_letter_queue = aws_sqs.Queue( self, "dead-letter-queue", visibility_timeout=LAMBDA_TIMEOUT, ) self.message_queue = aws_sqs.Queue( self, "update-catalog-message-queue", visibility_timeout=LAMBDA_TIMEOUT, dead_letter_queue=aws_sqs.DeadLetterQueue(max_receive_count=3, queue=dead_letter_queue), ) self.message_queue_name_parameter = aws_ssm.StringParameter( self, "update-catalog-message-queue-name", string_value=self.message_queue.queue_name, description=f"Update Catalog Message Queue Name for {env_name}", parameter_name=ParameterName.UPDATE_CATALOG_MESSAGE_QUEUE_NAME.value, ) populate_catalog_lambda = BundledLambdaFunction( self, "populate-catalog-bundled-lambda-function", directory="populate_catalog", extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, botocore_lambda_layer=botocore_lambda_layer, ) self.message_queue.grant_consume_messages(populate_catalog_lambda) populate_catalog_lambda.add_event_source( SqsEventSource(self.message_queue, batch_size=1) # type: ignore[arg-type] ) ############################################################################################ # STATE MACHINE TASKS check_stac_metadata_task = LambdaTask( self, "check-stac-metadata-task", directory="check_stac_metadata", botocore_lambda_layer=botocore_lambda_layer, extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) assert check_stac_metadata_task.lambda_function.role check_stac_metadata_task.lambda_function.role.add_managed_policy( policy=s3_read_only_access_policy ) for table in [processing_assets_table, validation_results_table]: table.grant_read_write_data(check_stac_metadata_task.lambda_function) table.grant( check_stac_metadata_task.lambda_function, "dynamodb:DescribeTable", ) content_iterator_task = LambdaTask( self, "content-iterator-task", directory="content_iterator", botocore_lambda_layer=botocore_lambda_layer, result_path=f"$.{CONTENT_KEY}", extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) check_files_checksums_directory = "check_files_checksums" check_files_checksums_default_payload_object = { f"{DATASET_ID_KEY}.$": f"$.{DATASET_ID_KEY}", f"{VERSION_ID_KEY}.$": f"$.{VERSION_ID_KEY}", f"{METADATA_URL_KEY}.$": f"$.{METADATA_URL_KEY}", f"{FIRST_ITEM_KEY}.$": f"$.{CONTENT_KEY}.{FIRST_ITEM_KEY}", f"{ASSETS_TABLE_NAME_KEY}.$": f"$.{CONTENT_KEY}.{ASSETS_TABLE_NAME_KEY}", f"{RESULTS_TABLE_NAME_KEY}.$": f"$.{CONTENT_KEY}.{RESULTS_TABLE_NAME_KEY}", } check_files_checksums_single_task = BatchSubmitJobTask( self, "check-files-checksums-single-task", env_name=env_name, directory=check_files_checksums_directory, s3_policy=s3_read_only_access_policy, job_queue=batch_job_queue, payload_object=check_files_checksums_default_payload_object, container_overrides_command=[ "--dataset-id", f"Ref::{DATASET_ID_KEY}", "--version-id", f"Ref::{VERSION_ID_KEY}", "--first-item", f"Ref::{FIRST_ITEM_KEY}", "--assets-table-name", f"Ref::{ASSETS_TABLE_NAME_KEY}", "--results-table-name", f"Ref::{RESULTS_TABLE_NAME_KEY}", ], ) array_size = int( aws_stepfunctions.JsonPath.number_at(f"$.{CONTENT_KEY}.{ITERATION_SIZE_KEY}") ) check_files_checksums_array_task = BatchSubmitJobTask( self, "check-files-checksums-array-task", env_name=env_name, directory=check_files_checksums_directory, s3_policy=s3_read_only_access_policy, job_queue=batch_job_queue, payload_object=check_files_checksums_default_payload_object, container_overrides_command=[ "--dataset-id", f"Ref::{DATASET_ID_KEY}", "--version-id", f"Ref::{VERSION_ID_KEY}", "--first-item", f"Ref::{FIRST_ITEM_KEY}", "--assets-table-name", f"Ref::{ASSETS_TABLE_NAME_KEY}", "--results-table-name", f"Ref::{RESULTS_TABLE_NAME_KEY}", ], array_size=array_size, ) for reader in [ content_iterator_task.lambda_function, check_files_checksums_single_task.job_role, check_files_checksums_array_task.job_role, ]: processing_assets_table.grant_read_data(reader) # type: ignore[arg-type] processing_assets_table.grant( reader, "dynamodb:DescribeTable" # type: ignore[arg-type] ) for writer in [ check_files_checksums_single_task.job_role, check_files_checksums_array_task.job_role, ]: validation_results_table.grant_read_write_data(writer) # type: ignore[arg-type] validation_results_table.grant( writer, "dynamodb:DescribeTable" # type: ignore[arg-type] ) validation_summary_task = LambdaTask( self, "validation-summary-task", directory="validation_summary", botocore_lambda_layer=botocore_lambda_layer, result_path=f"$.{VALIDATION_KEY}", extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) validation_results_table.grant_read_data(validation_summary_task.lambda_function) validation_results_table.grant( validation_summary_task.lambda_function, "dynamodb:DescribeTable" ) import_dataset_role = aws_iam.Role( self, "import-dataset", assumed_by=aws_iam.ServicePrincipal( # type: ignore[arg-type] "batchoperations.s3.amazonaws.com" ), ) import_asset_file_function = ImportFileFunction( self, directory="import_asset_file", invoker=import_dataset_role, env_name=env_name, botocore_lambda_layer=botocore_lambda_layer, ) import_metadata_file_function = ImportFileFunction( self, directory="import_metadata_file", invoker=import_dataset_role, env_name=env_name, botocore_lambda_layer=botocore_lambda_layer, ) import_dataset_task = LambdaTask( self, "import-dataset-task", directory="import_dataset", botocore_lambda_layer=botocore_lambda_layer, result_path=f"$.{IMPORT_DATASET_KEY}", extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) import_dataset_task.lambda_function.add_to_role_policy( aws_iam.PolicyStatement( resources=[import_dataset_role.role_arn], actions=["iam:PassRole"], ), ) import_dataset_task.lambda_function.add_to_role_policy( aws_iam.PolicyStatement(resources=["*"], actions=["s3:CreateJob"]) ) for table in [processing_assets_table]: table.grant_read_data(import_dataset_task.lambda_function) table.grant(import_dataset_task.lambda_function, "dynamodb:DescribeTable") # Import status check wait_before_upload_status_check = Wait( self, "wait-before-upload-status-check", time=WaitTime.duration(Duration.seconds(10)), ) upload_status_task = LambdaTask( self, "upload-status", directory="upload_status", botocore_lambda_layer=botocore_lambda_layer, result_path="$.upload_status", extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) validation_results_table.grant_read_data(upload_status_task.lambda_function) validation_results_table.grant(upload_status_task.lambda_function, "dynamodb:DescribeTable") upload_status_task.lambda_function.add_to_role_policy(ALLOW_DESCRIBE_ANY_S3_JOB) # Parameters import_asset_file_function_arn_parameter = aws_ssm.StringParameter( self, "import asset file function arn", string_value=import_asset_file_function.function_arn, description=f"Import asset file function ARN for {env_name}", parameter_name=ParameterName.PROCESSING_IMPORT_ASSET_FILE_FUNCTION_TASK_ARN.value, ) import_metadata_file_function_arn_parameter = aws_ssm.StringParameter( self, "import metadata file function arn", string_value=import_metadata_file_function.function_arn, description=f"Import metadata file function ARN for {env_name}", parameter_name=ParameterName.PROCESSING_IMPORT_METADATA_FILE_FUNCTION_TASK_ARN.value, ) import_dataset_role_arn_parameter = aws_ssm.StringParameter( self, "import dataset role arn", string_value=import_dataset_role.role_arn, description=f"Import dataset role ARN for {env_name}", parameter_name=ParameterName.PROCESSING_IMPORT_DATASET_ROLE_ARN.value, ) update_dataset_catalog = LambdaTask( self, "update-dataset-catalog", directory="update_dataset_catalog", botocore_lambda_layer=botocore_lambda_layer, extra_environment={ENV_NAME_VARIABLE_NAME: env_name}, ) self.message_queue.grant_send_messages(update_dataset_catalog.lambda_function) for storage_writer in [ import_dataset_role, import_dataset_task.lambda_function, import_asset_file_function, import_metadata_file_function, populate_catalog_lambda, update_dataset_catalog.lambda_function, ]: storage_bucket.grant_read_write(storage_writer) # type: ignore[arg-type] grant_parameter_read_access( { import_asset_file_function_arn_parameter: [import_dataset_task.lambda_function], import_dataset_role_arn_parameter: [import_dataset_task.lambda_function], import_metadata_file_function_arn_parameter: [import_dataset_task.lambda_function], processing_assets_table.name_parameter: [ check_stac_metadata_task.lambda_function, content_iterator_task.lambda_function, import_dataset_task.lambda_function, ], validation_results_table.name_parameter: [ check_stac_metadata_task.lambda_function, content_iterator_task.lambda_function, validation_summary_task.lambda_function, upload_status_task.lambda_function, ], self.message_queue_name_parameter: [update_dataset_catalog.lambda_function], } ) success_task = aws_stepfunctions.Succeed(self, "success") upload_failure = aws_stepfunctions.Fail(self, "upload failure") validation_failure = aws_stepfunctions.Succeed(self, "validation failure") ############################################################################################ # STATE MACHINE dataset_version_creation_definition = ( check_stac_metadata_task.next(content_iterator_task) .next( aws_stepfunctions.Choice( # type: ignore[arg-type] self, "check_files_checksums_maybe_array" ) .when( aws_stepfunctions.Condition.number_equals( f"$.{CONTENT_KEY}.{ITERATION_SIZE_KEY}", 1 ), check_files_checksums_single_task.batch_submit_job, ) .otherwise(check_files_checksums_array_task.batch_submit_job) .afterwards() ) .next( aws_stepfunctions.Choice(self, "content_iteration_finished") .when( aws_stepfunctions.Condition.number_equals( f"$.{CONTENT_KEY}.{NEXT_ITEM_KEY}", -1 ), validation_summary_task.next( aws_stepfunctions.Choice( # type: ignore[arg-type] self, "validation_successful" ) .when( aws_stepfunctions.Condition.boolean_equals( f"$.{VALIDATION_KEY}.{SUCCESS_KEY}", True ), import_dataset_task.next( wait_before_upload_status_check # type: ignore[arg-type] ) .next(upload_status_task) .next( aws_stepfunctions.Choice( self, "import_completed" # type: ignore[arg-type] ) .when( aws_stepfunctions.Condition.and_( aws_stepfunctions.Condition.string_equals( f"$.upload_status.{ASSET_UPLOAD_KEY}.status", "Complete" ), aws_stepfunctions.Condition.string_equals( f"$.upload_status.{METADATA_UPLOAD_KEY}.status", "Complete", ), ), update_dataset_catalog.next( success_task # type: ignore[arg-type] ), ) .when( aws_stepfunctions.Condition.or_( aws_stepfunctions.Condition.string_equals( f"$.upload_status.{ASSET_UPLOAD_KEY}.status", "Cancelled", ), aws_stepfunctions.Condition.string_equals( f"$.upload_status.{ASSET_UPLOAD_KEY}.status", "Failed" ), aws_stepfunctions.Condition.string_equals( f"$.upload_status.{METADATA_UPLOAD_KEY}.status", "Cancelled", ), aws_stepfunctions.Condition.string_equals( f"$.upload_status.{METADATA_UPLOAD_KEY}.status", "Failed", ), ), upload_failure, # type: ignore[arg-type] ) .otherwise( wait_before_upload_status_check # type: ignore[arg-type] ) ), ) .otherwise(validation_failure) # type: ignore[arg-type] ), ) .otherwise(content_iterator_task) ) ) self.state_machine = aws_stepfunctions.StateMachine( self, f"{env_name}-dataset-version-creation", definition=dataset_version_creation_definition, # type: ignore[arg-type] ) self.state_machine_parameter = aws_ssm.StringParameter( self, "state machine arn", description=f"State machine ARN for {env_name}", parameter_name=ParameterName.PROCESSING_DATASET_VERSION_CREATION_STEP_FUNCTION_ARN.value, # pylint:disable=line-too-long string_value=self.state_machine.state_machine_arn, ) Tags.of(self).add("ApplicationLayer", "processing") # type: ignore[arg-type]
def __init__(self, scope: core.Construct, _id: str, **kwargs) -> None: super().__init__(scope, _id, **kwargs) # Setup SSM parameter of credentials, bucket_para, ignore_list ssm_credential_para = ssm.StringParameter.from_secure_string_parameter_attributes( self, "ssm_parameter_credentials", parameter_name=ssm_parameter_credentials, version=1) ssm_bucket_para = ssm.StringParameter(self, "s3bucket_serverless", string_value=json.dumps( bucket_para, indent=4)) ssm_parameter_ignore_list = ssm.StringParameter( self, "s3_migrate_ignore_list", string_value=ignore_list) # Setup DynamoDB ddb_file_list = ddb.Table(self, "s3migrate_serverless", partition_key=ddb.Attribute( name="Key", type=ddb.AttributeType.STRING), billing_mode=ddb.BillingMode.PAY_PER_REQUEST) ddb_file_list.add_global_secondary_index( partition_key=ddb.Attribute(name="desBucket", type=ddb.AttributeType.STRING), index_name="desBucket-index", projection_type=ddb.ProjectionType.INCLUDE, non_key_attributes=["desKey", "versionId"]) # Setup SQS sqs_queue_DLQ = sqs.Queue(self, "s3migrate_serverless_Q_DLQ", visibility_timeout=core.Duration.minutes(15), retention_period=core.Duration.days(14)) sqs_queue = sqs.Queue(self, "s3migrate_serverless_Q", visibility_timeout=core.Duration.minutes(15), retention_period=core.Duration.days(14), dead_letter_queue=sqs.DeadLetterQueue( max_receive_count=60, queue=sqs_queue_DLQ)) # Setup API for Lambda to get IP address (for debug networking routing purpose) checkip = api.RestApi( self, "lambda-checkip-api", cloud_watch_role=True, deploy=True, description="For Lambda get IP address", default_integration=api.MockIntegration( integration_responses=[ api.IntegrationResponse(status_code="200", response_templates={ "application/json": "$context.identity.sourceIp" }) ], request_templates={"application/json": '{"statusCode": 200}'}), endpoint_types=[api.EndpointType.REGIONAL]) checkip.root.add_method("GET", method_responses=[ api.MethodResponse( status_code="200", response_models={ "application/json": api.Model.EMPTY_MODEL }) ]) # Setup Lambda functions handler = lam.Function(self, "s3-migrate-worker", code=lam.Code.asset("./lambda"), handler="lambda_function_worker.lambda_handler", runtime=lam.Runtime.PYTHON_3_8, memory_size=1024, timeout=core.Duration.minutes(15), tracing=lam.Tracing.ACTIVE, environment={ 'table_queue_name': ddb_file_list.table_name, 'Des_bucket_default': Des_bucket_default, 'Des_prefix_default': Des_prefix_default, 'StorageClass': StorageClass, 'checkip_url': checkip.url, 'ssm_parameter_credentials': ssm_parameter_credentials, 'JobType': JobType, 'MaxRetry': MaxRetry, 'MaxThread': MaxThread, 'MaxParallelFile': MaxParallelFile, 'JobTimeout': JobTimeout, 'UpdateVersionId': UpdateVersionId, 'GetObjectWithVersionId': GetObjectWithVersionId }) handler_jobsender = lam.Function( self, "s3-migrate-jobsender", code=lam.Code.asset("./lambda"), handler="lambda_function_jobsender.lambda_handler", runtime=lam.Runtime.PYTHON_3_8, memory_size=1024, timeout=core.Duration.minutes(15), tracing=lam.Tracing.ACTIVE, environment={ 'table_queue_name': ddb_file_list.table_name, 'StorageClass': StorageClass, 'checkip_url': checkip.url, 'sqs_queue': sqs_queue.queue_name, 'ssm_parameter_credentials': ssm_parameter_credentials, 'ssm_parameter_ignore_list': ssm_parameter_ignore_list.parameter_name, 'ssm_parameter_bucket': ssm_bucket_para.parameter_name, 'JobType': JobType, 'MaxRetry': MaxRetry, 'JobsenderCompareVersionId': JobsenderCompareVersionId }) # Allow lambda read/write DDB, SQS ddb_file_list.grant_read_write_data(handler) ddb_file_list.grant_read_write_data(handler_jobsender) sqs_queue.grant_send_messages(handler_jobsender) # SQS trigger Lambda worker handler.add_event_source(SqsEventSource(sqs_queue, batch_size=1)) # Option1: Create S3 Bucket, all new objects in this bucket will be transmitted by Lambda Worker s3bucket = s3.Bucket(self, "s3_new_migrate") s3bucket.grant_read(handler) s3bucket.add_event_notification(s3.EventType.OBJECT_CREATED, s3n.SqsDestination(sqs_queue)) # Option2: Allow Exist S3 Buckets to be read by Lambda functions. # Lambda Jobsender will scan and compare the these buckets and trigger Lambda Workers to transmit bucket_name = '' for b in bucket_para: if bucket_name != b['src_bucket']: # 如果列了多个相同的Bucket,就跳过 bucket_name = b['src_bucket'] s3exist_bucket = s3.Bucket.from_bucket_name( self, bucket_name, # 用这个做id bucket_name=bucket_name) if JobType == 'PUT': s3exist_bucket.grant_read(handler_jobsender) s3exist_bucket.grant_read(handler) else: # 'GET' mode s3exist_bucket.grant_read_write(handler_jobsender) s3exist_bucket.grant_read_write(handler) # Allow Lambda read ssm parameters ssm_bucket_para.grant_read(handler_jobsender) ssm_credential_para.grant_read(handler) ssm_credential_para.grant_read(handler_jobsender) ssm_parameter_ignore_list.grant_read(handler_jobsender) # Schedule cron event to trigger Lambda Jobsender per hour: event.Rule(self, 'cron_trigger_jobsender', schedule=event.Schedule.rate(core.Duration.hours(1)), targets=[target.LambdaFunction(handler_jobsender)]) # TODO: Trigger event imediately, add custom resource lambda to invoke handler_jobsender # Create Lambda logs filter to create network traffic metric handler.log_group.add_metric_filter( "Completed-bytes", metric_name="Completed-bytes", metric_namespace="s3_migrate", metric_value="$bytes", filter_pattern=logs.FilterPattern.literal( '[info, date, sn, p="--->Complete", bytes, key]')) handler.log_group.add_metric_filter( "Uploading-bytes", metric_name="Uploading-bytes", metric_namespace="s3_migrate", metric_value="$bytes", filter_pattern=logs.FilterPattern.literal( '[info, date, sn, p="--->Uploading", bytes, key]')) handler.log_group.add_metric_filter( "Downloading-bytes", metric_name="Downloading-bytes", metric_namespace="s3_migrate", metric_value="$bytes", filter_pattern=logs.FilterPattern.literal( '[info, date, sn, p="--->Downloading", bytes, key]')) handler.log_group.add_metric_filter( "MaxMemoryUsed", metric_name="MaxMemoryUsed", metric_namespace="s3_migrate", metric_value="$memory", filter_pattern=logs.FilterPattern.literal( '[head="REPORT", a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, ' 'a13, a14, a15, a16, memory, MB="MB", rest]')) lambda_metric_Complete = cw.Metric(namespace="s3_migrate", metric_name="Completed-bytes", statistic="Sum", period=core.Duration.minutes(1)) lambda_metric_Upload = cw.Metric(namespace="s3_migrate", metric_name="Uploading-bytes", statistic="Sum", period=core.Duration.minutes(1)) lambda_metric_Download = cw.Metric(namespace="s3_migrate", metric_name="Downloading-bytes", statistic="Sum", period=core.Duration.minutes(1)) lambda_metric_MaxMemoryUsed = cw.Metric( namespace="s3_migrate", metric_name="MaxMemoryUsed", statistic="Maximum", period=core.Duration.minutes(1)) handler.log_group.add_metric_filter( "ERROR", metric_name="ERROR-Logs", metric_namespace="s3_migrate", metric_value="1", filter_pattern=logs.FilterPattern.literal('"ERROR"')) handler.log_group.add_metric_filter( "WARNING", metric_name="WARNING-Logs", metric_namespace="s3_migrate", metric_value="1", filter_pattern=logs.FilterPattern.literal('"WARNING"')) # Task timed out handler.log_group.add_metric_filter( "TIMEOUT", metric_name="TIMEOUT-Logs", metric_namespace="s3_migrate", metric_value="1", filter_pattern=logs.FilterPattern.literal('"Task timed out"')) log_metric_ERROR = cw.Metric(namespace="s3_migrate", metric_name="ERROR-Logs", statistic="Sum", period=core.Duration.minutes(1)) log_metric_WARNING = cw.Metric(namespace="s3_migrate", metric_name="WARNING-Logs", statistic="Sum", period=core.Duration.minutes(1)) log_metric_TIMEOUT = cw.Metric(namespace="s3_migrate", metric_name="TIMEOUT-Logs", statistic="Sum", period=core.Duration.minutes(1)) # Dashboard to monitor SQS and Lambda board = cw.Dashboard(self, "s3_migrate_serverless") board.add_widgets( cw.GraphWidget(title="Lambda-NETWORK", left=[ lambda_metric_Download, lambda_metric_Upload, lambda_metric_Complete ]), cw.GraphWidget(title="Lambda-concurrent", left=[ handler.metric( metric_name="ConcurrentExecutions", period=core.Duration.minutes(1)) ]), cw.GraphWidget( title="Lambda-invocations/errors/throttles", left=[ handler.metric_invocations( period=core.Duration.minutes(1)), handler.metric_errors(period=core.Duration.minutes(1)), handler.metric_throttles(period=core.Duration.minutes(1)) ]), cw.GraphWidget( title="Lambda-duration", left=[ handler.metric_duration(period=core.Duration.minutes(1)) ]), ) board.add_widgets( cw.GraphWidget(title="Lambda_MaxMemoryUsed(MB)", left=[lambda_metric_MaxMemoryUsed]), cw.GraphWidget(title="ERROR/WARNING Logs", left=[log_metric_ERROR], right=[log_metric_WARNING, log_metric_TIMEOUT]), cw.GraphWidget( title="SQS-Jobs", left=[ sqs_queue.metric_approximate_number_of_messages_visible( period=core.Duration.minutes(1)), sqs_queue. metric_approximate_number_of_messages_not_visible( period=core.Duration.minutes(1)) ]), cw.SingleValueWidget( title="Running/Waiting and Dead Jobs", metrics=[ sqs_queue. metric_approximate_number_of_messages_not_visible( period=core.Duration.minutes(1)), sqs_queue.metric_approximate_number_of_messages_visible( period=core.Duration.minutes(1)), sqs_queue_DLQ. metric_approximate_number_of_messages_not_visible( period=core.Duration.minutes(1)), sqs_queue_DLQ. metric_approximate_number_of_messages_visible( period=core.Duration.minutes(1)) ], height=6)) # Alarm for queue - DLQ alarm_DLQ = cw.Alarm( self, "SQS_DLQ", metric=sqs_queue_DLQ.metric_approximate_number_of_messages_visible( ), threshold=0, comparison_operator=cw.ComparisonOperator.GREATER_THAN_THRESHOLD, evaluation_periods=1, datapoints_to_alarm=1) alarm_topic = sns.Topic(self, "SQS queue-DLQ has dead letter") alarm_topic.add_subscription( subscription=sub.EmailSubscription(alarm_email)) alarm_DLQ.add_alarm_action(action.SnsAction(alarm_topic)) core.CfnOutput(self, "Dashboard", value="CloudWatch Dashboard name s3_migrate_serverless")
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # The code that defines your stack goes here ########################################################################### # AWS SECRETS MANAGER - Templated secret ########################################################################### # templated_secret = aws_secretsmanager.Secret(self, "TemplatedSecret", # generate_secret_string=aws_secretsmanager.SecretStringGenerator( # secret_string_template= "{\"username\":\"cleanbox\"}", # generate_string_key="password" # ) # ) ########################################################################### # CUSTOM CLOUDFORMATION RESOURCE ########################################################################### # customlambda = aws_lambda.Function(self,'customconfig', # handler='customconfig.on_event', # runtime=aws_lambda.Runtime.PYTHON_3_7, # code=aws_lambda.Code.asset('customconfig'), # ) # customlambda_statement = aws_iam.PolicyStatement(actions=["events:PutRule"], conditions=None, effect=None, not_actions=None, not_principals=None, not_resources=None, principals=None, resources=["*"], sid=None) # customlambda.add_to_role_policy(statement=customlambda_statement) # my_provider = cr.Provider(self, "MyProvider", # on_event_handler=customlambda, # # is_complete_handler=is_complete, # optional async "waiter" # log_retention=logs.RetentionDays.SIX_MONTHS # ) # CustomResource(self, 'customconfigresource', service_token=my_provider.service_token) ########################################################################### # AWS LAMBDA FUNCTIONS ########################################################################### sqs_to_elastic_cloud = aws_lambda.Function( self, 'sqs_to_elastic_cloud', handler='sqs_to_elastic_cloud.lambda_handler', runtime=aws_lambda.Runtime.PYTHON_3_7, code=aws_lambda.Code.asset('sqs_to_elastic_cloud'), memory_size=4096, timeout=core.Duration.seconds(300), log_retention=logs.RetentionDays.ONE_DAY) sqs_to_elasticsearch_service = aws_lambda.Function( self, 'sqs_to_elasticsearch_service', handler='sqs_to_elasticsearch_service.lambda_handler', runtime=aws_lambda.Runtime.PYTHON_3_7, code=aws_lambda.Code.asset('sqs_to_elasticsearch_service'), memory_size=4096, timeout=core.Duration.seconds(300), log_retention=logs.RetentionDays.ONE_DAY) # sqs_to_elasticsearch_service.add_environment("kinesis_firehose_name", "-") # sqs_to_elastic_cloud.add_environment("index_name", "-") ########################################################################### # AWS LAMBDA FUNCTIONS ########################################################################### # sqs_to_elasticsearch_service_permission = aws_lambda.Permission(*, principal, action=None, event_source_token=None, scope=None, source_account=None, source_arn=None) ########################################################################### # AMAZON S3 BUCKETS ########################################################################### access_log_bucket = aws_s3.Bucket(self, "access_log_bucket") kinesis_log_bucket = aws_s3.Bucket(self, "kinesis_log_bucket") ########################################################################### # LAMBDA SUPPLEMENTAL POLICIES ########################################################################### lambda_supplemental_policy_statement = aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, actions=["s3:Get*", "s3:Head*", "s3:List*", "firehose:*"], resources=["*"]) sqs_to_elastic_cloud.add_to_role_policy( lambda_supplemental_policy_statement) sqs_to_elasticsearch_service.add_to_role_policy( lambda_supplemental_policy_statement) ########################################################################### # AWS SNS TOPICS ########################################################################### access_log_topic = aws_sns.Topic(self, "access_log_topic") ########################################################################### # ADD AMAZON S3 BUCKET NOTIFICATIONS ########################################################################### access_log_bucket.add_event_notification( aws_s3.EventType.OBJECT_CREATED, aws_s3_notifications.SnsDestination(access_log_topic)) ########################################################################### # AWS SQS QUEUES ########################################################################### sqs_to_elasticsearch_service_queue_iqueue = aws_sqs.Queue( self, "sqs_to_elasticsearch_service_queue_dlq") sqs_to_elasticsearch_service_queue_dlq = aws_sqs.DeadLetterQueue( max_receive_count=10, queue=sqs_to_elasticsearch_service_queue_iqueue) sqs_to_elasticsearch_service_queue = aws_sqs.Queue( self, "sqs_to_elasticsearch_service_queue", visibility_timeout=core.Duration.seconds(301), dead_letter_queue=sqs_to_elasticsearch_service_queue_dlq) sqs_to_elastic_cloud_queue_iqueue = aws_sqs.Queue( self, "sqs_to_elastic_cloud_queue_dlq") sqs_to_elastic_cloud_queue_dlq = aws_sqs.DeadLetterQueue( max_receive_count=10, queue=sqs_to_elastic_cloud_queue_iqueue) sqs_to_elastic_cloud_queue = aws_sqs.Queue( self, "sqs_to_elastic_cloud_queue", visibility_timeout=core.Duration.seconds(301), dead_letter_queue=sqs_to_elastic_cloud_queue_dlq) ########################################################################### # AWS SNS TOPIC SUBSCRIPTIONS ########################################################################### access_log_topic.add_subscription( aws_sns_subscriptions.SqsSubscription(sqs_to_elastic_cloud_queue)) access_log_topic.add_subscription( aws_sns_subscriptions.SqsSubscription( sqs_to_elasticsearch_service_queue)) ########################################################################### # AWS LAMBDA SQS EVENT SOURCE ########################################################################### sqs_to_elastic_cloud.add_event_source( SqsEventSource(sqs_to_elastic_cloud_queue, batch_size=10)) sqs_to_elasticsearch_service.add_event_source( SqsEventSource(sqs_to_elasticsearch_service_queue, batch_size=10)) ########################################################################### # AWS ELASTICSEARCH DOMAIN ########################################################################### ########################################################################### # AWS ELASTICSEARCH DOMAIN ACCESS POLICY ########################################################################### this_aws_account = aws_iam.AccountPrincipal(account_id="012345678912") # s3_to_elasticsearch_access_logs_domain_access_policy_statement = aws_iam.PolicyStatement( # principals=[this_aws_account], # effect=aws_iam.Effect.ALLOW, # actions=["es:*"], # resources=["*"] # ) # s3_to_elasticsearch_access_logs_domain_access_policy_statement_list=[] # s3_to_elasticsearch_access_logs_domain_access_policy_statement_list.append(s3_to_elasticsearch_access_logs_domain_access_policy_statement) s3_to_elasticsearch_access_logs_domain = aws_elasticsearch.Domain( self, "s3-to-elasticsearch-access-logs-domain", # access_policies=s3_to_elasticsearch_access_logs_domain_access_policy_statement_list, version=aws_elasticsearch.ElasticsearchVersion.V7_1, capacity={ "master_nodes": 3, "data_nodes": 4 }, ebs={"volume_size": 100}, zone_awareness={"availability_zone_count": 2}, logging={ "slow_search_log_enabled": True, "app_log_enabled": True, "slow_index_log_enabled": True }) ########################################################################### # AMAZON COGNITO USER POOL ########################################################################### s3_to_elasticsearch_user_pool = aws_cognito.UserPool( self, "s3-to-elasticsearch-access-logs-pool", account_recovery=None, auto_verify=None, custom_attributes=None, email_settings=None, enable_sms_role=None, lambda_triggers=None, mfa=None, mfa_second_factor=None, password_policy=None, self_sign_up_enabled=None, sign_in_aliases=aws_cognito.SignInAliases(email=True, phone=None, preferred_username=None, username=True), sign_in_case_sensitive=None, sms_role=None, sms_role_external_id=None, standard_attributes=None, user_invitation=None, user_pool_name=None, user_verification=None) ########################################################################### # AMAZON KINESIS FIREHOSE STREAM ########################################################################### # kinesis_policy_statement = aws_iam.PolicyStatement( # effect=aws_iam.Effect.ALLOW, # # actions=["es:*", "s3:*", "kms:*", "kinesis:*", "lambda:*"], # actions=["*"], # resources=["*"] # ) # kinesis_policy_document = aws_iam.PolicyDocument() # kinesis_policy_document.add_statements(kinesis_policy_statement) kinesis_firehose_stream_role = aws_iam.Role( self, "BaseVPCIAMLogRole", assumed_by=aws_iam.ServicePrincipal('firehose.amazonaws.com'), role_name=None, inline_policies={ "AllowLogAccess": aws_iam.PolicyDocument( assign_sids=False, statements=[ aws_iam.PolicyStatement(actions=[ '*', 'es:*', 'logs:PutLogEvents', 'logs:DescribeLogGroups', 'logs:DescribeLogsStreams' ], effect=aws_iam.Effect('ALLOW'), resources=['*']) ]) }) RetryOptions = aws_kinesisfirehose.CfnDeliveryStream.ElasticsearchRetryOptionsProperty( duration_in_seconds=300) s3_configuration = aws_kinesisfirehose.CfnDeliveryStream.S3DestinationConfigurationProperty( bucket_arn=kinesis_log_bucket.bucket_arn, role_arn=kinesis_firehose_stream_role.role_arn) ElasticsearchDestinationConfiguration = aws_kinesisfirehose.CfnDeliveryStream.ElasticsearchDestinationConfigurationProperty( # "BufferingHints" : ElasticsearchBufferingHints, # "CloudWatchLoggingOptions" : CloudWatchLoggingOptions, # "ClusterEndpoint" : String, domain_arn=s3_to_elasticsearch_access_logs_domain.domain_arn, index_name="s3-to-elasticsearch-accesslogs", index_rotation_period="OneDay", # "ProcessingConfiguration" : ProcessingConfiguration, retry_options=RetryOptions, role_arn=kinesis_firehose_stream_role.role_arn, # "S3BackupMode" : String, s3_configuration=s3_configuration # "TypeName" : String # "VpcConfiguration" : VpcConfiguration ) kinesis_firehose_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "kinesis_firehose_stream", delivery_stream_encryption_configuration_input=None, delivery_stream_name=None, delivery_stream_type=None, elasticsearch_destination_configuration= ElasticsearchDestinationConfiguration, extended_s3_destination_configuration=None, http_endpoint_destination_configuration=None, kinesis_stream_source_configuration=None, redshift_destination_configuration=None, s3_destination_configuration=None, splunk_destination_configuration=None, tags=None) sqs_to_elasticsearch_service.add_environment( "FIREHOSE_NAME", kinesis_firehose_stream.ref) sqs_to_elasticsearch_service.add_environment( "QUEUEURL", sqs_to_elasticsearch_service_queue.queue_url) sqs_to_elasticsearch_service.add_environment("DEBUG", "False") sqs_to_elastic_cloud.add_environment("ELASTICCLOUD_SECRET_NAME", "-") sqs_to_elastic_cloud.add_environment("ELASTIC_CLOUD_ID", "-") sqs_to_elastic_cloud.add_environment("ELASTIC_CLOUD_PASSWORD", "-") sqs_to_elastic_cloud.add_environment("ELASTIC_CLOUD_USERNAME", "-") sqs_to_elastic_cloud.add_environment( "QUEUEURL", sqs_to_elastic_cloud_queue.queue_url) sqs_to_elastic_cloud.add_environment("DEBUG", "False")
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # The code that defines your stack goes here ########################################################################### # AWS SECRETS MANAGER - Templated secret ########################################################################### # templated_secret = aws_secretsmanager.Secret(self, "TemplatedSecret", # generate_secret_string=aws_secretsmanager.SecretStringGenerator( # secret_string_template= "{\"username\":\"cleanbox\"}", # generate_string_key="password" # ) # ) ########################################################################### # CUSTOM CLOUDFORMATION RESOURCE ########################################################################### # customlambda = aws_lambda.Function(self,'customconfig', # handler='customconfig.on_event', # runtime=aws_lambda.Runtime.PYTHON_3_7, # code=aws_lambda.Code.asset('customconfig'), # ) # customlambda_statement = aws_iam.PolicyStatement(actions=["events:PutRule"], conditions=None, effect=None, not_actions=None, not_principals=None, not_resources=None, principals=None, resources=["*"], sid=None) # customlambda.add_to_role_policy(statement=customlambda_statement) # my_provider = cr.Provider(self, "MyProvider", # on_event_handler=customlambda, # # is_complete_handler=is_complete, # optional async "waiter" # log_retention=logs.RetentionDays.SIX_MONTHS # ) # CustomResource(self, 'customconfigresource', service_token=my_provider.service_token) ########################################################################### # AWS LAMBDA FUNCTIONS ########################################################################### sqs_to_elastic_cloud = aws_lambda.Function( self, 'sqs_to_elastic_cloud', handler='sqs_to_elastic_cloud.lambda_handler', runtime=aws_lambda.Runtime.PYTHON_3_7, code=aws_lambda.Code.asset('sqs_to_elastic_cloud'), memory_size=4096, timeout=core.Duration.seconds(301), log_retention=logs.RetentionDays.ONE_DAY) sqs_to_elasticsearch_service = aws_lambda.Function( self, 'sqs_to_elasticsearch_service', handler='sqs_to_elasticsearch_service.lambda_handler', runtime=aws_lambda.Runtime.PYTHON_3_7, code=aws_lambda.Code.asset('sqs_to_elasticsearch_service'), memory_size=4096, timeout=core.Duration.seconds(301), log_retention=logs.RetentionDays.ONE_DAY) ########################################################################### # AWS LAMBDA FUNCTIONS ########################################################################### ########################################################################### # AMAZON S3 BUCKETS ########################################################################### cloudtrail_log_bucket = aws_s3.Bucket(self, "cloudtrail_log_bucket") ########################################################################### # LAMBDA SUPPLEMENTAL POLICIES ########################################################################### lambda_supplemental_policy_statement = aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, actions=["s3:Get*", "s3:Head*", "s3:List*", "firehose:*", "es:*"], resources=["*"]) sqs_to_elastic_cloud.add_to_role_policy( lambda_supplemental_policy_statement) sqs_to_elasticsearch_service.add_to_role_policy( lambda_supplemental_policy_statement) ########################################################################### # AWS SNS TOPICS ########################################################################### cloudtrail_log_topic = aws_sns.Topic(self, "cloudtrail_log_topic") ########################################################################### # ADD AMAZON S3 BUCKET NOTIFICATIONS ########################################################################### cloudtrail_log_bucket.add_event_notification( aws_s3.EventType.OBJECT_CREATED, aws_s3_notifications.SnsDestination(cloudtrail_log_topic)) ########################################################################### # AWS SQS QUEUES ########################################################################### sqs_to_elasticsearch_service_queue_iqueue = aws_sqs.Queue( self, "sqs_to_elasticsearch_service_queue_dlq") sqs_to_elasticsearch_service_queue_dlq = aws_sqs.DeadLetterQueue( max_receive_count=10, queue=sqs_to_elasticsearch_service_queue_iqueue) sqs_to_elasticsearch_service_queue = aws_sqs.Queue( self, "sqs_to_elasticsearch_service_queue", visibility_timeout=core.Duration.seconds(300), dead_letter_queue=sqs_to_elasticsearch_service_queue_dlq) sqs_to_elastic_cloud_queue_iqueue = aws_sqs.Queue( self, "sqs_to_elastic_cloud_queue_dlq") sqs_to_elastic_cloud_queue_dlq = aws_sqs.DeadLetterQueue( max_receive_count=10, queue=sqs_to_elastic_cloud_queue_iqueue) sqs_to_elastic_cloud_queue = aws_sqs.Queue( self, "sqs_to_elastic_cloud_queue", visibility_timeout=core.Duration.seconds(300), dead_letter_queue=sqs_to_elastic_cloud_queue_dlq) ########################################################################### # AWS SNS TOPIC SUBSCRIPTIONS ########################################################################### cloudtrail_log_topic.add_subscription( aws_sns_subscriptions.SqsSubscription(sqs_to_elastic_cloud_queue)) cloudtrail_log_topic.add_subscription( aws_sns_subscriptions.SqsSubscription( sqs_to_elasticsearch_service_queue)) ########################################################################### # AWS LAMBDA SQS EVENT SOURCE ########################################################################### sqs_to_elastic_cloud.add_event_source( SqsEventSource(sqs_to_elastic_cloud_queue, batch_size=10)) sqs_to_elasticsearch_service.add_event_source( SqsEventSource(sqs_to_elasticsearch_service_queue, batch_size=10)) ########################################################################### # AWS ELASTICSEARCH DOMAIN ########################################################################### ########################################################################### # AWS ELASTICSEARCH DOMAIN ACCESS POLICY ########################################################################### this_aws_account = aws_iam.AccountPrincipal(account_id="012345678912") s3_to_elasticsearch_cloudtrail_logs_domain = aws_elasticsearch.Domain( self, "s3-to-elasticsearch-cloudtrail-logs-domain", version=aws_elasticsearch.ElasticsearchVersion.V7_1, capacity={ "master_nodes": 3, "data_nodes": 4 }, ebs={"volume_size": 100}, zone_awareness={"availability_zone_count": 2}, logging={ "slow_search_log_enabled": True, "app_log_enabled": True, "slow_index_log_enabled": True }) ########################################################################### # AMAZON COGNITO USER POOL ########################################################################### s3_to_elasticsearch_user_pool = aws_cognito.UserPool( self, "s3-to-elasticsearch-cloudtrial-logs-pool", account_recovery=None, auto_verify=None, custom_attributes=None, email_settings=None, enable_sms_role=None, lambda_triggers=None, mfa=None, mfa_second_factor=None, password_policy=None, self_sign_up_enabled=None, sign_in_aliases=aws_cognito.SignInAliases(email=True, phone=None, preferred_username=None, username=True), sign_in_case_sensitive=None, sms_role=None, sms_role_external_id=None, standard_attributes=None, user_invitation=None, user_pool_name=None, user_verification=None) sqs_to_elasticsearch_service.add_environment( "ELASTICSEARCH_HOST", s3_to_elasticsearch_cloudtrail_logs_domain.domain_endpoint) sqs_to_elasticsearch_service.add_environment( "QUEUEURL", sqs_to_elasticsearch_service_queue.queue_url) sqs_to_elasticsearch_service.add_environment("DEBUG", "False") sqs_to_elastic_cloud.add_environment("ELASTICCLOUD_SECRET_NAME", "-") sqs_to_elastic_cloud.add_environment("ELASTIC_CLOUD_ID", "-") sqs_to_elastic_cloud.add_environment("ELASTIC_CLOUD_PASSWORD", "-") sqs_to_elastic_cloud.add_environment("ELASTIC_CLOUD_USERNAME", "-") sqs_to_elastic_cloud.add_environment( "QUEUEURL", sqs_to_elastic_cloud_queue.queue_url) sqs_to_elastic_cloud.add_environment("DEBUG", "False") ########################################################################### # AWS COGNITO USER POOL ########################################################################### allevents_trail = aws_cloudtrail.Trail( self, "allevents_trail", bucket=cloudtrail_log_bucket, cloud_watch_log_group=None, cloud_watch_logs_retention=None, enable_file_validation=None, encryption_key=None, include_global_service_events=None, is_multi_region_trail=True, kms_key=None, management_events=aws_cloudtrail.ReadWriteType("ALL"), s3_key_prefix=None, send_to_cloud_watch_logs=False, sns_topic=None, trail_name=None)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) ########################################################################### # AWS LAMBDA FUNCTIONS ########################################################################### parse_image_list_file = aws_lambda.Function( self, 'parse_image_list_file', handler='parse_image_list_file.lambda_handler', runtime=aws_lambda.Runtime.PYTHON_3_7, code=aws_lambda.Code.asset('parse_image_list_file'), memory_size=10240, timeout=core.Duration.seconds(300), log_retention=aws_logs.RetentionDays.ONE_DAY) list_objects = aws_lambda.Function( self, 'list_objects', handler='list_objects.lambda_handler', runtime=aws_lambda.Runtime.PYTHON_3_7, code=aws_lambda.Code.asset('list_objects'), memory_size=4096, timeout=core.Duration.seconds(300), log_retention=aws_logs.RetentionDays.ONE_DAY) get_size_and_store = aws_lambda.Function( self, 'get_size_and_store', handler='get_size_and_store.lambda_handler', runtime=aws_lambda.Runtime.PYTHON_3_7, code=aws_lambda.Code.asset('get_size_and_store'), memory_size=4096, timeout=core.Duration.seconds(300), log_retention=aws_logs.RetentionDays.ONE_DAY) ########################################################################### # AMAZON S3 BUCKETS ########################################################################### images_bucket = aws_s3.Bucket(self, "images_bucket") ########################################################################### # LAMBDA SUPPLEMENTAL POLICIES ########################################################################### lambda_supplemental_policy_statement = aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, actions=["s3:Get*", "s3:Head*", "s3:List*", "sqs:*", "es:*"], resources=["*"]) parse_image_list_file.add_to_role_policy( lambda_supplemental_policy_statement) list_objects.add_to_role_policy(lambda_supplemental_policy_statement) get_size_and_store.add_to_role_policy( lambda_supplemental_policy_statement) ########################################################################### # AWS SNS TOPICS ########################################################################### # notification_topic = aws_sns.Topic(self, "notification_topic") ########################################################################### # ADD AMAZON S3 BUCKET NOTIFICATIONS ########################################################################### images_bucket.add_event_notification( aws_s3.EventType.OBJECT_CREATED, aws_s3_notifications.LambdaDestination(parse_image_list_file)) ########################################################################### # AWS SQS QUEUES ########################################################################### comprehend_queue_iqueue = aws_sqs.Queue(self, "comprehend_queue_iqueue") comprehend_queue_iqueue_dlq = aws_sqs.DeadLetterQueue( max_receive_count=10, queue=comprehend_queue_iqueue) comprehend_queue = aws_sqs.Queue( self, "comprehend_queue", visibility_timeout=core.Duration.seconds(301), dead_letter_queue=comprehend_queue_iqueue_dlq) rekognition_queue_iqueue = aws_sqs.Queue(self, "rekognition_queue_iqueue") rekognition_queue_dlq = aws_sqs.DeadLetterQueue( max_receive_count=10, queue=rekognition_queue_iqueue) rekognition_queue = aws_sqs.Queue( self, "rekognition_queue", visibility_timeout=core.Duration.seconds(301), dead_letter_queue=rekognition_queue_dlq) object_queue_iqueue = aws_sqs.Queue(self, "object_queue_iqueue") object_queue_dlq = aws_sqs.DeadLetterQueue(max_receive_count=10, queue=object_queue_iqueue) object_queue = aws_sqs.Queue( self, "object_queue", visibility_timeout=core.Duration.seconds(301), dead_letter_queue=object_queue_dlq) ########################################################################### # AWS LAMBDA SQS EVENT SOURCE ########################################################################### get_size_and_store.add_event_source( SqsEventSource(object_queue, batch_size=10)) ########################################################################### # AWS ELASTICSEARCH DOMAIN ########################################################################### s3workflow_domain = aws_elasticsearch.Domain( self, "s3workflow_domain", version=aws_elasticsearch.ElasticsearchVersion.V7_1, capacity={ "master_nodes": 3, "data_nodes": 4 }, ebs={"volume_size": 100}, zone_awareness={"availability_zone_count": 2}, logging={ "slow_search_log_enabled": True, "app_log_enabled": True, "slow_index_log_enabled": True }) ########################################################################### # AMAZON COGNITO USER POOL ########################################################################### s3workflow_pool = aws_cognito.UserPool( self, "s3workflow-pool", account_recovery=None, auto_verify=None, custom_attributes=None, email_settings=None, enable_sms_role=None, lambda_triggers=None, mfa=None, mfa_second_factor=None, password_policy=None, self_sign_up_enabled=None, sign_in_aliases=aws_cognito.SignInAliases(email=True, phone=None, preferred_username=None, username=True), sign_in_case_sensitive=None, sms_role=None, sms_role_external_id=None, standard_attributes=None, user_invitation=None, user_pool_name=None, user_verification=None) ########################################################################### # AMAZON VPC ########################################################################### vpc = aws_ec2.Vpc(self, "s3workflowVPC", max_azs=3) # default is all AZs in region ########################################################################### # AMAZON ECS CLUSTER ########################################################################### cluster = aws_ecs.Cluster(self, "s3", vpc=vpc) ########################################################################### # AMAZON ECS Repositories ########################################################################### rekognition_repository = aws_ecr.Repository( self, "rekognition_repository", image_scan_on_push=True, removal_policy=core.RemovalPolicy("DESTROY")) comprehend_repository = aws_ecr.Repository( self, "comprehend_repository", image_scan_on_push=True, removal_policy=core.RemovalPolicy("DESTROY")) ########################################################################### # AMAZON ECS Roles and Policies ########################################################################### task_execution_policy_statement = aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, actions=[ "logs:*", "ecs:*", "ec2:*", "elasticloadbalancing:*", "ecr:*" ], resources=["*"]) task_execution_policy_document = aws_iam.PolicyDocument() task_execution_policy_document.add_statements( task_execution_policy_statement) task_execution_policy = aws_iam.Policy( self, "task_execution_policy", document=task_execution_policy_document) task_execution_role = aws_iam.Role( self, "task_execution_role", assumed_by=aws_iam.ServicePrincipal('ecs-tasks.amazonaws.com')) task_execution_role.attach_inline_policy(task_execution_policy) task_policy_statement = aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, actions=[ "logs:*", "xray:*", "sqs:*", "s3:*", "rekognition:*", "comprehend:*", "es:*" ], resources=["*"]) task_policy_document = aws_iam.PolicyDocument() task_policy_document.add_statements(task_policy_statement) task_policy = aws_iam.Policy(self, "task_policy", document=task_policy_document) task_role = aws_iam.Role( self, "task_role", assumed_by=aws_iam.ServicePrincipal('ecs-tasks.amazonaws.com')) task_role.attach_inline_policy(task_policy) ########################################################################### # AMAZON ECS Task definitions ########################################################################### rekognition_task_definition = aws_ecs.TaskDefinition( self, "rekognition_task_definition", compatibility=aws_ecs.Compatibility("FARGATE"), cpu="1024", # ipc_mode=None, memory_mib="2048", network_mode=aws_ecs.NetworkMode("AWS_VPC"), # pid_mode=None, #Not supported in Fargate and Windows containers # placement_constraints=None, execution_role=task_execution_role, # family=None, # proxy_configuration=None, task_role=task_role # volumes=None ) comprehend_task_definition = aws_ecs.TaskDefinition( self, "comprehend_task_definition", compatibility=aws_ecs.Compatibility("FARGATE"), cpu="1024", # ipc_mode=None, memory_mib="2048", network_mode=aws_ecs.NetworkMode("AWS_VPC"), # pid_mode=None, #Not supported in Fargate and Windows containers # placement_constraints=None, execution_role=task_execution_role, # family=None, # proxy_configuration=None, task_role=task_role # volumes=None ) ########################################################################### # AMAZON ECS Images ########################################################################### rekognition_ecr_image = aws_ecs.EcrImage( repository=rekognition_repository, tag="latest") comprehend_ecr_image = aws_ecs.EcrImage( repository=comprehend_repository, tag="latest") ########################################################################### # ENVIRONMENT VARIABLES ########################################################################### environment_variables = {} environment_variables["COMPREHEND_QUEUE"] = comprehend_queue.queue_url environment_variables[ "REKOGNITION_QUEUE"] = rekognition_queue.queue_url environment_variables["IMAGES_BUCKET"] = images_bucket.bucket_name environment_variables[ "ELASTICSEARCH_HOST"] = s3workflow_domain.domain_endpoint parse_image_list_file.add_environment( "ELASTICSEARCH_HOST", s3workflow_domain.domain_endpoint) parse_image_list_file.add_environment("QUEUEURL", rekognition_queue.queue_url) parse_image_list_file.add_environment("DEBUG", "False") parse_image_list_file.add_environment("BUCKET", "-") parse_image_list_file.add_environment("KEY", "-") list_objects.add_environment("QUEUEURL", object_queue.queue_url) list_objects.add_environment("ELASTICSEARCH_HOST", s3workflow_domain.domain_endpoint) list_objects.add_environment("S3_BUCKET_NAME", images_bucket.bucket_name) list_objects.add_environment("S3_BUCKET_PREFIX", "images/") list_objects.add_environment("S3_BUCKET_SUFFIX", "") list_objects.add_environment("LOGGING_LEVEL", "INFO") get_size_and_store.add_environment("QUEUEURL", object_queue.queue_url) get_size_and_store.add_environment("ELASTICSEARCH_HOST", s3workflow_domain.domain_endpoint) get_size_and_store.add_environment("S3_BUCKET_NAME", images_bucket.bucket_name) get_size_and_store.add_environment("S3_BUCKET_PREFIX", "images/") get_size_and_store.add_environment("S3_BUCKET_SUFFIX", "") get_size_and_store.add_environment("LOGGING_LEVEL", "INFO") ########################################################################### # ECS Log Drivers ########################################################################### rekognition_task_log_driver = aws_ecs.LogDriver.aws_logs( stream_prefix="s3workflow", log_retention=aws_logs.RetentionDays("ONE_DAY")) comprehend_task_log_driver = aws_ecs.LogDriver.aws_logs( stream_prefix="s3workflow", log_retention=aws_logs.RetentionDays("ONE_DAY")) ########################################################################### # ECS Task Definitions ########################################################################### rekognition_task_definition.add_container( "rekognition_task_definition", image=rekognition_ecr_image, memory_reservation_mib=1024, environment=environment_variables, logging=rekognition_task_log_driver) comprehend_task_definition.add_container( "comprehend_task_definition", image=comprehend_ecr_image, memory_reservation_mib=1024, environment=environment_variables, logging=comprehend_task_log_driver) ########################################################################### # AWS ROUTE53 HOSTED ZONE ########################################################################### hosted_zone = aws_route53.HostedZone( self, "hosted_zone", zone_name="s3workflow.com", comment="private hosted zone for s3workflow system") hosted_zone.add_vpc(vpc)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # The code that defines your stack goes here # ********* SNS Topics ************* jobCompletionTopic = sns.Topic(self, "JobCompletion") # **********IAM Roles****************************** textractServiceRole = iam.Role( self, "TextractServiceRole", assumed_by=iam.ServicePrincipal("textract.amazonaws.com"), ) textractServiceRole.add_to_policy( iam.PolicyStatement( effect=iam.Effect.ALLOW, resources=[jobCompletionTopic.topic_arn], actions=["sns:Publish"], )) comprehendServiceRole = iam.Role( self, "ComprehendServiceRole", assumed_by=iam.ServicePrincipal("comprehend.amazonaws.com"), ) comprehendServiceRole.add_to_policy( iam.PolicyStatement( effect=iam.Effect.ALLOW, resources=["*"], actions=[ "comprehend:*", "s3:ListAllMyBuckets", "s3:ListBucket", "s3:GetBucketLocation", "iam:ListRoles", "iam:GetRole", ], )) # **********S3 Batch Operations Role****************************** s3BatchOperationsRole = iam.Role( self, "S3BatchOperationsRole", assumed_by=iam.ServicePrincipal( "batchoperations.s3.amazonaws.com"), ) # **********S3 Bucket****************************** # S3 bucket for input documents and output contentBucket = s3.Bucket(self, "DocumentsBucket", versioned=False) existingContentBucket = s3.Bucket(self, "ExistingDocumentsBucket", versioned=False) existingContentBucket.grant_read_write(s3BatchOperationsRole) inventoryAndLogsBucket = s3.Bucket(self, "InventoryAndLogsBucket", versioned=False) inventoryAndLogsBucket.grant_read_write(s3BatchOperationsRole) # **********DynamoDB Table************************* # DynamoDB table with links to output in S3 outputTable = dynamodb.Table( self, "OutputTable", partition_key={ "name": "documentId", "type": dynamodb.AttributeType.STRING, }, sort_key={ "name": "outputType", "type": dynamodb.AttributeType.STRING, }, ) # DynamoDB table with links to output in S3 documentsTable = dynamodb.Table( self, "DocumentsTable", partition_key={ "name": "documentId", "type": dynamodb.AttributeType.STRING, }, stream=dynamodb.StreamViewType.NEW_IMAGE, ) # **********SQS Queues***************************** # DLQ (Dead Letter Queue) dlq = sqs.Queue( self, "DLQ", visibility_timeout=core.Duration.seconds(30), retention_period=core.Duration.seconds(1209600), ) # Input Queue for sync jobs syncJobsQueue = sqs.Queue( self, "SyncJobs", visibility_timeout=core.Duration.seconds(30), retention_period=core.Duration.seconds(1209600), dead_letter_queue={ "queue": dlq, "max_receive_count": 50 }, ) # Input Queue for async jobs asyncJobsQueue = sqs.Queue( self, "AsyncJobs", visibility_timeout=core.Duration.seconds(30), retention_period=core.Duration.seconds(1209600), dead_letter_queue={ "queue": dlq, "max_receive_count": 50 }, ) # Queue jobResultsQueue = sqs.Queue( self, "JobResults", visibility_timeout=core.Duration.seconds(900), retention_period=core.Duration.seconds(1209600), dead_letter_queue={ "queue": dlq, "max_receive_count": 50 }, ) # Trigger # jobCompletionTopic.subscribeQueue(jobResultsQueue) jobCompletionTopic.add_subscription( snsSubscriptions.SqsSubscription(jobResultsQueue)) # **********Lambda Functions****************************** # Helper Layer with helper functions helperLayer = _lambda.LayerVersion( self, "HelperLayer", code=_lambda.Code.from_asset("awscdk/lambda/helper"), compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], license="Apache-2.0", description="Helper layer.", ) # Textractor helper layer textractorLayer = _lambda.LayerVersion( self, "Textractor", code=_lambda.Code.from_asset("awscdk/lambda/textractor"), compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], license="Apache-2.0", description="Textractor layer.", ) # ----------------------------------------------------------- # S3 Event processor s3Processor = _lambda.Function( self, "S3Processor", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.from_asset("awscdk/lambda/s3processor"), handler="lambda_function.lambda_handler", environment={ "SYNC_QUEUE_URL": syncJobsQueue.queue_url, "ASYNC_QUEUE_URL": asyncJobsQueue.queue_url, "DOCUMENTS_TABLE": documentsTable.table_name, "OUTPUT_TABLE": outputTable.table_name, }, ) # Layer s3Processor.add_layers(helperLayer) # Trigger s3Processor.add_event_source( S3EventSource(contentBucket, events=[s3.EventType.OBJECT_CREATED])) # Permissions documentsTable.grant_read_write_data(s3Processor) syncJobsQueue.grant_send_messages(s3Processor) asyncJobsQueue.grant_send_messages(s3Processor) # ------------------------------------------------------------ # S3 Batch Operations Event processor s3BatchProcessor = _lambda.Function( self, "S3BatchProcessor", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.from_asset("awscdk/lambda/s3batchprocessor"), handler="lambda_function.lambda_handler", environment={ "DOCUMENTS_TABLE": documentsTable.table_name, "OUTPUT_TABLE": outputTable.table_name, }, reserved_concurrent_executions=1, ) # Layer s3BatchProcessor.add_layers(helperLayer) # Permissions documentsTable.grant_read_write_data(s3BatchProcessor) s3BatchProcessor.grant_invoke(s3BatchOperationsRole) s3BatchOperationsRole.add_to_policy( iam.PolicyStatement(actions=["lambda:*"], resources=["*"])) # ------------------------------------------------------------ # Document processor (Router to Sync/Async Pipeline) documentProcessor = _lambda.Function( self, "TaskProcessor", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.from_asset("awscdk/lambda/documentprocessor"), handler="lambda_function.lambda_handler", environment={ "SYNC_QUEUE_URL": syncJobsQueue.queue_url, "ASYNC_QUEUE_URL": asyncJobsQueue.queue_url, }, ) # Layer documentProcessor.add_layers(helperLayer) # Trigger documentProcessor.add_event_source( DynamoEventSource( documentsTable, starting_position=_lambda.StartingPosition.TRIM_HORIZON, )) # Permissions documentsTable.grant_read_write_data(documentProcessor) syncJobsQueue.grant_send_messages(documentProcessor) asyncJobsQueue.grant_send_messages(documentProcessor) # ------------------------------------------------------------ # Sync Jobs Processor (Process jobs using sync APIs) syncProcessor = _lambda.Function( self, "SyncProcessor", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.from_asset("awscdk/lambda/documentprocessor"), handler="lambda_function.lambda_handler", environment={ "OUTPUT_TABLE": outputTable.table_name, "DOCUMENTS_TABLE": documentsTable.table_name, "AWS_DATA_PATH": "models", }, reserved_concurrent_executions=1, timeout=core.Duration.seconds(25), ) # Layer syncProcessor.add_layers(helperLayer) syncProcessor.add_layers(textractorLayer) # Trigger syncProcessor.add_event_source( SqsEventSource(syncJobsQueue, batch_size=1)) # Permissions contentBucket.grant_read_write(syncProcessor) existingContentBucket.grant_read_write(syncProcessor) outputTable.grant_read_write_data(syncProcessor) documentsTable.grant_read_write_data(syncProcessor) syncProcessor.add_to_role_policy( iam.PolicyStatement(actions=["textract:*"], resources=["*"])) # ------------------------------------------------------------ # Async Job Processor (Start jobs using Async APIs) asyncProcessor = _lambda.Function( self, "ASyncProcessor", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.from_asset("awscdk/lambda/asyncprocessor"), handler="lambda_function.lambda_handler", environment={ "ASYNC_QUEUE_URL": asyncJobsQueue.queue_url, "SNS_TOPIC_ARN": jobCompletionTopic.topic_arn, "SNS_ROLE_ARN": textractServiceRole.role_arn, "AWS_DATA_PATH": "models", }, reserved_concurrent_executions=1, timeout=core.Duration.seconds(60), ) # asyncProcessor.addEnvironment("SNS_TOPIC_ARN", textractServiceRole.topic_arn) # Layer asyncProcessor.add_layers(helperLayer) # Triggers # Run async job processor every 5 minutes # Enable code below after test deploy rule = events.Rule( self, "Rule", schedule=events.Schedule.expression("rate(2 minutes)")) rule.add_target(LambdaFunction(asyncProcessor)) # Run when a job is successfully complete asyncProcessor.add_event_source(SnsEventSource(jobCompletionTopic)) # Permissions contentBucket.grant_read(asyncProcessor) existingContentBucket.grant_read_write(asyncProcessor) asyncJobsQueue.grant_consume_messages(asyncProcessor) asyncProcessor.add_to_role_policy( iam.PolicyStatement( actions=["iam:PassRole"], resources=[textractServiceRole.role_arn], )) asyncProcessor.add_to_role_policy( iam.PolicyStatement(actions=["textract:*"], resources=["*"])) # ------------------------------------------------------------ # Async Jobs Results Processor jobResultProcessor = _lambda.Function( self, "JobResultProcessor", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.from_asset("awscdk/lambda/jobresultprocessor"), handler="lambda_function.lambda_handler", memory_size=2000, reserved_concurrent_executions=50, timeout=core.Duration.seconds(900), environment={ "OUTPUT_TABLE": outputTable.table_name, "DOCUMENTS_TABLE": documentsTable.table_name, "AWS_DATA_PATH": "models", }, ) # Layer jobResultProcessor.add_layers(helperLayer) jobResultProcessor.add_layers(textractorLayer) # Triggers jobResultProcessor.add_event_source( SqsEventSource(jobResultsQueue, batch_size=1)) # Permissions outputTable.grant_read_write_data(jobResultProcessor) documentsTable.grant_read_write_data(jobResultProcessor) contentBucket.grant_read_write(jobResultProcessor) existingContentBucket.grant_read_write(jobResultProcessor) jobResultProcessor.add_to_role_policy( iam.PolicyStatement(actions=["textract:*", "comprehend:*"], resources=["*"])) # -------------- # PDF Generator pdfGenerator = _lambda.Function( self, "PdfGenerator", runtime=_lambda.Runtime.JAVA_8, code=_lambda.Code.from_asset("awscdk/lambda/pdfgenerator"), handler="DemoLambdaV2::handleRequest", memory_size=3000, timeout=core.Duration.seconds(900), ) contentBucket.grant_read_write(pdfGenerator) existingContentBucket.grant_read_write(pdfGenerator) pdfGenerator.grant_invoke(syncProcessor) pdfGenerator.grant_invoke(asyncProcessor)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # SQS queue state_change_sqs = Queue( self, "state_change_sqs", visibility_timeout=core.Duration.seconds(60) ) # Dynamodb Tables # EC2 state changes tb_states = Table( self, "ec2_states", partition_key=Attribute(name="instance-id", type=AttributeType.STRING), sort_key=Attribute( name="time", type=AttributeType.STRING ), billing_mode=BillingMode.PAY_PER_REQUEST, removal_policy=core.RemovalPolicy.DESTROY, stream=StreamViewType.NEW_IMAGE) # EC2 inventory tb_inventory = Table( self, "ec2_inventory", partition_key=Attribute(name="instance-id", type=AttributeType.STRING), sort_key=Attribute( name="time", type=AttributeType.STRING ), billing_mode=BillingMode.PAY_PER_REQUEST, removal_policy=core.RemovalPolicy.DESTROY, stream=StreamViewType.KEYS_ONLY) # IAM policies - AWS managed basic_exec = ManagedPolicy.from_aws_managed_policy_name("service-role/AWSLambdaBasicExecutionRole") sqs_access = ManagedPolicy(self, "LambdaSQSExecution", statements=[ PolicyStatement( effect=Effect.ALLOW, actions=[ "sqs:ReceiveMessage", "sqs:DeleteMessage", "sqs:GetQueueAttributes" ], resources=[state_change_sqs.queue_arn] )]) # IAM Policies pol_ec2_states_ro = ManagedPolicy(self, "pol_EC2StatesReadOnly", statements=[ PolicyStatement( effect=Effect.ALLOW, actions=[ "dynamodb:DescribeStream", "dynamodb:GetRecords", "dynamodb:GetItem", "dynamodb:GetShardIterator", "dynamodb:ListStreams" ], resources=[tb_states.table_arn] )]) pol_ec2_states_rwd = ManagedPolicy( self, "pol_EC2StatesWriteDelete", statements=[ PolicyStatement( effect=Effect.ALLOW, actions=[ "dynamodb:DeleteItem", "dynamodb:DescribeTable", "dynamodb:PutItem", "dynamodb:Query", "dynamodb:UpdateItem" ], resources=[tb_states.table_arn] )]) pol_ec2_inventory_full = ManagedPolicy( self, "pol_EC2InventoryFullAccess", statements=[ PolicyStatement( effect=Effect.ALLOW, actions=[ "dynamodb:DeleteItem", "dynamodb:DescribeTable", "dynamodb:GetItem", "dynamodb:PutItem", "dynamodb:Query", "dynamodb:UpdateItem" ], resources=[tb_inventory.table_arn] )]) pol_lambda_describe_ec2 = ManagedPolicy( self, "pol_LambdaDescribeEC2", statements=[ PolicyStatement( effect=Effect.ALLOW, actions=[ "ec2:Describe*" ], resources=["*"] )]) # IAM Roles rl_event_capture = Role( self, 'rl_state_capture', assumed_by=ServicePrincipal('lambda.amazonaws.com'), managed_policies=[basic_exec, sqs_access, pol_ec2_states_rwd] ) rl_event_processor = Role( self, 'rl_state_processor', assumed_by=ServicePrincipal('lambda.amazonaws.com'), managed_policies=[ basic_exec, pol_ec2_states_ro, pol_ec2_states_rwd, pol_ec2_inventory_full, pol_lambda_describe_ec2]) # event capture lambda lambda_event_capture = Function( self, "lambda_event_capture", handler="event_capture.handler", runtime=Runtime.PYTHON_3_7, code=Code.asset('event_capture'), role=rl_event_capture, events=[SqsEventSource(state_change_sqs)], environment={"state_table": tb_states.table_name} ) # event processor lambda lambda_event_processor = Function( self, "lambda_event_processor", handler="event_processor.handler", runtime=Runtime.PYTHON_3_7, code=Code.asset('event_processor'), role=rl_event_processor, events=[ DynamoEventSource( tb_states, starting_position=StartingPosition.LATEST) ], environment={ "inventory_table": tb_inventory.table_name, } ) # Cloudwatch Event event_ec2_change = Rule( self, "ec2_state_change", description="trigger on ec2 start, stop and terminate instances", event_pattern=EventPattern( source=["aws.ec2"], detail_type=["EC2 Instance State-change Notification"], detail={ "state": [ "running", "stopped", "terminated"] } ), targets=[aws_events_targets.SqsQueue(state_change_sqs)] ) # Outputs core.CfnOutput(self, "rl_state_capture_arn", value=rl_event_capture.role_arn) core.CfnOutput(self, "rl_state_processor_arn", value=rl_event_processor.role_arn) core.CfnOutput(self, "tb_states_arn", value=tb_states.table_arn) core.CfnOutput(self, "tb_inventory_arn", value=tb_inventory.table_arn) core.CfnOutput(self, "sqs_state_change", value=state_change_sqs.queue_arn)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) """ Create Lambda Layer The packages should be stored in `python/lib/python3.7/site-packages` which translates to `/opt/python/lib/python3.7/site-packages` in AWS Lambda Refer here: https://stackoverflow.com/a/58702328/7999204 """ python_deps_layer = LayerVersion( self, "PythonDepsLayer", code=Code.from_asset("./python-deps-layer"), compatible_runtimes=[PYTHON_RUNTIME], description="A layer that contains Python Dependencies", ) """ Create DynamoDB Tables """ poll_table = Table( self, "PollTable", partition_key=Attribute(name="id", type=AttributeType.STRING), sort_key=Attribute(name="SK", type=AttributeType.STRING), read_capacity=10, write_capacity=10, stream=StreamViewType.NEW_IMAGE, ) # DynamoDB Lambda consumer worker aggregate_votes_function = Function( self, "AggregateVotesLambda", handler="ddb_stream.aggregate_vote_table", runtime=PYTHON_RUNTIME, code=Code.asset("./backend"), layers=[python_deps_layer], timeout=core.Duration.seconds(30), ) aggregate_votes_function.add_environment("POLL_TABLE", poll_table.table_name) # DynamoDB Stream (Lambda Event Source) poll_table.grant_stream_read(aggregate_votes_function) poll_table.grant_read_write_data(aggregate_votes_function) ddb_aggregate_votes_event_source = DynamoEventSource( poll_table, starting_position=StartingPosition.LATEST ) aggregate_votes_function.add_event_source(ddb_aggregate_votes_event_source) # DynamoDB main_page GSI poll_table.add_global_secondary_index( partition_key=Attribute(name="PK2", type=AttributeType.STRING), projection_type=ProjectionType.INCLUDE, index_name=MAIN_PAGE_GSI, non_key_attributes=["date", "question", "result"], ) """ Create AWS Cognito User Pool """ self.users = UserPool(self, "vote-user") """ HTTP API API Gateway with CORS """ api = HttpApi( self, "VoteHttpApi", cors_preflight={ "allow_headers": ["*"], "allow_methods": [ HttpMethod.GET, HttpMethod.HEAD, HttpMethod.OPTIONS, HttpMethod.POST, ], "allow_origins": ["*"], "max_age": core.Duration.days(10), }, ) """ HTTP API Lambda functions """ get_all_votes_function = api_lambda_function( self, "GetAllVoteLambda", "api.get_all_votes", api, "/vote", GET, [python_deps_layer], [poll_table], ) poll_table.grant_read_data(get_all_votes_function) get_vote_function = api_lambda_function( self, "GetVoteLambda", "api.get_vote_by_id", api, "/vote/{vote_id}", GET, [python_deps_layer], [poll_table], ) poll_table.grant_read_data(get_vote_function) create_poll_function = api_lambda_function( self, "CreatePollLambda", "api.create_poll", api, "/vote", POST, [python_deps_layer], [poll_table], ) poll_table.grant_write_data(create_poll_function) post_vote_function = api_lambda_function( self, "PostVoteLambda", "api.vote", api, "/vote/{vote_id}", POST, [python_deps_layer], [poll_table], ) """ Create SQS Queues """ voting_queue = Queue(self, "voting-queue") # SQS Consumer worker voting_to_ddb_function = Function( self, "VotingToDDBLambda", handler="sqs_worker.insert_to_vote_db_table", runtime=PYTHON_RUNTIME, code=Code.asset("./backend"), layers=[python_deps_layer], ) voting_to_ddb_function.add_environment("POLL_TABLE", poll_table.table_name) # SQS Queue to Lambda trigger mapping voting_to_ddb_event_source = SqsEventSource(voting_queue) voting_to_ddb_function.add_event_source(voting_to_ddb_event_source) poll_table.grant_read_write_data(voting_to_ddb_function) voting_queue.grant_send_messages(post_vote_function) post_vote_function.add_environment("VOTING_QUEUE_URL", voting_queue.queue_url) core.CfnOutput(self, "api-domain", value=api.url)
def create_all_lambdas(self) -> None: """ Create all lambda functions and associated triggers """ self.create_lambda( id="process_new_scene_lambda", code=aws_lambda.Code.from_asset( path="cbers2stac/process_new_scene_queue"), handler="code.handler", runtime=aws_lambda.Runtime.PYTHON_3_7, environment={ **self.lambdas_env_, **{ "SNS_TARGET_ARN": self.topics_["stac_item_topic"].topic_arn, "SNS_RECONCILE_TARGET_ARN": self.topics_["reconcile_stac_item_topic"].topic_arn, # This is used for testing, number of messages read from queue # when manually invoking lambda "MESSAGE_BATCH_SIZE": "1", }, }, timeout=core.Duration.seconds(55), dead_letter_queue=self.queues_["process_new_scenes_queue_dlq"], layers=[self.layers_["common_layer"]], description="Process new scenes from quicklook queue", ) self.lambdas_["process_new_scene_lambda"].add_event_source( SqsEventSource(queue=self.queues_["new_scenes_queue"], batch_size=10)) # See comment below on using from_bucket_name to # create a CDK bucket read_cbers_pds_permissions = iam.PolicyStatement( actions=["s3:ListObjectsV2", "s3:ListBucket", "s3:Get*"], resources=[ "arn:aws:s3:::cbers-pds", "arn:aws:s3:::cbers-pds/*", ], ) self.lambdas_["process_new_scene_lambda"].add_to_role_policy( read_cbers_pds_permissions) self.create_lambda( id="generate_catalog_levels_to_be_updated_lambda", code=aws_lambda.Code.from_asset( path="cbers2stac/generate_catalog_levels_to_be_updated"), handler="code.handler", runtime=aws_lambda.Runtime.PYTHON_3_7, environment={ **self.lambdas_env_, **{ "CATALOG_PREFIX_UPDATE_QUEUE": self.queues_["catalog_prefix_update_queue"].queue_url }, }, timeout=core.Duration.seconds(900), dead_letter_queue=self.queues_["dead_letter_queue"], layers=[self.layers_["common_layer"]], description="Generate levels into output table from input table", ) self.create_lambda( id="update_catalog_prefix_lambda", code=aws_lambda.Code.from_asset( path="cbers2stac/update_catalog_tree"), handler="code.trigger_handler", runtime=aws_lambda.Runtime.PYTHON_3_7, environment={ **self.lambdas_env_, }, timeout=core.Duration.seconds(55), dead_letter_queue=self.queues_["dead_letter_queue"], layers=[self.layers_["common_layer"]], description="Update catalog from prefix", ) self.lambdas_["update_catalog_prefix_lambda"].add_event_source( SqsEventSource(queue=self.queues_["catalog_prefix_update_queue"], batch_size=10)) self.create_lambda( id="populate_reconcile_queue_lambda", code=aws_lambda.Code.from_asset( path="cbers2stac/populate_reconcile_queue"), handler="code.handler", runtime=aws_lambda.Runtime.PYTHON_3_7, environment={ **self.lambdas_env_, **{ "RECONCILE_QUEUE": self.queues_["reconcile_queue"].queue_url }, }, timeout=core.Duration.seconds(300), dead_letter_queue=self.queues_["dead_letter_queue"], layers=[self.layers_["common_layer"]], description= "Populates reconcile queue with S3 keys from a common prefix", ) # I'm using the bucket ARN directly here just to make sure that I don't # mess with the cbers-pds bucket... creating it from_bucket_name should # be safe but I'll not take my chances # cbers_pds_bucket = s3.Bucket.from_bucket_name(self, "cbers-pds", "cbers-pds") list_cbers_pds_permissions = iam.PolicyStatement( actions=["s3:ListObjectsV2", "s3:ListBucket"], resources=[ "arn:aws:s3:::cbers-pds", "arn:aws:s3:::cbers-pds/*", ], ) self.lambdas_["populate_reconcile_queue_lambda"].add_to_role_policy( list_cbers_pds_permissions) self.create_lambda( id="consume_reconcile_queue_lambda", code=aws_lambda.Code.from_asset( path="cbers2stac/consume_reconcile_queue"), handler="code.handler", runtime=aws_lambda.Runtime.PYTHON_3_7, environment={ **self.lambdas_env_, **{ "NEW_SCENES_QUEUE": self.queues_["new_scenes_queue"].queue_url }, }, timeout=core.Duration.seconds(900), dead_letter_queue=self.queues_["consume_reconcile_queue_dlq"], layers=[self.layers_["common_layer"]], description="Consume dirs from reconcile queue, populating " "new_scenes_queue with quicklooks to be processed", ) self.lambdas_["consume_reconcile_queue_lambda"].add_to_role_policy( list_cbers_pds_permissions) self.lambdas_["consume_reconcile_queue_lambda"].add_event_source( SqsEventSource(queue=self.queues_["reconcile_queue"], batch_size=5)) # Section with lambdas used to support STAC API. Specific lambdas integrated # with API GW are defined in create_api_lambdas() if settings.enable_api: self.create_lambda( id="create_elastic_index_lambda", code=aws_lambda.Code.from_asset( path="cbers2stac/elasticsearch"), handler="es.create_stac_index_handler", runtime=aws_lambda.Runtime.PYTHON_3_7, environment={ **self.lambdas_env_, }, layers=[self.layers_["common_layer"]], timeout=core.Duration.seconds(30), dead_letter_queue=self.queues_["dead_letter_queue"], description="Create Elasticsearch stac index", ) self.create_lambda( id="insert_into_elastic_lambda", code=aws_lambda.Code.from_asset( path="cbers2stac/elasticsearch"), handler="es.create_documents_handler", runtime=aws_lambda.Runtime.PYTHON_3_7, environment={ **self.lambdas_env_, **{ "ES_STRIPPED": "YES", "BULK_CALLS": "1", "BULK_SIZE": "10" }, }, layers=[self.layers_["common_layer"]], timeout=core.Duration.seconds(30), dead_letter_queue=self.queues_["dead_letter_queue"], # Concurrent executions tuned to work with t2.small.elasticsearch reserved_concurrent_executions=5, description="Consume STAC items from queue, inserting into ES", ) self.lambdas_["insert_into_elastic_lambda"].add_event_source( SqsEventSource( queue=self.queues_["insert_into_elasticsearch_queue"], batch_size=10)) self.create_lambda( id="consume_stac_reconcile_queue_lambda", code=aws_lambda.Code.from_asset( path="cbers2stac/reindex_stac_items"), handler="code.consume_stac_reconcile_queue_handler", runtime=aws_lambda.Runtime.PYTHON_3_7, environment=self.lambdas_env_, layers=[self.layers_["common_layer"]], timeout=core.Duration.seconds(900), description="Reindex STAC items from a prefix", ) # Batch size changed from 5 to 2 to reduce the lambda work and increase # the chances to make it fit within the 900s limit. self.lambdas_[ "consume_stac_reconcile_queue_lambda"].add_event_source( SqsEventSource(queue=self.queues_["stac_reconcile_queue"], batch_size=2)) self.create_lambda( id="populate_stac_reconcile_queue_lambda", code=aws_lambda.Code.from_asset( path="cbers2stac/reindex_stac_items"), handler="code.populate_stac_reconcile_queue_handler", runtime=aws_lambda.Runtime.PYTHON_3_7, environment={ **self.lambdas_env_, }, timeout=core.Duration.seconds(300), dead_letter_queue=self.queues_["dead_letter_queue"], layers=[self.layers_["common_layer"]], description= "Populates reconcile queue with STAC items from a common prefix", )
def __init__(self, scope: cdk.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) S3policy = iam.PolicyStatement(actions=['s3:*'], resources=['*']) SQSpolicy = iam.PolicyStatement(actions=['sqs:*'], resources=['*']) Rekpolicy = iam.PolicyStatement(actions=['rekognition:*'], resources=['*']) rds_lambda_role = iam.Role( scope=self, id='cdk-lambda-role', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'), role_name='cdk-lambda-role', managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSLambdaVPCAccessExecutionRole'), iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSLambdaBasicExecutionRole') ]) policystatement = iam.PolicyStatement(resources=["*"], actions=["sns:Publish"], effect=iam.Effect.ALLOW) token = cdk.SecretValue.plain_text("") #github personal token amplify_app = amp.App( self, "MyApp", source_code_provider=amp.GitHubSourceCodeProvider( owner="swen-514-614-spring2021", repository="term-project--team-9", oauth_token=token)) amplify_app.add_branch("main") # matt beef bucket picbucket = s3.Bucket(self, "bucket1", bucket_name='bucketswen614', versioned=False, removal_policy=cdk.RemovalPolicy.DESTROY, auto_delete_objects=True) # matt lambda fifosendfunction = _lambda.Function( self, "lambda_function1", code=_lambda.Code.asset(os.path.join(dirname, "send_to_fifo_queue")), runtime=_lambda.Runtime.PYTHON_3_7, handler="lambda-handler.main", function_name="sendtofifoqueue") # remember to add role= to funciton fifosendfunction.add_to_role_policy(S3policy) fifosendfunction.add_to_role_policy(SQSpolicy) # notification for lambda to activate when file gets put into bucket notification = s3n.LambdaDestination(fifosendfunction) picbucket.add_event_notification(s3.EventType.OBJECT_CREATED_PUT, notification) # matt queue queueP = sqs.Queue(self, "Queue", queue_name="picturequeue.fifo", fifo=True, content_based_deduplication=True) # matt lambda make it so this is activated by message sent to queue above then send it forward to bean bucket function_rekognition = _lambda.Function( self, "lambda_function2", code=_lambda.Code.asset( os.path.join(dirname, "send_to_rekognition")), runtime=_lambda.Runtime.PYTHON_3_7, handler="lambda-handler.main", function_name="detect_labels") function_rekognition.add_to_role_policy(S3policy) function_rekognition.add_to_role_policy(SQSpolicy) function_rekognition.add_to_role_policy(Rekpolicy) event_source = function_rekognition.add_event_source( SqsEventSource(queueP)) # event_source = functionbean.add_event_source(SqsEventSource(queueP)) # event_source_id = event_source.event_source_id #VPC for RDS vpc = ec2.Vpc(self, "VPC", max_azs=2) #Sets password using secretmanageer password = cdk.SecretValue.plain_text("swen614Team9") #Creates RDS using POSTGRESQL myrds = rds.DatabaseInstance( self, "RDS", database_name="CCDatabase", engine=rds.DatabaseInstanceEngine.postgres( version=rds.PostgresEngineVersion.VER_12_5), instance_type=ec2.InstanceType.of(ec2.InstanceClass.BURSTABLE2, ec2.InstanceSize.MICRO), vpc=vpc, storage_type=rds.StorageType.GP2, allocated_storage=20, credentials=rds.Credentials.from_password('team9', password), vpc_subnets={"subnet_type": ec2.SubnetType.PUBLIC}) myrds.connections.allow_default_port_from_any_ipv4('5432') # FIFO Queue going into database queueDB = sqs.Queue(self, "DBQueue", queue_name="dbqueue.fifo", fifo=True, content_based_deduplication=True) # lambda for DB queue fifoDBsendfunction = _lambda.Function( self, "lambda_function3", code=_lambda.Code.asset(os.path.join(dirname, "send_to_db")), runtime=_lambda.Runtime.PYTHON_3_7, handler="lambda-handler.main", role=rds_lambda_role, function_name="sendtodb", environment={'DB_HOST': myrds.db_instance_endpoint_address}) fifoDBsendfunction.add_to_role_policy(SQSpolicy) # attaches DBFIFO to the lambda event_source1 = fifoDBsendfunction.add_event_source( SqsEventSource(queueDB)) # lambda for DB get DBgetfunction = _lambda.Function( self, "lambda_function4", code=_lambda.Code.asset(os.path.join(dirname, "get_capacity_rds")), runtime=_lambda.Runtime.PYTHON_3_7, handler="lambda-handler.main", role=rds_lambda_role, function_name="getfromdb", environment={'DB_HOST': myrds.db_instance_endpoint_address}) DBgetfunction.add_to_role_policy(policystatement) api_gateway = apigw.LambdaRestApi( self, 'Endpoint', handler=DBgetfunction, ) get_widgets_integration = apigw.LambdaIntegration( DBgetfunction, request_templates={"application/json": '{ "statusCode": "200" }'}) api_gateway.root.add_method("GET", get_widgets_integration) # GET / cdk.CfnOutput(self, 'frontend', value="https://main." + amplify_app.default_domain) cdk.CfnOutput(self, 'rdsendpoint', value=myrds.db_instance_endpoint_address)
def __init__(self, scope: core.Construct, _id: str, **kwargs) -> None: super().__init__(scope, _id, **kwargs) ddb_file_list = ddb.Table(self, "ddb", partition_key=ddb.Attribute(name="Key", type=ddb.AttributeType.STRING), billing_mode=ddb.BillingMode.PAY_PER_REQUEST) sqs_queue_DLQ = sqs.Queue(self, "sqs_DLQ", visibility_timeout=core.Duration.minutes(15), retention_period=core.Duration.days(14) ) sqs_queue = sqs.Queue(self, "sqs_queue", visibility_timeout=core.Duration.minutes(15), retention_period=core.Duration.days(14), dead_letter_queue=sqs.DeadLetterQueue( max_receive_count=100, queue=sqs_queue_DLQ ) ) handler = lam.Function(self, "lambdaFunction", code=lam.Code.asset("./lambda"), handler="lambda_function.lambda_handler", runtime=lam.Runtime.PYTHON_3_8, memory_size=1024, timeout=core.Duration.minutes(15), tracing=lam.Tracing.ACTIVE, environment={ 'table_queue_name': ddb_file_list.table_name, 'Des_bucket_default': Des_bucket_default, 'Des_prefix_default': Des_prefix_default, 'StorageClass': StorageClass, 'aws_access_key_id': aws_access_key_id, 'aws_secret_access_key': aws_secret_access_key, 'aws_access_key_region': aws_access_key_region }) ddb_file_list.grant_read_write_data(handler) handler.add_event_source(SqsEventSource(sqs_queue)) s3bucket = s3.Bucket(self, "s3bucket") s3bucket.grant_read(handler) s3bucket.add_event_notification(s3.EventType.OBJECT_CREATED, s3n.SqsDestination(sqs_queue)) # You can import an existing bucket and grant access to lambda # exist_s3bucket = s3.Bucket.from_bucket_name(self, "import_bucket", # bucket_name="you_bucket_name") # exist_s3bucket.grant_read(handler) # But You have to add sqs as imported bucket event notification manually, it doesn't support by CloudFormation # An work around is to add on_cloud_trail_event for the bucket, but will trigger could_trail first # 因为是导入的Bucket,需要手工建Bucket Event Trigger SQS,以及设置SQS允许该bucekt触发的Permission core.CfnOutput(self, "DynamoDB_Table", value=ddb_file_list.table_name) core.CfnOutput(self, "SQS_Job_Queue", value=sqs_queue.queue_name) core.CfnOutput(self, "SQS_Job_Queue_DLQ", value=sqs_queue_DLQ.queue_name) core.CfnOutput(self, "Worker_Lambda_Function", value=handler.function_name) core.CfnOutput(self, "New_S3_Bucket", value=s3bucket.bucket_name) # Create Lambda logs filter to create network traffic metric handler.log_group.add_metric_filter("Complete-bytes", metric_name="Complete-bytes", metric_namespace="s3_migrate", metric_value="$bytes", filter_pattern=logs.FilterPattern.literal( '[info, date, sn, p="--->Complete", bytes, key]')) handler.log_group.add_metric_filter("Uploading-bytes", metric_name="Uploading-bytes", metric_namespace="s3_migrate", metric_value="$bytes", filter_pattern=logs.FilterPattern.literal( '[info, date, sn, p="--->Uploading", bytes, key]')) handler.log_group.add_metric_filter("Downloading-bytes", metric_name="Downloading-bytes", metric_namespace="s3_migrate", metric_value="$bytes", filter_pattern=logs.FilterPattern.literal( '[info, date, sn, p="--->Downloading", bytes, key]')) lambda_metric_Complete = cw.Metric(namespace="s3_migrate", metric_name="Complete-bytes", statistic="Sum", period=core.Duration.minutes(1)) lambda_metric_Upload = cw.Metric(namespace="s3_migrate", metric_name="Uploading-bytes", statistic="Sum", period=core.Duration.minutes(1)) lambda_metric_Download = cw.Metric(namespace="s3_migrate", metric_name="Downloading-bytes", statistic="Sum", period=core.Duration.minutes(1)) handler.log_group.add_metric_filter("ERROR", metric_name="ERROR-Logs", metric_namespace="s3_migrate", metric_value="1", filter_pattern=logs.FilterPattern.literal( '"ERROR"')) handler.log_group.add_metric_filter("WARNING", metric_name="WARNING-Logs", metric_namespace="s3_migrate", metric_value="1", filter_pattern=logs.FilterPattern.literal( '"WARNING"')) log_metric_ERROR = cw.Metric(namespace="s3_migrate", metric_name="ERROR-Logs", statistic="Sum", period=core.Duration.minutes(1)) log_metric_WARNING = cw.Metric(namespace="s3_migrate", metric_name="WARNING-Logs", statistic="Sum", period=core.Duration.minutes(1)) # Dashboard to monitor SQS and Lambda board = cw.Dashboard(self, "s3_migrate", dashboard_name="s3_migrate_serverless") board.add_widgets(cw.GraphWidget(title="Lambda-NETWORK", left=[lambda_metric_Download, lambda_metric_Upload, lambda_metric_Complete]), # TODO: here monitor all lambda concurrency not just the working one. Limitation from CDK # Lambda now supports monitor single lambda concurrency, will change this after CDK support cw.GraphWidget(title="Lambda-all-concurrent", left=[handler.metric_all_concurrent_executions(period=core.Duration.minutes(1))]), cw.GraphWidget(title="Lambda-invocations/errors/throttles", left=[handler.metric_invocations(period=core.Duration.minutes(1)), handler.metric_errors(period=core.Duration.minutes(1)), handler.metric_throttles(period=core.Duration.minutes(1))]), cw.GraphWidget(title="Lambda-duration", left=[handler.metric_duration(period=core.Duration.minutes(1))]), ) board.add_widgets(cw.GraphWidget(title="SQS-Jobs", left=[sqs_queue.metric_approximate_number_of_messages_visible( period=core.Duration.minutes(1) ), sqs_queue.metric_approximate_number_of_messages_not_visible( period=core.Duration.minutes(1) )]), cw.GraphWidget(title="SQS-DeadLetterQueue", left=[sqs_queue_DLQ.metric_approximate_number_of_messages_visible( period=core.Duration.minutes(1) ), sqs_queue_DLQ.metric_approximate_number_of_messages_not_visible( period=core.Duration.minutes(1) )]), cw.GraphWidget(title="ERROR/WARNING Logs", left=[log_metric_ERROR], right=[log_metric_WARNING]), cw.SingleValueWidget(title="Running/Waiting and Dead Jobs", metrics=[sqs_queue.metric_approximate_number_of_messages_not_visible( period=core.Duration.minutes(1) ), sqs_queue.metric_approximate_number_of_messages_visible( period=core.Duration.minutes(1) ), sqs_queue_DLQ.metric_approximate_number_of_messages_not_visible( period=core.Duration.minutes(1) ), sqs_queue_DLQ.metric_approximate_number_of_messages_visible( period=core.Duration.minutes(1) )], height=6) ) # Alarm for queue - DLQ alarm_DLQ = cw.Alarm(self, "SQS_DLQ", alarm_name="s3-migration-serverless-SQS Dead Letter Queue", metric=sqs_queue_DLQ.metric_approximate_number_of_messages_visible(), threshold=0, comparison_operator=cw.ComparisonOperator.GREATER_THAN_THRESHOLD, evaluation_periods=1, datapoints_to_alarm=1) alarm_topic = sns.Topic(self, "SQS queue-DLQ has dead letter") alarm_topic.add_subscription(subscription=sub.EmailSubscription(alarm_email)) alarm_DLQ.add_alarm_action(action.SnsAction(alarm_topic)) # Alarm for queue empty, i.e. no visible message and no in-visible message # metric_all_message = cw.MathExpression( # expression="a + b", # label="empty_queue_expression", # using_metrics={ # "a": sqs_queue.metric_approximate_number_of_messages_visible(), # "b": sqs_queue.metric_approximate_number_of_messages_not_visible() # } # ) # alarm_0 = cw.Alarm(self, "SQSempty", # alarm_name="SQS queue empty-Serverless", # metric=metric_all_message, # threshold=0, # comparison_operator=cw.ComparisonOperator.LESS_THAN_OR_EQUAL_TO_THRESHOLD, # evaluation_periods=3, # datapoints_to_alarm=3, # treat_missing_data=cw.TreatMissingData.IGNORE # ) # alarm_topic = sns.Topic(self, "SQS queue empty-Serverless") # alarm_topic.add_subscription(subscription=sub.EmailSubscription(alarm_email)) # alarm_0.add_alarm_action(action.SnsAction(alarm_topic)) # core.CfnOutput(self, "Alarm", value="CloudWatch SQS queue empty Alarm for Serverless: " + alarm_email) core.CfnOutput(self, "Dashboard", value="CloudWatch Dashboard name s3_migrate_serverless")
def __init__(self, scope: core.Construct, stack_id: str, *, env: core.Environment, api_method: apigw.HttpMethod, api_path: str, function_code: lambda_.Code, function_dependencies_layer: lambda_.LayerVersion, function_name_prefix: str, handler_function_handler: str, receiver_function_handler: str, queue_name: str, **kwargs): super().__init__(scope, stack_id, env=env, **kwargs) # create the queue self.queue = sqs.Queue( self, 'Queue', dead_letter_queue=sqs.DeadLetterQueue( max_receive_count=1, queue=sqs.Queue( self, f'DLQ', queue_name=f'{queue_name}-dlq')), queue_name=queue_name) # create the receiver function # add the queue url as an environment variable self.receiver_function = lambda_.Function( self, 'ReceiverFunction', code=function_code, environment={'QUEUE_URL': self.queue.queue_url}, function_name=f'{function_name_prefix}-receiver', handler=receiver_function_handler, layers=[function_dependencies_layer], # memory_size=256, runtime=lambda_.Runtime.PYTHON_3_8) # allow the receiver function to enqueue messages self.queue.grant_send_messages(self.receiver_function) # route requests to the receiver lambda # (with a circular dependency, so never mind) # api.add_routes( # integration=apigw.LambdaProxyIntegration( # handler=self.receiver_function), # methods=[api_method], # path=api_path) # route requests to the receiver lambda # (without creating a circular dependency?) # integration = apigw.CfnIntegration( # self, 'Integration', # api_id=api.http_api_id, # integration_type='AWS_PROXY', # integration_uri=self.receiver_function.function_arn, # payload_format_version='2.0') # apigw.CfnRoute(self, 'Route', # api_id=api.http_api_id, # route_key=f'{api_method.value} {api_path}', # target=f'integrations/{integration.ref}') # # trigger the lambda with those routed requests # lambda_.CfnEventSourceMapping( # self, 'Mappping', # event_source_arn=f'arn:aws:execute-api:{env.region}:{env.account}:{api.http_api_id}/*/*{api_path}', # function_name=self.receiver_function.function_arn) # create the handler function self.handler_function = lambda_.Function( self, 'HandlerFunction', code=function_code, function_name=f'{function_name_prefix}-handler', handler=handler_function_handler, layers=[function_dependencies_layer], # memory_size=256, runtime=lambda_.Runtime.PYTHON_3_8) # add the queue as a trigger for the handler function self.handler_function.add_event_source(SqsEventSource(self.queue))