def __init__(self, scope: core.Construct, _id: str, bucket_para, **kwargs) -> None: super().__init__(scope, _id, **kwargs) self.ddb_file_list = ddb.Table( self, "s3_migrate_ddb", partition_key=ddb.Attribute(name="Key", type=ddb.AttributeType.STRING), billing_mode=ddb.BillingMode.PAY_PER_REQUEST) self.sqs_queue_DLQ = sqs.Queue( self, "s3_migrate_sqs_DLQ", visibility_timeout=core.Duration.hours(1), retention_period=core.Duration.days(14)) self.sqs_queue = sqs.Queue(self, "s3_migrate_sqs_queue", visibility_timeout=core.Duration.hours(1), retention_period=core.Duration.days(14), dead_letter_queue=sqs.DeadLetterQueue( max_receive_count=24, queue=self.sqs_queue_DLQ)) self.ssm_bucket_para = ssm.StringParameter(self, "s3_migrate_bucket_para", string_value=json.dumps( bucket_para, indent=4)) # You need to manually setup ssm_credential_para in SSM Parameter Store before deploy CDK # Here import ssm_credential_para, MIND THE VERSION NUMBER MUST BE EXACT THE SAME !!! # 你需要先手工配置了一个ssm_credential_para,然后在这里导入,注意版本号一致!!! self.ssm_credential_para = ssm.StringParameter.from_secure_string_parameter_attributes( self, "ssm_parameter_credentials", parameter_name=ssm_parameter_credentials, version=1) # New a S3 bucket, new object in this bucket will trigger SQS jobs # This is not for existing S3 bucket. Jobsender will scan the existing bucket and create sqs jobs. # 这里新建一个S3 bucket,里面新建Object就会触发SQS启动搬迁工作。 # 对于现有的S3 bucket,不在这里配置,由jobsender进行扫描并生成SQS Job任务。 self.s3bucket = s3.Bucket(self, "s3_migrate_bucket") self.s3bucket.add_event_notification( s3.EventType.OBJECT_CREATED, s3n.SqsDestination(self.sqs_queue)) # Deploy code self.s3_deploy = s3.Bucket(self, "s3_migrate_deploybucket") s3d.BucketDeployment(self, "deploy_code", sources=[s3d.Source.asset("./code")], destination_bucket=self.s3_deploy) core.CfnOutput(self, 'NewS3Bucket_MigrateObjects', value=self.s3bucket.bucket_name) core.CfnOutput(self, 'NewS3Bucket_deploy_code', value=self.s3_deploy.bucket_name)
def __init__(self, scope: core.Construct, _id: str, **kwargs) -> None: super().__init__(scope, _id, **kwargs) ddb_file_list = ddb.Table(self, "ddb", partition_key=ddb.Attribute(name="Key", type=ddb.AttributeType.STRING), billing_mode=ddb.BillingMode.PAY_PER_REQUEST) sqs_queue_DLQ = sqs.Queue(self, "sqs_DLQ", visibility_timeout=core.Duration.minutes(15), retention_period=core.Duration.days(14) ) sqs_queue = sqs.Queue(self, "sqs_queue", visibility_timeout=core.Duration.minutes(15), retention_period=core.Duration.days(14), dead_letter_queue=sqs.DeadLetterQueue( max_receive_count=100, queue=sqs_queue_DLQ ) ) handler = lam.Function(self, "lambdaFunction", code=lam.Code.asset("./lambda"), handler="lambda_function.lambda_handler", runtime=lam.Runtime.PYTHON_3_8, memory_size=1024, timeout=core.Duration.minutes(15), environment={ 'table_name': ddb_file_list.table_name, 'queue_name': sqs_queue.queue_name, 'Des_bucket_default': Des_bucket_default, 'Des_prefix_default': Des_prefix_default, 'Des_region': Des_region, 'StorageClass': StorageClass, 'aws_access_key_id': aws_access_key_id, 'aws_secret_access_key': aws_secret_access_key }) ddb_file_list.grant_read_write_data(handler) handler.add_event_source(SqsEventSource(sqs_queue)) s3bucket = s3.Bucket(self, "s3bucket") s3bucket.grant_read(handler) s3bucket.add_event_notification(s3.EventType.OBJECT_CREATED, s3n.SqsDestination(sqs_queue)) # You can import an existing bucket and grant access to lambda # exist_s3bucket = s3.Bucket.from_bucket_name(self, "import_bucket", # bucket_name="you_bucket_name") # exist_s3bucket.grant_read(handler) # But You have to add sqs as imported bucket event notification manually, it doesn't support by CloudFormation # An work around is to add on_cloud_trail_event for the bucket, but will trigger could_trail first # 因为是导入的Bucket,需要手工建Bucket Event Trigger SQS,以及设置SQS允许该bucekt触发的Permission core.CfnOutput(self, "DynamoDB_Table", value=ddb_file_list.table_name) core.CfnOutput(self, "SQS_Job_Queue", value=sqs_queue.queue_name) core.CfnOutput(self, "SQS_Job_Queue_DLQ", value=sqs_queue_DLQ.queue_name) core.CfnOutput(self, "Worker_Lambda_Function", value=handler.function_name) core.CfnOutput(self, "New_S3_Bucket", value=s3bucket.bucket_name)
def storage(self): bucket = _s3.Bucket(self, "Bucket", removal_policy=core.RemovalPolicy.DESTROY) notification_queue = _sqs.Queue( self, "NotificationQueue", ) bucket.add_object_created_notification( dest=_s3_notifications.SqsDestination(notification_queue)) return bucket, notification_queue
def __init__(self, scope: core.Construct, id: str, sqsCfn, s3_loc_up, **kwargs) -> None: super().__init__(scope, id, **kwargs) self.PREFIX = id self.sqs_queue = sqs.Queue.from_queue_arn( self, "QueuefromCfn", f"arn:aws:sqs:us-east-1:{core.Aws.ACCOUNT_ID}:{sqsCfn.queue_name}") s3_loc_up.add_object_created_notification( aws_s3_notifications.SqsDestination(self.sqs_queue), _s3.NotificationKeyFilter( prefix='stdized-data/comprehend_results/csv/', suffix='.csv'))
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) #This will create the s3 bucket in AWS bucket = s3.Bucket(self, "ssl_s3_bucket_raw") #This will create the sqs in AWS queue = sqs.Queue(self, "ssl_sqs_event_queue") #Create S3 notification object which points to SQS. notification = aws_s3_notifications.SqsDestination(queue) filter1 = s3.NotificationKeyFilter(prefix="home/") #Attach notificaton event to S3 bucket. bucket.add_event_notification(s3.EventType.OBJECT_CREATED, notification, filter1)
def __init__(self, scope: core.Construct, _id: str, bucket_para, **kwargs) -> None: super().__init__(scope, _id, **kwargs) self.ddb_file_list = ddb.Table( self, "ddb", table_name=table_queue_name, partition_key=ddb.Attribute(name="Key", type=ddb.AttributeType.STRING), billing_mode=ddb.BillingMode.PAY_PER_REQUEST) self.sqs_queue_DLQ = sqs.Queue( self, "sqs_DLQ", queue_name=table_queue_name + "-DLQ", visibility_timeout=core.Duration.hours(1), retention_period=core.Duration.days(14)) self.sqs_queue = sqs.Queue(self, "sqs_queue", queue_name=table_queue_name, visibility_timeout=core.Duration.hours(1), retention_period=core.Duration.days(14), dead_letter_queue=sqs.DeadLetterQueue( max_receive_count=24, queue=self.sqs_queue_DLQ)) self.ssm_bucket_para = ssm.StringParameter( self, "para-bucket", string_value=json.dumps(bucket_para), parameter_name=ssm_parameter_bucket) # 你需要先手工配置了一个ssm_credential_para,然后在这里导入,注意版本号一致!!! self.ssm_credential_para = ssm.StringParameter.from_secure_string_parameter_attributes( self, "ssm_parameter_credentials", parameter_name=ssm_parameter_credentials, version=2) # 这里新建一个S3 bucket,里面新建Object就会触发SQS启动搬迁工作。 # 对于现有的S3 bucket,不在这里配置,由jobsender进行扫描并生成SQS Job任务。 self.s3bucket = s3.Bucket(self, "newbucket") self.s3bucket.add_event_notification( s3.EventType.OBJECT_CREATED, s3n.SqsDestination(self.sqs_queue))
def __init__(self, app: App, id: str) -> None: super().__init__(app, id) # SQS queue queue = sqs.Queue(self, 's3-to-sqs-test') bucket = s3.Bucket(self, "MyBucket") bucket.add_event_notification(s3.EventType.OBJECT_CREATED, s3n.SqsDestination(queue)) # Output information about the created resources CfnOutput(self, 'sqsQueueUrl', value=queue.queue_url, description='The URL of the SQS queue') CfnOutput(self, 'bucketName', value=bucket.bucket_name, description='The name of the bucket created')
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) #Create Role S3SqsKmsSampleStack.__Role = IamService.create_role(self) #get KMS policy Document kms_policy_document = IamService.get_kms_policy_documents(self) kms_key = kms.Key(self, id='ssl_s3_sqs_kms_key', alias='sslS3SqsKmsKey', description='This is kms key', enabled=True, enable_key_rotation=True, policy=kms_policy_document) #This will create the s3 bucket in AWS bucket = s3.Bucket(self, "ssl_s3_bucket_raw_kms", bucket_name="ssl-s3-bucket-kms-raw", encryption=s3.BucketEncryption.KMS, encryption_key=kms_key) #This will create the sqs in AWS queue = sqs.Queue(self, "ssl_sqs_event_queue", queue_name="ssl-sqs-kms-event-queue", encryption=sqs.QueueEncryption.KMS, encryption_master_key=kms_key) #queue.node.add_dependency(kms_key) bucket.node.add_dependency(queue, kms_key) # #Create S3 notification object which points to SQS. notification = aws_s3_notifications.SqsDestination(queue) filter1 = s3.NotificationKeyFilter(prefix="home/") # #Attach notificaton event to S3 bucket. bucket.add_event_notification(s3.EventType.OBJECT_CREATED, notification, filter1)
def create_events(self, services): # kickoff_notification = aws_s3_notifications.LambdaDestination(services["lambda"]["kickoff"]) extensions = [ "pdf", "pDf", "pDF", "pdF", "PDF", "Pdf", "png", "pNg", "pNG", "pnG", "PNG", "Png", "jpg", "jPg", "jPG", "jpG", "JPG", "Jpg" ] for extension in extensions: services["main_s3_bucket"].add_event_notification( aws_s3.EventType.OBJECT_CREATED, aws_s3_notifications.SqsDestination(services["sf_sqs"]), aws_s3.NotificationKeyFilter(prefix="uploads/", suffix=extension) ) services["lambda"]["kickoff"].add_event_source( aws_lambda_event_sources.SqsEventSource( services["sf_sqs"], batch_size=1 ) ) services["lambda"]["analyzepdf"].add_event_source( aws_lambda_event_sources.SqsEventSource( services["textract_sqs"], batch_size=1 ) ) human_complete_target = aws_events_targets.LambdaFunction(services["lambda"]["humancomplete"]) human_review_event_pattern = aws_events.EventPattern( source=["aws.sagemaker"], detail_type=["SageMaker A2I HumanLoop Status Change"] ) aws_events.Rule(self, "multipadepdfa2i_HumanReviewComplete", event_pattern=human_review_event_pattern, targets=[human_complete_target] )
def __init__(self, app: core.App, id: str, **kwargs) -> None: super().__init__(app, id, **kwargs) # Policies s3_access_policy = iam.ManagedPolicy.from_managed_policy_arn( self, id="s3_access_policy", managed_policy_arn="arn:aws:iam::aws:policy/AmazonS3FullAccess") lambda_access_policy = iam.ManagedPolicy.from_managed_policy_arn( self, id="lambda_access_policy", managed_policy_arn="arn:aws:iam::aws:policy/AWSLambda_FullAccess") logs_policy = iam.ManagedPolicy.from_managed_policy_arn( self, id="logs_policy", managed_policy_arn= "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess") # Roles lambda_role = iam.Role( self, id="lambda_role", assumed_by=iam.ServicePrincipal(service="lambda.amazonaws.com"), managed_policies=[ s3_access_policy, lambda_access_policy, logs_policy ], role_name=f"midi-to-mp3-lambda-role") # SQS conversion_sqs = sqs.Queue(self, id=f"conversion_sqs", queue_name=f"conversion_sqs", visibility_timeout=core.Duration.hours(12), retention_period=core.Duration.days(1)) kickoff_sqs = sqs.Queue(self, id=f"kickoff_sqs", queue_name=f"kickoff_sqs", visibility_timeout=core.Duration.hours(12), retention_period=core.Duration.days(1)) # S3 midi_file_dropoff_bucket = s3.Bucket( self, id="midi_files_dropoff", bucket_name=MIDI_FILE_DROPOFF_BUCKET, auto_delete_objects=True, removal_policy=core.RemovalPolicy.DESTROY) midi_file_dropoff_bucket.add_event_notification( event=s3.EventType.OBJECT_CREATED, dest=s3n.SqsDestination(kickoff_sqs)) created_mp3_files_bucket = s3.Bucket( self, id="created_mp3_files", bucket_name=CREATED_MP3_FILES_BUCKET, auto_delete_objects=True, removal_policy=core.RemovalPolicy.DESTROY) component_midi_files_bucket = s3.Bucket( self, id="component_midi_files", bucket_name=COMPONENT_MIDI_FILES_BUCKET, auto_delete_objects=True, removal_policy=core.RemovalPolicy.DESTROY) component_midi_files_bucket.add_event_notification( event=s3.EventType.OBJECT_CREATED, dest=s3n.SqsDestination(conversion_sqs)) # Lambdas lambda_code = lambda_.DockerImageCode.from_image_asset( directory='./midi_to_mp3_lambda/', file="Dockerfile", build_args={ "AWS_ACCESS_KEY_ID": os.environ.get("AWS_ACCESS_KEY_ID"), "AWS_SECRET_ACCESS_KEY": os.environ.get("AWS_SECRET_ACCESS_KEY") }) midi_to_mp3_lambda = lambda_.DockerImageFunction( self, id="midi_to_mp3_lambda", role=lambda_role, function_name="midi-to-mp3", memory_size=1024, timeout=core.Duration.minutes(5), code=lambda_code) midi_split_lambda = alg.GoFunction( self, id="midi_split_lambda", entry="./midi_split_lambda/midi_split_lambda.go", timeout=core.Duration.minutes(15), runtime=lambda_.Runtime.GO_1_X, role=lambda_role, function_name=f"midi-split-lambda", memory_size=512, bundling={"environment": { "GO111MODULE": "off" }}) s3_cleanup_lambda = lambda_.Function( self, id="s3_cleanup_lambda", runtime=lambda_.Runtime.PYTHON_3_8, role=lambda_role, function_name="s3-cleanup-lambda", memory_size=256, timeout=core.Duration.minutes(5), environment={"NUM_WEEKS_TO_KEEP_FILES": "1"}, handler="s3_cleanup_lambda.handler", code=lambda_.Code.from_asset(os.path.join(".", "s3_cleanup_lambda"))) weekly_on_sunday_cron = events.Rule( self, "Rule", schedule=events.Schedule.cron(minute='0', hour='0', week_day="SUN", month='*', year='*'), ) weekly_on_sunday_cron.add_target( targets.LambdaFunction(s3_cleanup_lambda)) # Event Sources midi_to_mp3_lambda.add_event_source( eventsources.SqsEventSource(queue=conversion_sqs)) midi_split_lambda.add_event_source( eventsources.SqsEventSource(queue=kickoff_sqs))
def __init__(self, scope: core.Construct, _id: str, **kwargs) -> None: super().__init__(scope, _id, **kwargs) # Setup SSM parameter of credentials, bucket_para, ignore_list ssm_credential_para = ssm.StringParameter.from_secure_string_parameter_attributes( self, "ssm_parameter_credentials", parameter_name=ssm_parameter_credentials, version=1) ssm_bucket_para = ssm.StringParameter(self, "s3bucket_serverless", string_value=json.dumps( bucket_para, indent=4)) ssm_parameter_ignore_list = ssm.StringParameter( self, "s3_migrate_ignore_list", string_value=ignore_list) # Setup DynamoDB ddb_file_list = ddb.Table(self, "s3migrate_serverless", partition_key=ddb.Attribute( name="Key", type=ddb.AttributeType.STRING), billing_mode=ddb.BillingMode.PAY_PER_REQUEST) ddb_file_list.add_global_secondary_index( partition_key=ddb.Attribute(name="desBucket", type=ddb.AttributeType.STRING), index_name="desBucket-index", projection_type=ddb.ProjectionType.INCLUDE, non_key_attributes=["desKey", "versionId"]) # Setup SQS sqs_queue_DLQ = sqs.Queue(self, "s3migrate_serverless_Q_DLQ", visibility_timeout=core.Duration.minutes(15), retention_period=core.Duration.days(14)) sqs_queue = sqs.Queue(self, "s3migrate_serverless_Q", visibility_timeout=core.Duration.minutes(15), retention_period=core.Duration.days(14), dead_letter_queue=sqs.DeadLetterQueue( max_receive_count=60, queue=sqs_queue_DLQ)) # Setup API for Lambda to get IP address (for debug networking routing purpose) checkip = api.RestApi( self, "lambda-checkip-api", cloud_watch_role=True, deploy=True, description="For Lambda get IP address", default_integration=api.MockIntegration( integration_responses=[ api.IntegrationResponse(status_code="200", response_templates={ "application/json": "$context.identity.sourceIp" }) ], request_templates={"application/json": '{"statusCode": 200}'}), endpoint_types=[api.EndpointType.REGIONAL]) checkip.root.add_method("GET", method_responses=[ api.MethodResponse( status_code="200", response_models={ "application/json": api.Model.EMPTY_MODEL }) ]) # Setup Lambda functions handler = lam.Function(self, "s3-migrate-worker", code=lam.Code.asset("./lambda"), handler="lambda_function_worker.lambda_handler", runtime=lam.Runtime.PYTHON_3_8, memory_size=1024, timeout=core.Duration.minutes(15), tracing=lam.Tracing.ACTIVE, environment={ 'table_queue_name': ddb_file_list.table_name, 'Des_bucket_default': Des_bucket_default, 'Des_prefix_default': Des_prefix_default, 'StorageClass': StorageClass, 'checkip_url': checkip.url, 'ssm_parameter_credentials': ssm_parameter_credentials, 'JobType': JobType, 'MaxRetry': MaxRetry, 'MaxThread': MaxThread, 'MaxParallelFile': MaxParallelFile, 'JobTimeout': JobTimeout, 'UpdateVersionId': UpdateVersionId, 'GetObjectWithVersionId': GetObjectWithVersionId }) handler_jobsender = lam.Function( self, "s3-migrate-jobsender", code=lam.Code.asset("./lambda"), handler="lambda_function_jobsender.lambda_handler", runtime=lam.Runtime.PYTHON_3_8, memory_size=1024, timeout=core.Duration.minutes(15), tracing=lam.Tracing.ACTIVE, environment={ 'table_queue_name': ddb_file_list.table_name, 'StorageClass': StorageClass, 'checkip_url': checkip.url, 'sqs_queue': sqs_queue.queue_name, 'ssm_parameter_credentials': ssm_parameter_credentials, 'ssm_parameter_ignore_list': ssm_parameter_ignore_list.parameter_name, 'ssm_parameter_bucket': ssm_bucket_para.parameter_name, 'JobType': JobType, 'MaxRetry': MaxRetry, 'JobsenderCompareVersionId': JobsenderCompareVersionId }) # Allow lambda read/write DDB, SQS ddb_file_list.grant_read_write_data(handler) ddb_file_list.grant_read_write_data(handler_jobsender) sqs_queue.grant_send_messages(handler_jobsender) # SQS trigger Lambda worker handler.add_event_source(SqsEventSource(sqs_queue, batch_size=1)) # Option1: Create S3 Bucket, all new objects in this bucket will be transmitted by Lambda Worker s3bucket = s3.Bucket(self, "s3_new_migrate") s3bucket.grant_read(handler) s3bucket.add_event_notification(s3.EventType.OBJECT_CREATED, s3n.SqsDestination(sqs_queue)) # Option2: Allow Exist S3 Buckets to be read by Lambda functions. # Lambda Jobsender will scan and compare the these buckets and trigger Lambda Workers to transmit bucket_name = '' for b in bucket_para: if bucket_name != b['src_bucket']: # 如果列了多个相同的Bucket,就跳过 bucket_name = b['src_bucket'] s3exist_bucket = s3.Bucket.from_bucket_name( self, bucket_name, # 用这个做id bucket_name=bucket_name) if JobType == 'PUT': s3exist_bucket.grant_read(handler_jobsender) s3exist_bucket.grant_read(handler) else: # 'GET' mode s3exist_bucket.grant_read_write(handler_jobsender) s3exist_bucket.grant_read_write(handler) # Allow Lambda read ssm parameters ssm_bucket_para.grant_read(handler_jobsender) ssm_credential_para.grant_read(handler) ssm_credential_para.grant_read(handler_jobsender) ssm_parameter_ignore_list.grant_read(handler_jobsender) # Schedule cron event to trigger Lambda Jobsender per hour: event.Rule(self, 'cron_trigger_jobsender', schedule=event.Schedule.rate(core.Duration.hours(1)), targets=[target.LambdaFunction(handler_jobsender)]) # TODO: Trigger event imediately, add custom resource lambda to invoke handler_jobsender # Create Lambda logs filter to create network traffic metric handler.log_group.add_metric_filter( "Completed-bytes", metric_name="Completed-bytes", metric_namespace="s3_migrate", metric_value="$bytes", filter_pattern=logs.FilterPattern.literal( '[info, date, sn, p="--->Complete", bytes, key]')) handler.log_group.add_metric_filter( "Uploading-bytes", metric_name="Uploading-bytes", metric_namespace="s3_migrate", metric_value="$bytes", filter_pattern=logs.FilterPattern.literal( '[info, date, sn, p="--->Uploading", bytes, key]')) handler.log_group.add_metric_filter( "Downloading-bytes", metric_name="Downloading-bytes", metric_namespace="s3_migrate", metric_value="$bytes", filter_pattern=logs.FilterPattern.literal( '[info, date, sn, p="--->Downloading", bytes, key]')) handler.log_group.add_metric_filter( "MaxMemoryUsed", metric_name="MaxMemoryUsed", metric_namespace="s3_migrate", metric_value="$memory", filter_pattern=logs.FilterPattern.literal( '[head="REPORT", a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, ' 'a13, a14, a15, a16, memory, MB="MB", rest]')) lambda_metric_Complete = cw.Metric(namespace="s3_migrate", metric_name="Completed-bytes", statistic="Sum", period=core.Duration.minutes(1)) lambda_metric_Upload = cw.Metric(namespace="s3_migrate", metric_name="Uploading-bytes", statistic="Sum", period=core.Duration.minutes(1)) lambda_metric_Download = cw.Metric(namespace="s3_migrate", metric_name="Downloading-bytes", statistic="Sum", period=core.Duration.minutes(1)) lambda_metric_MaxMemoryUsed = cw.Metric( namespace="s3_migrate", metric_name="MaxMemoryUsed", statistic="Maximum", period=core.Duration.minutes(1)) handler.log_group.add_metric_filter( "ERROR", metric_name="ERROR-Logs", metric_namespace="s3_migrate", metric_value="1", filter_pattern=logs.FilterPattern.literal('"ERROR"')) handler.log_group.add_metric_filter( "WARNING", metric_name="WARNING-Logs", metric_namespace="s3_migrate", metric_value="1", filter_pattern=logs.FilterPattern.literal('"WARNING"')) # Task timed out handler.log_group.add_metric_filter( "TIMEOUT", metric_name="TIMEOUT-Logs", metric_namespace="s3_migrate", metric_value="1", filter_pattern=logs.FilterPattern.literal('"Task timed out"')) log_metric_ERROR = cw.Metric(namespace="s3_migrate", metric_name="ERROR-Logs", statistic="Sum", period=core.Duration.minutes(1)) log_metric_WARNING = cw.Metric(namespace="s3_migrate", metric_name="WARNING-Logs", statistic="Sum", period=core.Duration.minutes(1)) log_metric_TIMEOUT = cw.Metric(namespace="s3_migrate", metric_name="TIMEOUT-Logs", statistic="Sum", period=core.Duration.minutes(1)) # Dashboard to monitor SQS and Lambda board = cw.Dashboard(self, "s3_migrate_serverless") board.add_widgets( cw.GraphWidget(title="Lambda-NETWORK", left=[ lambda_metric_Download, lambda_metric_Upload, lambda_metric_Complete ]), cw.GraphWidget(title="Lambda-concurrent", left=[ handler.metric( metric_name="ConcurrentExecutions", period=core.Duration.minutes(1)) ]), cw.GraphWidget( title="Lambda-invocations/errors/throttles", left=[ handler.metric_invocations( period=core.Duration.minutes(1)), handler.metric_errors(period=core.Duration.minutes(1)), handler.metric_throttles(period=core.Duration.minutes(1)) ]), cw.GraphWidget( title="Lambda-duration", left=[ handler.metric_duration(period=core.Duration.minutes(1)) ]), ) board.add_widgets( cw.GraphWidget(title="Lambda_MaxMemoryUsed(MB)", left=[lambda_metric_MaxMemoryUsed]), cw.GraphWidget(title="ERROR/WARNING Logs", left=[log_metric_ERROR], right=[log_metric_WARNING, log_metric_TIMEOUT]), cw.GraphWidget( title="SQS-Jobs", left=[ sqs_queue.metric_approximate_number_of_messages_visible( period=core.Duration.minutes(1)), sqs_queue. metric_approximate_number_of_messages_not_visible( period=core.Duration.minutes(1)) ]), cw.SingleValueWidget( title="Running/Waiting and Dead Jobs", metrics=[ sqs_queue. metric_approximate_number_of_messages_not_visible( period=core.Duration.minutes(1)), sqs_queue.metric_approximate_number_of_messages_visible( period=core.Duration.minutes(1)), sqs_queue_DLQ. metric_approximate_number_of_messages_not_visible( period=core.Duration.minutes(1)), sqs_queue_DLQ. metric_approximate_number_of_messages_visible( period=core.Duration.minutes(1)) ], height=6)) # Alarm for queue - DLQ alarm_DLQ = cw.Alarm( self, "SQS_DLQ", metric=sqs_queue_DLQ.metric_approximate_number_of_messages_visible( ), threshold=0, comparison_operator=cw.ComparisonOperator.GREATER_THAN_THRESHOLD, evaluation_periods=1, datapoints_to_alarm=1) alarm_topic = sns.Topic(self, "SQS queue-DLQ has dead letter") alarm_topic.add_subscription( subscription=sub.EmailSubscription(alarm_email)) alarm_DLQ.add_alarm_action(action.SnsAction(alarm_topic)) core.CfnOutput(self, "Dashboard", value="CloudWatch Dashboard name s3_migrate_serverless")
def __init__(self, scope: cdk.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) # Image Bucket image_bucket = s3.Bucket(self, IMG_BUCKET_NAME, removal_policy=cdk.RemovalPolicy.DESTROY) cdk.CfnOutput(self, "imageBucket", value=image_bucket.bucket_name) image_bucket.add_cors_rule( allowed_methods=[s3.HttpMethods.GET, s3.HttpMethods.PUT], allowed_origins=["*"], allowed_headers=["*"], max_age=3000, ) # Thumbnail Bucket resized_image_bucket = s3.Bucket( self, RESIZED_IMG_BUCKET_NAME, removal_policy=cdk.RemovalPolicy.DESTROY) cdk.CfnOutput(self, "resizedBucket", value=resized_image_bucket.bucket_name) resized_image_bucket.add_cors_rule( allowed_methods=[s3.HttpMethods.GET, s3.HttpMethods.PUT], allowed_origins=["*"], allowed_headers=["*"], max_age=3000, ) # S3 Static bucket for website code web_bucket = s3.Bucket( self, WEBSITE_BUCKET_NAME, website_index_document="index.html", website_error_document="index.html", removal_policy=cdk.RemovalPolicy.DESTROY, # uncomment this and delete the policy statement below to allow public access to our # static website # public_read_access=true ) web_policy_statement = iam.PolicyStatement( actions=["s3:GetObject"], resources=[web_bucket.arn_for_objects("*")], principals=[iam.AnyPrincipal()], conditions={"IpAddress": { "aws:SourceIp": ["139.138.203.36"] }}, ) web_bucket.add_to_resource_policy(web_policy_statement) cdk.CfnOutput(self, "bucketURL", value=web_bucket.bucket_website_domain_name) # Deploy site contents to S3 Bucket s3_dep.BucketDeployment( self, "DeployWebsite", sources=[s3_dep.Source.asset("./public")], destination_bucket=web_bucket, ) # DynamoDB to store image labels partition_key = dynamodb.Attribute(name="image", type=dynamodb.AttributeType.STRING) table = dynamodb.Table( self, "ImageLabels", partition_key=partition_key, removal_policy=cdk.RemovalPolicy.DESTROY, ) cdk.CfnOutput(self, "ddbTable", value=table.table_name) # Lambda layer for Pillow library layer = lb.LayerVersion( self, "pil", code=lb.Code.from_asset("reklayer"), compatible_runtimes=[lb.Runtime.PYTHON_3_7], license="Apache-2.0", description= "A layer to enable the PIL library in our Rekognition Lambda", ) # Lambda function rek_fn = lb.Function( self, "rekognitionFunction", code=lb.Code.from_asset("rekognitionFunction"), runtime=lb.Runtime.PYTHON_3_7, handler="index.handler", timeout=cdk.Duration.seconds(30), memory_size=1024, layers=[layer], environment={ "TABLE": table.table_name, "BUCKET": image_bucket.bucket_name, "THUMBBUCKET": resized_image_bucket.bucket_name, }, ) image_bucket.grant_read(rek_fn) resized_image_bucket.grant_write(rek_fn) table.grant_write_data(rek_fn) rek_fn.add_to_role_policy( iam.PolicyStatement(effect=iam.Effect.ALLOW, actions=["rekognition:DetectLabels"], resources=["*"])) # Lambda for Synchronous front end serviceFn = lb.Function( self, "serviceFunction", code=lb.Code.from_asset("servicelambda"), runtime=lb.Runtime.PYTHON_3_7, handler="index.handler", environment={ "TABLE": table.table_name, "BUCKET": image_bucket.bucket_name, "RESIZEDBUCKET": resized_image_bucket.bucket_name, }, ) image_bucket.grant_write(serviceFn) resized_image_bucket.grant_write(serviceFn) table.grant_read_write_data(serviceFn) # Cognito User Pool Auth auto_verified_attrs = cognito.AutoVerifiedAttrs(email=True) sign_in_aliases = cognito.SignInAliases(email=True, username=True) user_pool = cognito.UserPool( self, "UserPool", self_sign_up_enabled=True, auto_verify=auto_verified_attrs, sign_in_aliases=sign_in_aliases, ) user_pool_client = cognito.UserPoolClient(self, "UserPoolClient", user_pool=user_pool, generate_secret=False) identity_pool = cognito.CfnIdentityPool( self, "ImageRekognitionIdentityPool", allow_unauthenticated_identities=False, cognito_identity_providers=[{ "clientId": user_pool_client.user_pool_client_id, "providerName": user_pool.user_pool_provider_name, }], ) # API Gateway cors_options = apigw.CorsOptions(allow_origins=apigw.Cors.ALL_ORIGINS, allow_methods=apigw.Cors.ALL_METHODS) api = apigw.LambdaRestApi( self, "imageAPI", default_cors_preflight_options=cors_options, handler=serviceFn, proxy=False, ) auth = apigw.CfnAuthorizer( self, "ApiGatewayAuthorizer", name="customer-authorizer", identity_source="method.request.header.Authorization", provider_arns=[user_pool.user_pool_arn], rest_api_id=api.rest_api_id, # type=apigw.AuthorizationType.COGNITO, type="COGNITO_USER_POOLS", ) assumed_by = iam.FederatedPrincipal( "cognito-identity.amazon.com", conditions={ "StringEquals": { "cognito-identity.amazonaws.com:aud": identity_pool.ref }, "ForAnyValue:StringLike": { "cognito-identity.amazonaws.com:amr": "authenticated" }, }, assume_role_action="sts:AssumeRoleWithWebIdentity", ) authenticated_role = iam.Role( self, "ImageRekognitionAuthenticatedRole", assumed_by=assumed_by, ) # IAM policy granting users permission to get and put their pictures policy_statement = iam.PolicyStatement( actions=["s3:GetObject", "s3:PutObject"], effect=iam.Effect.ALLOW, resources=[ image_bucket.bucket_arn + "/private/${cognito-identity.amazonaws.com:sub}/*", image_bucket.bucket_arn + "/private/${cognito-identity.amazonaws.com:sub}/", resized_image_bucket.bucket_arn + "/private/${cognito-identity.amazonaws.com:sub}/*", resized_image_bucket.bucket_arn + "/private/${cognito-identity.amazonaws.com:sub}/", ], ) # IAM policy granting users permission to list their pictures list_policy_statement = iam.PolicyStatement( actions=["s3:ListBucket"], effect=iam.Effect.ALLOW, resources=[ image_bucket.bucket_arn, resized_image_bucket.bucket_arn ], conditions={ "StringLike": { "s3:prefix": ["private/${cognito-identity.amazonaws.com:sub}/*"] } }, ) authenticated_role.add_to_policy(policy_statement) authenticated_role.add_to_policy(list_policy_statement) # Attach role to our Identity Pool cognito.CfnIdentityPoolRoleAttachment( self, "IdentityPoolRoleAttachment", identity_pool_id=identity_pool.ref, roles={"authenticated": authenticated_role.role_arn}, ) # Get some outputs from cognito cdk.CfnOutput(self, "UserPoolId", value=user_pool.user_pool_id) cdk.CfnOutput(self, "AppClientId", value=user_pool_client.user_pool_client_id) cdk.CfnOutput(self, "IdentityPoolId", value=identity_pool.ref) # New Amazon API Gateway with AWS Lambda Integration success_response = apigw.IntegrationResponse( status_code="200", response_parameters={ "method.response.header.Access-Control-Allow-Origin": "'*'" }, ) error_response = apigw.IntegrationResponse( selection_pattern="(\n|.)+", status_code="500", response_parameters={ "method.response.header.Access-Control-Allow-Origin": "'*'" }, ) request_template = json.dumps({ "action": "$util.escapeJavaScript($input.params('action'))", "key": "$util.escapeJavaScript($input.params('key'))", }) lambda_integration = apigw.LambdaIntegration( serviceFn, proxy=False, request_parameters={ "integration.request.querystring.action": "method.request.querystring.action", "integration.request.querystring.key": "method.request.querystring.key", }, request_templates={"application/json": request_template}, passthrough_behavior=apigw.PassthroughBehavior.WHEN_NO_TEMPLATES, integration_responses=[success_response, error_response], ) imageAPI = api.root.add_resource("images") success_resp = apigw.MethodResponse( status_code="200", response_parameters={ "method.response.header.Access-Control-Allow-Origin": True }, ) error_resp = apigw.MethodResponse( status_code="500", response_parameters={ "method.response.header.Access-Control-Allow-Origin": True }, ) # GET /images get_method = imageAPI.add_method( "GET", lambda_integration, authorization_type=apigw.AuthorizationType.COGNITO, request_parameters={ "method.request.querystring.action": True, "method.request.querystring.key": True, }, method_responses=[success_resp, error_resp], ) # DELETE /images delete_method = imageAPI.add_method( "DELETE", lambda_integration, authorization_type=apigw.AuthorizationType.COGNITO, request_parameters={ "method.request.querystring.action": True, "method.request.querystring.key": True, }, method_responses=[success_resp, error_resp], ) # Override the authorizer id because it doesn't work when defininting it as a param # in add_method get_method_resource = get_method.node.find_child("Resource") get_method_resource.add_property_override("AuthorizerId", auth.ref) delete_method_resource = delete_method.node.find_child("Resource") delete_method_resource.add_property_override("AuthorizerId", auth.ref) # Building SQS queue and DeadLetter Queue dl_queue = sqs.Queue( self, "ImageDLQueue", queue_name="ImageDLQueue", ) dl_queue_opts = sqs.DeadLetterQueue(max_receive_count=2, queue=dl_queue) queue = sqs.Queue( self, "ImageQueue", queue_name="ImageQueue", visibility_timeout=cdk.Duration.seconds(30), receive_message_wait_time=cdk.Duration.seconds(20), dead_letter_queue=dl_queue_opts, ) # S3 Bucket Create Notification to SQS # Whenever an image is uploaded add it to the queue image_bucket.add_object_created_notification( s3n.SqsDestination(queue), s3.NotificationKeyFilter(prefix="private/"))
def __init__(self, scope: core.Construct, _id: str, **kwargs) -> None: super().__init__(scope, _id, **kwargs) ddb_file_list = ddb.Table(self, "ddb", partition_key=ddb.Attribute(name="Key", type=ddb.AttributeType.STRING), billing_mode=ddb.BillingMode.PAY_PER_REQUEST) sqs_queue_DLQ = sqs.Queue(self, "sqs_DLQ", visibility_timeout=core.Duration.minutes(15), retention_period=core.Duration.days(14) ) sqs_queue = sqs.Queue(self, "sqs_queue", visibility_timeout=core.Duration.minutes(15), retention_period=core.Duration.days(14), dead_letter_queue=sqs.DeadLetterQueue( max_receive_count=100, queue=sqs_queue_DLQ ) ) handler = lam.Function(self, "lambdaFunction", code=lam.Code.asset("./lambda"), handler="lambda_function.lambda_handler", runtime=lam.Runtime.PYTHON_3_8, memory_size=1024, timeout=core.Duration.minutes(15), tracing=lam.Tracing.ACTIVE, environment={ 'table_queue_name': ddb_file_list.table_name, 'Des_bucket_default': Des_bucket_default, 'Des_prefix_default': Des_prefix_default, 'StorageClass': StorageClass, 'aws_access_key_id': aws_access_key_id, 'aws_secret_access_key': aws_secret_access_key, 'aws_access_key_region': aws_access_key_region }) ddb_file_list.grant_read_write_data(handler) handler.add_event_source(SqsEventSource(sqs_queue)) s3bucket = s3.Bucket(self, "s3bucket") s3bucket.grant_read(handler) s3bucket.add_event_notification(s3.EventType.OBJECT_CREATED, s3n.SqsDestination(sqs_queue)) # You can import an existing bucket and grant access to lambda # exist_s3bucket = s3.Bucket.from_bucket_name(self, "import_bucket", # bucket_name="you_bucket_name") # exist_s3bucket.grant_read(handler) # But You have to add sqs as imported bucket event notification manually, it doesn't support by CloudFormation # An work around is to add on_cloud_trail_event for the bucket, but will trigger could_trail first # 因为是导入的Bucket,需要手工建Bucket Event Trigger SQS,以及设置SQS允许该bucekt触发的Permission core.CfnOutput(self, "DynamoDB_Table", value=ddb_file_list.table_name) core.CfnOutput(self, "SQS_Job_Queue", value=sqs_queue.queue_name) core.CfnOutput(self, "SQS_Job_Queue_DLQ", value=sqs_queue_DLQ.queue_name) core.CfnOutput(self, "Worker_Lambda_Function", value=handler.function_name) core.CfnOutput(self, "New_S3_Bucket", value=s3bucket.bucket_name) # Create Lambda logs filter to create network traffic metric handler.log_group.add_metric_filter("Complete-bytes", metric_name="Complete-bytes", metric_namespace="s3_migrate", metric_value="$bytes", filter_pattern=logs.FilterPattern.literal( '[info, date, sn, p="--->Complete", bytes, key]')) handler.log_group.add_metric_filter("Uploading-bytes", metric_name="Uploading-bytes", metric_namespace="s3_migrate", metric_value="$bytes", filter_pattern=logs.FilterPattern.literal( '[info, date, sn, p="--->Uploading", bytes, key]')) handler.log_group.add_metric_filter("Downloading-bytes", metric_name="Downloading-bytes", metric_namespace="s3_migrate", metric_value="$bytes", filter_pattern=logs.FilterPattern.literal( '[info, date, sn, p="--->Downloading", bytes, key]')) lambda_metric_Complete = cw.Metric(namespace="s3_migrate", metric_name="Complete-bytes", statistic="Sum", period=core.Duration.minutes(1)) lambda_metric_Upload = cw.Metric(namespace="s3_migrate", metric_name="Uploading-bytes", statistic="Sum", period=core.Duration.minutes(1)) lambda_metric_Download = cw.Metric(namespace="s3_migrate", metric_name="Downloading-bytes", statistic="Sum", period=core.Duration.minutes(1)) handler.log_group.add_metric_filter("ERROR", metric_name="ERROR-Logs", metric_namespace="s3_migrate", metric_value="1", filter_pattern=logs.FilterPattern.literal( '"ERROR"')) handler.log_group.add_metric_filter("WARNING", metric_name="WARNING-Logs", metric_namespace="s3_migrate", metric_value="1", filter_pattern=logs.FilterPattern.literal( '"WARNING"')) log_metric_ERROR = cw.Metric(namespace="s3_migrate", metric_name="ERROR-Logs", statistic="Sum", period=core.Duration.minutes(1)) log_metric_WARNING = cw.Metric(namespace="s3_migrate", metric_name="WARNING-Logs", statistic="Sum", period=core.Duration.minutes(1)) # Dashboard to monitor SQS and Lambda board = cw.Dashboard(self, "s3_migrate", dashboard_name="s3_migrate_serverless") board.add_widgets(cw.GraphWidget(title="Lambda-NETWORK", left=[lambda_metric_Download, lambda_metric_Upload, lambda_metric_Complete]), # TODO: here monitor all lambda concurrency not just the working one. Limitation from CDK # Lambda now supports monitor single lambda concurrency, will change this after CDK support cw.GraphWidget(title="Lambda-all-concurrent", left=[handler.metric_all_concurrent_executions(period=core.Duration.minutes(1))]), cw.GraphWidget(title="Lambda-invocations/errors/throttles", left=[handler.metric_invocations(period=core.Duration.minutes(1)), handler.metric_errors(period=core.Duration.minutes(1)), handler.metric_throttles(period=core.Duration.minutes(1))]), cw.GraphWidget(title="Lambda-duration", left=[handler.metric_duration(period=core.Duration.minutes(1))]), ) board.add_widgets(cw.GraphWidget(title="SQS-Jobs", left=[sqs_queue.metric_approximate_number_of_messages_visible( period=core.Duration.minutes(1) ), sqs_queue.metric_approximate_number_of_messages_not_visible( period=core.Duration.minutes(1) )]), cw.GraphWidget(title="SQS-DeadLetterQueue", left=[sqs_queue_DLQ.metric_approximate_number_of_messages_visible( period=core.Duration.minutes(1) ), sqs_queue_DLQ.metric_approximate_number_of_messages_not_visible( period=core.Duration.minutes(1) )]), cw.GraphWidget(title="ERROR/WARNING Logs", left=[log_metric_ERROR], right=[log_metric_WARNING]), cw.SingleValueWidget(title="Running/Waiting and Dead Jobs", metrics=[sqs_queue.metric_approximate_number_of_messages_not_visible( period=core.Duration.minutes(1) ), sqs_queue.metric_approximate_number_of_messages_visible( period=core.Duration.minutes(1) ), sqs_queue_DLQ.metric_approximate_number_of_messages_not_visible( period=core.Duration.minutes(1) ), sqs_queue_DLQ.metric_approximate_number_of_messages_visible( period=core.Duration.minutes(1) )], height=6) ) # Alarm for queue - DLQ alarm_DLQ = cw.Alarm(self, "SQS_DLQ", alarm_name="s3-migration-serverless-SQS Dead Letter Queue", metric=sqs_queue_DLQ.metric_approximate_number_of_messages_visible(), threshold=0, comparison_operator=cw.ComparisonOperator.GREATER_THAN_THRESHOLD, evaluation_periods=1, datapoints_to_alarm=1) alarm_topic = sns.Topic(self, "SQS queue-DLQ has dead letter") alarm_topic.add_subscription(subscription=sub.EmailSubscription(alarm_email)) alarm_DLQ.add_alarm_action(action.SnsAction(alarm_topic)) # Alarm for queue empty, i.e. no visible message and no in-visible message # metric_all_message = cw.MathExpression( # expression="a + b", # label="empty_queue_expression", # using_metrics={ # "a": sqs_queue.metric_approximate_number_of_messages_visible(), # "b": sqs_queue.metric_approximate_number_of_messages_not_visible() # } # ) # alarm_0 = cw.Alarm(self, "SQSempty", # alarm_name="SQS queue empty-Serverless", # metric=metric_all_message, # threshold=0, # comparison_operator=cw.ComparisonOperator.LESS_THAN_OR_EQUAL_TO_THRESHOLD, # evaluation_periods=3, # datapoints_to_alarm=3, # treat_missing_data=cw.TreatMissingData.IGNORE # ) # alarm_topic = sns.Topic(self, "SQS queue empty-Serverless") # alarm_topic.add_subscription(subscription=sub.EmailSubscription(alarm_email)) # alarm_0.add_alarm_action(action.SnsAction(alarm_topic)) # core.CfnOutput(self, "Alarm", value="CloudWatch SQS queue empty Alarm for Serverless: " + alarm_email) core.CfnOutput(self, "Dashboard", value="CloudWatch Dashboard name s3_migrate_serverless")
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Tag all constructs with the project for easy billing drilldown, # filtering, and organization. core.Tags.of(self).add('project', 'MediaTranscription') # Media files bucket media_bucket = s3.Bucket( self, 'media-transcription-bucket', encryption=s3.BucketEncryption.S3_MANAGED, ) # SQS queue for media files bucket event notifications media_bucket_event_queue = sqs.Queue( self, 'media-transcription-event-notification-queue', queue_name='media-transcription-event-notification-queue', visibility_timeout=core.Duration.seconds(60), dead_letter_queue=sqs.DeadLetterQueue( max_receive_count=3, queue=sqs.Queue( self, 'media-transcription-event-notifications-dlq', queue_name='media-transcription-event-notifications-dlq', )), ) # S3 object created notifications sent to SQS queue media_bucket.add_event_notification( s3.EventType.OBJECT_CREATED, s3n.SqsDestination(media_bucket_event_queue), *[s3.NotificationKeyFilter(prefix='media-input/')], ) # Lambda function to create/submit Transcribe jobs transcribe_job_init_fn = lambda_.Function( self, 'transcribe-job-init-fn', runtime=lambda_.Runtime.PYTHON_3_8, code=lambda_.Code.from_asset( '../lambdas/transcribe-job-init-fn', # The following is just dumb. # The Lambda runtime doesn't use the latest boto3 by default. # In order to use the latest boto3, we have to pip install # and bundle locally using Docker. # Q: Why need the latest boto3? # A: https://github.com/boto/boto3/issues/2630 # I'll have to delete the ECR containers to avoid cost. # TODO: Revert back to normal in like a month I guess. bundling={ 'image': lambda_.Runtime.PYTHON_3_8.bundling_docker_image, 'command': [ 'bash', '-c', '\n pip install -r requirements.txt -t /asset-output &&\n cp -au . /asset-output\n ' ] }), handler='fn.handler', reserved_concurrent_executions=1, # Effectively single-threaded ) # Triggered by SQS messages created for media file puts transcribe_job_init_fn.add_event_source( les.SqsEventSource( queue=media_bucket_event_queue, batch_size=5, enabled=True, )) # Grant access to start transcription jobs transcribe_job_init_fn.add_to_role_policy( statement=iam.PolicyStatement( actions=[ 'transcribe:StartTranscriptionJob', ], resources=['*'], effect=iam.Effect.ALLOW, )) # Grant Lambda role to read and write to input and output portions of # the S3 bucket. # Q: Why grant Lambda the permissions instead of Transcribe service? # A: Two-fold: # - i) https://amzn.to/321Nx5I # - ii) Granting just to this Lambda means other Transcribe jobs # across the account cannot use this bucket (least privilege). media_bucket.grant_read( identity=transcribe_job_init_fn.grant_principal, objects_key_pattern='media-input/*') # Cannot specify a prefix for writes as Transcribe will not accept # a job unless it has write permission on the whole bucket. # Edit: The above statement was when I had to use '*' for writes. But # now, I granted access to that .write_access_check_file.temp # file and it seems to all work now? media_bucket.grant_write( identity=transcribe_job_init_fn.grant_principal, objects_key_pattern='transcribe-output-raw/*') # This is just as frustrating to you as it is to me. media_bucket.grant_write( identity=transcribe_job_init_fn.grant_principal, objects_key_pattern='.write_access_check_file.temp') # DynamoDB table for Jobs metadata jobs_metadata_table = ddb.Table( self, 'MediaTranscription-TranscriptionJobs', table_name='MediaTranscription-TranscriptionJobs', partition_key=ddb.Attribute( name='Bucket-Key-ETag', type=ddb.AttributeType.STRING, ), billing_mode=ddb.BillingMode.PAY_PER_REQUEST, ) jobs_metadata_table.grant(transcribe_job_init_fn.grant_principal, *[ 'dynamodb:GetItem', 'dynamodb:PutItem', ]) # Create IAM Group with read/write permissions to S3 bucket # TODO: Make this more federated and robust console_users_group = iam.Group(self, 'MediaTranscriptionConsoleUsers') console_users_group.attach_inline_policy(policy=iam.Policy( self, 'MediaTranscriptionConsoleUserS3Access', statements=[ iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ 's3:ListBucket', ], resources=[ media_bucket.bucket_arn, ], ), iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ 's3:GetObject', 's3:PutObject', ], resources=[ media_bucket.arn_for_objects('media-input/*'), ], ), iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ 's3:GetObject', ], resources=[ media_bucket.arn_for_objects( 'transcribe-output-raw/*'), ], ), ], ))
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Create image bucket image_bucket = s3.Bucket(self, 'inbound_image_s3_bucket') # Create the image processing queue image_process_queue = sqs.Queue( self, "image_process_queue", visibility_timeout=core.Duration.seconds(300), retention_period=core.Duration.days(1) ) # Create the image response queue response_queue = sqs.Queue( self, "results_queue", visibility_timeout=core.Duration.seconds(300), retention_period=core.Duration.days(1) ) # Set the put object notification to the SQS Queue image_bucket.add_event_notification(event=s3.EventType.OBJECT_CREATED_PUT, dest=s3n.SqsDestination(image_process_queue)) # Define the AWS Lambda to call Amazon Rekognition DetectFaces detect_faces_lambda = _lambda.Function(self, 'detect_faces', runtime=_lambda.Runtime.PYTHON_3_7, handler='detect_faces.lambda_handler', code=_lambda.Code.asset('./lambda'), timeout=core.Duration.seconds(30), environment={'SQS_RESPONSE_QUEUE': response_queue.queue_name}, reserved_concurrent_executions=50 ) # Set SQS image_process_queue Queue as event source for detect_faces_lambda detect_faces_lambda.add_event_source(_lambda_events.SqsEventSource(image_process_queue, batch_size=1)) # Allow response queue messages from lambda response_queue.grant_send_messages(detect_faces_lambda) # Allow lambda to call Rekognition by adding a IAM Policy Statement detect_faces_lambda.add_to_role_policy(iam.PolicyStatement(actions=['rekognition:*'], resources=['*'])) # Allow lambda to read from S3 image_bucket.grant_read(detect_faces_lambda) # Define the DynamoDB Table results_table = dynamodb.Table(self, 'detect_faces_results', table_name='detect_faces_results', partition_key=dynamodb.Attribute(name='id', type=dynamodb.AttributeType.STRING), read_capacity=200, write_capacity=200 ) # Define the AWS Lambda to write results into DyanamoDB results_table write_results_lambda = _lambda.Function(self, 'write_results', runtime=_lambda.Runtime.PYTHON_3_7, handler='write_results.lambda_handler', code=_lambda.Code.asset('./lambda'), timeout=core.Duration.seconds(30), environment={'TABLE_NAME': results_table.table_name} ) # Set SQS response_queue Queue as event source for write_results_lambda results_table write_results_lambda.add_event_source(_lambda_events.SqsEventSource(response_queue, batch_size=1)) # Allow AWS Lambda write_results_lambda to Write to Dynamodb results_table.grant_write_data(write_results_lambda) # Allow AWS Lambda write_results_lambda to read messages from the SQS response_queue Queue response_queue.grant_consume_messages(write_results_lambda) # Output to Amazon S3 Image Bucket core.CfnOutput(self, 'cdk_output', value=image_bucket.bucket_name, description='Input Amazon S3 Image Bucket')
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # If left unchecked this pattern could "fan out" on the transform and load # lambdas to the point that it consumes all resources on the account. This is # why we are limiting concurrency to 2 on all 3 lambdas. Feel free to raise this. lambda_throttle_size = 2 #### # DynamoDB Table # This is where our transformed data ends up #### table = dynamo_db.Table(self, "TransformedData", partition_key=dynamo_db.Attribute( name="id", type=dynamo_db.AttributeType.STRING)) #### # S3 Landing Bucket # This is where the user uploads the file to be transformed #### bucket = s3.Bucket(self, "LandingBucket") #### # Queue that listens for S3 Bucket events #### queue = sqs.Queue(self, 'newObjectInLandingBucketEventQueue', visibility_timeout=core.Duration.seconds(300)) bucket.add_event_notification(s3.EventType.OBJECT_CREATED, s3n.SqsDestination(queue)) # EventBridge Permissions event_bridge_put_policy = iam.PolicyStatement( effect=iam.Effect.ALLOW, resources=['*'], actions=['events:PutEvents']) #### # Fargate ECS Task Creation to pull data from S3 # # Fargate is used here because if you had a seriously large file, # you could stream the data to fargate for as long as needed before # putting the data onto eventbridge or up the memory/storage to # download the whole file. Lambda has limitations on runtime and # memory/storage #### vpc = ec2.Vpc(self, "Vpc", max_azs=2) logging = ecs.AwsLogDriver(stream_prefix='TheEventBridgeETL', log_retention=logs.RetentionDays.ONE_WEEK) cluster = ecs.Cluster(self, 'Ec2Cluster', vpc=vpc) task_definition = ecs.TaskDefinition( self, 'FargateTaskDefinition', memory_mib="512", cpu="256", compatibility=ecs.Compatibility.FARGATE) # We need to give our fargate container permission to put events on our EventBridge task_definition.add_to_task_role_policy(event_bridge_put_policy) # Grant fargate container access to the object that was uploaded to s3 bucket.grant_read(task_definition.task_role) container = task_definition.add_container( 'AppContainer', image=ecs.ContainerImage.from_asset( 'container/s3DataExtractionTask'), logging=logging, environment={ 'S3_BUCKET_NAME': bucket.bucket_name, 'S3_OBJECT_KEY': '' }) #### # Lambdas # # These are used for 4 phases: # # Extract - kicks of ecs fargate task to download data and splinter to eventbridge events # Transform - takes the two comma separated strings and produces a json object # Load - inserts the data into dynamodb # Observe - This is a lambda that subscribes to all events and logs them centrally #### subnet_ids = [] for subnet in vpc.private_subnets: subnet_ids.append(subnet.subnet_id) #### # Extract # defines an AWS Lambda resource to trigger our fargate ecs task #### extract_lambda = _lambda.Function( self, "extractLambdaHandler", runtime=_lambda.Runtime.NODEJS_12_X, handler="s3SqsEventConsumer.handler", code=_lambda.Code.from_asset("lambdas/extract"), reserved_concurrent_executions=lambda_throttle_size, environment={ "CLUSTER_NAME": cluster.cluster_name, "TASK_DEFINITION": task_definition.task_definition_arn, "SUBNETS": json.dumps(subnet_ids), "CONTAINER_NAME": container.container_name }) queue.grant_consume_messages(extract_lambda) extract_lambda.add_event_source(_event.SqsEventSource(queue=queue)) extract_lambda.add_to_role_policy(event_bridge_put_policy) run_task_policy_statement = iam.PolicyStatement( effect=iam.Effect.ALLOW, resources=[task_definition.task_definition_arn], actions=['ecs:RunTask']) extract_lambda.add_to_role_policy(run_task_policy_statement) task_execution_role_policy_statement = iam.PolicyStatement( effect=iam.Effect.ALLOW, resources=[ task_definition.obtain_execution_role().role_arn, task_definition.task_role.role_arn ], actions=['iam:PassRole']) extract_lambda.add_to_role_policy(task_execution_role_policy_statement) #### # Transform # defines a lambda to transform the data that was extracted from s3 #### transform_lambda = _lambda.Function( self, "TransformLambdaHandler", runtime=_lambda.Runtime.NODEJS_12_X, handler="transform.handler", code=_lambda.Code.from_asset("lambdas/transform"), reserved_concurrent_executions=lambda_throttle_size, timeout=core.Duration.seconds(3)) transform_lambda.add_to_role_policy(event_bridge_put_policy) # Create EventBridge rule to route extraction events transform_rule = events.Rule( self, 'transformRule', description='Data extracted from S3, Needs transformed', event_pattern=events.EventPattern( source=['cdkpatterns.the-eventbridge-etl'], detail_type=['s3RecordExtraction'], detail={"status": ["extracted"]})) transform_rule.add_target( targets.LambdaFunction(handler=transform_lambda)) #### # Load # load the transformed data in dynamodb #### load_lambda = _lambda.Function( self, "LoadLambdaHandler", runtime=_lambda.Runtime.NODEJS_12_X, handler="load.handler", code=_lambda.Code.from_asset("lambdas/load"), reserved_concurrent_executions=lambda_throttle_size, timeout=core.Duration.seconds(3), environment={"TABLE_NAME": table.table_name}) load_lambda.add_to_role_policy(event_bridge_put_policy) table.grant_read_write_data(load_lambda) load_rule = events.Rule( self, 'loadRule', description='Data transformed, Needs loaded into dynamodb', event_pattern=events.EventPattern( source=['cdkpatterns.the-eventbridge-etl'], detail_type=['transform'], detail={"status": ["transformed"]})) load_rule.add_target(targets.LambdaFunction(handler=load_lambda)) #### # Observe # Watch for all cdkpatterns.the-eventbridge-etl events and log them centrally #### observe_lambda = _lambda.Function( self, "ObserveLambdaHandler", runtime=_lambda.Runtime.NODEJS_12_X, handler="observe.handler", code=_lambda.Code.from_asset("lambdas/observe"), reserved_concurrent_executions=lambda_throttle_size, timeout=core.Duration.seconds(3)) observe_rule = events.Rule( self, 'observeRule', description='all events are caught here and logged centrally', event_pattern=events.EventPattern( source=['cdkpatterns.the-eventbridge-etl'])) observe_rule.add_target(targets.LambdaFunction(handler=observe_lambda))