Beispiel #1
0
    def __init__(self, scope: core.Construct, _id: str, bucket_para,
                 **kwargs) -> None:
        super().__init__(scope, _id, **kwargs)

        self.ddb_file_list = ddb.Table(
            self,
            "s3_migrate_ddb",
            partition_key=ddb.Attribute(name="Key",
                                        type=ddb.AttributeType.STRING),
            billing_mode=ddb.BillingMode.PAY_PER_REQUEST)

        self.sqs_queue_DLQ = sqs.Queue(
            self,
            "s3_migrate_sqs_DLQ",
            visibility_timeout=core.Duration.hours(1),
            retention_period=core.Duration.days(14))
        self.sqs_queue = sqs.Queue(self,
                                   "s3_migrate_sqs_queue",
                                   visibility_timeout=core.Duration.hours(1),
                                   retention_period=core.Duration.days(14),
                                   dead_letter_queue=sqs.DeadLetterQueue(
                                       max_receive_count=24,
                                       queue=self.sqs_queue_DLQ))
        self.ssm_bucket_para = ssm.StringParameter(self,
                                                   "s3_migrate_bucket_para",
                                                   string_value=json.dumps(
                                                       bucket_para, indent=4))

        # You need to manually setup ssm_credential_para in SSM Parameter Store before deploy CDK
        # Here import ssm_credential_para, MIND THE VERSION NUMBER MUST BE EXACT THE SAME !!!
        # 你需要先手工配置了一个ssm_credential_para,然后在这里导入,注意版本号一致!!!
        self.ssm_credential_para = ssm.StringParameter.from_secure_string_parameter_attributes(
            self,
            "ssm_parameter_credentials",
            parameter_name=ssm_parameter_credentials,
            version=1)

        # New a S3 bucket, new object in this bucket will trigger SQS jobs
        # This is not for existing S3 bucket. Jobsender will scan the existing bucket and create sqs jobs.
        # 这里新建一个S3 bucket,里面新建Object就会触发SQS启动搬迁工作。
        # 对于现有的S3 bucket,不在这里配置,由jobsender进行扫描并生成SQS Job任务。
        self.s3bucket = s3.Bucket(self, "s3_migrate_bucket")
        self.s3bucket.add_event_notification(
            s3.EventType.OBJECT_CREATED, s3n.SqsDestination(self.sqs_queue))

        # Deploy code
        self.s3_deploy = s3.Bucket(self, "s3_migrate_deploybucket")
        s3d.BucketDeployment(self,
                             "deploy_code",
                             sources=[s3d.Source.asset("./code")],
                             destination_bucket=self.s3_deploy)

        core.CfnOutput(self,
                       'NewS3Bucket_MigrateObjects',
                       value=self.s3bucket.bucket_name)
        core.CfnOutput(self,
                       'NewS3Bucket_deploy_code',
                       value=self.s3_deploy.bucket_name)
Beispiel #2
0
    def __init__(self, scope: core.Construct, _id: str, **kwargs) -> None:
        super().__init__(scope, _id, **kwargs)

        ddb_file_list = ddb.Table(self, "ddb",
                                  partition_key=ddb.Attribute(name="Key", type=ddb.AttributeType.STRING),
                                  billing_mode=ddb.BillingMode.PAY_PER_REQUEST)

        sqs_queue_DLQ = sqs.Queue(self, "sqs_DLQ",
                                  visibility_timeout=core.Duration.minutes(15),
                                  retention_period=core.Duration.days(14)
                                  )
        sqs_queue = sqs.Queue(self, "sqs_queue",
                              visibility_timeout=core.Duration.minutes(15),
                              retention_period=core.Duration.days(14),
                              dead_letter_queue=sqs.DeadLetterQueue(
                                  max_receive_count=100,
                                  queue=sqs_queue_DLQ
                              )
                              )
        handler = lam.Function(self, "lambdaFunction",
                               code=lam.Code.asset("./lambda"),
                               handler="lambda_function.lambda_handler",
                               runtime=lam.Runtime.PYTHON_3_8,
                               memory_size=1024,
                               timeout=core.Duration.minutes(15),
                               environment={
                                   'table_name': ddb_file_list.table_name,
                                   'queue_name': sqs_queue.queue_name,
                                   'Des_bucket_default': Des_bucket_default,
                                   'Des_prefix_default': Des_prefix_default,
                                   'Des_region': Des_region,
                                   'StorageClass': StorageClass,
                                   'aws_access_key_id': aws_access_key_id,
                                   'aws_secret_access_key': aws_secret_access_key
                               })
        ddb_file_list.grant_read_write_data(handler)
        handler.add_event_source(SqsEventSource(sqs_queue))

        s3bucket = s3.Bucket(self, "s3bucket")
        s3bucket.grant_read(handler)
        s3bucket.add_event_notification(s3.EventType.OBJECT_CREATED,
                                        s3n.SqsDestination(sqs_queue))

        # You can import an existing bucket and grant access to lambda
        # exist_s3bucket = s3.Bucket.from_bucket_name(self, "import_bucket",
        #                                             bucket_name="you_bucket_name")
        # exist_s3bucket.grant_read(handler)

        # But You have to add sqs as imported bucket event notification manually, it doesn't support by CloudFormation
        # An work around is to add on_cloud_trail_event for the bucket, but will trigger could_trail first
        # 因为是导入的Bucket,需要手工建Bucket Event Trigger SQS,以及设置SQS允许该bucekt触发的Permission

        core.CfnOutput(self, "DynamoDB_Table", value=ddb_file_list.table_name)
        core.CfnOutput(self, "SQS_Job_Queue", value=sqs_queue.queue_name)
        core.CfnOutput(self, "SQS_Job_Queue_DLQ", value=sqs_queue_DLQ.queue_name)
        core.CfnOutput(self, "Worker_Lambda_Function", value=handler.function_name)
        core.CfnOutput(self, "New_S3_Bucket", value=s3bucket.bucket_name)
Beispiel #3
0
 def storage(self):
     bucket = _s3.Bucket(self,
                         "Bucket",
                         removal_policy=core.RemovalPolicy.DESTROY)
     notification_queue = _sqs.Queue(
         self,
         "NotificationQueue",
     )
     bucket.add_object_created_notification(
         dest=_s3_notifications.SqsDestination(notification_queue))
     return bucket, notification_queue
Beispiel #4
0
    def __init__(self, scope: core.Construct, id: str, sqsCfn, s3_loc_up,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        self.PREFIX = id

        self.sqs_queue = sqs.Queue.from_queue_arn(
            self, "QueuefromCfn",
            f"arn:aws:sqs:us-east-1:{core.Aws.ACCOUNT_ID}:{sqsCfn.queue_name}")

        s3_loc_up.add_object_created_notification(
            aws_s3_notifications.SqsDestination(self.sqs_queue),
            _s3.NotificationKeyFilter(
                prefix='stdized-data/comprehend_results/csv/', suffix='.csv'))
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        #This will create the s3 bucket in AWS
        bucket = s3.Bucket(self, "ssl_s3_bucket_raw")

        #This will create the sqs in AWS
        queue = sqs.Queue(self, "ssl_sqs_event_queue")

        #Create S3 notification object which points to SQS.
        notification = aws_s3_notifications.SqsDestination(queue)
        filter1 = s3.NotificationKeyFilter(prefix="home/")

        #Attach notificaton event to S3 bucket.
        bucket.add_event_notification(s3.EventType.OBJECT_CREATED,
                                      notification, filter1)
Beispiel #6
0
    def __init__(self, scope: core.Construct, _id: str, bucket_para,
                 **kwargs) -> None:
        super().__init__(scope, _id, **kwargs)

        self.ddb_file_list = ddb.Table(
            self,
            "ddb",
            table_name=table_queue_name,
            partition_key=ddb.Attribute(name="Key",
                                        type=ddb.AttributeType.STRING),
            billing_mode=ddb.BillingMode.PAY_PER_REQUEST)

        self.sqs_queue_DLQ = sqs.Queue(
            self,
            "sqs_DLQ",
            queue_name=table_queue_name + "-DLQ",
            visibility_timeout=core.Duration.hours(1),
            retention_period=core.Duration.days(14))
        self.sqs_queue = sqs.Queue(self,
                                   "sqs_queue",
                                   queue_name=table_queue_name,
                                   visibility_timeout=core.Duration.hours(1),
                                   retention_period=core.Duration.days(14),
                                   dead_letter_queue=sqs.DeadLetterQueue(
                                       max_receive_count=24,
                                       queue=self.sqs_queue_DLQ))
        self.ssm_bucket_para = ssm.StringParameter(
            self,
            "para-bucket",
            string_value=json.dumps(bucket_para),
            parameter_name=ssm_parameter_bucket)

        # 你需要先手工配置了一个ssm_credential_para,然后在这里导入,注意版本号一致!!!
        self.ssm_credential_para = ssm.StringParameter.from_secure_string_parameter_attributes(
            self,
            "ssm_parameter_credentials",
            parameter_name=ssm_parameter_credentials,
            version=2)

        # 这里新建一个S3 bucket,里面新建Object就会触发SQS启动搬迁工作。
        # 对于现有的S3 bucket,不在这里配置,由jobsender进行扫描并生成SQS Job任务。
        self.s3bucket = s3.Bucket(self, "newbucket")
        self.s3bucket.add_event_notification(
            s3.EventType.OBJECT_CREATED, s3n.SqsDestination(self.sqs_queue))
Beispiel #7
0
    def __init__(self, app: App, id: str) -> None:
        super().__init__(app, id)

        # SQS queue
        queue = sqs.Queue(self, 's3-to-sqs-test')

        bucket = s3.Bucket(self, "MyBucket")
        bucket.add_event_notification(s3.EventType.OBJECT_CREATED,
                                      s3n.SqsDestination(queue))

        # Output information about the created resources
        CfnOutput(self,
                  'sqsQueueUrl',
                  value=queue.queue_url,
                  description='The URL of the SQS queue')
        CfnOutput(self,
                  'bucketName',
                  value=bucket.bucket_name,
                  description='The name of the bucket created')
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        #Create Role
        S3SqsKmsSampleStack.__Role = IamService.create_role(self)

        #get KMS policy Document
        kms_policy_document = IamService.get_kms_policy_documents(self)

        kms_key = kms.Key(self,
                          id='ssl_s3_sqs_kms_key',
                          alias='sslS3SqsKmsKey',
                          description='This is kms key',
                          enabled=True,
                          enable_key_rotation=True,
                          policy=kms_policy_document)

        #This will create the s3 bucket in AWS
        bucket = s3.Bucket(self,
                           "ssl_s3_bucket_raw_kms",
                           bucket_name="ssl-s3-bucket-kms-raw",
                           encryption=s3.BucketEncryption.KMS,
                           encryption_key=kms_key)

        #This will create the sqs in AWS
        queue = sqs.Queue(self,
                          "ssl_sqs_event_queue",
                          queue_name="ssl-sqs-kms-event-queue",
                          encryption=sqs.QueueEncryption.KMS,
                          encryption_master_key=kms_key)

        #queue.node.add_dependency(kms_key)
        bucket.node.add_dependency(queue, kms_key)
        # #Create S3 notification object which points to SQS.
        notification = aws_s3_notifications.SqsDestination(queue)
        filter1 = s3.NotificationKeyFilter(prefix="home/")

        # #Attach notificaton event to S3 bucket.

        bucket.add_event_notification(s3.EventType.OBJECT_CREATED,
                                      notification, filter1)
    def create_events(self, services):
        # kickoff_notification = aws_s3_notifications.LambdaDestination(services["lambda"]["kickoff"])
        extensions = [
            "pdf", "pDf", "pDF", "pdF", "PDF", "Pdf",
            "png", "pNg", "pNG", "pnG", "PNG", "Png",
            "jpg", "jPg", "jPG", "jpG", "JPG", "Jpg"
        ]
        for extension in extensions:
            services["main_s3_bucket"].add_event_notification(
                aws_s3.EventType.OBJECT_CREATED,  
                aws_s3_notifications.SqsDestination(services["sf_sqs"]),
                aws_s3.NotificationKeyFilter(prefix="uploads/", suffix=extension)
            )    
        
        services["lambda"]["kickoff"].add_event_source(
            aws_lambda_event_sources.SqsEventSource(
                services["sf_sqs"], 
                batch_size=1
            )
        )
        
        services["lambda"]["analyzepdf"].add_event_source(
            aws_lambda_event_sources.SqsEventSource(
                services["textract_sqs"], 
                batch_size=1
            )
        )

        human_complete_target = aws_events_targets.LambdaFunction(services["lambda"]["humancomplete"])

        human_review_event_pattern = aws_events.EventPattern(
            source=["aws.sagemaker"],
            detail_type=["SageMaker A2I HumanLoop Status Change"]
        )

        aws_events.Rule(self, 
            "multipadepdfa2i_HumanReviewComplete", 
            event_pattern=human_review_event_pattern,
            targets=[human_complete_target]
        )
Beispiel #10
0
    def __init__(self, app: core.App, id: str, **kwargs) -> None:
        super().__init__(app, id, **kwargs)

        # Policies
        s3_access_policy = iam.ManagedPolicy.from_managed_policy_arn(
            self,
            id="s3_access_policy",
            managed_policy_arn="arn:aws:iam::aws:policy/AmazonS3FullAccess")

        lambda_access_policy = iam.ManagedPolicy.from_managed_policy_arn(
            self,
            id="lambda_access_policy",
            managed_policy_arn="arn:aws:iam::aws:policy/AWSLambda_FullAccess")

        logs_policy = iam.ManagedPolicy.from_managed_policy_arn(
            self,
            id="logs_policy",
            managed_policy_arn=
            "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess")

        # Roles
        lambda_role = iam.Role(
            self,
            id="lambda_role",
            assumed_by=iam.ServicePrincipal(service="lambda.amazonaws.com"),
            managed_policies=[
                s3_access_policy, lambda_access_policy, logs_policy
            ],
            role_name=f"midi-to-mp3-lambda-role")

        # SQS
        conversion_sqs = sqs.Queue(self,
                                   id=f"conversion_sqs",
                                   queue_name=f"conversion_sqs",
                                   visibility_timeout=core.Duration.hours(12),
                                   retention_period=core.Duration.days(1))

        kickoff_sqs = sqs.Queue(self,
                                id=f"kickoff_sqs",
                                queue_name=f"kickoff_sqs",
                                visibility_timeout=core.Duration.hours(12),
                                retention_period=core.Duration.days(1))

        # S3

        midi_file_dropoff_bucket = s3.Bucket(
            self,
            id="midi_files_dropoff",
            bucket_name=MIDI_FILE_DROPOFF_BUCKET,
            auto_delete_objects=True,
            removal_policy=core.RemovalPolicy.DESTROY)

        midi_file_dropoff_bucket.add_event_notification(
            event=s3.EventType.OBJECT_CREATED,
            dest=s3n.SqsDestination(kickoff_sqs))

        created_mp3_files_bucket = s3.Bucket(
            self,
            id="created_mp3_files",
            bucket_name=CREATED_MP3_FILES_BUCKET,
            auto_delete_objects=True,
            removal_policy=core.RemovalPolicy.DESTROY)

        component_midi_files_bucket = s3.Bucket(
            self,
            id="component_midi_files",
            bucket_name=COMPONENT_MIDI_FILES_BUCKET,
            auto_delete_objects=True,
            removal_policy=core.RemovalPolicy.DESTROY)

        component_midi_files_bucket.add_event_notification(
            event=s3.EventType.OBJECT_CREATED,
            dest=s3n.SqsDestination(conversion_sqs))

        # Lambdas
        lambda_code = lambda_.DockerImageCode.from_image_asset(
            directory='./midi_to_mp3_lambda/',
            file="Dockerfile",
            build_args={
                "AWS_ACCESS_KEY_ID": os.environ.get("AWS_ACCESS_KEY_ID"),
                "AWS_SECRET_ACCESS_KEY":
                os.environ.get("AWS_SECRET_ACCESS_KEY")
            })

        midi_to_mp3_lambda = lambda_.DockerImageFunction(
            self,
            id="midi_to_mp3_lambda",
            role=lambda_role,
            function_name="midi-to-mp3",
            memory_size=1024,
            timeout=core.Duration.minutes(5),
            code=lambda_code)

        midi_split_lambda = alg.GoFunction(
            self,
            id="midi_split_lambda",
            entry="./midi_split_lambda/midi_split_lambda.go",
            timeout=core.Duration.minutes(15),
            runtime=lambda_.Runtime.GO_1_X,
            role=lambda_role,
            function_name=f"midi-split-lambda",
            memory_size=512,
            bundling={"environment": {
                "GO111MODULE": "off"
            }})

        s3_cleanup_lambda = lambda_.Function(
            self,
            id="s3_cleanup_lambda",
            runtime=lambda_.Runtime.PYTHON_3_8,
            role=lambda_role,
            function_name="s3-cleanup-lambda",
            memory_size=256,
            timeout=core.Duration.minutes(5),
            environment={"NUM_WEEKS_TO_KEEP_FILES": "1"},
            handler="s3_cleanup_lambda.handler",
            code=lambda_.Code.from_asset(os.path.join(".",
                                                      "s3_cleanup_lambda")))

        weekly_on_sunday_cron = events.Rule(
            self,
            "Rule",
            schedule=events.Schedule.cron(minute='0',
                                          hour='0',
                                          week_day="SUN",
                                          month='*',
                                          year='*'),
        )
        weekly_on_sunday_cron.add_target(
            targets.LambdaFunction(s3_cleanup_lambda))

        # Event Sources

        midi_to_mp3_lambda.add_event_source(
            eventsources.SqsEventSource(queue=conversion_sqs))

        midi_split_lambda.add_event_source(
            eventsources.SqsEventSource(queue=kickoff_sqs))
Beispiel #11
0
    def __init__(self, scope: core.Construct, _id: str, **kwargs) -> None:
        super().__init__(scope, _id, **kwargs)

        # Setup SSM parameter of credentials, bucket_para, ignore_list
        ssm_credential_para = ssm.StringParameter.from_secure_string_parameter_attributes(
            self,
            "ssm_parameter_credentials",
            parameter_name=ssm_parameter_credentials,
            version=1)

        ssm_bucket_para = ssm.StringParameter(self,
                                              "s3bucket_serverless",
                                              string_value=json.dumps(
                                                  bucket_para, indent=4))

        ssm_parameter_ignore_list = ssm.StringParameter(
            self, "s3_migrate_ignore_list", string_value=ignore_list)

        # Setup DynamoDB
        ddb_file_list = ddb.Table(self,
                                  "s3migrate_serverless",
                                  partition_key=ddb.Attribute(
                                      name="Key",
                                      type=ddb.AttributeType.STRING),
                                  billing_mode=ddb.BillingMode.PAY_PER_REQUEST)
        ddb_file_list.add_global_secondary_index(
            partition_key=ddb.Attribute(name="desBucket",
                                        type=ddb.AttributeType.STRING),
            index_name="desBucket-index",
            projection_type=ddb.ProjectionType.INCLUDE,
            non_key_attributes=["desKey", "versionId"])

        # Setup SQS
        sqs_queue_DLQ = sqs.Queue(self,
                                  "s3migrate_serverless_Q_DLQ",
                                  visibility_timeout=core.Duration.minutes(15),
                                  retention_period=core.Duration.days(14))
        sqs_queue = sqs.Queue(self,
                              "s3migrate_serverless_Q",
                              visibility_timeout=core.Duration.minutes(15),
                              retention_period=core.Duration.days(14),
                              dead_letter_queue=sqs.DeadLetterQueue(
                                  max_receive_count=60, queue=sqs_queue_DLQ))

        # Setup API for Lambda to get IP address (for debug networking routing purpose)
        checkip = api.RestApi(
            self,
            "lambda-checkip-api",
            cloud_watch_role=True,
            deploy=True,
            description="For Lambda get IP address",
            default_integration=api.MockIntegration(
                integration_responses=[
                    api.IntegrationResponse(status_code="200",
                                            response_templates={
                                                "application/json":
                                                "$context.identity.sourceIp"
                                            })
                ],
                request_templates={"application/json": '{"statusCode": 200}'}),
            endpoint_types=[api.EndpointType.REGIONAL])
        checkip.root.add_method("GET",
                                method_responses=[
                                    api.MethodResponse(
                                        status_code="200",
                                        response_models={
                                            "application/json":
                                            api.Model.EMPTY_MODEL
                                        })
                                ])

        # Setup Lambda functions
        handler = lam.Function(self,
                               "s3-migrate-worker",
                               code=lam.Code.asset("./lambda"),
                               handler="lambda_function_worker.lambda_handler",
                               runtime=lam.Runtime.PYTHON_3_8,
                               memory_size=1024,
                               timeout=core.Duration.minutes(15),
                               tracing=lam.Tracing.ACTIVE,
                               environment={
                                   'table_queue_name':
                                   ddb_file_list.table_name,
                                   'Des_bucket_default': Des_bucket_default,
                                   'Des_prefix_default': Des_prefix_default,
                                   'StorageClass': StorageClass,
                                   'checkip_url': checkip.url,
                                   'ssm_parameter_credentials':
                                   ssm_parameter_credentials,
                                   'JobType': JobType,
                                   'MaxRetry': MaxRetry,
                                   'MaxThread': MaxThread,
                                   'MaxParallelFile': MaxParallelFile,
                                   'JobTimeout': JobTimeout,
                                   'UpdateVersionId': UpdateVersionId,
                                   'GetObjectWithVersionId':
                                   GetObjectWithVersionId
                               })

        handler_jobsender = lam.Function(
            self,
            "s3-migrate-jobsender",
            code=lam.Code.asset("./lambda"),
            handler="lambda_function_jobsender.lambda_handler",
            runtime=lam.Runtime.PYTHON_3_8,
            memory_size=1024,
            timeout=core.Duration.minutes(15),
            tracing=lam.Tracing.ACTIVE,
            environment={
                'table_queue_name': ddb_file_list.table_name,
                'StorageClass': StorageClass,
                'checkip_url': checkip.url,
                'sqs_queue': sqs_queue.queue_name,
                'ssm_parameter_credentials': ssm_parameter_credentials,
                'ssm_parameter_ignore_list':
                ssm_parameter_ignore_list.parameter_name,
                'ssm_parameter_bucket': ssm_bucket_para.parameter_name,
                'JobType': JobType,
                'MaxRetry': MaxRetry,
                'JobsenderCompareVersionId': JobsenderCompareVersionId
            })

        # Allow lambda read/write DDB, SQS
        ddb_file_list.grant_read_write_data(handler)
        ddb_file_list.grant_read_write_data(handler_jobsender)
        sqs_queue.grant_send_messages(handler_jobsender)
        # SQS trigger Lambda worker
        handler.add_event_source(SqsEventSource(sqs_queue, batch_size=1))

        # Option1: Create S3 Bucket, all new objects in this bucket will be transmitted by Lambda Worker
        s3bucket = s3.Bucket(self, "s3_new_migrate")
        s3bucket.grant_read(handler)
        s3bucket.add_event_notification(s3.EventType.OBJECT_CREATED,
                                        s3n.SqsDestination(sqs_queue))

        # Option2: Allow Exist S3 Buckets to be read by Lambda functions.
        # Lambda Jobsender will scan and compare the these buckets and trigger Lambda Workers to transmit
        bucket_name = ''
        for b in bucket_para:
            if bucket_name != b['src_bucket']:  # 如果列了多个相同的Bucket,就跳过
                bucket_name = b['src_bucket']
                s3exist_bucket = s3.Bucket.from_bucket_name(
                    self,
                    bucket_name,  # 用这个做id
                    bucket_name=bucket_name)
                if JobType == 'PUT':
                    s3exist_bucket.grant_read(handler_jobsender)
                    s3exist_bucket.grant_read(handler)
                else:  # 'GET' mode
                    s3exist_bucket.grant_read_write(handler_jobsender)
                    s3exist_bucket.grant_read_write(handler)

        # Allow Lambda read ssm parameters
        ssm_bucket_para.grant_read(handler_jobsender)
        ssm_credential_para.grant_read(handler)
        ssm_credential_para.grant_read(handler_jobsender)
        ssm_parameter_ignore_list.grant_read(handler_jobsender)

        # Schedule cron event to trigger Lambda Jobsender per hour:
        event.Rule(self,
                   'cron_trigger_jobsender',
                   schedule=event.Schedule.rate(core.Duration.hours(1)),
                   targets=[target.LambdaFunction(handler_jobsender)])

        # TODO: Trigger event imediately, add custom resource lambda to invoke handler_jobsender

        # Create Lambda logs filter to create network traffic metric
        handler.log_group.add_metric_filter(
            "Completed-bytes",
            metric_name="Completed-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[info, date, sn, p="--->Complete", bytes, key]'))
        handler.log_group.add_metric_filter(
            "Uploading-bytes",
            metric_name="Uploading-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[info, date, sn, p="--->Uploading", bytes, key]'))
        handler.log_group.add_metric_filter(
            "Downloading-bytes",
            metric_name="Downloading-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[info, date, sn, p="--->Downloading", bytes, key]'))
        handler.log_group.add_metric_filter(
            "MaxMemoryUsed",
            metric_name="MaxMemoryUsed",
            metric_namespace="s3_migrate",
            metric_value="$memory",
            filter_pattern=logs.FilterPattern.literal(
                '[head="REPORT", a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, '
                'a13, a14, a15, a16, memory, MB="MB", rest]'))
        lambda_metric_Complete = cw.Metric(namespace="s3_migrate",
                                           metric_name="Completed-bytes",
                                           statistic="Sum",
                                           period=core.Duration.minutes(1))
        lambda_metric_Upload = cw.Metric(namespace="s3_migrate",
                                         metric_name="Uploading-bytes",
                                         statistic="Sum",
                                         period=core.Duration.minutes(1))
        lambda_metric_Download = cw.Metric(namespace="s3_migrate",
                                           metric_name="Downloading-bytes",
                                           statistic="Sum",
                                           period=core.Duration.minutes(1))
        lambda_metric_MaxMemoryUsed = cw.Metric(
            namespace="s3_migrate",
            metric_name="MaxMemoryUsed",
            statistic="Maximum",
            period=core.Duration.minutes(1))
        handler.log_group.add_metric_filter(
            "ERROR",
            metric_name="ERROR-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"ERROR"'))
        handler.log_group.add_metric_filter(
            "WARNING",
            metric_name="WARNING-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"WARNING"'))
        # Task timed out
        handler.log_group.add_metric_filter(
            "TIMEOUT",
            metric_name="TIMEOUT-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"Task timed out"'))
        log_metric_ERROR = cw.Metric(namespace="s3_migrate",
                                     metric_name="ERROR-Logs",
                                     statistic="Sum",
                                     period=core.Duration.minutes(1))
        log_metric_WARNING = cw.Metric(namespace="s3_migrate",
                                       metric_name="WARNING-Logs",
                                       statistic="Sum",
                                       period=core.Duration.minutes(1))
        log_metric_TIMEOUT = cw.Metric(namespace="s3_migrate",
                                       metric_name="TIMEOUT-Logs",
                                       statistic="Sum",
                                       period=core.Duration.minutes(1))

        # Dashboard to monitor SQS and Lambda
        board = cw.Dashboard(self, "s3_migrate_serverless")

        board.add_widgets(
            cw.GraphWidget(title="Lambda-NETWORK",
                           left=[
                               lambda_metric_Download, lambda_metric_Upload,
                               lambda_metric_Complete
                           ]),
            cw.GraphWidget(title="Lambda-concurrent",
                           left=[
                               handler.metric(
                                   metric_name="ConcurrentExecutions",
                                   period=core.Duration.minutes(1))
                           ]),
            cw.GraphWidget(
                title="Lambda-invocations/errors/throttles",
                left=[
                    handler.metric_invocations(
                        period=core.Duration.minutes(1)),
                    handler.metric_errors(period=core.Duration.minutes(1)),
                    handler.metric_throttles(period=core.Duration.minutes(1))
                ]),
            cw.GraphWidget(
                title="Lambda-duration",
                left=[
                    handler.metric_duration(period=core.Duration.minutes(1))
                ]),
        )

        board.add_widgets(
            cw.GraphWidget(title="Lambda_MaxMemoryUsed(MB)",
                           left=[lambda_metric_MaxMemoryUsed]),
            cw.GraphWidget(title="ERROR/WARNING Logs",
                           left=[log_metric_ERROR],
                           right=[log_metric_WARNING, log_metric_TIMEOUT]),
            cw.GraphWidget(
                title="SQS-Jobs",
                left=[
                    sqs_queue.metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1))
                ]),
            cw.SingleValueWidget(
                title="Running/Waiting and Dead Jobs",
                metrics=[
                    sqs_queue.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue.metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1))
                ],
                height=6))
        # Alarm for queue - DLQ
        alarm_DLQ = cw.Alarm(
            self,
            "SQS_DLQ",
            metric=sqs_queue_DLQ.metric_approximate_number_of_messages_visible(
            ),
            threshold=0,
            comparison_operator=cw.ComparisonOperator.GREATER_THAN_THRESHOLD,
            evaluation_periods=1,
            datapoints_to_alarm=1)
        alarm_topic = sns.Topic(self, "SQS queue-DLQ has dead letter")
        alarm_topic.add_subscription(
            subscription=sub.EmailSubscription(alarm_email))
        alarm_DLQ.add_alarm_action(action.SnsAction(alarm_topic))

        core.CfnOutput(self,
                       "Dashboard",
                       value="CloudWatch Dashboard name s3_migrate_serverless")
Beispiel #12
0
    def __init__(self, scope: cdk.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)
        # Image Bucket
        image_bucket = s3.Bucket(self,
                                 IMG_BUCKET_NAME,
                                 removal_policy=cdk.RemovalPolicy.DESTROY)
        cdk.CfnOutput(self, "imageBucket", value=image_bucket.bucket_name)

        image_bucket.add_cors_rule(
            allowed_methods=[s3.HttpMethods.GET, s3.HttpMethods.PUT],
            allowed_origins=["*"],
            allowed_headers=["*"],
            max_age=3000,
        )

        # Thumbnail Bucket
        resized_image_bucket = s3.Bucket(
            self,
            RESIZED_IMG_BUCKET_NAME,
            removal_policy=cdk.RemovalPolicy.DESTROY)
        cdk.CfnOutput(self,
                      "resizedBucket",
                      value=resized_image_bucket.bucket_name)

        resized_image_bucket.add_cors_rule(
            allowed_methods=[s3.HttpMethods.GET, s3.HttpMethods.PUT],
            allowed_origins=["*"],
            allowed_headers=["*"],
            max_age=3000,
        )
        # S3 Static bucket for website code
        web_bucket = s3.Bucket(
            self,
            WEBSITE_BUCKET_NAME,
            website_index_document="index.html",
            website_error_document="index.html",
            removal_policy=cdk.RemovalPolicy.DESTROY,
            # uncomment this and delete the policy statement below to allow public access to our
            # static website
            # public_read_access=true
        )

        web_policy_statement = iam.PolicyStatement(
            actions=["s3:GetObject"],
            resources=[web_bucket.arn_for_objects("*")],
            principals=[iam.AnyPrincipal()],
            conditions={"IpAddress": {
                "aws:SourceIp": ["139.138.203.36"]
            }},
        )

        web_bucket.add_to_resource_policy(web_policy_statement)

        cdk.CfnOutput(self,
                      "bucketURL",
                      value=web_bucket.bucket_website_domain_name)

        # Deploy site contents to S3 Bucket
        s3_dep.BucketDeployment(
            self,
            "DeployWebsite",
            sources=[s3_dep.Source.asset("./public")],
            destination_bucket=web_bucket,
        )

        # DynamoDB to store image labels
        partition_key = dynamodb.Attribute(name="image",
                                           type=dynamodb.AttributeType.STRING)
        table = dynamodb.Table(
            self,
            "ImageLabels",
            partition_key=partition_key,
            removal_policy=cdk.RemovalPolicy.DESTROY,
        )
        cdk.CfnOutput(self, "ddbTable", value=table.table_name)

        # Lambda layer for Pillow library
        layer = lb.LayerVersion(
            self,
            "pil",
            code=lb.Code.from_asset("reklayer"),
            compatible_runtimes=[lb.Runtime.PYTHON_3_7],
            license="Apache-2.0",
            description=
            "A layer to enable the PIL library in our Rekognition Lambda",
        )

        # Lambda function
        rek_fn = lb.Function(
            self,
            "rekognitionFunction",
            code=lb.Code.from_asset("rekognitionFunction"),
            runtime=lb.Runtime.PYTHON_3_7,
            handler="index.handler",
            timeout=cdk.Duration.seconds(30),
            memory_size=1024,
            layers=[layer],
            environment={
                "TABLE": table.table_name,
                "BUCKET": image_bucket.bucket_name,
                "THUMBBUCKET": resized_image_bucket.bucket_name,
            },
        )

        image_bucket.grant_read(rek_fn)
        resized_image_bucket.grant_write(rek_fn)
        table.grant_write_data(rek_fn)

        rek_fn.add_to_role_policy(
            iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                actions=["rekognition:DetectLabels"],
                                resources=["*"]))

        # Lambda for Synchronous front end
        serviceFn = lb.Function(
            self,
            "serviceFunction",
            code=lb.Code.from_asset("servicelambda"),
            runtime=lb.Runtime.PYTHON_3_7,
            handler="index.handler",
            environment={
                "TABLE": table.table_name,
                "BUCKET": image_bucket.bucket_name,
                "RESIZEDBUCKET": resized_image_bucket.bucket_name,
            },
        )

        image_bucket.grant_write(serviceFn)
        resized_image_bucket.grant_write(serviceFn)
        table.grant_read_write_data(serviceFn)

        # Cognito User Pool Auth
        auto_verified_attrs = cognito.AutoVerifiedAttrs(email=True)
        sign_in_aliases = cognito.SignInAliases(email=True, username=True)
        user_pool = cognito.UserPool(
            self,
            "UserPool",
            self_sign_up_enabled=True,
            auto_verify=auto_verified_attrs,
            sign_in_aliases=sign_in_aliases,
        )

        user_pool_client = cognito.UserPoolClient(self,
                                                  "UserPoolClient",
                                                  user_pool=user_pool,
                                                  generate_secret=False)

        identity_pool = cognito.CfnIdentityPool(
            self,
            "ImageRekognitionIdentityPool",
            allow_unauthenticated_identities=False,
            cognito_identity_providers=[{
                "clientId":
                user_pool_client.user_pool_client_id,
                "providerName":
                user_pool.user_pool_provider_name,
            }],
        )

        # API Gateway
        cors_options = apigw.CorsOptions(allow_origins=apigw.Cors.ALL_ORIGINS,
                                         allow_methods=apigw.Cors.ALL_METHODS)
        api = apigw.LambdaRestApi(
            self,
            "imageAPI",
            default_cors_preflight_options=cors_options,
            handler=serviceFn,
            proxy=False,
        )

        auth = apigw.CfnAuthorizer(
            self,
            "ApiGatewayAuthorizer",
            name="customer-authorizer",
            identity_source="method.request.header.Authorization",
            provider_arns=[user_pool.user_pool_arn],
            rest_api_id=api.rest_api_id,
            # type=apigw.AuthorizationType.COGNITO,
            type="COGNITO_USER_POOLS",
        )

        assumed_by = iam.FederatedPrincipal(
            "cognito-identity.amazon.com",
            conditions={
                "StringEquals": {
                    "cognito-identity.amazonaws.com:aud": identity_pool.ref
                },
                "ForAnyValue:StringLike": {
                    "cognito-identity.amazonaws.com:amr": "authenticated"
                },
            },
            assume_role_action="sts:AssumeRoleWithWebIdentity",
        )
        authenticated_role = iam.Role(
            self,
            "ImageRekognitionAuthenticatedRole",
            assumed_by=assumed_by,
        )
        # IAM policy granting users permission to get and put their pictures
        policy_statement = iam.PolicyStatement(
            actions=["s3:GetObject", "s3:PutObject"],
            effect=iam.Effect.ALLOW,
            resources=[
                image_bucket.bucket_arn +
                "/private/${cognito-identity.amazonaws.com:sub}/*",
                image_bucket.bucket_arn +
                "/private/${cognito-identity.amazonaws.com:sub}/",
                resized_image_bucket.bucket_arn +
                "/private/${cognito-identity.amazonaws.com:sub}/*",
                resized_image_bucket.bucket_arn +
                "/private/${cognito-identity.amazonaws.com:sub}/",
            ],
        )

        # IAM policy granting users permission to list their pictures
        list_policy_statement = iam.PolicyStatement(
            actions=["s3:ListBucket"],
            effect=iam.Effect.ALLOW,
            resources=[
                image_bucket.bucket_arn, resized_image_bucket.bucket_arn
            ],
            conditions={
                "StringLike": {
                    "s3:prefix":
                    ["private/${cognito-identity.amazonaws.com:sub}/*"]
                }
            },
        )

        authenticated_role.add_to_policy(policy_statement)
        authenticated_role.add_to_policy(list_policy_statement)

        # Attach role to our Identity Pool
        cognito.CfnIdentityPoolRoleAttachment(
            self,
            "IdentityPoolRoleAttachment",
            identity_pool_id=identity_pool.ref,
            roles={"authenticated": authenticated_role.role_arn},
        )

        # Get some outputs from cognito
        cdk.CfnOutput(self, "UserPoolId", value=user_pool.user_pool_id)
        cdk.CfnOutput(self,
                      "AppClientId",
                      value=user_pool_client.user_pool_client_id)
        cdk.CfnOutput(self, "IdentityPoolId", value=identity_pool.ref)

        # New Amazon API Gateway with AWS Lambda Integration
        success_response = apigw.IntegrationResponse(
            status_code="200",
            response_parameters={
                "method.response.header.Access-Control-Allow-Origin": "'*'"
            },
        )
        error_response = apigw.IntegrationResponse(
            selection_pattern="(\n|.)+",
            status_code="500",
            response_parameters={
                "method.response.header.Access-Control-Allow-Origin": "'*'"
            },
        )

        request_template = json.dumps({
            "action":
            "$util.escapeJavaScript($input.params('action'))",
            "key":
            "$util.escapeJavaScript($input.params('key'))",
        })

        lambda_integration = apigw.LambdaIntegration(
            serviceFn,
            proxy=False,
            request_parameters={
                "integration.request.querystring.action":
                "method.request.querystring.action",
                "integration.request.querystring.key":
                "method.request.querystring.key",
            },
            request_templates={"application/json": request_template},
            passthrough_behavior=apigw.PassthroughBehavior.WHEN_NO_TEMPLATES,
            integration_responses=[success_response, error_response],
        )

        imageAPI = api.root.add_resource("images")

        success_resp = apigw.MethodResponse(
            status_code="200",
            response_parameters={
                "method.response.header.Access-Control-Allow-Origin": True
            },
        )
        error_resp = apigw.MethodResponse(
            status_code="500",
            response_parameters={
                "method.response.header.Access-Control-Allow-Origin": True
            },
        )

        # GET /images
        get_method = imageAPI.add_method(
            "GET",
            lambda_integration,
            authorization_type=apigw.AuthorizationType.COGNITO,
            request_parameters={
                "method.request.querystring.action": True,
                "method.request.querystring.key": True,
            },
            method_responses=[success_resp, error_resp],
        )
        # DELETE /images
        delete_method = imageAPI.add_method(
            "DELETE",
            lambda_integration,
            authorization_type=apigw.AuthorizationType.COGNITO,
            request_parameters={
                "method.request.querystring.action": True,
                "method.request.querystring.key": True,
            },
            method_responses=[success_resp, error_resp],
        )

        # Override the authorizer id because it doesn't work when defininting it as a param
        # in add_method
        get_method_resource = get_method.node.find_child("Resource")
        get_method_resource.add_property_override("AuthorizerId", auth.ref)
        delete_method_resource = delete_method.node.find_child("Resource")
        delete_method_resource.add_property_override("AuthorizerId", auth.ref)

        # Building SQS queue and DeadLetter Queue
        dl_queue = sqs.Queue(
            self,
            "ImageDLQueue",
            queue_name="ImageDLQueue",
        )

        dl_queue_opts = sqs.DeadLetterQueue(max_receive_count=2,
                                            queue=dl_queue)

        queue = sqs.Queue(
            self,
            "ImageQueue",
            queue_name="ImageQueue",
            visibility_timeout=cdk.Duration.seconds(30),
            receive_message_wait_time=cdk.Duration.seconds(20),
            dead_letter_queue=dl_queue_opts,
        )

        # S3 Bucket Create Notification to SQS
        # Whenever an image is uploaded add it to the queue

        image_bucket.add_object_created_notification(
            s3n.SqsDestination(queue),
            s3.NotificationKeyFilter(prefix="private/"))
    def __init__(self, scope: core.Construct, _id: str, **kwargs) -> None:
        super().__init__(scope, _id, **kwargs)

        ddb_file_list = ddb.Table(self, "ddb",
                                  partition_key=ddb.Attribute(name="Key", type=ddb.AttributeType.STRING),
                                  billing_mode=ddb.BillingMode.PAY_PER_REQUEST)

        sqs_queue_DLQ = sqs.Queue(self, "sqs_DLQ",
                                  visibility_timeout=core.Duration.minutes(15),
                                  retention_period=core.Duration.days(14)
                                  )
        sqs_queue = sqs.Queue(self, "sqs_queue",
                              visibility_timeout=core.Duration.minutes(15),
                              retention_period=core.Duration.days(14),
                              dead_letter_queue=sqs.DeadLetterQueue(
                                  max_receive_count=100,
                                  queue=sqs_queue_DLQ
                              )
                              )
        handler = lam.Function(self, "lambdaFunction",
                               code=lam.Code.asset("./lambda"),
                               handler="lambda_function.lambda_handler",
                               runtime=lam.Runtime.PYTHON_3_8,
                               memory_size=1024,
                               timeout=core.Duration.minutes(15),
                               tracing=lam.Tracing.ACTIVE,
                               environment={
                                   'table_queue_name': ddb_file_list.table_name,
                                   'Des_bucket_default': Des_bucket_default,
                                   'Des_prefix_default': Des_prefix_default,
                                   'StorageClass': StorageClass,
                                   'aws_access_key_id': aws_access_key_id,
                                   'aws_secret_access_key': aws_secret_access_key,
                                   'aws_access_key_region': aws_access_key_region
                               })

        ddb_file_list.grant_read_write_data(handler)
        handler.add_event_source(SqsEventSource(sqs_queue))

        s3bucket = s3.Bucket(self, "s3bucket")
        s3bucket.grant_read(handler)
        s3bucket.add_event_notification(s3.EventType.OBJECT_CREATED,
                                        s3n.SqsDestination(sqs_queue))

        # You can import an existing bucket and grant access to lambda
        # exist_s3bucket = s3.Bucket.from_bucket_name(self, "import_bucket",
        #                                             bucket_name="you_bucket_name")
        # exist_s3bucket.grant_read(handler)

        # But You have to add sqs as imported bucket event notification manually, it doesn't support by CloudFormation
        # An work around is to add on_cloud_trail_event for the bucket, but will trigger could_trail first
        # 因为是导入的Bucket,需要手工建Bucket Event Trigger SQS,以及设置SQS允许该bucekt触发的Permission

        core.CfnOutput(self, "DynamoDB_Table", value=ddb_file_list.table_name)
        core.CfnOutput(self, "SQS_Job_Queue", value=sqs_queue.queue_name)
        core.CfnOutput(self, "SQS_Job_Queue_DLQ", value=sqs_queue_DLQ.queue_name)
        core.CfnOutput(self, "Worker_Lambda_Function", value=handler.function_name)
        core.CfnOutput(self, "New_S3_Bucket", value=s3bucket.bucket_name)

        # Create Lambda logs filter to create network traffic metric
        handler.log_group.add_metric_filter("Complete-bytes",
                                            metric_name="Complete-bytes",
                                            metric_namespace="s3_migrate",
                                            metric_value="$bytes",
                                            filter_pattern=logs.FilterPattern.literal(
                                                '[info, date, sn, p="--->Complete", bytes, key]'))
        handler.log_group.add_metric_filter("Uploading-bytes",
                                            metric_name="Uploading-bytes",
                                            metric_namespace="s3_migrate",
                                            metric_value="$bytes",
                                            filter_pattern=logs.FilterPattern.literal(
                                                '[info, date, sn, p="--->Uploading", bytes, key]'))
        handler.log_group.add_metric_filter("Downloading-bytes",
                                            metric_name="Downloading-bytes",
                                            metric_namespace="s3_migrate",
                                            metric_value="$bytes",
                                            filter_pattern=logs.FilterPattern.literal(
                                                '[info, date, sn, p="--->Downloading", bytes, key]'))
        lambda_metric_Complete = cw.Metric(namespace="s3_migrate",
                                           metric_name="Complete-bytes",
                                           statistic="Sum",
                                           period=core.Duration.minutes(1))
        lambda_metric_Upload = cw.Metric(namespace="s3_migrate",
                                         metric_name="Uploading-bytes",
                                         statistic="Sum",
                                         period=core.Duration.minutes(1))
        lambda_metric_Download = cw.Metric(namespace="s3_migrate",
                                           metric_name="Downloading-bytes",
                                           statistic="Sum",
                                           period=core.Duration.minutes(1))
        handler.log_group.add_metric_filter("ERROR",
                                            metric_name="ERROR-Logs",
                                            metric_namespace="s3_migrate",
                                            metric_value="1",
                                            filter_pattern=logs.FilterPattern.literal(
                                                '"ERROR"'))
        handler.log_group.add_metric_filter("WARNING",
                                            metric_name="WARNING-Logs",
                                            metric_namespace="s3_migrate",
                                            metric_value="1",
                                            filter_pattern=logs.FilterPattern.literal(
                                                '"WARNING"'))
        log_metric_ERROR = cw.Metric(namespace="s3_migrate",
                                     metric_name="ERROR-Logs",
                                     statistic="Sum",
                                     period=core.Duration.minutes(1))
        log_metric_WARNING = cw.Metric(namespace="s3_migrate",
                                       metric_name="WARNING-Logs",
                                       statistic="Sum",
                                       period=core.Duration.minutes(1))

        # Dashboard to monitor SQS and Lambda
        board = cw.Dashboard(self, "s3_migrate", dashboard_name="s3_migrate_serverless")

        board.add_widgets(cw.GraphWidget(title="Lambda-NETWORK",
                                         left=[lambda_metric_Download, lambda_metric_Upload, lambda_metric_Complete]),
                          # TODO: here monitor all lambda concurrency not just the working one. Limitation from CDK
                          # Lambda now supports monitor single lambda concurrency, will change this after CDK support
                          cw.GraphWidget(title="Lambda-all-concurrent",
                                         left=[handler.metric_all_concurrent_executions(period=core.Duration.minutes(1))]),

                          cw.GraphWidget(title="Lambda-invocations/errors/throttles",
                                         left=[handler.metric_invocations(period=core.Duration.minutes(1)),
                                               handler.metric_errors(period=core.Duration.minutes(1)),
                                               handler.metric_throttles(period=core.Duration.minutes(1))]),
                          cw.GraphWidget(title="Lambda-duration",
                                         left=[handler.metric_duration(period=core.Duration.minutes(1))]),
                          )

        board.add_widgets(cw.GraphWidget(title="SQS-Jobs",
                                         left=[sqs_queue.metric_approximate_number_of_messages_visible(
                                             period=core.Duration.minutes(1)
                                         ),
                                               sqs_queue.metric_approximate_number_of_messages_not_visible(
                                                   period=core.Duration.minutes(1)
                                               )]),
                          cw.GraphWidget(title="SQS-DeadLetterQueue",
                                         left=[sqs_queue_DLQ.metric_approximate_number_of_messages_visible(
                                             period=core.Duration.minutes(1)
                                         ),
                                               sqs_queue_DLQ.metric_approximate_number_of_messages_not_visible(
                                                   period=core.Duration.minutes(1)
                                               )]),
                          cw.GraphWidget(title="ERROR/WARNING Logs",
                                         left=[log_metric_ERROR],
                                         right=[log_metric_WARNING]),
                          cw.SingleValueWidget(title="Running/Waiting and Dead Jobs",
                                               metrics=[sqs_queue.metric_approximate_number_of_messages_not_visible(
                                                   period=core.Duration.minutes(1)
                                               ),
                                                        sqs_queue.metric_approximate_number_of_messages_visible(
                                                            period=core.Duration.minutes(1)
                                                        ),
                                                        sqs_queue_DLQ.metric_approximate_number_of_messages_not_visible(
                                                            period=core.Duration.minutes(1)
                                                        ),
                                                        sqs_queue_DLQ.metric_approximate_number_of_messages_visible(
                                                            period=core.Duration.minutes(1)
                                                        )],
                                               height=6)
                          )
        # Alarm for queue - DLQ
        alarm_DLQ = cw.Alarm(self, "SQS_DLQ",
                             alarm_name="s3-migration-serverless-SQS Dead Letter Queue",
                             metric=sqs_queue_DLQ.metric_approximate_number_of_messages_visible(),
                             threshold=0,
                             comparison_operator=cw.ComparisonOperator.GREATER_THAN_THRESHOLD,
                             evaluation_periods=1,
                             datapoints_to_alarm=1)
        alarm_topic = sns.Topic(self, "SQS queue-DLQ has dead letter")
        alarm_topic.add_subscription(subscription=sub.EmailSubscription(alarm_email))
        alarm_DLQ.add_alarm_action(action.SnsAction(alarm_topic))

        # Alarm for queue empty, i.e. no visible message and no in-visible message
        # metric_all_message = cw.MathExpression(
        #     expression="a + b",
        #     label="empty_queue_expression",
        #     using_metrics={
        #         "a": sqs_queue.metric_approximate_number_of_messages_visible(),
        #         "b": sqs_queue.metric_approximate_number_of_messages_not_visible()
        #     }
        # )
        # alarm_0 = cw.Alarm(self, "SQSempty",
        #                    alarm_name="SQS queue empty-Serverless",
        #                    metric=metric_all_message,
        #                    threshold=0,
        #                    comparison_operator=cw.ComparisonOperator.LESS_THAN_OR_EQUAL_TO_THRESHOLD,
        #                    evaluation_periods=3,
        #                    datapoints_to_alarm=3,
        #                    treat_missing_data=cw.TreatMissingData.IGNORE
        #                    )
        # alarm_topic = sns.Topic(self, "SQS queue empty-Serverless")
        # alarm_topic.add_subscription(subscription=sub.EmailSubscription(alarm_email))
        # alarm_0.add_alarm_action(action.SnsAction(alarm_topic))

        # core.CfnOutput(self, "Alarm", value="CloudWatch SQS queue empty Alarm for Serverless: " + alarm_email)
        core.CfnOutput(self, "Dashboard", value="CloudWatch Dashboard name s3_migrate_serverless")
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Tag all constructs with the project for easy billing drilldown,
        # filtering, and organization.
        core.Tags.of(self).add('project', 'MediaTranscription')

        # Media files bucket
        media_bucket = s3.Bucket(
            self,
            'media-transcription-bucket',
            encryption=s3.BucketEncryption.S3_MANAGED,
        )

        # SQS queue for media files bucket event notifications
        media_bucket_event_queue = sqs.Queue(
            self,
            'media-transcription-event-notification-queue',
            queue_name='media-transcription-event-notification-queue',
            visibility_timeout=core.Duration.seconds(60),
            dead_letter_queue=sqs.DeadLetterQueue(
                max_receive_count=3,
                queue=sqs.Queue(
                    self,
                    'media-transcription-event-notifications-dlq',
                    queue_name='media-transcription-event-notifications-dlq',
                )),
        )

        # S3 object created notifications sent to SQS queue
        media_bucket.add_event_notification(
            s3.EventType.OBJECT_CREATED,
            s3n.SqsDestination(media_bucket_event_queue),
            *[s3.NotificationKeyFilter(prefix='media-input/')],
        )

        # Lambda function to create/submit Transcribe jobs
        transcribe_job_init_fn = lambda_.Function(
            self,
            'transcribe-job-init-fn',
            runtime=lambda_.Runtime.PYTHON_3_8,
            code=lambda_.Code.from_asset(
                '../lambdas/transcribe-job-init-fn',
                # The following is just dumb.
                # The Lambda runtime doesn't use the latest boto3 by default.
                # In order to use the latest boto3, we have to pip install
                # and bundle locally using Docker.
                # Q: Why need the latest boto3?
                # A: https://github.com/boto/boto3/issues/2630
                # I'll have to delete the ECR containers to avoid cost.
                # TODO: Revert back to normal in like a month I guess.
                bundling={
                    'image':
                    lambda_.Runtime.PYTHON_3_8.bundling_docker_image,
                    'command': [
                        'bash', '-c',
                        '\n        pip install -r requirements.txt -t /asset-output &&\n        cp -au . /asset-output\n        '
                    ]
                }),
            handler='fn.handler',
            reserved_concurrent_executions=1,  # Effectively single-threaded
        )
        # Triggered by SQS messages created for media file puts
        transcribe_job_init_fn.add_event_source(
            les.SqsEventSource(
                queue=media_bucket_event_queue,
                batch_size=5,
                enabled=True,
            ))
        # Grant access to start transcription jobs
        transcribe_job_init_fn.add_to_role_policy(
            statement=iam.PolicyStatement(
                actions=[
                    'transcribe:StartTranscriptionJob',
                ],
                resources=['*'],
                effect=iam.Effect.ALLOW,
            ))

        # Grant Lambda role to read and write to input and output portions of
        # the S3 bucket.
        # Q: Why grant Lambda the permissions instead of Transcribe service?
        # A: Two-fold:
        #   -  i) https://amzn.to/321Nx5I
        #   - ii) Granting just to this Lambda means other Transcribe jobs
        #         across the account cannot use this bucket (least privilege).
        media_bucket.grant_read(
            identity=transcribe_job_init_fn.grant_principal,
            objects_key_pattern='media-input/*')
        # Cannot specify a prefix for writes as Transcribe will not accept
        # a job unless it has write permission on the whole bucket.
        # Edit: The above statement was when I had to use '*' for writes. But
        #       now, I granted access to that .write_access_check_file.temp
        #       file and it seems to all work now?
        media_bucket.grant_write(
            identity=transcribe_job_init_fn.grant_principal,
            objects_key_pattern='transcribe-output-raw/*')
        # This is just as frustrating to you as it is to me.
        media_bucket.grant_write(
            identity=transcribe_job_init_fn.grant_principal,
            objects_key_pattern='.write_access_check_file.temp')

        # DynamoDB table for Jobs metadata
        jobs_metadata_table = ddb.Table(
            self,
            'MediaTranscription-TranscriptionJobs',
            table_name='MediaTranscription-TranscriptionJobs',
            partition_key=ddb.Attribute(
                name='Bucket-Key-ETag',
                type=ddb.AttributeType.STRING,
            ),
            billing_mode=ddb.BillingMode.PAY_PER_REQUEST,
        )
        jobs_metadata_table.grant(transcribe_job_init_fn.grant_principal, *[
            'dynamodb:GetItem',
            'dynamodb:PutItem',
        ])

        # Create IAM Group with read/write permissions to S3 bucket
        # TODO: Make this more federated and robust
        console_users_group = iam.Group(self, 'MediaTranscriptionConsoleUsers')
        console_users_group.attach_inline_policy(policy=iam.Policy(
            self,
            'MediaTranscriptionConsoleUserS3Access',
            statements=[
                iam.PolicyStatement(
                    effect=iam.Effect.ALLOW,
                    actions=[
                        's3:ListBucket',
                    ],
                    resources=[
                        media_bucket.bucket_arn,
                    ],
                ),
                iam.PolicyStatement(
                    effect=iam.Effect.ALLOW,
                    actions=[
                        's3:GetObject',
                        's3:PutObject',
                    ],
                    resources=[
                        media_bucket.arn_for_objects('media-input/*'),
                    ],
                ),
                iam.PolicyStatement(
                    effect=iam.Effect.ALLOW,
                    actions=[
                        's3:GetObject',
                    ],
                    resources=[
                        media_bucket.arn_for_objects(
                            'transcribe-output-raw/*'),
                    ],
                ),
            ],
        ))
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Create image bucket
        image_bucket = s3.Bucket(self, 'inbound_image_s3_bucket')

        # Create the image processing queue
        image_process_queue = sqs.Queue(
            self, "image_process_queue",
            visibility_timeout=core.Duration.seconds(300),
            retention_period=core.Duration.days(1)
        )

        # Create the image response queue
        response_queue = sqs.Queue(
            self, "results_queue",
            visibility_timeout=core.Duration.seconds(300),
            retention_period=core.Duration.days(1)
        )

        # Set the put object notification to the SQS Queue
        image_bucket.add_event_notification(event=s3.EventType.OBJECT_CREATED_PUT,
                                            dest=s3n.SqsDestination(image_process_queue))

        # Define the AWS Lambda to call Amazon Rekognition DetectFaces
        detect_faces_lambda = _lambda.Function(self, 'detect_faces',
                                               runtime=_lambda.Runtime.PYTHON_3_7,
                                               handler='detect_faces.lambda_handler',
                                               code=_lambda.Code.asset('./lambda'),
                                               timeout=core.Duration.seconds(30),
                                               environment={'SQS_RESPONSE_QUEUE': response_queue.queue_name},
                                               reserved_concurrent_executions=50
                                               )

        # Set SQS image_process_queue Queue as event source for detect_faces_lambda
        detect_faces_lambda.add_event_source(_lambda_events.SqsEventSource(image_process_queue,
                                                                           batch_size=1))

        # Allow response queue messages from lambda
        response_queue.grant_send_messages(detect_faces_lambda)

        # Allow lambda to call Rekognition by adding a IAM Policy Statement
        detect_faces_lambda.add_to_role_policy(iam.PolicyStatement(actions=['rekognition:*'],
                                                                   resources=['*']))
        # Allow lambda to read from S3
        image_bucket.grant_read(detect_faces_lambda)

        # Define the DynamoDB Table
        results_table = dynamodb.Table(self, 'detect_faces_results',
                                       table_name='detect_faces_results',
                                       partition_key=dynamodb.Attribute(name='id', type=dynamodb.AttributeType.STRING),
                                       read_capacity=200,
                                       write_capacity=200
                                       )

        # Define the AWS Lambda to write results into DyanamoDB results_table
        write_results_lambda = _lambda.Function(self, 'write_results',
                                               runtime=_lambda.Runtime.PYTHON_3_7,
                                               handler='write_results.lambda_handler',
                                               code=_lambda.Code.asset('./lambda'),
                                               timeout=core.Duration.seconds(30),
                                               environment={'TABLE_NAME': results_table.table_name}
                                               )

        # Set SQS response_queue Queue as event source for write_results_lambda results_table
        write_results_lambda.add_event_source(_lambda_events.SqsEventSource(response_queue,
                                                                            batch_size=1))

        # Allow AWS Lambda write_results_lambda to Write to Dynamodb
        results_table.grant_write_data(write_results_lambda)

        # Allow AWS Lambda write_results_lambda to read messages from the SQS response_queue Queue
        response_queue.grant_consume_messages(write_results_lambda)

        # Output to Amazon S3 Image Bucket
        core.CfnOutput(self, 'cdk_output',
                       value=image_bucket.bucket_name,
                       description='Input Amazon S3 Image Bucket')
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # If left unchecked this pattern could "fan out" on the transform and load
        # lambdas to the point that it consumes all resources on the account. This is
        # why we are limiting concurrency to 2 on all 3 lambdas. Feel free to raise this.
        lambda_throttle_size = 2

        ####
        # DynamoDB Table
        # This is where our transformed data ends up
        ####
        table = dynamo_db.Table(self,
                                "TransformedData",
                                partition_key=dynamo_db.Attribute(
                                    name="id",
                                    type=dynamo_db.AttributeType.STRING))

        ####
        # S3 Landing Bucket
        # This is where the user uploads the file to be transformed
        ####
        bucket = s3.Bucket(self, "LandingBucket")

        ####
        # Queue that listens for S3 Bucket events
        ####
        queue = sqs.Queue(self,
                          'newObjectInLandingBucketEventQueue',
                          visibility_timeout=core.Duration.seconds(300))

        bucket.add_event_notification(s3.EventType.OBJECT_CREATED,
                                      s3n.SqsDestination(queue))

        # EventBridge Permissions
        event_bridge_put_policy = iam.PolicyStatement(
            effect=iam.Effect.ALLOW,
            resources=['*'],
            actions=['events:PutEvents'])

        ####
        # Fargate ECS Task Creation to pull data from S3
        #
        # Fargate is used here because if you had a seriously large file,
        # you could stream the data to fargate for as long as needed before
        # putting the data onto eventbridge or up the memory/storage to
        # download the whole file. Lambda has limitations on runtime and
        # memory/storage
        ####
        vpc = ec2.Vpc(self, "Vpc", max_azs=2)

        logging = ecs.AwsLogDriver(stream_prefix='TheEventBridgeETL',
                                   log_retention=logs.RetentionDays.ONE_WEEK)

        cluster = ecs.Cluster(self, 'Ec2Cluster', vpc=vpc)

        task_definition = ecs.TaskDefinition(
            self,
            'FargateTaskDefinition',
            memory_mib="512",
            cpu="256",
            compatibility=ecs.Compatibility.FARGATE)

        # We need to give our fargate container permission to put events on our EventBridge
        task_definition.add_to_task_role_policy(event_bridge_put_policy)
        # Grant fargate container access to the object that was uploaded to s3
        bucket.grant_read(task_definition.task_role)

        container = task_definition.add_container(
            'AppContainer',
            image=ecs.ContainerImage.from_asset(
                'container/s3DataExtractionTask'),
            logging=logging,
            environment={
                'S3_BUCKET_NAME': bucket.bucket_name,
                'S3_OBJECT_KEY': ''
            })

        ####
        # Lambdas
        #
        # These are used for 4 phases:
        #
        # Extract    - kicks of ecs fargate task to download data and splinter to eventbridge events
        # Transform  - takes the two comma separated strings and produces a json object
        # Load       - inserts the data into dynamodb
        # Observe    - This is a lambda that subscribes to all events and logs them centrally
        ####

        subnet_ids = []
        for subnet in vpc.private_subnets:
            subnet_ids.append(subnet.subnet_id)

        ####
        # Extract
        # defines an AWS Lambda resource to trigger our fargate ecs task
        ####
        extract_lambda = _lambda.Function(
            self,
            "extractLambdaHandler",
            runtime=_lambda.Runtime.NODEJS_12_X,
            handler="s3SqsEventConsumer.handler",
            code=_lambda.Code.from_asset("lambdas/extract"),
            reserved_concurrent_executions=lambda_throttle_size,
            environment={
                "CLUSTER_NAME": cluster.cluster_name,
                "TASK_DEFINITION": task_definition.task_definition_arn,
                "SUBNETS": json.dumps(subnet_ids),
                "CONTAINER_NAME": container.container_name
            })
        queue.grant_consume_messages(extract_lambda)
        extract_lambda.add_event_source(_event.SqsEventSource(queue=queue))
        extract_lambda.add_to_role_policy(event_bridge_put_policy)

        run_task_policy_statement = iam.PolicyStatement(
            effect=iam.Effect.ALLOW,
            resources=[task_definition.task_definition_arn],
            actions=['ecs:RunTask'])
        extract_lambda.add_to_role_policy(run_task_policy_statement)

        task_execution_role_policy_statement = iam.PolicyStatement(
            effect=iam.Effect.ALLOW,
            resources=[
                task_definition.obtain_execution_role().role_arn,
                task_definition.task_role.role_arn
            ],
            actions=['iam:PassRole'])
        extract_lambda.add_to_role_policy(task_execution_role_policy_statement)

        ####
        # Transform
        # defines a lambda to transform the data that was extracted from s3
        ####

        transform_lambda = _lambda.Function(
            self,
            "TransformLambdaHandler",
            runtime=_lambda.Runtime.NODEJS_12_X,
            handler="transform.handler",
            code=_lambda.Code.from_asset("lambdas/transform"),
            reserved_concurrent_executions=lambda_throttle_size,
            timeout=core.Duration.seconds(3))
        transform_lambda.add_to_role_policy(event_bridge_put_policy)

        # Create EventBridge rule to route extraction events
        transform_rule = events.Rule(
            self,
            'transformRule',
            description='Data extracted from S3, Needs transformed',
            event_pattern=events.EventPattern(
                source=['cdkpatterns.the-eventbridge-etl'],
                detail_type=['s3RecordExtraction'],
                detail={"status": ["extracted"]}))
        transform_rule.add_target(
            targets.LambdaFunction(handler=transform_lambda))

        ####
        # Load
        # load the transformed data in dynamodb
        ####

        load_lambda = _lambda.Function(
            self,
            "LoadLambdaHandler",
            runtime=_lambda.Runtime.NODEJS_12_X,
            handler="load.handler",
            code=_lambda.Code.from_asset("lambdas/load"),
            reserved_concurrent_executions=lambda_throttle_size,
            timeout=core.Duration.seconds(3),
            environment={"TABLE_NAME": table.table_name})
        load_lambda.add_to_role_policy(event_bridge_put_policy)
        table.grant_read_write_data(load_lambda)

        load_rule = events.Rule(
            self,
            'loadRule',
            description='Data transformed, Needs loaded into dynamodb',
            event_pattern=events.EventPattern(
                source=['cdkpatterns.the-eventbridge-etl'],
                detail_type=['transform'],
                detail={"status": ["transformed"]}))
        load_rule.add_target(targets.LambdaFunction(handler=load_lambda))

        ####
        # Observe
        # Watch for all cdkpatterns.the-eventbridge-etl events and log them centrally
        ####

        observe_lambda = _lambda.Function(
            self,
            "ObserveLambdaHandler",
            runtime=_lambda.Runtime.NODEJS_12_X,
            handler="observe.handler",
            code=_lambda.Code.from_asset("lambdas/observe"),
            reserved_concurrent_executions=lambda_throttle_size,
            timeout=core.Duration.seconds(3))

        observe_rule = events.Rule(
            self,
            'observeRule',
            description='all events are caught here and logged centrally',
            event_pattern=events.EventPattern(
                source=['cdkpatterns.the-eventbridge-etl']))

        observe_rule.add_target(targets.LambdaFunction(handler=observe_lambda))