def __init__(self, scope: core.Construct, _id: str, **kwargs) -> None:
        super().__init__(scope, _id, **kwargs)

        # Setup SSM parameter of credentials, bucket_para, ignore_list
        ssm_credential_para = ssm.StringParameter.from_secure_string_parameter_attributes(
            self,
            "ssm_parameter_credentials",
            parameter_name=ssm_parameter_credentials,
            version=1)

        ssm_bucket_para = ssm.StringParameter(self,
                                              "s3bucket_serverless",
                                              string_value=json.dumps(
                                                  bucket_para, indent=4))

        ssm_parameter_ignore_list = ssm.StringParameter(
            self, "s3_migrate_ignore_list", string_value=ignore_list)

        # Setup DynamoDB
        ddb_file_list = ddb.Table(self,
                                  "s3migrate_serverless",
                                  partition_key=ddb.Attribute(
                                      name="Key",
                                      type=ddb.AttributeType.STRING),
                                  billing_mode=ddb.BillingMode.PAY_PER_REQUEST)

        # Setup SQS
        sqs_queue_DLQ = sqs.Queue(self,
                                  "s3migrate_serverless_Q_DLQ",
                                  visibility_timeout=core.Duration.minutes(15),
                                  retention_period=core.Duration.days(14))
        sqs_queue = sqs.Queue(self,
                              "s3migrate_serverless_Q",
                              visibility_timeout=core.Duration.minutes(15),
                              retention_period=core.Duration.days(14),
                              dead_letter_queue=sqs.DeadLetterQueue(
                                  max_receive_count=3, queue=sqs_queue_DLQ))

        # Setup API for Lambda to get IP address (for debug networking routing purpose)
        checkip = api.RestApi(
            self,
            "lambda-checkip-api",
            cloud_watch_role=True,
            deploy=True,
            description="For Lambda get IP address",
            default_integration=api.MockIntegration(
                integration_responses=[
                    api.IntegrationResponse(status_code="200",
                                            response_templates={
                                                "application/json":
                                                "$context.identity.sourceIp"
                                            })
                ],
                request_templates={"application/json": '{"statusCode": 200}'}),
            endpoint_types=[api.EndpointType.REGIONAL])
        checkip.root.add_method("GET",
                                method_responses=[
                                    api.MethodResponse(
                                        status_code="200",
                                        response_models={
                                            "application/json":
                                            api.Model.EMPTY_MODEL
                                        })
                                ])

        # Setup Lambda functions
        handler = lam.Function(self,
                               "s3-migrate-worker",
                               code=lam.Code.asset("./lambda"),
                               handler="lambda_function_worker.lambda_handler",
                               runtime=lam.Runtime.PYTHON_3_8,
                               memory_size=1024,
                               timeout=core.Duration.minutes(15),
                               tracing=lam.Tracing.ACTIVE,
                               environment={
                                   'table_queue_name':
                                   ddb_file_list.table_name,
                                   'Des_bucket_default':
                                   Des_bucket_default,
                                   'Des_prefix_default':
                                   Des_prefix_default,
                                   'StorageClass':
                                   StorageClass,
                                   'checkip_url':
                                   checkip.url,
                                   'ssm_parameter_credentials':
                                   ssm_parameter_credentials
                               })

        handler_jobsender = lam.Function(
            self,
            "s3-migrate-jobsender",
            code=lam.Code.asset("./lambda"),
            handler="lambda_function_jobsender.lambda_handler",
            runtime=lam.Runtime.PYTHON_3_8,
            memory_size=1024,
            timeout=core.Duration.minutes(15),
            tracing=lam.Tracing.ACTIVE,
            environment={
                'table_queue_name': ddb_file_list.table_name,
                'StorageClass': StorageClass,
                'checkip_url': checkip.url,
                'sqs_queue': sqs_queue.queue_name,
                'ssm_parameter_credentials': ssm_parameter_credentials,
                'ssm_parameter_ignore_list':
                ssm_parameter_ignore_list.parameter_name,
                'ssm_parameter_bucket': ssm_bucket_para.parameter_name
            })

        # Allow lambda read/write DDB, SQS
        ddb_file_list.grant_read_write_data(handler)
        ddb_file_list.grant_read_write_data(handler_jobsender)
        sqs_queue.grant_send_messages(handler_jobsender)
        # SQS trigger Lambda worker
        handler.add_event_source(SqsEventSource(sqs_queue, batch_size=1))

        # Option1: Create S3 Bucket, all new objects in this bucket will be transmitted by Lambda Worker
        s3bucket = s3.Bucket(self, "s3_new_migrate")
        s3bucket.grant_read(handler)
        s3bucket.add_event_notification(s3.EventType.OBJECT_CREATED,
                                        s3n.SqsDestination(sqs_queue))

        # Option2: Allow Exist S3 Buckets to be read by Lambda functions.
        # Lambda Jobsender will scan and compare the these buckets and trigger Lambda Workers to transmit
        bucket_name = ''
        for b in bucket_para:
            if bucket_name != b['src_bucket']:  # 如果列了多个相同的Bucket,就跳过
                bucket_name = b['src_bucket']
                s3exist_bucket = s3.Bucket.from_bucket_name(
                    self,
                    bucket_name,  # 用这个做id
                    bucket_name=bucket_name)
                s3exist_bucket.grant_read(handler_jobsender)
                s3exist_bucket.grant_read(handler)

        # Allow Lambda read ssm parameters
        ssm_bucket_para.grant_read(handler_jobsender)
        ssm_credential_para.grant_read(handler)
        ssm_credential_para.grant_read(handler_jobsender)
        ssm_parameter_ignore_list.grant_read(handler_jobsender)

        # Schedule cron event to trigger Lambda Jobsender per hour:
        event.Rule(self,
                   'cron_trigger_jobsender',
                   schedule=event.Schedule.rate(core.Duration.hours(1)),
                   targets=[target.LambdaFunction(handler_jobsender)])

        # Create Lambda logs filter to create network traffic metric
        handler.log_group.add_metric_filter(
            "Complete-bytes",
            metric_name="Complete-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[info, date, sn, p="--->Complete", bytes, key]'))
        handler.log_group.add_metric_filter(
            "Uploading-bytes",
            metric_name="Uploading-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[info, date, sn, p="--->Uploading", bytes, key]'))
        handler.log_group.add_metric_filter(
            "Downloading-bytes",
            metric_name="Downloading-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[info, date, sn, p="--->Downloading", bytes, key]'))
        lambda_metric_Complete = cw.Metric(namespace="s3_migrate",
                                           metric_name="Complete-bytes",
                                           statistic="Sum",
                                           period=core.Duration.minutes(1))
        lambda_metric_Upload = cw.Metric(namespace="s3_migrate",
                                         metric_name="Uploading-bytes",
                                         statistic="Sum",
                                         period=core.Duration.minutes(1))
        lambda_metric_Download = cw.Metric(namespace="s3_migrate",
                                           metric_name="Downloading-bytes",
                                           statistic="Sum",
                                           period=core.Duration.minutes(1))
        handler.log_group.add_metric_filter(
            "ERROR",
            metric_name="ERROR-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"ERROR"'))
        handler.log_group.add_metric_filter(
            "WARNING",
            metric_name="WARNING-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"WARNING"'))
        log_metric_ERROR = cw.Metric(namespace="s3_migrate",
                                     metric_name="ERROR-Logs",
                                     statistic="Sum",
                                     period=core.Duration.minutes(1))
        log_metric_WARNING = cw.Metric(namespace="s3_migrate",
                                       metric_name="WARNING-Logs",
                                       statistic="Sum",
                                       period=core.Duration.minutes(1))

        # Dashboard to monitor SQS and Lambda
        board = cw.Dashboard(self, "s3_migrate_serverless")

        board.add_widgets(
            cw.GraphWidget(title="Lambda-NETWORK",
                           left=[
                               lambda_metric_Download, lambda_metric_Upload,
                               lambda_metric_Complete
                           ]),
            # TODO: here monitor all lambda concurrency not just the working one. Limitation from CDK
            # Lambda now supports monitor single lambda concurrency, will change this after CDK support
            cw.GraphWidget(title="Lambda-all-concurrent",
                           left=[
                               handler.metric_all_concurrent_executions(
                                   period=core.Duration.minutes(1))
                           ]),
            cw.GraphWidget(
                title="Lambda-invocations/errors/throttles",
                left=[
                    handler.metric_invocations(
                        period=core.Duration.minutes(1)),
                    handler.metric_errors(period=core.Duration.minutes(1)),
                    handler.metric_throttles(period=core.Duration.minutes(1))
                ]),
            cw.GraphWidget(
                title="Lambda-duration",
                left=[
                    handler.metric_duration(period=core.Duration.minutes(1))
                ]),
        )

        board.add_widgets(
            cw.GraphWidget(
                title="SQS-Jobs",
                left=[
                    sqs_queue.metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1))
                ]),
            cw.GraphWidget(
                title="SQS-DeadLetterQueue",
                left=[
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1))
                ]),
            cw.GraphWidget(title="ERROR/WARNING Logs",
                           left=[log_metric_ERROR],
                           right=[log_metric_WARNING]),
            cw.SingleValueWidget(
                title="Running/Waiting and Dead Jobs",
                metrics=[
                    sqs_queue.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue.metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1))
                ],
                height=6))
        # Alarm for queue - DLQ
        alarm_DLQ = cw.Alarm(
            self,
            "SQS_DLQ",
            metric=sqs_queue_DLQ.metric_approximate_number_of_messages_visible(
            ),
            threshold=0,
            comparison_operator=cw.ComparisonOperator.GREATER_THAN_THRESHOLD,
            evaluation_periods=1,
            datapoints_to_alarm=1)
        alarm_topic = sns.Topic(self, "SQS queue-DLQ has dead letter")
        alarm_topic.add_subscription(
            subscription=sub.EmailSubscription(alarm_email))
        alarm_DLQ.add_alarm_action(action.SnsAction(alarm_topic))

        core.CfnOutput(self,
                       "Dashboard",
                       value="CloudWatch Dashboard name s3_migrate_serverless")
Beispiel #2
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)
        self.platform_resources = ImportedResources(self, self.stack_name)

        ###

        ECS_APP_NAME = "octicketing-microservice"
        ECS_DEPLOYMENT_GROUP_NAME = "octicketingECSBlueGreen"
        ECS_DEPLOYMENT_CONFIG_NAME = "CodeDeployDefault.ECSLinear10PercentEvery1Minutes"
        ECS_DEPLOYMENT_CONFIG_ALL = "CodeDeployDefault.ECSAllAtOnce"
        ECS_TASKSET_TERMINATION_WAIT_TIME = 10
        ECS_TASK_FAMILY_NAME = "octicketing-service"
        ECS_APP_LOG_GROUP_NAME = "/ecs/" + ECS_TASK_FAMILY_NAME

        DUMMY_APP_NAME = "hello-world-microservice"
        DUMMY_TASK_FAMILY_NAME = "hello-world-service"
        DUMMY_APP_LOG_GROUP_NAME = "/ecs/dummy-" + ECS_TASK_FAMILY_NAME
        DUMMY_CONTAINER_IMAGE = self.account + ".dkr.ecr." + \
            self.region + ".amazonaws.com/hello-world:latest"
        Dmmuyvare = ""
        # =============================================================================
        # ECR and CodeCommit repositories for the Blue/ Green deployment
        # =============================================================================

        # ECR repository for the docker images
        self.octicketing_ecr_repo = aws_ecr.Repository(
            self,
            "OcticketingECRRepo",
            repository_name=ECS_APP_NAME,
            removal_policy=core.RemovalPolicy.DESTROY)

        self.octicketing_code_repo = aws_codecommit.Repository(
            self,
            ECS_APP_NAME + "-bg",
            repository_name=ECS_APP_NAME + "-bg",
            description=ECS_APP_NAME + "blue-green service repository")
        core.CfnOutput(self,
                       'BGRepoName',
                       value=self.octicketing_code_repo.repository_name,
                       export_name='OcticketingBGRepoName')
        core.CfnOutput(self,
                       'BGRepoARN',
                       value=self.octicketing_code_repo.repository_arn,
                       export_name='OcticketingBGRepoARN')

        # =============================================================================
        #   CODE BUILD and ECS TASK ROLES for the Blue/ Green deployment
        # =============================================================================

        # IAM role for the Code Build project
        codeBuildServiceRole = aws_iam.Role(
            self,
            "codeBuildServiceRole",
            assumed_by=aws_iam.ServicePrincipal('codebuild.amazonaws.com'))
        inlinePolicyForCodeBuild = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            actions=[
                "ecr:GetAuthorizationToken", "ecr:BatchCheckLayerAvailability",
                "ecr:InitiateLayerUpload", "ecr:BatchGetImage",
                "ecr:GetDownloadUrlForLayer", "ecr:UploadLayerPart",
                "ecr:CompleteLayerUpload", "ecr:PutImage"
            ],
            resources=["*"])

        codeBuildServiceRole.add_to_policy(inlinePolicyForCodeBuild)

        # ECS task role
        ecsTaskRole = aws_iam.Role(
            self,
            "ecsTaskRoleForWorkshop",
            assumed_by=aws_iam.ServicePrincipal('ecs-tasks.amazonaws.com'))

        ecsTaskRole.add_managed_policy(
            aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                "service-role/AmazonECSTaskExecutionRolePolicy"))
        ecsTaskRole.add_managed_policy(
            aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                "SecretsManagerReadWrite"))

        # =============================================================================
        # CODE DEPLOY APPLICATION for the Blue/ Green deployment
        # =============================================================================

        # Creating the code deploy application
        codeDeployApplication = codedeploy.EcsApplication(
            self, "OcticketingCodeDeploy")

        # Creating the code deploy service role
        codeDeployServiceRole = aws_iam.Role(
            self,
            "codeDeployServiceRole",
            assumed_by=aws_iam.ServicePrincipal('codedeploy.amazonaws.com'))
        codeDeployServiceRole.add_managed_policy(
            aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                "AWSCodeDeployRoleForECS"))

        # IAM role for custom lambda function
        customLambdaServiceRole = aws_iam.Role(
            self,
            "codeDeployCustomLambda",
            assumed_by=aws_iam.ServicePrincipal('lambda.amazonaws.com'))

        inlinePolicyForLambda = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            actions=[
                "iam:PassRole", "sts:AssumeRole", "codedeploy:List*",
                "codedeploy:Get*", "codedeploy:UpdateDeploymentGroup",
                "codedeploy:CreateDeploymentGroup",
                "codedeploy:DeleteDeploymentGroup"
            ],
            resources=["*"])

        customLambdaServiceRole.add_managed_policy(
            aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                'service-role/AWSLambdaBasicExecutionRole'))
        customLambdaServiceRole.add_to_policy(inlinePolicyForLambda)

        # Custom resource to create the deployment group
        createDeploymentGroupLambda = aws_lambda.Function(
            self,
            'createDeploymentGroupLambda',
            code=aws_lambda.Code.from_asset("custom_resources"),
            runtime=aws_lambda.Runtime.PYTHON_3_8,
            handler='create_deployment_group.handler',
            role=customLambdaServiceRole,
            description="Custom resource to create deployment group",
            memory_size=128,
            timeout=core.Duration.seconds(60))

        # ================================================================================================
        # CloudWatch Alarms for 4XX errors
        blue4xxMetric = aws_cloudwatch.Metric(
            namespace='AWS/ApplicationELB',
            metric_name='HTTPCode_Target_4XX_Count',
            dimensions={
                "TargetGroup": self.platform_resources.blue_target_full_name,
                "LoadBalancer": self.platform_resources.alb_full_name
            },
            statistic="sum",
            period=core.Duration.minutes(1))

        self.blue_targetAlarm = aws_cloudwatch.Alarm(
            self,
            "blue4xxErrors",
            alarm_name="Blue_4xx_Alarm",
            alarm_description=
            "CloudWatch Alarm for the 4xx errors of Blue target group",
            metric=blue4xxMetric,
            threshold=1,
            evaluation_periods=1)

        green4xxMetric = aws_cloudwatch.Metric(
            namespace='AWS/ApplicationELB',
            metric_name='HTTPCode_Target_4XX_Count',
            dimensions={
                "TargetGroup": self.platform_resources.green_target_full_name,
                "LoadBalancer": self.platform_resources.alb_full_name
            },
            statistic="sum",
            period=core.Duration.minutes(1))
        self.green_targetAlarm = aws_cloudwatch.Alarm(
            self,
            "green4xxErrors",
            alarm_name="Green_4xx_Alarm",
            alarm_description=
            "CloudWatch Alarm for the 4xx errors of Green target group",
            metric=green4xxMetric,
            threshold=1,
            evaluation_periods=1)

        # ================================================================================================
        # DUMMY TASK DEFINITION for the initial service creation
        # This is required for the service being made available to create the CodeDeploy Deployment Group
        # ================================================================================================
        sampleTaskDefinition = aws_ecs.FargateTaskDefinition(
            self,
            "sampleTaskDefn",
            family=DUMMY_TASK_FAMILY_NAME,
            cpu=256,
            memory_limit_mib=1024,
            task_role=ecsTaskRole,
            execution_role=ecsTaskRole)

        sampleContainerDefn = sampleTaskDefinition.add_container(
            "sampleAppContainer",
            image=aws_ecs.ContainerImage.from_registry(DUMMY_CONTAINER_IMAGE),
            logging=aws_ecs.AwsLogDriver(log_group=aws_logs.LogGroup(
                self,
                "sampleAppLogGroup",
                log_group_name=DUMMY_APP_LOG_GROUP_NAME,
                removal_policy=core.RemovalPolicy.DESTROY),
                                         stream_prefix=DUMMY_APP_NAME),
            docker_labels={"name": DUMMY_APP_NAME})

        port_mapping = aws_ecs.PortMapping(container_port=8080,
                                           protocol=aws_ecs.Protocol.TCP)

        sampleContainerDefn.add_port_mappings(port_mapping)

        # ================================================================================================
        # ECS task definition using ECR image
        # Will be used by the CODE DEPLOY for Blue/Green deployment
        # ================================================================================================
        OcticketingTaskDef = aws_ecs.FargateTaskDefinition(
            self,
            "appTaskDefn",
            family=ECS_TASK_FAMILY_NAME,
            cpu=256,
            memory_limit_mib=1024,
            task_role=ecsTaskRole,
            execution_role=ecsTaskRole)

        # =============================================================================
        octicketing_cont_def = OcticketingTaskDef.add_container(
            "OcticketingAppContainer",
            image=aws_ecs.ContainerImage.from_ecr_repository(
                self.octicketing_ecr_repo, "latest"),
            logging=aws_ecs.AwsLogDriver(log_group=aws_logs.LogGroup(
                self,
                "OcticketingAppLogGroup",
                log_group_name=ECS_APP_LOG_GROUP_NAME,
                removal_policy=core.RemovalPolicy.DESTROY),
                                         stream_prefix=ECS_APP_NAME),
            docker_labels={"name": ECS_APP_NAME})
        octicketing_cont_def.add_port_mappings(port_mapping)

        # =============================================================================
        # ECS SERVICE for the Blue/ Green deployment
        # =============================================================================

        OcticketingAppService = aws_ecs.FargateService(
            self,
            "OcticketingAppService",
            cluster=self.platform_resources.ecs_cluster,
            task_definition=sampleTaskDefinition,
            health_check_grace_period=core.Duration.seconds(10),
            platform_version=aws_ecs.FargatePlatformVersion.VERSION1_4,
            desired_count=1,
            deployment_controller={
                "type": aws_ecs.DeploymentControllerType.CODE_DEPLOY
            },
            service_name=ECS_APP_NAME)

        OcticketingAppService.connections.allow_from(
            self.platform_resources.alb, aws_ec2.Port.tcp(80))
        OcticketingAppService.connections.allow_from(
            self.platform_resources.alb, aws_ec2.Port.tcp(8080))
        OcticketingAppService.attach_to_application_target_group(
            self.platform_resources.blue_target)

        # =============================================================================
        # CODE DEPLOY - Deployment Group CUSTOM RESOURCE for the Blue/ Green deployment
        # =============================================================================

        core.CustomResource(
            self,
            'customEcsDeploymentGroup',
            service_token=createDeploymentGroupLambda.function_arn,
            properties={
                "ApplicationName": codeDeployApplication.application_name,
                "DeploymentGroupName": ECS_DEPLOYMENT_GROUP_NAME,
                "DeploymentConfigName": ECS_DEPLOYMENT_CONFIG_NAME,
                "ServiceRoleArn": codeDeployServiceRole.role_arn,
                "BlueTargetGroup": self.platform_resources.blue_target_name,
                "GreenTargetGroup": self.platform_resources.green_target_name,
                "ProdListenerArn":
                self.platform_resources.prod_listener.listener_arn,
                "TestListenerArn":
                self.platform_resources.test_listener.listener_arn,
                "EcsClusterName":
                self.platform_resources.ecs_cluster.cluster_name,
                "EcsServiceName": OcticketingAppService.service_name,
                "TerminationWaitTime": ECS_TASKSET_TERMINATION_WAIT_TIME,
                "BlueGroupAlarm": self.blue_targetAlarm.alarm_name,
                "GreenGroupAlarm": self.green_targetAlarm.alarm_name,
            })

        ecsDeploymentGroup = codedeploy.EcsDeploymentGroup.from_ecs_deployment_group_attributes(
            self,
            "ecsDeploymentGroup",
            application=codeDeployApplication,
            deployment_group_name=ECS_DEPLOYMENT_GROUP_NAME,
            deployment_config=codedeploy.EcsDeploymentConfig.
            from_ecs_deployment_config_name(self, "ecsDeploymentConfig",
                                            ECS_DEPLOYMENT_CONFIG_NAME))
        # =============================================================================
        # CODE BUILD PROJECT for the Blue/ Green deployment
        # =============================================================================

        # Creating the code build project
        OcticketingAppcodebuild = aws_codebuild.Project(
            self,
            "OcticketingAppcodebuild",
            role=codeBuildServiceRole,
            environment=aws_codebuild.BuildEnvironment(
                build_image=aws_codebuild.LinuxBuildImage.STANDARD_4_0,
                compute_type=aws_codebuild.ComputeType.SMALL,
                privileged=True,
                environment_variables={
                    'REPOSITORY_URI': {
                        'value':
                        self.octicketing_ecr_repo.repository_uri,
                        'type':
                        aws_codebuild.BuildEnvironmentVariableType.PLAINTEXT
                    },
                    'TASK_EXECUTION_ARN': {
                        'value':
                        ecsTaskRole.role_arn,
                        'type':
                        aws_codebuild.BuildEnvironmentVariableType.PLAINTEXT
                    },
                    'TASK_FAMILY': {
                        'value':
                        ECS_TASK_FAMILY_NAME,
                        'type':
                        aws_codebuild.BuildEnvironmentVariableType.PLAINTEXT
                    }
                }),
            source=aws_codebuild.Source.code_commit(
                repository=self.octicketing_code_repo))

        # =============================================================================
        # CODE PIPELINE for Blue/Green ECS deployment
        # =============================================================================

        codePipelineServiceRole = aws_iam.Role(
            self,
            "codePipelineServiceRole",
            assumed_by=aws_iam.ServicePrincipal('codepipeline.amazonaws.com'))

        inlinePolicyForCodePipeline = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            actions=[
                "iam:PassRole", "sts:AssumeRole", "codecommit:Get*",
                "codecommit:List*", "codecommit:GitPull",
                "codecommit:UploadArchive", "codecommit:CancelUploadArchive",
                "codebuild:BatchGetBuilds", "codebuild:StartBuild",
                "codedeploy:CreateDeployment", "codedeploy:Get*",
                "codedeploy:RegisterApplicationRevision", "s3:Get*",
                "s3:List*", "s3:PutObject"
            ],
            resources=["*"])

        codePipelineServiceRole.add_to_policy(inlinePolicyForCodePipeline)

        sourceArtifact = codepipeline.Artifact('sourceArtifact')
        buildArtifact = codepipeline.Artifact('buildArtifact')

        # S3 bucket for storing the code pipeline artifacts
        OcticketingAppArtifactsBucket = s3.Bucket(
            self,
            "OcticketingAppArtifactsBucket",
            encryption=s3.BucketEncryption.S3_MANAGED,
            block_public_access=s3.BlockPublicAccess.BLOCK_ALL)

        # S3 bucket policy for the code pipeline artifacts
        denyUnEncryptedObjectUploads = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.DENY,
            actions=["s3:PutObject"],
            principals=[aws_iam.AnyPrincipal()],
            resources=[OcticketingAppArtifactsBucket.bucket_arn + "/*"],
            conditions={
                "StringNotEquals": {
                    "s3:x-amz-server-side-encryption": "aws:kms"
                }
            })

        denyInsecureConnections = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.DENY,
            actions=["s3:*"],
            principals=[aws_iam.AnyPrincipal()],
            resources=[OcticketingAppArtifactsBucket.bucket_arn + "/*"],
            conditions={"Bool": {
                "aws:SecureTransport": "false"
            }})

        OcticketingAppArtifactsBucket.add_to_resource_policy(
            denyUnEncryptedObjectUploads)
        OcticketingAppArtifactsBucket.add_to_resource_policy(
            denyInsecureConnections)

        # Code Pipeline - CloudWatch trigger event is created by CDK
        codepipeline.Pipeline(
            self,
            "ecsBlueGreen",
            role=codePipelineServiceRole,
            artifact_bucket=OcticketingAppArtifactsBucket,
            stages=[
                codepipeline.StageProps(
                    stage_name='Source',
                    actions=[
                        aws_codepipeline_actions.CodeCommitSourceAction(
                            action_name='Source',
                            repository=self.octicketing_code_repo,
                            output=sourceArtifact,
                        )
                    ]),
                codepipeline.StageProps(
                    stage_name='Build',
                    actions=[
                        aws_codepipeline_actions.CodeBuildAction(
                            action_name='Build',
                            project=OcticketingAppcodebuild,
                            input=sourceArtifact,
                            outputs=[buildArtifact])
                    ]),
                codepipeline.StageProps(
                    stage_name='Deploy',
                    actions=[
                        aws_codepipeline_actions.CodeDeployEcsDeployAction(
                            action_name='Deploy',
                            deployment_group=ecsDeploymentGroup,
                            app_spec_template_input=buildArtifact,
                            task_definition_template_input=buildArtifact,
                        )
                    ])
            ])

        # =============================================================================
        # Export the outputs
        # =============================================================================
        core.CfnOutput(
            self,
            "ecsBlueGreenCodeRepo",
            description="Demo app code commit repository",
            export_name="ecsBlueGreenDemoAppRepo",
            value=self.octicketing_code_repo.repository_clone_url_http)
Beispiel #3
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Read Lambda Code):
        try:
            with open(
                    "serverless_stacks/lambda_src/konstone_custom_metric_log_generator.py",
                    mode="r") as f:
                konstone_custom_metric_fn_code = f.read()
        except OSError:
            print("Unable to read Lambda Function Code")

        konstone_custom_metric_fn = _lambda.Function(
            self,
            "konstoneFunction",
            function_name="konstone_custom_metric_fn",
            runtime=_lambda.Runtime.PYTHON_3_7,
            handler="index.lambda_handler",
            code=_lambda.InlineCode(konstone_custom_metric_fn_code),
            timeout=core.Duration.seconds(3),
            reserved_concurrent_executions=1,
            environment={
                "LOG_LEVEL": "INFO",
                "PERCENTAGE_ERRORS": "75"
            })

        # Create Custom Loggroup
        # /aws/lambda/function-name
        konstone_custom_metric_lg = _logs.LogGroup(
            self,
            "konstoneLoggroup",
            log_group_name=
            f"/aws/lambda/{konstone_custom_metric_fn.function_name}",
            removal_policy=core.RemovalPolicy.DESTROY,
            retention=_logs.RetentionDays.ONE_DAY,
        )

        # Create Custom Metric Namespace
        third_party_error_metric = _cloudwatch.Metric(
            namespace=f"third-party-error-metric",
            metric_name="third_party_error_metric",
            label="Total No. of Third Party API Errors",
            period=core.Duration.minutes(1),
            statistic="Sum")

        # Create Custom Metric Log Filter
        third_party_error_metric_filter = _logs.MetricFilter(
            self,
            "thirdPartyApiErrorMetricFilter",
            filter_pattern=_logs.FilterPattern.boolean_value(
                "$.third_party_api_error", True),
            log_group=konstone_custom_metric_lg,
            metric_namespace=third_party_error_metric.namespace,
            metric_name=third_party_error_metric.metric_name,
            default_value=0,
            metric_value="1")

        # Create Third Party Error Alarm
        third_party_error_alarm = _cloudwatch.Alarm(
            self,
            "thirdPartyApiErrorAlarm",
            alarm_description=
            "Alert if 3rd party API has more than 2 errors in the last two minutes",
            alarm_name="third-party-api-alarm",
            metric=third_party_error_metric,
            comparison_operator=_cloudwatch.ComparisonOperator.
            GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
            threshold=2,
            evaluation_periods=2,
            datapoints_to_alarm=1,
            period=core.Duration.minutes(1),
            treat_missing_data=_cloudwatch.TreatMissingData.NOT_BREACHING)
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        #sns topic for monitor
        snstopic_monitor01 = aws_sns.Topic(self,
                                           "MonitorSnsTopic",
                                           display_name="monitor webapp",
                                           topic_name="EC2Monitor")

        #add subcriptions  to sns
        snstopic_monitor01.add_subscription(
            aws_sns_subc.EmailSubscription("*****@*****.**"))

        ## vpc block ##
        prod_config = self.node.try_get_context('envs')['prod']

        custom_vpc = aws_ec2.Vpc(
            self,
            "CustomVpcID",
            cidr=prod_config['vpc_config']['vpc_cidr'],
            max_azs=2,
            nat_gateways=1,
            subnet_configuration=[
                aws_ec2.SubnetConfiguration(
                    name="PublicSubnet",
                    cidr_mask=prod_config['vpc_config']['cidr_mask'],
                    subnet_type=aws_ec2.SubnetType.PUBLIC),
                aws_ec2.SubnetConfiguration(
                    name="PrivateSubnet",
                    cidr_mask=prod_config['vpc_config']['cidr_mask'],
                    subnet_type=aws_ec2.SubnetType.PRIVATE),
                aws_ec2.SubnetConfiguration(
                    name="DbSubnet",
                    cidr_mask=prod_config['vpc_config']['cidr_mask'],
                    subnet_type=aws_ec2.SubnetType.ISOLATED)
            ])
        ## end vpc block ##

        ## ec2 block ##
        #import user-data scripts
        with open("userdata_scripts/setup.sh", mode="r") as file:
            user_data = file.read()

        #get latest ami from any region
        aws_linux_ami = aws_ec2.MachineImage.latest_amazon_linux(
            generation=aws_ec2.AmazonLinuxGeneration.AMAZON_LINUX_2,
            edition=aws_ec2.AmazonLinuxEdition.STANDARD,
            storage=aws_ec2.AmazonLinuxStorage.EBS,
            virtualization=aws_ec2.AmazonLinuxVirt.HVM)

        #ec2
        test_server = aws_ec2.Instance(
            self,
            "ec2id",
            instance_type=aws_ec2.InstanceType(
                instance_type_identifier="t2.micro"),
            instance_name="TestServer01",
            machine_image=aws_linux_ami,
            vpc=custom_vpc,
            vpc_subnets=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC),
            key_name="SAA-C01",
            user_data=aws_ec2.UserData.custom(user_data))

        #allow web traffic
        test_server.connections.allow_from_any_ipv4(
            aws_ec2.Port.tcp(80), description="allow web traffic")

        # add permission to instances profile
        test_server.role.add_managed_policy(
            aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonSSMManagedInstanceCore"))
        test_server.role.add_managed_policy(
            aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonS3ReadOnlyAccess"))
        ## end ec2 block ##

        ## lambda block ##
        #import function code
        try:
            with open("serverless_stack/functions/function.py",
                      mode="r") as file:
                function_body = file.read()
        except OSError:
            print('File can not read')

        #function
        function_01 = aws_lambda.Function(
            self,
            "lambdafunction01",
            function_name="LambdaTestCDK",
            runtime=aws_lambda.Runtime.PYTHON_3_6,
            handler="index.lambda_handler",
            code=aws_lambda.InlineCode(function_body),
            timeout=core.Duration.seconds(5),
            reserved_concurrent_executions=1,
            environment={
                'LOG_LEVEL': 'INFO',
                'AUTOMATION': 'SKON'
            })
        ## end lambda block ##

        ## monitor block ##
        #ec2 metric for cpu usage
        ec2_metric_01 = aws_cw.Metric(
            namespace="AWS/EC2",
            metric_name="CPUUtilization",
            dimensions={"InstanceID": test_server.instance_id},
            period=core.Duration.minutes(5))

        #under utilize alram ec2
        low_cpu_ec2 = aws_cw.Alarm(
            self,
            "lowcpualram",
            alarm_description="low cpu utilization",
            alarm_name="Low-CPU-Alarm",
            actions_enabled=True,
            metric=ec2_metric_01,
            threshold=10,
            comparison_operator=aws_cw.ComparisonOperator.
            LESS_THAN_OR_EQUAL_TO_THRESHOLD,
            evaluation_periods=1,
            datapoints_to_alarm=1,
            period=core.Duration.minutes(5),
            treat_missing_data=aws_cw.TreatMissingData.NOT_BREACHING)

        #sns on ec2 alram
        low_cpu_ec2.add_alarm_action(aws_cw_ats.SnsAction(snstopic_monitor01))

        #Lambda alram
        function_01_alarm = aws_cw.Alarm(self,
                                         "LambdaAlarm",
                                         metric=function_01.metric_errors(),
                                         threshold=2,
                                         evaluation_periods=1,
                                         datapoints_to_alarm=1,
                                         period=core.Duration.minutes(5))

        #sns on lambda alarm
        function_01_alarm.add_alarm_action(
            aws_cw_ats.SnsAction(snstopic_monitor01))
    def __init__(self, scope: core.Construct, construct_id: str,
                 stack_log_level: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # Add your stack resources below):

        # Maximum number of times, a message can be tried to be process from the queue before deleting
        self.max_msg_receive_cnt = 5
        self.max_msg_receive_cnt_at_retry = 3

        # Define Dead Letter Queue
        self.reliable_q_dlq = _sqs.Queue(
            self,
            "DeadLetterQueue",
            delivery_delay=core.Duration.seconds(100),
            queue_name=f"reliable_q_dlq",
            retention_period=core.Duration.days(2),
            visibility_timeout=core.Duration.seconds(10),
            receive_message_wait_time=core.Duration.seconds(10))

        # Define Retry Queue for Reliable Q
        self.reliable_q_retry_1 = _sqs.Queue(
            self,
            "reliableQueueRetry1",
            delivery_delay=core.Duration.seconds(10),
            queue_name=f"reliable_q_retry_1",
            retention_period=core.Duration.days(2),
            visibility_timeout=core.Duration.seconds(10),
            receive_message_wait_time=core.Duration.seconds(10),
            dead_letter_queue=_sqs.DeadLetterQueue(
                max_receive_count=self.max_msg_receive_cnt_at_retry,
                queue=self.reliable_q_dlq))

        # Primary Source Queue
        self.reliable_q = _sqs.Queue(
            self,
            "reliableQueue",
            delivery_delay=core.Duration.seconds(5),
            queue_name=f"reliable_q",
            retention_period=core.Duration.days(2),
            visibility_timeout=core.Duration.seconds(10),
            receive_message_wait_time=core.Duration.seconds(10),
            dead_letter_queue=_sqs.DeadLetterQueue(
                max_receive_count=self.max_msg_receive_cnt,
                queue=self.reliable_q_retry_1))

        ########################################
        #######                          #######
        #######     SQS Data Producer    #######
        #######                          #######
        ########################################

        # Read Lambda Code
        try:
            with open(
                    "stacks/back_end/serverless_sqs_producer_stack/lambda_src/sqs_data_producer.py",
                    encoding="utf-8",
                    mode="r") as f:
                data_producer_fn_code = f.read()
        except OSError:
            print("Unable to read Lambda Function Code")
            raise

        data_producer_fn = _lambda.Function(
            self,
            "sqsDataProducerFn",
            function_name=f"data_producer_fn_{construct_id}",
            description="Produce data events and push to SQS",
            runtime=_lambda.Runtime.PYTHON_3_7,
            code=_lambda.InlineCode(data_producer_fn_code),
            handler="index.lambda_handler",
            timeout=core.Duration.seconds(5),
            reserved_concurrent_executions=1,
            environment={
                "LOG_LEVEL": f"{stack_log_level}",
                "APP_ENV": "Production",
                "RELIABLE_QUEUE_NAME": f"{self.reliable_q.queue_name}",
                "TRIGGER_RANDOM_FAILURES": "True"
            })

        # Grant our Lambda Producer privileges to write to SQS
        self.reliable_q.grant_send_messages(data_producer_fn)

        # Create Custom Loggroup for Producer
        data_producer_lg = _logs.LogGroup(
            self,
            "dataProducerLogGroup",
            log_group_name=f"/aws/lambda/{data_producer_fn.function_name}",
            removal_policy=core.RemovalPolicy.DESTROY,
            retention=_logs.RetentionDays.ONE_DAY)

        # Restrict Produce Lambda to be invoked only from the stack owner account
        data_producer_fn.add_permission(
            "restrictLambdaInvocationToFhInOwnAccount",
            principal=_iam.AccountRootPrincipal(),
            action="lambda:InvokeFunction",
            source_account=core.Aws.ACCOUNT_ID)

        # Monitoring for Queue
        reliable_q_alarm = _cw.Alarm(
            self,
            "reliableQueueAlarm",
            metric=self.reliable_q.metric(
                "ApproximateNumberOfMessagesVisible"),
            statistic="sum",
            threshold=10,
            period=core.Duration.minutes(5),
            evaluation_periods=1,
            comparison_operator=_cw.ComparisonOperator.GREATER_THAN_THRESHOLD)

        ###########################################
        ################# OUTPUTS #################
        ###########################################
        output_0 = core.CfnOutput(
            self,
            "AutomationFrom",
            value=f"{GlobalArgs.SOURCE_INFO}",
            description=
            "To know more about this automation stack, check out our github page."
        )

        output_1 = core.CfnOutput(
            self,
            "SqsDataProducer",
            value=
            f"https://console.aws.amazon.com/lambda/home?region={core.Aws.REGION}#/functions/{data_producer_fn.function_name}",
            description="Produce data events and push to SQS Queue.")

        output_2 = core.CfnOutput(
            self,
            "ReliableQueue",
            value=
            f"https://console.aws.amazon.com/sqs/v2/home?region={core.Aws.REGION}#/queues",
            description="Reliable Queue")
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        #import function code
        try:
            with open("serverless_stack/functions/metric_logs_generator.py",
                      mode="r") as file:
                function_body = file.read()
        except OSError:
            print('File can not read')

        #function
        function_01 = aws_lambda.Function(
            self,
            "lambdafunction01",
            function_name="LambdaTestCustomMEtric",
            runtime=aws_lambda.Runtime.PYTHON_3_6,
            handler="index.lambda_handler",
            code=aws_lambda.InlineCode(function_body),
            timeout=core.Duration.seconds(5),
            reserved_concurrent_executions=1,
            environment={
                'LOG_LEVEL': 'INFO',
                'PERCENTAGE_ERRORS': '75'
            })

        #attached cloudwatch log group
        custom_metric_log_group01 = aws_logs.LogGroup(
            self,
            "cloudwatchlog01",
            log_group_name=f"/aws/lambda/{function_01.function_name}",
            removal_policy=core.RemovalPolicy.DESTROY,
            retention=aws_logs.RetentionDays.ONE_DAY)

        #Custom metric namespace
        custom_metric_namespace01 = aws_cw.Metric(
            namespace=f"custom-error-metric",
            metric_name="custom-error-metric",
            label="Amount of Custom API errors",
            period=core.Duration.minutes(1),
            statistic="Sum")

        #Custom metric logs filter
        custom_metric_filter01 = aws_logs.MetricFilter(
            self,
            "customMetricFilter",
            filter_pattern=aws_logs.FilterPattern.boolean_value(
                "$.custom_api_error", True),
            log_group=custom_metric_log_group01,
            metric_namespace=custom_metric_namespace01.namespace,
            metric_name=custom_metric_namespace01.metric_name,
            default_value=0,
            metric_value="1")

        #create custom alarm
        custom_metric_alarm01 = aws_cw.Alarm(
            self,
            "customMetricAlarm",
            alarm_description="Custom API errors",
            alarm_name="Custom-API-alarm",
            metric=custom_metric_namespace01,
            comparison_operator=aws_cw.ComparisonOperator.
            GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
            threshold=2,
            evaluation_periods=2,
            datapoints_to_alarm=1,
            period=core.Duration.minutes(1),
            treat_missing_data=aws_cw.TreatMissingData.NOT_BREACHING)
    def __init__(self, scope: core.Construct, _id: str, **kwargs) -> None:
        super().__init__(scope, _id, **kwargs)

        ddb_file_list = ddb.Table(self,
                                  "ddb",
                                  partition_key=ddb.Attribute(
                                      name="Key",
                                      type=ddb.AttributeType.STRING),
                                  billing_mode=ddb.BillingMode.PAY_PER_REQUEST)

        sqs_queue_DLQ = sqs.Queue(self,
                                  "sqs_DLQ",
                                  visibility_timeout=core.Duration.minutes(15),
                                  retention_period=core.Duration.days(14))
        sqs_queue = sqs.Queue(self,
                              "sqs_queue",
                              visibility_timeout=core.Duration.minutes(15),
                              retention_period=core.Duration.days(14),
                              dead_letter_queue=sqs.DeadLetterQueue(
                                  max_receive_count=100, queue=sqs_queue_DLQ))

        checkip = api.RestApi(
            self,
            "lambda-checkip-api",
            cloud_watch_role=True,
            deploy=True,
            description="For Lambda get IP address",
            default_integration=api.MockIntegration(
                integration_responses=[
                    api.IntegrationResponse(status_code="200",
                                            response_templates={
                                                "application/json":
                                                "$context.identity.sourceIp"
                                            })
                ],
                request_templates={"application/json": '{"statusCode": 200}'}),
            endpoint_types=[api.EndpointType.REGIONAL])
        checkip.root.add_method("GET",
                                method_responses=[
                                    api.MethodResponse(
                                        status_code="200",
                                        response_models={
                                            "application/json":
                                            api.Model.EMPTY_MODEL
                                        })
                                ])

        handler = lam.Function(self,
                               "lambdaFunction",
                               code=lam.Code.asset("./lambda"),
                               handler="lambda_function.lambda_handler",
                               runtime=lam.Runtime.PYTHON_3_8,
                               memory_size=1024,
                               timeout=core.Duration.minutes(15),
                               tracing=lam.Tracing.ACTIVE,
                               environment={
                                   'table_queue_name':
                                   ddb_file_list.table_name,
                                   'Des_bucket_default': Des_bucket_default,
                                   'Des_prefix_default': Des_prefix_default,
                                   'StorageClass': StorageClass,
                                   'aws_access_key_id': aws_access_key_id,
                                   'aws_secret_access_key':
                                   aws_secret_access_key,
                                   'aws_access_key_region':
                                   aws_access_key_region,
                                   'checkip_url': checkip.url
                               })

        ddb_file_list.grant_read_write_data(handler)
        handler.add_event_source(SqsEventSource(sqs_queue))

        s3bucket = s3.Bucket(self, "s3bucket")
        s3bucket.grant_read(handler)
        s3bucket.add_event_notification(s3.EventType.OBJECT_CREATED,
                                        s3n.SqsDestination(sqs_queue))

        # You can import an existing bucket and grant access to lambda
        # exist_s3bucket = s3.Bucket.from_bucket_name(self, "import_bucket",
        #                                             bucket_name="you_bucket_name")
        # exist_s3bucket.grant_read(handler)

        # But You have to add sqs as imported bucket event notification manually, it doesn't support by CloudFormation
        # An work around is to add on_cloud_trail_event for the bucket, but will trigger could_trail first
        # 因为是导入的Bucket,需要手工建Bucket Event Trigger SQS,以及设置SQS允许该bucekt触发的Permission

        core.CfnOutput(self, "DynamoDB_Table", value=ddb_file_list.table_name)
        core.CfnOutput(self, "SQS_Job_Queue", value=sqs_queue.queue_name)
        core.CfnOutput(self,
                       "SQS_Job_Queue_DLQ",
                       value=sqs_queue_DLQ.queue_name)
        core.CfnOutput(self,
                       "Worker_Lambda_Function",
                       value=handler.function_name)
        core.CfnOutput(self, "New_S3_Bucket", value=s3bucket.bucket_name)

        # Create Lambda logs filter to create network traffic metric
        handler.log_group.add_metric_filter(
            "Complete-bytes",
            metric_name="Complete-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[info, date, sn, p="--->Complete", bytes, key]'))
        handler.log_group.add_metric_filter(
            "Uploading-bytes",
            metric_name="Uploading-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[info, date, sn, p="--->Uploading", bytes, key]'))
        handler.log_group.add_metric_filter(
            "Downloading-bytes",
            metric_name="Downloading-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[info, date, sn, p="--->Downloading", bytes, key]'))
        lambda_metric_Complete = cw.Metric(namespace="s3_migrate",
                                           metric_name="Complete-bytes",
                                           statistic="Sum",
                                           period=core.Duration.minutes(1))
        lambda_metric_Upload = cw.Metric(namespace="s3_migrate",
                                         metric_name="Uploading-bytes",
                                         statistic="Sum",
                                         period=core.Duration.minutes(1))
        lambda_metric_Download = cw.Metric(namespace="s3_migrate",
                                           metric_name="Downloading-bytes",
                                           statistic="Sum",
                                           period=core.Duration.minutes(1))
        handler.log_group.add_metric_filter(
            "ERROR",
            metric_name="ERROR-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"ERROR"'))
        handler.log_group.add_metric_filter(
            "WARNING",
            metric_name="WARNING-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"WARNING"'))
        log_metric_ERROR = cw.Metric(namespace="s3_migrate",
                                     metric_name="ERROR-Logs",
                                     statistic="Sum",
                                     period=core.Duration.minutes(1))
        log_metric_WARNING = cw.Metric(namespace="s3_migrate",
                                       metric_name="WARNING-Logs",
                                       statistic="Sum",
                                       period=core.Duration.minutes(1))

        # Dashboard to monitor SQS and Lambda
        board = cw.Dashboard(self,
                             "s3_migrate",
                             dashboard_name="s3_migrate_serverless")

        board.add_widgets(
            cw.GraphWidget(title="Lambda-NETWORK",
                           left=[
                               lambda_metric_Download, lambda_metric_Upload,
                               lambda_metric_Complete
                           ]),
            # TODO: here monitor all lambda concurrency not just the working one. Limitation from CDK
            # Lambda now supports monitor single lambda concurrency, will change this after CDK support
            cw.GraphWidget(title="Lambda-all-concurrent",
                           left=[
                               handler.metric_all_concurrent_executions(
                                   period=core.Duration.minutes(1))
                           ]),
            cw.GraphWidget(
                title="Lambda-invocations/errors/throttles",
                left=[
                    handler.metric_invocations(
                        period=core.Duration.minutes(1)),
                    handler.metric_errors(period=core.Duration.minutes(1)),
                    handler.metric_throttles(period=core.Duration.minutes(1))
                ]),
            cw.GraphWidget(
                title="Lambda-duration",
                left=[
                    handler.metric_duration(period=core.Duration.minutes(1))
                ]),
        )

        board.add_widgets(
            cw.GraphWidget(
                title="SQS-Jobs",
                left=[
                    sqs_queue.metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1))
                ]),
            cw.GraphWidget(
                title="SQS-DeadLetterQueue",
                left=[
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1))
                ]),
            cw.GraphWidget(title="ERROR/WARNING Logs",
                           left=[log_metric_ERROR],
                           right=[log_metric_WARNING]),
            cw.SingleValueWidget(
                title="Running/Waiting and Dead Jobs",
                metrics=[
                    sqs_queue.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue.metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1))
                ],
                height=6))
        # Alarm for queue - DLQ
        alarm_DLQ = cw.Alarm(
            self,
            "SQS_DLQ",
            alarm_name="s3-migration-serverless-SQS Dead Letter Queue",
            metric=sqs_queue_DLQ.metric_approximate_number_of_messages_visible(
            ),
            threshold=0,
            comparison_operator=cw.ComparisonOperator.GREATER_THAN_THRESHOLD,
            evaluation_periods=1,
            datapoints_to_alarm=1)
        alarm_topic = sns.Topic(self, "SQS queue-DLQ has dead letter")
        alarm_topic.add_subscription(
            subscription=sub.EmailSubscription(alarm_email))
        alarm_DLQ.add_alarm_action(action.SnsAction(alarm_topic))

        # Alarm for queue empty, i.e. no visible message and no in-visible message
        # metric_all_message = cw.MathExpression(
        #     expression="a + b",
        #     label="empty_queue_expression",
        #     using_metrics={
        #         "a": sqs_queue.metric_approximate_number_of_messages_visible(),
        #         "b": sqs_queue.metric_approximate_number_of_messages_not_visible()
        #     }
        # )
        # alarm_0 = cw.Alarm(self, "SQSempty",
        #                    alarm_name="SQS queue empty-Serverless",
        #                    metric=metric_all_message,
        #                    threshold=0,
        #                    comparison_operator=cw.ComparisonOperator.LESS_THAN_OR_EQUAL_TO_THRESHOLD,
        #                    evaluation_periods=3,
        #                    datapoints_to_alarm=3,
        #                    treat_missing_data=cw.TreatMissingData.IGNORE
        #                    )
        # alarm_topic = sns.Topic(self, "SQS queue empty-Serverless")
        # alarm_topic.add_subscription(subscription=sub.EmailSubscription(alarm_email))
        # alarm_0.add_alarm_action(action.SnsAction(alarm_topic))

        # core.CfnOutput(self, "Alarm", value="CloudWatch SQS queue empty Alarm for Serverless: " + alarm_email)
        core.CfnOutput(self,
                       "Dashboard",
                       value="CloudWatch Dashboard name s3_migrate_serverless")
        core.CfnOutput(self, "API-checkip", value=checkip.url)
Beispiel #8
0
    def __init__(self, scope: core.Construct, id: str, group_name: str,
                 minute_duration: int, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)
        # TODO: Setup alerting of failure to an SNS
        # TODO: Failure is not the same as a student not in a group
        # TODO: Streamline input data so that lambda's only get the info they really need
        # TODO: Comment
        # TODO: Need to separate unexpected errors from regular errors
        # Setting up monitoring

        schedule_stop = lambda_.Function(
            self,
            id="ScheduleStopLambda",
            runtime=lambda_.Runtime.PYTHON_3_7,
            code=lambda_.Code.from_inline(
                open("./resources/schedule-termination.py", 'r').read()),
            handler="index.handler",
            log_retention=logs.RetentionDays.ONE_DAY,
            environment=dict(GROUP_NAME=group_name),
            timeout=core.Duration.seconds(30))
        schedule_stop.add_to_role_policy(
            statement=iam.PolicyStatement(actions=[
                "ec2:Describe*", "iam:ListGroupsForUser", "iam:ListUsers"
            ],
                                          effect=iam.Effect.ALLOW,
                                          resources=["*"]))

        terminate_ec2 = lambda_.Function(
            self,
            id="TerminateEC2",
            runtime=lambda_.Runtime.PYTHON_3_7,
            code=lambda_.Code.from_inline(
                open("./resources/terminate-ec2.py", 'r').read()),
            handler="index.handler",
            log_retention=logs.RetentionDays.ONE_DAY,
            timeout=core.Duration.seconds(30))
        terminate_ec2.add_to_role_policy(
            statement=iam.PolicyStatement(actions=[
                "ec2:DescribeInstance*",
                "ec2:TerminateInstances",
            ],
                                          effect=iam.Effect.ALLOW,
                                          resources=["*"]))

        # The lambda object that will see if we should schedule.
        schedule_stop_task = tasks.LambdaInvoke(
            self,
            id='schedule stop',
            lambda_function=schedule_stop,
            input_path="$.detail.userIdentity",
            result_path="$.Payload",
        )
        # TODO: Need to change this based on the configuration info above
        # Wait state to try and delete
        # wait_x = sfn.Wait(self, 'Wait x minutes', time=sfn.WaitTime.seconds_path("10"))
        wait_x = sfn.Wait(self,
                          id='Wait x minutes',
                          time=sfn.WaitTime.duration(
                              core.Duration.minutes(minute_duration)))

        job_failed = sfn.Fail(self,
                              id="Failed Job",
                              cause="Error in the input",
                              error="Error")
        job_finished = sfn.Succeed(self, id="Job Finished")
        choice = sfn.Choice(self, 'Can I delete')
        choice.when(sfn.Condition.boolean_equals('$.Payload.Payload', False),
                    job_finished)
        choice.otherwise(wait_x)
        terminate_ec2_task = tasks.LambdaInvoke(
            self,
            'terminate',
            lambda_function=terminate_ec2,
            input_path="$.detail.responseElements.instancesSet")
        wait_x.next(terminate_ec2_task).next(job_finished)

        state_definition = schedule_stop_task \
            .next(choice)
        terminate_machine = sfn.StateMachine(self,
                                             id="State Machine",
                                             definition=state_definition)
        cloudwatch.Alarm(self,
                         "EC2ScheduleAlarm",
                         metric=terminate_machine.metric_failed(),
                         threshold=1,
                         evaluation_periods=1)
        # TODO Build Rule that monitors for EC2 creation
        # Any new creation, the EC2 will have to be destroyed.  Including
        # other things?
        create_event = events.Rule(
            self,
            id='detect-ec2-start',
            description="Detects if an EC2 is created",
            enabled=True,
            event_pattern=events.EventPattern(
                detail_type=["AWS API Call via CloudTrail"],
                source=["aws.ec2"],
                detail={
                    "eventName": ["RunInstances"],
                    "eventSource": ["ec2.amazonaws.com"]
                }),
            targets=[targets.SfnStateMachine(terminate_machine)])
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Get config value for alert email
        email = self.node.try_get_context("email")
        if email == 'changeme@localhost':
            exit(
                'ERROR: Change the email in cdk.json or pass it with -c email=changeme@localhost'
            )

        # Create SNS for alarms to be sent to
        alarm_topic = sns.Topic(self,
                                "backup_alarm",
                                display_name="backup_alarm")

        # Subscribe my email so the alarms go to me
        alarm_topic.add_subscription(subscriptions.EmailSubscription(email))

        # Create VPC to run everything in. We make this public just because we don't
        # want to spend $30/mo on a NAT gateway.
        vpc = ec2.Vpc(
            self,
            "VPC",
            nat_gateways=0,
            subnet_configuration=[
                ec2.SubnetConfiguration(name="public",
                                        subnet_type=ec2.SubnetType.PUBLIC)
            ],
        )

        ecs_sg = ec2.SecurityGroup(self, "ecs_sg", vpc=vpc)
        efs_sg = ec2.SecurityGroup(self, "efs_sg", vpc=vpc)
        efs_sg.add_ingress_rule(
            peer=ecs_sg,
            connection=ec2.Port.tcp(2049),
            description="Allow backup runner access",
        )
        # Open this to the VPC
        efs_sg.add_ingress_rule(
            peer=ec2.Peer.ipv4("10.0.0.0/8"),
            connection=ec2.Port.tcp(2049),
            description="Allow backup runner access",
        )

        # Define the EFS
        fileSystem = efs.FileSystem(
            self,
            "MyEfsFileSystem",
            vpc=vpc,
            encrypted=True,
            lifecycle_policy=efs.LifecyclePolicy.AFTER_7_DAYS,
            performance_mode=efs.PerformanceMode.GENERAL_PURPOSE,
            throughput_mode=efs.ThroughputMode.BURSTING,
            security_group=efs_sg,
        )

        # Define the ECS task
        cluster = ecs.Cluster(self, "Cluster", vpc=vpc)
        taskDefinition = ecs.FargateTaskDefinition(
            self,
            "taskDefinition",
            volumes=[
                ecs.Volume(
                    name="efsvolume",
                    efs_volume_configuration=ecs.EfsVolumeConfiguration(
                        file_system_id=fileSystem.file_system_id,
                        root_directory="/",
                        transit_encryption="ENABLED",
                    ),
                )
            ],
            memory_limit_mib=8192,
            cpu=2048,
        )

        log_driver = ecs.AwsLogDriver(
            stream_prefix="backup_runner",
            log_retention=logs.RetentionDays.TWO_WEEKS,
        )

        taskDefinition.add_container(
            "backup-runner",
            image=ecs.ContainerImage.from_asset("./resources/backup_runner"),
            memory_limit_mib=8192,
            cpu=2048,
            logging=log_driver,
        )

        # The previous method to add the container doesn't let us specify the mount point for the EFS,
        # so we have to do it here, and referencing the container that was just added.
        taskDefinition.default_container.add_mount_points(
            ecs.MountPoint(container_path="/mnt/efs",
                           read_only=False,
                           source_volume="efsvolume"))

        # Create rule to trigger this be run every 24 hours
        events.Rule(
            self,
            "scheduled_run",
            rule_name="backup_runner",
            # Run at 2am EST (6am UTC) every night
            schedule=events.Schedule.expression("cron(0 0 * * ? *)"),
            description="Starts the backup runner task every night",
            targets=[
                targets.EcsTask(
                    cluster=cluster,
                    task_definition=taskDefinition,
                    subnet_selection=ec2.SubnetSelection(
                        subnet_type=ec2.SubnetType.PUBLIC),
                    platform_version=ecs.FargatePlatformVersion.
                    VERSION1_4,  # Required to use EFS
                    # Because "Latest" does not yet support EFS
                    security_groups=[ecs_sg],
                )
            ],
        )

        # Create notification topic for backups
        backup_topic = sns.Topic(self,
                                 "backup_topic",
                                 display_name="Backup status")

        # Create AWS Backup
        vault = backup.BackupVault(
            self,
            "Vault",
            access_policy=iam.PolicyDocument(statements=[
                iam.PolicyStatement(
                    effect=iam.Effect.DENY,
                    actions=[
                        "backup:DeleteBackupVault",
                        "backup:DeleteRecoveryPoint",
                        "backup:UpdateRecoveryPointLifecycle",
                        # "backup:PutBackupVaultAccessPolicy", # This results in "Failed putting policy for Backup vault backuprunnerVaultXXX as it will lock down from further policy changes"
                        "backup:DeleteBackupVaultAccessPolicy",
                        "backup:DeleteBackupVaultNotifications",
                        # "backup:PutBackupVaultNotifications", # This causes oher part of this app to fail.
                    ],
                    resources=["*"],
                    principals=[iam.AnyPrincipal()],
                )
            ]),
            notification_topic=alarm_topic,
            notification_events=[
                # Monitor for some failures or access to the backups
                backup.BackupVaultEvents.BACKUP_JOB_EXPIRED,
                backup.BackupVaultEvents.BACKUP_JOB_FAILED,
                backup.BackupVaultEvents.COPY_JOB_FAILED,
                backup.BackupVaultEvents.COPY_JOB_FAILED,
                backup.BackupVaultEvents.COPY_JOB_STARTED,
                backup.BackupVaultEvents.RESTORE_JOB_COMPLETED,
                backup.BackupVaultEvents.RESTORE_JOB_FAILED,
                backup.BackupVaultEvents.RESTORE_JOB_STARTED,
                backup.BackupVaultEvents.RESTORE_JOB_SUCCESSFUL,
            ],
        )

        plan = backup.BackupPlan.daily35_day_retention(self, "backup")
        plan.add_selection(
            "Selection",
            resources=[backup.BackupResource.from_efs_file_system(fileSystem)],
        )

        #
        # Create metric filter for errors in the CloudWatch Logs from the ECS
        #
        METRIC_NAME = "log_errors"
        METRIC_NAMESPACE = "backup_runner"

        metric = cloudwatch.Metric(namespace=METRIC_NAMESPACE,
                                   metric_name=METRIC_NAME)

        error_metric = logs.MetricFilter(
            self,
            "MetricFilterId",
            metric_name=METRIC_NAME,
            metric_namespace=METRIC_NAMESPACE,
            log_group=log_driver.log_group,
            filter_pattern=logs.FilterPattern.any_term("ERROR"),
            metric_value="1",
        )

        error_alarm = cloudwatch.Alarm(
            self,
            "AlarmId",
            metric=metric,
            evaluation_periods=1,
            actions_enabled=True,
            alarm_name="backuper_runner_alarm",
            alarm_description="Errors in backup runner",
            comparison_operator=cloudwatch.ComparisonOperator.
            GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
            treat_missing_data=cloudwatch.TreatMissingData.NOT_BREACHING,
            period=core.Duration.hours(1),
            threshold=1,
            statistic="sum",
        )

        # Connect the alarm to the SNS
        error_alarm.add_alarm_action(cloudwatch_actions.SnsAction(alarm_topic))

        # The above doesn't give it privileges, so add them to the alarm topic resource policy.
        alarm_topic.add_to_resource_policy(
            iam.PolicyStatement(
                effect=iam.Effect.ALLOW,
                actions=["sns:Publish"],
                resources=[alarm_topic.topic_arn],
                principals=[iam.ServicePrincipal("cloudwatch.amazonaws.com")],
            ))
Beispiel #10
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # read parameters from SSM
        vpcid = _ssm.StringParameter.value_from_lookup(self, "/cdk/ec2/vpc_id")

        instance_type = _ssm.StringParameter.value_from_lookup(
            self, "/cdk/ec2/instance_type")

        key_name = _ssm.StringParameter.value_from_lookup(
            self, "/cdk/ec2/key_name")

        allow_ssh_web_location = _ssm.StringParameter.value_from_lookup(
            self, "/cdk/ec2/sshLocation")

        # Get the existing VPC
        my_vpc = _ec2.Vpc.from_lookup(self, "VPC", vpc_id=vpcid)

        # Prepare security group configuration - create security group
        my_security_group = _ec2.SecurityGroup(
            self,
            "my_security_group",
            vpc=my_vpc,
            security_group_name="myfirstcdk_secgroup")

        # Add an ingress rules for above security group
        add_securitygroup_ingress_for_22 = my_security_group.add_ingress_rule(
            # BUG 1 Invalid IPv4 CIDR: "dummy-value-for-"
            peer=_ec2.Peer.ipv4(allow_ssh_web_location),
            connection=_ec2.Port.tcp(22))

        add_securitygroup_ingress_for_80 = my_security_group.add_ingress_rule(
            peer=_ec2.Peer.ipv4(allow_ssh_web_location),
            connection=_ec2.Port.tcp(80))

        # create an IAM role with ssm managed policy
        managed_policies = _iam.ManagedPolicy.from_aws_managed_policy_name(
            "AmazonSSMManagedInstanceCore"),

        my_session_mgmt_role = _iam.Role(
            self,
            id="my_session_mgmt_role",
            assumed_by=_iam.ServicePrincipal(service="ec2.amazonaws.com"),
            description="SSM session management role",
            managed_policies=list(managed_policies),
            role_name="SessionManagerRole")

        # Create an EC2 instance with the above configuration
        ec2_instance = _ec2.Instance(
            self,
            "my_ec2_instance",
            instance_type=_ec2.InstanceType(
                instance_type_identifier=instance_type),
            machine_image=_ec2.MachineImage.latest_amazon_linux(),
            vpc=my_vpc,
            instance_name="MyInstance",
            key_name=key_name,
            security_group=my_security_group,
            role=my_session_mgmt_role,
            user_data=_ec2.UserData.custom(user_data))

        # Create a CloudWatch Alarm for EC2 instance CPU utilization
        metric = _cw.Metric(metric_name="CPUUtilization",
                            namespace="AWS/EC2",
                            dimensions={
                                "InstanceId": ec2_instance.instance_id,
                            },
                            statistic="Average")

        cpu_alarm = _cw.Alarm(
            self,
            "cpu_alarm",
            alarm_name="CPUUtilizationOver15",
            alarm_description="CPU Utilization Over 15 Percent",
            evaluation_periods=3,
            threshold=15,
            period=core.Duration.seconds(60),
            metric=metric,
            datapoints_to_alarm=2,
            comparison_operator=_cw.ComparisonOperator.
            GREATER_THAN_OR_EQUAL_TO_THRESHOLD)

        # CFN outputs
        ec2_instance_id = core.CfnOutput(
            self,
            "instance_id",
            value=ec2_instance.instance_id,
            description="InstanceId of the newly created EC2 instance")

        availability_zone = core.CfnOutput(
            self,
            "availability zone",
            value=ec2_instance.instance_availability_zone,
            description="Availability Zone of the newly created EC2")

        public_dns_name = core.CfnOutput(
            self,
            "public_dns_name",
            value=ec2_instance.instance_public_dns_name,
            description="Public DNSName of the newly created EC2 instance")

        public_ip = core.CfnOutput(
            self,
            "public_ip",
            value=ec2_instance.instance_public_ip,
            description="Public IP address of the newly created EC2")

        cloudwatch_alarm = core.CfnOutput(self,
                                          "Cloudwatch Alarm",
                                          value=cpu_alarm.alarm_arn,
                                          description="CPU alarm ARN")
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Create SNS Topic for Operations Team):
        konstone_ops_team = _sns.Topic(
            self,
            "konstoneOpsTeam",
            display_name="KonStone 24x7 On Watsapp? Support",
            topic_name="konstoneOpsTeam")

        # Add Subscription to SNS Topic
        konstone_ops_team.add_subscription(
            _subs.EmailSubscription("*****@*****.**"))

        # Create a MultiAZ VPC):
        vpc = _ec2.Vpc(self,
                       "konstoneVpcId",
                       cidr="10.0.0.0/24",
                       max_azs=2,
                       nat_gateways=0,
                       subnet_configuration=[
                           _ec2.SubnetConfiguration(
                               name="public",
                               subnet_type=_ec2.SubnetType.PUBLIC)
                       ])

        # Read EC2 BootStrap Script
        try:
            with open("bootstrap_scripts/install_httpd.sh", mode="r") as file:
                user_data = file.read()
        except OSError:
            print('Unable to read UserData script')

        # Get the latest ami
        amzn_linux_ami = _ec2.MachineImage.latest_amazon_linux(
            generation=_ec2.AmazonLinuxGeneration.AMAZON_LINUX_2,
            edition=_ec2.AmazonLinuxEdition.STANDARD,
            storage=_ec2.AmazonLinuxStorage.EBS,
            virtualization=_ec2.AmazonLinuxVirt.HVM)

        # WebServer Instance
        web_server = _ec2.Instance(self,
                                   "WebServer004Id",
                                   instance_type=_ec2.InstanceType(
                                       instance_type_identifier="t2.micro"),
                                   instance_name="WebServer004",
                                   machine_image=amzn_linux_ami,
                                   vpc=vpc,
                                   vpc_subnets=_ec2.SubnetSelection(
                                       subnet_type=_ec2.SubnetType.PUBLIC),
                                   user_data=_ec2.UserData.custom(user_data))

        # Allow Web Traffic to WebServer
        web_server.connections.allow_from_any_ipv4(
            _ec2.Port.tcp(80), description="Allow Web Traffic")

        # Add permission to web server instance profile
        web_server.role.add_managed_policy(
            _iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonSSMManagedInstanceCore"))

        # Read Lambda Code
        try:
            with open("serverless_stacks/lambda_src/konstone_processor.py",
                      mode="r") as f:
                konstone_fn_code = f.read()
        except OSError:
            print("Unable to read Lambda Function Code")

        # Simple Lambda Function to return event
        konstone_fn = _lambda.Function(
            self,
            "konstoneFunction",
            function_name="konstone_function",
            runtime=_lambda.Runtime.PYTHON_3_7,
            handler="index.lambda_handler",
            code=_lambda.InlineCode(konstone_fn_code),
            timeout=core.Duration.seconds(3),
            reserved_concurrent_executions=1,
            environment={
                "LOG_LEVEL": "INFO",
                "AUTOMATION": "SKON"
            })

        # EC2 Metric for Avg. CPU
        ec2_metric_for_avg_cpu = _cloudwatch.Metric(
            namespace="AWS/EC2",
            metric_name="CPUUtilization",
            dimensions={"InstanceId": web_server.instance_id},
            period=core.Duration.minutes(5))

        # Low CPU Alarm for Web Server
        low_cpu_alarm = _cloudwatch.Alarm(
            self,
            "lowCPUAlarm",
            alarm_description="Alert if CPU is less than 10%",
            alarm_name="low-cpu-alarm",
            actions_enabled=True,
            metric=ec2_metric_for_avg_cpu,
            threshold=10,
            comparison_operator=_cloudwatch.ComparisonOperator.
            LESS_THAN_OR_EQUAL_TO_THRESHOLD,
            evaluation_periods=1,
            datapoints_to_alarm=1,
            period=core.Duration.minutes(5),
            treat_missing_data=_cloudwatch.TreatMissingData.NOT_BREACHING)

        # Inform SNS on EC2 Alarm State
        low_cpu_alarm.add_alarm_action(
            _cloudwatch_actions.SnsAction(konstone_ops_team))

        # Create Lambda Alarm
        konstone_fn_error_alarm = _cloudwatch.Alarm(
            self,
            "konstoneFunctionErrorAlarm",
            metric=konstone_fn.metric_errors(),
            threshold=2,
            evaluation_periods=1,
            datapoints_to_alarm=1,
            period=core.Duration.minutes(5))

        # Inform SNS on Lambda Alarm State
        konstone_fn_error_alarm.add_alarm_action(
            _cloudwatch_actions.SnsAction(konstone_ops_team))
Beispiel #12
0
    def __init__(self, scope: core.Construct, id: str,
                 config: ContainerPipelineConfiguration, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        #VPC
        vpc = ec2.Vpc(self, "TheVPC", cidr="10.0.0.0/16")

        #IAM roles
        service_task_def_exe_role = iam.Role(
            self,
            "ServiceTaskDefExecutionRole",
            assumed_by=iam.ServicePrincipal('ecs-tasks.amazonaws.com'))
        service_task_def_exe_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'service-role/AmazonECSTaskExecutionRolePolicy'))

        service_task_def_role = iam.Role(
            self,
            'ServiceTaskDefTaskRole',
            assumed_by=iam.ServicePrincipal('ecs-tasks.amazonaws.com'))

        code_deploy_role = iam.Role(
            self,
            "CodeDeployRole",
            assumed_by=iam.ServicePrincipal('codedeploy.amazonaws.com'))
        code_deploy_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'AWSCodeDeployRoleForECS'))

        # Fargate cluster
        cluster = ecs.Cluster(scope=self,
                              id="ecs-cluster",
                              cluster_name=config.ProjectName + "-" +
                              config.stage,
                              vpc=vpc)

        load_balancer = elbv2.ApplicationLoadBalancer(self,
                                                      "load_balancer",
                                                      vpc=vpc,
                                                      internet_facing=True)

        #Security Group
        service_sg = ec2.SecurityGroup(self, "service_sg", vpc=vpc)
        service_sg.connections.allow_from(load_balancer, ec2.Port.tcp(80))

        #ECR Repo
        image_repo = ecr.Repository.from_repository_name(
            self, "image_repo", repository_name=config.ProjectName)

        log_group = logs.LogGroup(self,
                                  "log_group",
                                  log_group_name=config.ProjectName + "-" +
                                  config.stage,
                                  removal_policy=core.RemovalPolicy.DESTROY,
                                  retention=None)

        #ECS Task Def
        fargate_task_definition = ecs.FargateTaskDefinition(
            scope=self,
            id="fargate_task_definition",
            cpu=256,
            memory_limit_mib=512,
            execution_role=service_task_def_exe_role,
            task_role=service_task_def_role,
            family=config.ProjectName + "-" + config.stage)

        container = fargate_task_definition.add_container(
            id="fargate_task_container",
            image=ecs.ContainerImage.from_ecr_repository(repository=image_repo,
                                                         tag='release'))

        container.add_port_mappings(
            ecs.PortMapping(container_port=80,
                            host_port=80,
                            protocol=ecs.Protocol.TCP))

        #ECS Fargate Service
        fargate_service = ecs.FargateService(
            scope=self,
            id="fargate_service",
            security_group=service_sg,
            cluster=cluster,
            desired_count=5,
            deployment_controller=ecs.DeploymentController(
                type=ecs.DeploymentControllerType.CODE_DEPLOY),
            task_definition=fargate_task_definition,
            service_name=config.ProjectName + "-" + config.stage)

        #Main Env
        listern_health_check_main = elbv2.HealthCheck(
            healthy_http_codes='200',
            interval=core.Duration.seconds(5),
            healthy_threshold_count=2,
            unhealthy_threshold_count=3,
            timeout=core.Duration.seconds(4))
        #Test Env
        listern_health_check_test = elbv2.HealthCheck(
            healthy_http_codes='200',
            interval=core.Duration.seconds(5),
            healthy_threshold_count=2,
            unhealthy_threshold_count=3,
            timeout=core.Duration.seconds(4))

        listener_main = load_balancer.add_listener(
            "load_balancer_listener_1",
            port=80,
        )

        listern_main_targets = listener_main.add_targets(
            "load_balancer_target_1",
            port=80,
            health_check=listern_health_check_main,
            targets=[fargate_service])

        listener_test = load_balancer.add_listener(
            "load_balancer_listener_2",
            port=8080,
        )

        listern_test_targets = listener_test.add_targets(
            "load_balancer_target_2",
            port=80,
            health_check=listern_health_check_test,
            targets=[fargate_service])

        #Alarms: monitor 500s on target group
        aws_cloudwatch.Alarm(
            self,
            "TargetGroup5xx",
            metric=listern_main_targets.metric_http_code_target(
                elbv2.HttpCodeTarget.TARGET_5XX_COUNT),
            threshold=1,
            evaluation_periods=1,
            period=core.Duration.minutes(1))

        aws_cloudwatch.Alarm(
            self,
            "TargetGroup25xx",
            metric=listern_test_targets.metric_http_code_target(
                elbv2.HttpCodeTarget.TARGET_5XX_COUNT),
            threshold=1,
            evaluation_periods=1,
            period=core.Duration.minutes(1))

        #Alarms: monitor unhealthy hosts on target group
        aws_cloudwatch.Alarm(
            self,
            "TargetGroupUnhealthyHosts",
            metric=listern_main_targets.metric('UnHealthyHostCount'),
            threshold=1,
            evaluation_periods=1,
            period=core.Duration.minutes(1))

        aws_cloudwatch.Alarm(
            self,
            "TargetGroup2UnhealthyHosts",
            metric=listern_test_targets.metric('UnHealthyHostCount'),
            threshold=1,
            evaluation_periods=1,
            period=core.Duration.minutes(1))

        core.CfnOutput(self,
                       "lburl",
                       value=load_balancer.load_balancer_dns_name,
                       export_name="LoadBalancerUrl")
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        yt_api_key = _ssm.StringParameter(
            self,
            'YouTubeApiKey',
            parameter_name=f"/{id}/YouTubeApiKey",
            string_value='PLACEHOLDER')

        yt_list_id = _ssm.StringParameter(
            self,
            'YouTubePlayListId',
            parameter_name=f"/{id}/YouTubePlayListId",
            string_value='PLf-O3X2-mxDmn0ikyO7OF8sPr2GDQeZXk')

        yt_next_page_token = _ssm.StringParameter(
            self,
            'NextPageToken',
            parameter_name=f"/{id}/NextPageToken",
            string_value='CAEQAQ')

        telegram_bot_token = _ssm.StringParameter(
            self,
            'TelegramBotToken',
            parameter_name=f"/{id}/TelegramBotToken",
            string_value='PLACEHOLDER')

        telegram_chat_id = _ssm.StringParameter(
            self,
            'TelegramChatId',
            parameter_name=f"/{id}/TelegramChatId",
            string_value='PLACEHOLDER')

        function = _lambda.Function(
            self,
            'FiqueEmCasaConfPublisher',
            code=_lambda.Code.asset('src/fique_em_casa_conf/'),
            handler='lambda_function.lambda_handler',
            runtime=_lambda.Runtime.PYTHON_3_7,
            timeout=core.Duration.seconds(30),
            retry_attempts=0,
            environment={
                'YT_API_KEY_SSM': yt_api_key.parameter_name,
                'YT_LIST_ID_SSM': yt_list_id.parameter_name,
                'YT_NEXT_PAGE_TOKEN_SSM': yt_next_page_token.parameter_name,
                'TELEGRAM_BOT_TOKEN_SSM': telegram_bot_token.parameter_name,
                'TELEGRAM_CHAT_ID_SSM': telegram_chat_id.parameter_name
            })
        yt_api_key.grant_read(function)
        yt_list_id.grant_read(function)
        yt_next_page_token.grant_read(function)
        yt_next_page_token.grant_write(function)
        telegram_bot_token.grant_read(function)
        telegram_chat_id.grant_read(function)

        _events.Rule(
            self,
            'FiqueEmCasaConfSchedule',
            description=
            "Sends one video from FiqueEmCasaConf to Telegram every day",
            enabled=True if 'Prod' in id else False,
            schedule=_events.Schedule.expression(
                expression='cron(0 15 * * ? *)'),
            targets=[_events_targets.LambdaFunction(function)])

        error_notifications = _sns.Topic(self, 'ErrorNotifications')
        fique_em_casa_conf_alarm = _cw.Alarm(
            self,
            'FiqueEmCasaConfErrors',
            metric=function.metric_errors(),
            threshold=0,
            evaluation_periods=1,
            comparison_operator=_cw.ComparisonOperator.GREATER_THAN_THRESHOLD)
        fique_em_casa_conf_alarm.add_alarm_action(
            _cw_actions.SnsAction(error_notifications))
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        CLUSTER_NAME = self.node.try_get_context("cluster_name")
        NOTIFY_EMAIL = self.node.try_get_context("notify_email")
        SLACK_WEBHOOK_URL = self.node.try_get_context("webhook_url")

        if not CLUSTER_NAME or not NOTIFY_EMAIL or not SLACK_WEBHOOK_URL:
            logger.error(
                f"Required context variables for {id} were not provided!")
        else:
            # Get the log group of our postgres instance
            log_group = logs.LogGroup.from_log_group_name(
                self,
                "InAur01DetectionLogGroup",
                f"/aws/rds/cluster/{CLUSTER_NAME}/postgresql",
            )

            # Create new metric
            metric = cloudwatch.Metric(
                namespace="LogMetrics",
                metric_name="InAur01DetectionFailedDbLoginAttempts",
            )

            # Apply metric filter
            # Filter all metrics of failed login attempts in log
            logs.MetricFilter(
                self,
                "InAur01DetectionMetricFilter",
                log_group=log_group,
                metric_namespace=metric.namespace,
                metric_name=metric.metric_name,
                filter_pattern=logs.FilterPattern.all_terms(
                    "FATAL:  password authentication failed for user"),
                metric_value="1",
            )

            # Create new SNS topic
            topic = sns.Topic(self, "InAur01DetectionTopic")

            # Add email subscription
            topic.add_subscription(subs.EmailSubscription(NOTIFY_EMAIL))

            # Create new alarm for metric
            # Alarm will trigger if there is >= 10 failed login attempts
            # over a period of 30 seconds.
            alarm = cloudwatch.Alarm(
                self,
                "InAur01DetectionAlarm",
                metric=metric,
                threshold=10,
                evaluation_periods=1,
                period=core.Duration.seconds(30),
                datapoints_to_alarm=1,
                statistic="sum",
            )

            # Add SNS action to alarm
            alarm.add_alarm_action(cw_actions.SnsAction(topic))

            # Create unban lambda
            lambda_dir_path = os.path.join(os.getcwd(), "ir_cdk_stacks",
                                           "in_aur_01")
            unban_lambda = _lambda.Function(
                self,
                "InAur01ResponseUnbanFunction",
                runtime=_lambda.Runtime.PYTHON_3_8,
                handler="unban_lambda.lambda_handler",
                code=_lambda.Code.from_asset(lambda_dir_path),
            )
            # Assign EC2 permissions to lambda
            unban_lambda.add_to_role_policy(
                iam.PolicyStatement(
                    actions=["ec2:DeleteNetworkAclEntry"],
                    effect=iam.Effect.ALLOW,
                    resources=["*"],
                ))

            # Create stepfunction
            # Define a second state machine to unban the blacklisted IP after 1 hour
            wait_step = sfn.Wait(
                self,
                "InAur01ResponseStepWait",
                time=sfn.WaitTime.duration(core.Duration.hours(1)),
            )
            unban_step = sfn.Task(
                self,
                "InAur01ResponseStepUnban",
                task=tasks.RunLambdaTask(
                    unban_lambda,
                    integration_pattern=sfn.ServiceIntegrationPattern.
                    FIRE_AND_FORGET,
                ),
                parameters={"Payload.$": "$"},
            )
            statemachine = sfn.StateMachine(
                self,
                "InAur01ResponseUnbanStateMachine",
                definition=wait_step.next(unban_step),
                timeout=core.Duration.hours(1.5),
            )

            # Create lambda function
            lambda_func = _lambda.Function(
                self,
                "InAur01ResponseFunction",
                runtime=_lambda.Runtime.PYTHON_3_8,
                handler="response_lambda.lambda_handler",
                code=_lambda.Code.from_asset(lambda_dir_path),
                environment={
                    "webhook_url": SLACK_WEBHOOK_URL,
                    "unban_sm_arn": statemachine.state_machine_arn,
                    "cluster_name": CLUSTER_NAME,
                },
            )
            # AWS CDK has a bug where it would not add the correct permission
            # to the lambda for Cloudwatch log subscription to invoke it.
            # Hence, we need to manually add permission to lambda.
            lambda_func.add_permission(
                "InAur01ResponseFunctionInvokePermission",
                principal=iam.ServicePrincipal("logs.amazonaws.com"),
                action="lambda:InvokeFunction",
                source_arn=log_group.log_group_arn + ":*",
            )
            # Assign permissions to response lambda
            lambda_func.add_to_role_policy(
                iam.PolicyStatement(
                    actions=[
                        "states:StartExecution",
                    ],
                    effect=iam.Effect.ALLOW,
                    resources=[statemachine.state_machine_arn],
                ))
            # Assign RDS Read-only permissions to lambda
            lambda_func.add_to_role_policy(
                iam.PolicyStatement(
                    actions=["rds:Describe*"],
                    effect=iam.Effect.ALLOW,
                    resources=["*"],
                ))
            # Assign EC2 permissions to lambda
            lambda_func.add_to_role_policy(
                iam.PolicyStatement(
                    actions=[
                        "ec2:Describe*",
                        "ec2:CreateNetworkAclEntry",
                        "ec2:DeleteNetworkAclEntry",
                    ],
                    effect=iam.Effect.ALLOW,
                    resources=["*"],
                ))
            # Assign CloudWatch logs permissions to lambda
            lambda_func.add_to_role_policy(
                iam.PolicyStatement(
                    actions=[
                        "cloudwatch:Get*",
                        "cloudwatch:Describe*",
                        "logs:FilterLogEvents",
                        "logs:DescribeMetricFilters",
                    ],
                    effect=iam.Effect.ALLOW,
                    resources=["*"],
                ))

            sns_event_source = lambda_event_sources.SnsEventSource(topic)
            lambda_func.add_event_source(sns_event_source)
Beispiel #15
0
    def create_all_queues(self) -> None:
        """
        Create all STACK queues, attach subscriptions and alarms
        """

        # General DLQs for lambdas (not API)
        self.create_queue(id="dead_letter_queue")
        general_dlq_alarm = cloudwatch.Alarm(
            self,
            "DLQAlarm",
            metric=self.queues_["dead_letter_queue"].metric(
                "ApproximateNumberOfMessagesVisible"),
            evaluation_periods=1,
            threshold=0.0,
            comparison_operator=ComparisonOperator.GREATER_THAN_THRESHOLD,
        )
        general_dlq_alarm.add_alarm_action(
            cw_actions.SnsAction(self.topics_["alarm_topic"]))

        # DLQ for API lambdas
        self.create_queue(id="api_dead_letter_queue")
        api_dlq_alarm = cloudwatch.Alarm(
            self,
            "APIDLQAlarm",
            metric=self.queues_["api_dead_letter_queue"].metric(
                "ApproximateNumberOfMessagesVisible"),
            evaluation_periods=1,
            threshold=0.0,
            comparison_operator=ComparisonOperator.GREATER_THAN_THRESHOLD,
        )
        api_dlq_alarm.add_alarm_action(
            cw_actions.SnsAction(self.topics_["alarm_topic"]))

        # The new_scenes_queue subscribe to CBERS 4/4A quicklooks notifications. The
        # STAC items are generated from the original INPE metadata file as
        # soon as the quicklooks are created in the PDS bucket
        # This code fragment creates the queue, the associated dlq and
        # subscribe to CBERS 4/4A quicklook notification topics
        self.create_queue(
            id="process_new_scenes_queue_dlq",
            retention_period=core.Duration.seconds(1209600),
        )
        process_new_scenes_queue_alarm = cloudwatch.Alarm(
            self,
            "ProcessNewScenesQueueAlarm",
            metric=self.queues_["process_new_scenes_queue_dlq"].metric(
                "ApproximateNumberOfMessagesVisible"),
            evaluation_periods=1,
            threshold=0.0,
            comparison_operator=ComparisonOperator.GREATER_THAN_THRESHOLD,
        )
        process_new_scenes_queue_alarm.add_alarm_action(
            cw_actions.SnsAction(self.topics_["alarm_topic"]))
        self.create_queue(
            id="new_scenes_queue",
            visibility_timeout=core.Duration.seconds(385),
            retention_period=core.Duration.seconds(1209600),
            dead_letter_queue=sqs.DeadLetterQueue(
                max_receive_count=1,
                queue=self.queues_["process_new_scenes_queue_dlq"]),
        )
        # Add subscriptions for each CB4 camera
        sns.Topic.from_topic_arn(
            self,
            id="CB4MUX",
            topic_arn="arn:aws:sns:us-east-1:599544552497:NewCB4MUXQuicklook",
        ).add_subscription(
            sns_subscriptions.SqsSubscription(
                self.queues_["new_scenes_queue"]))
        sns.Topic.from_topic_arn(
            self,
            id="CB4AWFI",
            topic_arn="arn:aws:sns:us-east-1:599544552497:NewCB4AWFIQuicklook",
        ).add_subscription(
            sns_subscriptions.SqsSubscription(
                self.queues_["new_scenes_queue"]))
        sns.Topic.from_topic_arn(
            self,
            id="CB4PAN10M",
            topic_arn=
            "arn:aws:sns:us-east-1:599544552497:NewCB4PAN10MQuicklook",
        ).add_subscription(
            sns_subscriptions.SqsSubscription(
                self.queues_["new_scenes_queue"]))
        sns.Topic.from_topic_arn(
            self,
            id="CBPAN5M",
            topic_arn="arn:aws:sns:us-east-1:599544552497:NewCB4PAN5MQuicklook",
        ).add_subscription(
            sns_subscriptions.SqsSubscription(
                self.queues_["new_scenes_queue"]))
        # Subscription for CB4A (all cameras)
        sns.Topic.from_topic_arn(
            self,
            id="CB4A",
            topic_arn="arn:aws:sns:us-east-1:599544552497:NewCB4AQuicklook",
        ).add_subscription(
            sns_subscriptions.SqsSubscription(
                self.queues_["new_scenes_queue"]))

        self.create_queue(
            id="catalog_prefix_update_queue",
            visibility_timeout=core.Duration.seconds(60),
            retention_period=core.Duration.seconds(1209600),
            dead_letter_queue=sqs.DeadLetterQueue(
                max_receive_count=3, queue=self.queues_["dead_letter_queue"]),
        )

        # Reconcile queue for INPE's XML metadata
        self.create_queue(
            id="consume_reconcile_queue_dlq",
            retention_period=core.Duration.seconds(1209600),
        )
        consume_reconcile_queue_alarm = cloudwatch.Alarm(
            self,
            "ConsumeReconcileQueueAlarm",
            metric=self.queues_["consume_reconcile_queue_dlq"].metric(
                "ApproximateNumberOfMessagesVisible"),
            evaluation_periods=1,
            threshold=0.0,
            comparison_operator=ComparisonOperator.GREATER_THAN_THRESHOLD,
        )
        consume_reconcile_queue_alarm.add_alarm_action(
            cw_actions.SnsAction(self.topics_["alarm_topic"]))
        self.create_queue(
            id="reconcile_queue",
            visibility_timeout=core.Duration.seconds(1000),
            retention_period=core.Duration.seconds(1209600),
            dead_letter_queue=sqs.DeadLetterQueue(
                max_receive_count=3,
                queue=self.queues_["consume_reconcile_queue_dlq"]),
        )

        # Reconcile queue for STAC items
        self.create_queue(
            id="consume_stac_reconcile_queue_dlq",
            retention_period=core.Duration.seconds(1209600),
        )
        consume_stac_reconcile_queue_alarm = cloudwatch.Alarm(
            self,
            "ConsumeStacReconcileQueueAlarm",
            metric=self.queues_["consume_stac_reconcile_queue_dlq"].metric(
                "ApproximateNumberOfMessagesVisible"),
            evaluation_periods=1,
            threshold=0.0,
            comparison_operator=ComparisonOperator.GREATER_THAN_THRESHOLD,
        )
        consume_stac_reconcile_queue_alarm.add_alarm_action(
            cw_actions.SnsAction(self.topics_["alarm_topic"]))
        self.create_queue(
            id="stac_reconcile_queue",
            visibility_timeout=core.Duration.seconds(1000),
            retention_period=core.Duration.seconds(1209600),
            dead_letter_queue=sqs.DeadLetterQueue(
                max_receive_count=3,
                queue=self.queues_["consume_stac_reconcile_queue_dlq"],
            ),
        )

        # Queue for STAC items to be inserted into Elasticsearch. Subscribe to the
        # topic with new stac items
        self.create_queue(
            id="insert_into_elasticsearch_queue",
            visibility_timeout=core.Duration.seconds(180),
            retention_period=core.Duration.seconds(1209600),
            dead_letter_queue=sqs.DeadLetterQueue(
                max_receive_count=3, queue=self.queues_["dead_letter_queue"]),
        )
        # Subscription for new item topics
        self.topics_["stac_item_topic"].add_subscription(
            sns_subscriptions.SqsSubscription(
                self.queues_["insert_into_elasticsearch_queue"]))
        # Subscription for reconciled item topics
        self.topics_["reconcile_stac_item_topic"].add_subscription(
            sns_subscriptions.SqsSubscription(
                self.queues_["insert_into_elasticsearch_queue"]))

        # Backup queue for STAC items inserted into Elasticsearch.
        # This holds the same items received by "insert_into_elasticsearch_queue",
        # simply holding them for some time to allow recover from ES
        # cluster failures (see #78)
        # This queue subscribe only to new item topics
        self.create_queue(
            id="backup_insert_into_elasticsearch_queue",
            visibility_timeout=core.Duration.seconds(180),
            retention_period=core.Duration.days(
                settings.backup_queue_retention_days),
            dead_letter_queue=sqs.DeadLetterQueue(
                max_receive_count=3, queue=self.queues_["dead_letter_queue"]),
        )
        # Subscription for new item topics
        self.topics_["stac_item_topic"].add_subscription(
            sns_subscriptions.SqsSubscription(
                self.queues_["backup_insert_into_elasticsearch_queue"]))
    def __init__(
            self,
            scope: core.Construct,
            _id: str,
            *,
            vpc,
            bucket_para,  # key_name,
            ddb_file_list,
            sqs_queue,
            sqs_queue_DLQ,
            ssm_bucket_para,
            ssm_credential_para,
            s3bucket,
            s3_deploy,
            **kwargs) -> None:
        super().__init__(scope, _id, **kwargs)

        # Create environment variable into userdata
        env_var = f'export table_queue_name={ddb_file_list.table_name}\n' \
                  f'export sqs_queue_name={sqs_queue.queue_name}\n' \
                  f'export ssm_parameter_bucket={ssm_bucket_para.parameter_name}\n'
        env_var_st = f'echo \"export table_queue_name={ddb_file_list.table_name}\" >> /etc/rc.local\n' \
                     f'echo \"export sqs_queue_name={sqs_queue.queue_name}\" >> /etc/rc.local\n' \
                     f'echo \"export ssm_parameter_bucket={ssm_bucket_para.parameter_name}\" >> /etc/rc.local\n'
        # Create log group and put group name into userdata
        s3_migrate_log = logs.LogGroup(self, "applog")
        cw_agent_config['logs']['logs_collected']['files']['collect_list'][0][
            'log_group_name'] = s3_migrate_log.log_group_name
        cw_agent_config['logs']['logs_collected']['files']['collect_list'][1][
            'log_group_name'] = s3_migrate_log.log_group_name
        cw_agent_config['metrics']['append_dimensions'][
            'AutoScalingGroupName'] = "\\${aws:AutoScalingGroupName}"
        cw_agent_config['metrics']['append_dimensions'][
            'InstanceId'] = "\\${aws:InstanceId}"
        cw_agent_config_str = json.dumps(cw_agent_config,
                                         indent=4).replace("\\\\", "\\")
        userdata_head = user_data_part1 + cw_agent_config_str + user_data_part2 + \
                        s3_deploy.bucket_name + " .\n" + env_var + env_var_st
        jobsender_userdata = userdata_head + user_data_jobsender_p
        worker_userdata = userdata_head + user_data_worker_p

        # Create jobsender ec2 node
        jobsender = autoscaling.AutoScalingGroup(
            self,
            "jobsender",
            instance_type=ec2.InstanceType(
                instance_type_identifier=jobsender_type),
            machine_image=linux_ami,
            # key_name=key_name,
            user_data=ec2.UserData.custom(jobsender_userdata),
            vpc=vpc,
            vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC),
            desired_capacity=1,
            min_capacity=0,
            max_capacity=1)
        # jobsender.connections.allow_from_any_ipv4(ec2.Port.tcp(22), "Internet access SSH")
        # Don't need SSH since we use Session Manager

        # Assign EC2 Policy to use SSM and CWAgent
        jobsender.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonSSMManagedInstanceCore"))
        jobsender.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "CloudWatchAgentServerPolicy"))

        # Create Worker Autoscaling Group
        worker_asg = autoscaling.AutoScalingGroup(
            self,
            "worker-asg",
            vpc=vpc,
            vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC),
            instance_type=ec2.InstanceType(
                instance_type_identifier=worker_type),
            machine_image=linux_ami,
            # key_name=key_name,  # Optional if use SSM-SessionManager
            user_data=ec2.UserData.custom(worker_userdata),
            desired_capacity=2,
            min_capacity=2,
            max_capacity=10,
            spot_price="0.5",
            group_metrics=[autoscaling.GroupMetrics.all()])
        # worker_asg.connections.allow_from_any_ipv4(ec2.Port.tcp(22), "Internet access SSH")
        # Don't need SSH since we use Session Manager

        # Assign EC2 Policy to use SSM and CWAgent
        worker_asg.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonSSMManagedInstanceCore"))
        worker_asg.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "CloudWatchAgentServerPolicy"))

        # Allow EC2 access new DynamoDB Table
        ddb_file_list.grant_full_access(jobsender)
        ddb_file_list.grant_full_access(worker_asg)

        # Allow EC2 access new sqs and its DLQ
        sqs_queue.grant_consume_messages(jobsender)
        sqs_queue.grant_send_messages(jobsender)
        sqs_queue.grant_consume_messages(worker_asg)
        sqs_queue_DLQ.grant_consume_messages(jobsender)

        # Allow EC2 access SSM Parameter Store, get bucket infor and get credential
        ssm_bucket_para.grant_read(jobsender)
        ssm_credential_para.grant_read(jobsender)
        ssm_credential_para.grant_read(worker_asg)

        # Allow EC2 access source code on s3_deploy bucket
        s3_deploy.grant_read(jobsender)
        s3_deploy.grant_read(worker_asg)

        # Allow EC2 access new s3 bucket
        s3bucket.grant_read(jobsender)
        s3bucket.grant_read(worker_asg)

        # Allow EC2 access exist s3 bucket for PUT mode: readonly access the source buckets
        bucket_name = ''
        for b in bucket_para:
            if bucket_name != b['src_bucket']:  # 如果列了多个相同的Bucket,就跳过
                bucket_name = b['src_bucket']
                s3exist_bucket = s3.Bucket.from_bucket_name(
                    self,
                    bucket_name,  # 用这个做id
                    bucket_name=bucket_name)
                s3exist_bucket.grant_read(jobsender)
                s3exist_bucket.grant_read(worker_asg)
        # Allow EC2 access exist s3 bucket for GET mode: read and write access the destination buckets
        # bucket_name = ''
        # for b in bucket_para:
        #     if bucket_name != b['des_bucket']:  # 如果列了多个相同的Bucket,就跳过
        #         bucket_name = b['des_bucket']
        #         s3exist_bucket = s3.Bucket.from_bucket_name(self,
        #                                                     bucket_name,  # 用这个做id
        #                                                     bucket_name=bucket_name)
        #         s3exist_bucket.grant_read_write(jobsender)
        #         s3exist_bucket.grant_read_write(worker_asg)

        # Dashboard to monitor SQS and EC2
        board = cw.Dashboard(self, "s3_migrate")

        ec2_metric_cpu_avg = cw.Metric(namespace="AWS/EC2",
                                       metric_name="CPUUtilization",
                                       dimensions={
                                           "AutoScalingGroupName":
                                           worker_asg.auto_scaling_group_name
                                       },
                                       period=core.Duration.minutes(1))

        ec2_metric_net_out = cw.MathExpression(
            expression=
            "SEARCH('{AWS/EC2, InstanceId} NetworkOut', 'Average', 60)",
            label="EC2-NetworkOut",
            using_metrics={})

        autoscaling_GroupDesiredCapacity = cw.Metric(
            namespace="AWS/AutoScaling",
            metric_name="GroupDesiredCapacity",
            dimensions={
                "AutoScalingGroupName": worker_asg.auto_scaling_group_name
            },
            period=core.Duration.minutes(1))
        autoscaling_GroupInServiceInstances = cw.Metric(
            namespace="AWS/AutoScaling",
            metric_name="GroupInServiceInstances",
            dimensions={
                "AutoScalingGroupName": worker_asg.auto_scaling_group_name
            },
            period=core.Duration.minutes(1))
        autoscaling_GroupMinSize = cw.Metric(
            namespace="AWS/AutoScaling",
            metric_name="GroupMinSize",
            dimensions={
                "AutoScalingGroupName": worker_asg.auto_scaling_group_name
            },
            period=core.Duration.minutes(1))
        autoscaling_GroupMaxSize = cw.Metric(
            namespace="AWS/AutoScaling",
            metric_name="GroupMaxSize",
            dimensions={
                "AutoScalingGroupName": worker_asg.auto_scaling_group_name
            },
            period=core.Duration.minutes(1))

        # CWAgent collected metric
        cwagent_mem_avg = cw.MathExpression(
            expression=
            "SEARCH('{CWAgent, AutoScalingGroupName, InstanceId} (AutoScalingGroupName="
            + worker_asg.auto_scaling_group_name +
            " AND MetricName=mem_used_percent)', 'Average', 60)",
            label="mem_avg",
            using_metrics={})
        cwagent_disk_avg = cw.MathExpression(
            expression=
            "SEARCH('{CWAgent, path, InstanceId, AutoScalingGroupName, device, fstype} "
            "(AutoScalingGroupName=" + worker_asg.auto_scaling_group_name +
            " AND MetricName=disk_used_percent AND path=\"/\")', 'Average', 60)",
            label="disk_avg",
            using_metrics={})
        cwagent_net_tcp = cw.MathExpression(
            expression=
            "SEARCH('{CWAgent, AutoScalingGroupName, InstanceId} (AutoScalingGroupName="
            + worker_asg.auto_scaling_group_name +
            " AND MetricName=tcp_established)', 'Average', 60)",
            label="tcp_conn",
            using_metrics={})

        # CWAgent collected application logs - filter metric
        s3_migrate_log.add_metric_filter(
            "Completed-bytes",
            metric_name="Completed-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[date, time, info, hs, p="--->Complete", bytes, key]'))
        s3_migrate_log.add_metric_filter(
            "Uploading-bytes",
            metric_name="Uploading-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[date, time, info, hs, p="--->Uploading", bytes, key]'))
        s3_migrate_log.add_metric_filter(
            "Downloading-bytes",
            metric_name="Downloading-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[date, time, info, hs, p="--->Downloading", bytes, key]'))
        traffic_metric_Complete = cw.Metric(namespace="s3_migrate",
                                            metric_name="Completed-bytes",
                                            statistic="Sum",
                                            period=core.Duration.minutes(1))
        traffic_metric_Upload = cw.Metric(namespace="s3_migrate",
                                          metric_name="Uploading-bytes",
                                          statistic="Sum",
                                          period=core.Duration.minutes(1))
        traffic_metric_Download = cw.Metric(namespace="s3_migrate",
                                            metric_name="Downloading-bytes",
                                            statistic="Sum",
                                            period=core.Duration.minutes(1))
        s3_migrate_log.add_metric_filter(
            "ERROR",
            metric_name="ERROR-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"ERROR"'))
        s3_migrate_log.add_metric_filter(
            "WARNING",
            metric_name="WARNING-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"WARNING"'))
        log_metric_ERROR = cw.Metric(namespace="s3_migrate",
                                     metric_name="ERROR-Logs",
                                     statistic="Sum",
                                     period=core.Duration.minutes(1))
        log_metric_WARNING = cw.Metric(namespace="s3_migrate",
                                       metric_name="WARNING-Logs",
                                       statistic="Sum",
                                       period=core.Duration.minutes(1))

        board.add_widgets(
            cw.GraphWidget(title="S3-MIGRATION-TOTAL-TRAFFIC",
                           left=[
                               traffic_metric_Complete, traffic_metric_Upload,
                               traffic_metric_Download
                           ],
                           left_y_axis=cw.YAxisProps(label="Bytes/min",
                                                     show_units=False)),
            cw.GraphWidget(title="ERROR/WARNING LOGS",
                           left=[log_metric_ERROR],
                           left_y_axis=cw.YAxisProps(label="Count",
                                                     show_units=False),
                           right=[log_metric_WARNING],
                           right_y_axis=cw.YAxisProps(label="Count",
                                                      show_units=False)),
            cw.GraphWidget(
                title="SQS-JOBS",
                left=[
                    sqs_queue.metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1))
                ]),
            cw.SingleValueWidget(
                title="RUNNING, WAITING & DEATH JOBS",
                metrics=[
                    sqs_queue.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue.metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1))
                ],
                height=6))

        board.add_widgets(
            cw.GraphWidget(title="EC2-AutoscalingGroup-TCP",
                           left=[cwagent_net_tcp],
                           left_y_axis=cw.YAxisProps(label="Count",
                                                     show_units=False)),
            cw.GraphWidget(title="EC2-AutoscalingGroup-CPU/MEMORY",
                           left=[ec2_metric_cpu_avg, cwagent_mem_avg],
                           left_y_axis=cw.YAxisProps(max=100,
                                                     min=0,
                                                     label="%",
                                                     show_units=False)),
            cw.GraphWidget(title="EC2-AutoscalingGroup-DISK",
                           left=[cwagent_disk_avg],
                           left_y_axis=cw.YAxisProps(max=100,
                                                     min=0,
                                                     label="%",
                                                     show_units=False)),
            cw.SingleValueWidget(title="EC2-AutoscalingGroup-CAPACITY",
                                 metrics=[
                                     autoscaling_GroupDesiredCapacity,
                                     autoscaling_GroupInServiceInstances,
                                     autoscaling_GroupMinSize,
                                     autoscaling_GroupMaxSize
                                 ],
                                 height=6))
        board.add_widgets(
            cw.GraphWidget(title="EC2-NetworkOut",
                           left=[ec2_metric_net_out],
                           left_y_axis=cw.YAxisProps(label="Bytes/min",
                                                     show_units=False)))

        # Autoscaling up when visible message > 100 in 5 mins
        worker_asg.scale_on_metric(
            "scaleup",
            metric=sqs_queue.metric_approximate_number_of_messages_visible(),
            scaling_steps=[
                autoscaling.ScalingInterval(change=1, lower=100, upper=500),
                autoscaling.ScalingInterval(change=2, lower=500),
                autoscaling.ScalingInterval(change=0, upper=100, lower=0)
            ],
            adjustment_type=autoscaling.AdjustmentType.CHANGE_IN_CAPACITY)

        # Alarm for queue empty and ec2 > 1
        # 消息队列空(没有Visible+Invisible),并且EC2不止一台,则告警,并设置EC2为1台
        # 这里还可以根据场景调整,如果Jobsender也用来做传输,则可以在这里设置没有任务的时候,Autoscaling Group为0
        metric_all_message = cw.MathExpression(
            expression="IF(((a+b) == 0) AND (c >1), 0, 1)",  # a+b且c>1则设置为0,告警
            label="empty_queue_expression",
            using_metrics={
                "a": sqs_queue.metric_approximate_number_of_messages_visible(),
                "b":
                sqs_queue.metric_approximate_number_of_messages_not_visible(),
                "c": autoscaling_GroupInServiceInstances
            })
        alarm_0 = cw.Alarm(
            self,
            "SQSempty",
            alarm_name=
            "s3-migration-cluster-SQS queue empty and ec2 more than 1 in Cluster",
            metric=metric_all_message,
            threshold=0,
            comparison_operator=cw.ComparisonOperator.
            LESS_THAN_OR_EQUAL_TO_THRESHOLD,
            evaluation_periods=3,
            datapoints_to_alarm=3,
            treat_missing_data=cw.TreatMissingData.NOT_BREACHING)
        alarm_topic_empty = sns.Topic(
            self, "SQS queue empty and ec2 more than 1 in Cluster")
        # 这个告警可以作为批量传输完成后的通知,而且这样做可以只通知一次,而不会不停地通知
        alarm_topic_empty.add_subscription(
            subscription=sub.EmailSubscription(alarm_email))
        alarm_0.add_alarm_action(action.SnsAction(alarm_topic_empty))

        # If queue empty, set autoscale down to 1 EC2
        action_shutdown = autoscaling.StepScalingAction(
            self,
            "shutdown",
            auto_scaling_group=worker_asg,
            adjustment_type=autoscaling.AdjustmentType.EXACT_CAPACITY)
        action_shutdown.add_adjustment(adjustment=1, upper_bound=0)
        alarm_0.add_alarm_action(action.AutoScalingAction(action_shutdown))

        # While message in SQS-DLQ, alarm to sns
        alarm_DLQ = cw.Alarm(
            self,
            "SQS_DLQ",
            alarm_name=
            "s3-migration-cluster-SQS DLQ more than 1 message-Cluster",
            metric=sqs_queue_DLQ.metric_approximate_number_of_messages_visible(
            ),
            threshold=0,
            comparison_operator=cw.ComparisonOperator.GREATER_THAN_THRESHOLD,
            evaluation_periods=3,
            datapoints_to_alarm=3,
            treat_missing_data=cw.TreatMissingData.IGNORE)
        alarm_topic_DLQ = sns.Topic(self,
                                    "SQS DLQ more than 1 message-Cluster")
        alarm_topic_DLQ.add_subscription(
            subscription=sub.EmailSubscription(alarm_email))
        alarm_DLQ.add_alarm_action(action.SnsAction(alarm_topic_DLQ))

        # Output
        core.CfnOutput(self, "LogGroup", value=s3_migrate_log.log_group_name)
        core.CfnOutput(self,
                       "Dashboard",
                       value="CloudWatch Dashboard name s3_migrate_cluster")
        core.CfnOutput(self,
                       "Alarm",
                       value="CloudWatch SQS queue empty Alarm for cluster: " +
                       alarm_email)
Beispiel #17
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        security_distribution_list_email = '*****@*****.**'

        # securityhub_instance = securityhub.CfnHub(self, 'SecurityHub')

        # Ensure AWS Config is enabled / Ensure CloudTrail is enabled in all Regions 2.1 - 2.8
        cloudtrail_bucket_accesslogs = s3.Bucket(
            self,
            "CloudTrailS3Accesslogs",
            block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
            encryption=s3.BucketEncryption.S3_MANAGED,
            removal_policy=core.RemovalPolicy.RETAIN)

        cloudtrail_bucket = s3.Bucket(
            self,
            "CloudTrailS3",
            block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
            encryption=s3.BucketEncryption.S3_MANAGED,
            removal_policy=core.RemovalPolicy.RETAIN,
            server_access_logs_bucket=cloudtrail_bucket_accesslogs,
        )

        cloudtrail_kms = kms.Key(self,
                                 "CloudTrailKey",
                                 enable_key_rotation=True)

        # CloudTrail - single account, not Organization
        trail = cloudtrail.Trail(
            self,
            "CloudTrail",
            enable_file_validation=True,
            is_multi_region_trail=True,
            include_global_service_events=True,
            send_to_cloud_watch_logs=True,
            cloud_watch_logs_retention=logs.RetentionDays.FOUR_MONTHS,
            bucket=cloudtrail_bucket,
            kms_key=cloudtrail_kms)

        cloudtrail_kms.grant(iam.ServicePrincipal('cloudtrail.amazonaws.com'),
                             'kms:DescribeKey')

        cloudtrail_kms.grant(
            iam.ServicePrincipal(
                'cloudtrail.amazonaws.com',
                conditions={
                    'StringLike': {
                        'kms:EncryptionContext:aws:cloudtrail:arn':
                        'arn:aws:cloudtrail:*:' + core.Stack.of(self).account +
                        ':trail/*'
                    }
                }), 'kms:GenerateDataKey*')

        cloudtrail_kms.add_to_resource_policy(
            iam.PolicyStatement(
                actions=["kms:Decrypt", "kms:ReEncryptFrom"],
                conditions={
                    'StringEquals': {
                        'kms:CallerAccount': core.Stack.of(self).account
                    },
                    'StringLike': {
                        'kms:EncryptionContext:aws:cloudtrail:arn':
                        'arn:aws:cloudtrail:*:' + core.Stack.of(self).account +
                        ':trail/*'
                    }
                },
                effect=iam.Effect.ALLOW,
                principals=[iam.AnyPrincipal()],
                resources=['*']))

        cloudtrail_kms.add_to_resource_policy(
            iam.PolicyStatement(actions=["kms:CreateAlias"],
                                conditions={
                                    'StringEquals': {
                                        'kms:CallerAccount':
                                        core.Stack.of(self).account,
                                        'kms:ViaService':
                                        'ec2.' + core.Stack.of(self).region +
                                        '.amazonaws.com'
                                    }
                                },
                                effect=iam.Effect.ALLOW,
                                principals=[iam.AnyPrincipal()],
                                resources=['*']))

        cloudtrail_kms.add_to_resource_policy(
            iam.PolicyStatement(
                actions=["kms:Decrypt", "kms:ReEncryptFrom"],
                conditions={
                    'StringEquals': {
                        'kms:CallerAccount': core.Stack.of(self).account
                    },
                    'StringLike': {
                        'kms:EncryptionContext:aws:cloudtrail:arn':
                        'arn:aws:cloudtrail:*:' + core.Stack.of(self).account +
                        ':trail/*'
                    }
                },
                effect=iam.Effect.ALLOW,
                principals=[iam.AnyPrincipal()],
                resources=['*']))

        config_role = iam.CfnServiceLinkedRole(
            self,
            id='ServiceLinkedRoleConfig',
            aws_service_name='config.amazonaws.com')

        global_config = config.CfnConfigurationRecorder(self, 'ConfigRecorder',
                                                        name='default',
                                                        # role_arn=config_role.role_arn,
                                                        role_arn="arn:aws:iam::" + \
                                                        core.Stack.of(
                                                            self).account+":role/aws-service-role/config.amazonaws.com/AWSServiceRoleForConfig",
                                                        # role_arn=config_role.get_att(
                                                        #     attribute_name='resource.arn').to_string(),
                                                        recording_group=config.CfnConfigurationRecorder.RecordingGroupProperty(
                                                            all_supported=True,
                                                            include_global_resource_types=True
                                                        )
                                                        )

        config_bucket = s3.Bucket(
            self,
            "ConfigS3",
            block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
            encryption=s3.BucketEncryption.S3_MANAGED,
            removal_policy=core.RemovalPolicy.RETAIN,
        )

        config_bucket.add_to_resource_policy(
            iam.PolicyStatement(
                actions=['s3:GetBucketAcl'],
                effect=iam.Effect.ALLOW,
                principals=[iam.ServicePrincipal('config.amazonaws.com')],
                resources=[config_bucket.bucket_arn]))

        config_bucket.add_to_resource_policy(
            iam.PolicyStatement(
                actions=['s3:PutObject'],
                effect=iam.Effect.ALLOW,
                principals=[iam.ServicePrincipal('config.amazonaws.com')],
                resources=[
                    config_bucket.arn_for_objects('AWSLogs/' +
                                                  core.Stack.of(self).account +
                                                  '/Config/*')
                ],
                conditions={
                    "StringEquals": {
                        's3:x-amz-acl': 'bucket-owner-full-control',
                    }
                }))

        config_delivery_stream = config.CfnDeliveryChannel(
            self,
            "ConfigDeliveryChannel",
            s3_bucket_name=config_bucket.bucket_name)

        # Config Aggregator in Organizations account
        # config_aggregator = config.CfnConfigurationAggregator(self, 'ConfigAggregator',
        #                                                       configuration_aggregator_name='ConfigAggregator',
        #                                                       organization_aggregation_source=config.CfnConfigurationAggregator.OrganizationAggregationSourceProperty(
        #                                                           role_arn=iam.Role(self, "AWSConfigRoleForOrganizations",
        #                                                                             assumed_by=iam.ServicePrincipal(
        #                                                                                 'config.amazonaws.com'),
        #                                                                             managed_policies=[iam.ManagedPolicy.from_aws_managed_policy_name(
        #                                                                                 'service-role/AWSConfigRoleForOrganizations')]
        #                                                                             ).role_arn,
        #                                                           all_aws_regions=True
        #                                                       )
        #                                                       )

        # 2.9 – Ensure VPC flow logging is enabled in all VPCs
        # vpc = ec2.Vpc.from_lookup(self, "VPC",
        #                           is_default=True,
        #                           )

        # S3 for VPC flow logs
        # vpc_flow_logs_bucket = s3.Bucket(self, "VPCFlowLogsBucket",
        #                                  block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
        #                                  encryption=s3.BucketEncryption.S3_MANAGED,
        #                                  removal_policy=core.RemovalPolicy.RETAIN
        #                                  )

        # Ensure a log metric filter and alarm exist for 3.1 – 3.14
        security_notifications_topic = sns.Topic(self,
                                                 'CIS_Topic',
                                                 display_name='CIS_Topic',
                                                 topic_name='CIS_Topic')

        sns.Subscription(self,
                         'CIS_Subscription',
                         topic=security_notifications_topic,
                         protocol=sns.SubscriptionProtocol.EMAIL,
                         endpoint=security_distribution_list_email)

        cloudwatch_actions_cis = cloudwatch_actions.SnsAction(
            security_notifications_topic)

        cis_metricfilter_alarms = {
            'CIS-3.1-UnauthorizedAPICalls':
            '($.errorCode="*UnauthorizedOperation") || ($.errorCode="AccessDenied*")',
            'CIS-3.2-ConsoleSigninWithoutMFA':
            '($.eventName="ConsoleLogin") && ($.additionalEventData.MFAUsed !="Yes")',
            'RootAccountUsageAlarm':
            '$.userIdentity.type="Root" && $.userIdentity.invokedBy NOT EXISTS && $.eventType !="AwsServiceEvent"',
            'CIS-3.4-IAMPolicyChanges':
            '($.eventName=DeleteGroupPolicy) || ($.eventName=DeleteRolePolicy) || ($.eventName=DeleteUserPolicy) || ($.eventName=PutGroupPolicy) || ($.eventName=PutRolePolicy) || ($.eventName=PutUserPolicy) || ($.eventName=CreatePolicy) || ($.eventName=DeletePolicy) || ($.eventName=CreatePolicyVersion) || ($.eventName=DeletePolicyVersion) || ($.eventName=AttachRolePolicy) || ($.eventName=DetachRolePolicy) || ($.eventName=AttachUserPolicy) || ($.eventName=DetachUserPolicy) || ($.eventName=AttachGroupPolicy) || ($.eventName=DetachGroupPolicy)',
            'CIS-3.5-CloudTrailChanges':
            '($.eventName=CreateTrail) || ($.eventName=UpdateTrail) || ($.eventName=DeleteTrail) || ($.eventName=StartLogging) || ($.eventName=StopLogging)',
            'CIS-3.6-ConsoleAuthenticationFailure':
            '($.eventName=ConsoleLogin) && ($.errorMessage="Failed authentication")',
            'CIS-3.7-DisableOrDeleteCMK':
            '($.eventSource=kms.amazonaws.com) && (($.eventName=DisableKey) || ($.eventName=ScheduleKeyDeletion))',
            'CIS-3.8-S3BucketPolicyChanges':
            '($.eventSource=s3.amazonaws.com) && (($.eventName=PutBucketAcl) || ($.eventName=PutBucketPolicy) || ($.eventName=PutBucketCors) || ($.eventName=PutBucketLifecycle) || ($.eventName=PutBucketReplication) || ($.eventName=DeleteBucketPolicy) || ($.eventName=DeleteBucketCors) || ($.eventName=DeleteBucketLifecycle) || ($.eventName=DeleteBucketReplication))',
            'CIS-3.9-AWSConfigChanges':
            '($.eventSource=config.amazonaws.com) && (($.eventName=StopConfigurationRecorder) || ($.eventName=DeleteDeliveryChannel) || ($.eventName=PutDeliveryChannel) || ($.eventName=PutConfigurationRecorder))',
            'CIS-3.10-SecurityGroupChanges':
            '($.eventName=AuthorizeSecurityGroupIngress) || ($.eventName=AuthorizeSecurityGroupEgress) || ($.eventName=RevokeSecurityGroupIngress) || ($.eventName=RevokeSecurityGroupEgress) || ($.eventName=CreateSecurityGroup) || ($.eventName=DeleteSecurityGroup)',
            'CIS-3.11-NetworkACLChanges':
            '($.eventName=CreateNetworkAcl) || ($.eventName=CreateNetworkAclEntry) || ($.eventName=DeleteNetworkAcl) || ($.eventName=DeleteNetworkAclEntry) || ($.eventName=ReplaceNetworkAclEntry) || ($.eventName=ReplaceNetworkAclAssociation)',
            'CIS-3.12-NetworkGatewayChanges':
            '($.eventName=CreateCustomerGateway) || ($.eventName=DeleteCustomerGateway) || ($.eventName=AttachInternetGateway) || ($.eventName=CreateInternetGateway) || ($.eventName=DeleteInternetGateway) || ($.eventName=DetachInternetGateway)',
            'CIS-3.13-RouteTableChanges':
            '($.eventName=CreateRoute) || ($.eventName=CreateRouteTable) || ($.eventName=ReplaceRoute) || ($.eventName=ReplaceRouteTableAssociation) || ($.eventName=DeleteRouteTable) || ($.eventName=DeleteRoute) || ($.eventName=DisassociateRouteTable)',
            'CIS-3.14-VPCChanges':
            '($.eventName=CreateVpc) || ($.eventName=DeleteVpc) || ($.eventName=ModifyVpcAttribute) || ($.eventName=AcceptVpcPeeringConnection) || ($.eventName=CreateVpcPeeringConnection) || ($.eventName=DeleteVpcPeeringConnection) || ($.eventName=RejectVpcPeeringConnection) || ($.eventName=AttachClassicLinkVpc) || ($.eventName=DetachClassicLinkVpc) || ($.eventName=DisableVpcClassicLink) || ($.eventName=EnableVpcClassicLink)',
        }
        for x, y in cis_metricfilter_alarms.items():
            str_x = str(x)
            str_y = str(y)
            logs.MetricFilter(
                self,
                "MetricFilter_" + str_x,
                log_group=trail.log_group,
                filter_pattern=logs.JsonPattern(json_pattern_string=str_y),
                metric_name=str_x,
                metric_namespace="LogMetrics",
                metric_value='1')
            cloudwatch.Alarm(
                self,
                "Alarm_" + str_x,
                alarm_name=str_x,
                alarm_description=str_x,
                statistic='Sum',
                period=core.Duration.minutes(5),
                comparison_operator=cloudwatch.ComparisonOperator.
                GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
                evaluation_periods=1,
                threshold=1,
                metric=cloudwatch.Metric(metric_name=str_x,
                                         namespace="LogMetrics"),
            ).add_alarm_action(cloudwatch_actions_cis)

        # IAM Password Policy custom resource CIS 1.5 - 1.11
        cfn_template = cfn_inc.CfnInclude(
            self,
            "includeTemplate",
            template_file="account-password-policy.yaml",
            parameters={
                "MaxPasswordAge": 90,
                "MinimumPasswordLength": 14,
                "PasswordReusePrevention": 24,
                "RequireLowercaseCharacters": True,
                "RequireNumbers": True,
                "RequireSymbols": True,
                "RequireUppercaseCharacters": True,
            })

        # CIS 1.20
        support_role = iam.Role(
            self,
            "SupportRole",
            assumed_by=iam.AccountPrincipal(
                account_id=core.Stack.of(self).account),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AWSSupportAccess')
            ],
            role_name='AWSSupportAccess')

        guardduty_detector = guardduty.CfnDetector(self,
                                                   'GuardDutyDetector',
                                                   enable=True)

        guardduty_event = events.Rule(
            self,
            'GuardDutyEvent',
            rule_name='guardduty-notification',
            description='GuardDuty Notification',
            event_pattern=events.EventPattern(
                source=['aws.guardduty'], detail_type=['GuardDuty Finding']),
            targets=[events_targets.SnsTopic(security_notifications_topic)])
Beispiel #18
0
    def __init__(self, scope: core.Construct, id: str, **kwargs):
        super().__init__(scope, id, **kwargs)

        # The code that defines your stack goes here
        this_dir = path.dirname(__file__)

        handler = lmb.Function(self,
                               'Handler',
                               runtime=lmb.Runtime.PYTHON_3_7,
                               handler='handler.handler',
                               code=lmb.Code.from_asset(
                                   path.join(this_dir, 'lambda')))
        alias = lmb.Alias(self,
                          "HandlerAlias",
                          alias_name="Current",
                          version=handler.current_version)
        gw = apigw.LambdaRestApi(
            self,
            'Gateway',
            description='Endpoint for a singple Lambda-powered web service',
            handler=alias,
            endpoint_types=[EndpointType.REGIONAL])
        failure_alarm = cloudwatch.Alarm(
            self,
            "FailureAlarm",
            alarm_name=self.stack_name + '-' + '500Alarm',
            metric=cloudwatch.Metric(metric_name="5XXError",
                                     namespace="AWS/ApiGateway",
                                     dimensions={
                                         "ApiName": "Gateway",
                                     },
                                     statistic="Sum",
                                     period=core.Duration.minutes(1)),
            threshold=1,
            evaluation_periods=1)

        alarm500topic = sns.Topic(self,
                                  "Alarm500Topic",
                                  topic_name=self.stack_name + '-' +
                                  'Alarm500TopicSNS')
        alarm500topic.add_subscription(
            subscriptions.EmailSubscription("*****@*****.**"))
        failure_alarm.add_alarm_action(cw_actions.SnsAction(alarm500topic))
        codedeploy.LambdaDeploymentGroup(
            self,
            "DeploymentGroup",
            alias=alias,
            deployment_config=codedeploy.LambdaDeploymentConfig.
            CANARY_10_PERCENT_10_MINUTES,
            alarms=[failure_alarm])
        # Create a dynamodb table

        table_name = self.stack_name + '-' + 'HelloCdkTable'
        table = dynamodb.Table(self,
                               "TestTable",
                               table_name=table_name,
                               partition_key=Attribute(
                                   name="id",
                                   type=dynamodb.AttributeType.STRING))
        table_name_id = cr.PhysicalResourceId.of(table.table_name)
        on_create_action = AwsSdkCall(
            action='putItem',
            service='DynamoDB',
            physical_resource_id=table_name_id,
            parameters={
                'Item': {
                    'id': {
                        'S': 'HOLA_CREATE'
                    },
                    'date': {
                        'S': datetime.today().strftime('%Y-%m-%d')
                    },
                    'epoch': {
                        'N': str(int(time.time()))
                    }
                },
                'TableName': table_name
            })
        on_update_action = AwsSdkCall(
            action='putItem',
            service='DynamoDB',
            physical_resource_id=table_name_id,
            parameters={
                'Item': {
                    'id': {
                        'S': 'HOLA_UPDATE'
                    },
                    'date': {
                        'S': datetime.today().strftime('%Y-%m-%d')
                    },
                    'epoch': {
                        'N': str(int(time.time()))
                    }
                },
                'TableName': table_name
            })
        cr.AwsCustomResource(
            self,
            "TestTableCustomResource",
            on_create=on_create_action,
            on_update=on_update_action,
            policy=cr.AwsCustomResourcePolicy.from_sdk_calls(
                resources=cr.AwsCustomResourcePolicy.ANY_RESOURCE))

        # OUTPUT
        self.url_output = core.CfnOutput(self, 'Url', value=gw.url)
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # -----------------------------------------------------------------------------------------------------------
        # The Simple Webservice Logic - This is what we will be monitoring
        #
        # API GW HTTP API, Lambda Fn and DynamoDB
        # https://github.com/cdk-patterns/serverless/tree/master/the-simple-webservice
        # -----------------------------------------------------------------------------------------------------------

        # DynamoDB Table
        table = dynamo_db.Table(
            self,
            "Hits",
            partition_key=dynamo_db.Attribute(
                name="path", type=dynamo_db.AttributeType.STRING),
            billing_mode=dynamo_db.BillingMode.PAY_PER_REQUEST)

        # defines an AWS  Lambda resource
        dynamo_lambda = _lambda.Function(
            self,
            "DynamoLambdaHandler",
            runtime=_lambda.Runtime.NODEJS_12_X,  # execution environment
            handler="lambda.handler",  # file is "lambda", function is "handler"
            code=_lambda.Code.from_asset(
                "lambda_fns"),  # Code loaded from the lambda dir
            environment={'HITS_TABLE_NAME': table.table_name})

        # grant the lambda role read/write permissions to our table'
        table.grant_read_write_data(dynamo_lambda)

        # defines an API Gateway Http API resource backed by our "dynamoLambda" function.
        api = api_gw.HttpApi(
            self,
            'HttpAPI',
            default_integration=integrations.LambdaProxyIntegration(
                handler=dynamo_lambda))

        core.CfnOutput(self, 'HTTP API Url', value=api.url)

        # -----------------------------------------------------------------------------------------------------------
        # Monitoring Logic Starts Here
        #
        # This is everything we need to understand the state of our system:
        # - custom metrics
        # - cloudwatch alarms
        # - custom cloudwatch dashboard
        # -----------------------------------------------------------------------------------------------------------

        # SNS Topic so we can hook things into our alerts e.g. email
        error_topic = sns.Topic(self, 'theBigFanTopic')

        ###
        # Custom Metrics
        ###

        api_gw_4xx_error_percentage = cloud_watch.MathExpression(
            expression="m1/m2*100",
            label="% API Gateway 4xx Errors",
            using_metrics={
                "m1":
                self.metric_for_api_gw(api.http_api_id, '4XXError',
                                       '4XX Errors', 'sum'),
                "m2":
                self.metric_for_api_gw(api.http_api_id, 'Count', '# Requests',
                                       'sum'),
            },
            period=core.Duration.minutes(5))

        # Gather the % of lambda invocations that error in past 5 mins
        lambda_error_perc = cloud_watch.MathExpression(
            expression="e / i * 100",
            label="% of invocations that errored, last 5 mins",
            using_metrics={
                "i":
                dynamo_lambda.metric(metric_name="Invocations",
                                     statistic="sum"),
                "e":
                dynamo_lambda.metric(metric_name="Errors", statistic="sum"),
            },
            period=core.Duration.minutes(5))

        # note: throttled requests are not counted in total num of invocations
        lambda_throttled_perc = cloud_watch.MathExpression(
            expression="t / (i + t) * 100",
            label="% of throttled requests, last 30 mins",
            using_metrics={
                "i":
                dynamo_lambda.metric(metric_name="Invocations",
                                     statistic="sum"),
                "t":
                dynamo_lambda.metric(metric_name="Throttles", statistic="sum"),
            },
            period=core.Duration.minutes(5))

        # I think usererrors are at an account level rather than a table level so merging
        # these two metrics until I can get a definitive answer. I think usererrors
        # will always show as 0 when scoped to a table so this is still effectively
        # a system errors count
        dynamo_db_total_errors = cloud_watch.MathExpression(
            expression="m1 + m2",
            label="DynamoDB Errors",
            using_metrics={
                "m1": table.metric_user_errors(),
                "m2": table.metric_system_errors_for_operations(),
            },
            period=core.Duration.minutes(5))

        # Rather than have 2 alerts, let's create one aggregate metric
        dynamo_db_throttles = cloud_watch.MathExpression(
            expression="m1 + m2",
            label="DynamoDB Throttles",
            using_metrics={
                "m1":
                table.metric(metric_name="ReadThrottleEvents",
                             statistic="sum"),
                "m2":
                table.metric(metric_name="WriteThrottleEvents",
                             statistic="sum"),
            },
            period=core.Duration.minutes(5))
        ###
        # Alarms
        ###

        # Api Gateway

        # 4xx are user errors so a large volume indicates a problem
        cloud_watch.Alarm(self,
                          id="API Gateway 4XX Errors > 1%",
                          metric=api_gw_4xx_error_percentage,
                          threshold=1,
                          evaluation_periods=6,
                          datapoints_to_alarm=1,
                          treat_missing_data=cloud_watch.TreatMissingData.NOT_BREACHING) \
            .add_alarm_action(actions.SnsAction(error_topic))

        # 5xx are internal server errors so we want 0 of these
        cloud_watch.Alarm(self,
                          id="API Gateway 5XX Errors > 0",
                          metric=self.metric_for_api_gw(api_id=api.http_api_id,
                                                        metric_name="5XXError",
                                                        label="5XX Errors",
                                                        stat="p99"),
                          threshold=0,
                          period=core.Duration.minutes(5),
                          evaluation_periods=6,
                          datapoints_to_alarm=1,
                          treat_missing_data=cloud_watch.TreatMissingData.NOT_BREACHING) \
            .add_alarm_action(actions.SnsAction(error_topic))

        cloud_watch.Alarm(self,
                          id="API p99 latency alarm >= 1s",
                          metric=self.metric_for_api_gw(api_id=api.http_api_id,
                                                        metric_name="Latency",
                                                        label="API GW Latency",
                                                        stat="p99"),
                          threshold=1000,
                          period=core.Duration.minutes(5),
                          evaluation_periods=6,
                          datapoints_to_alarm=1,
                          treat_missing_data=cloud_watch.TreatMissingData.NOT_BREACHING) \
            .add_alarm_action(actions.SnsAction(error_topic))

        # Lambda

        # 2% of Dynamo Lambda invocations erroring
        cloud_watch.Alarm(self,
                          id="Dynamo Lambda 2% Error",
                          metric=lambda_error_perc,
                          threshold=2,
                          evaluation_periods=6,
                          datapoints_to_alarm=1,
                          treat_missing_data=cloud_watch.TreatMissingData.NOT_BREACHING) \
            .add_alarm_action(actions.SnsAction(error_topic))

        # 1% of Lambda invocations taking longer than 1 second
        cloud_watch.Alarm(self,
                          id="Dynamo Lambda p99 Long Duration (>1s)",
                          metric=dynamo_lambda.metric_duration(),
                          period=core.Duration.minutes(5),
                          threshold=1000,
                          evaluation_periods=6,
                          datapoints_to_alarm=1,
                          statistic="p99",
                          treat_missing_data=cloud_watch.TreatMissingData.NOT_BREACHING) \
            .add_alarm_action(actions.SnsAction(error_topic))

        # 2% of our lambda invocations are throttled
        cloud_watch.Alarm(self,
                          id="Dynamo Lambda 2% Throttled",
                          metric=lambda_throttled_perc,
                          threshold=2,
                          evaluation_periods=6,
                          datapoints_to_alarm=1,
                          treat_missing_data=cloud_watch.TreatMissingData.NOT_BREACHING) \
            .add_alarm_action(actions.SnsAction(error_topic))

        # DynamoDB

        # DynamoDB Interactions are throttled - indicated poorly provisioned
        cloud_watch.Alarm(self,
                          id="DynamoDB Table Reads/Writes Throttled",
                          metric=dynamo_db_throttles,
                          threshold=1,
                          evaluation_periods=6,
                          datapoints_to_alarm=1,
                          treat_missing_data=cloud_watch.TreatMissingData.NOT_BREACHING) \
            .add_alarm_action(actions.SnsAction(error_topic))

        # There should be 0 DynamoDB errors
        cloud_watch.Alarm(self,
                          id="DynamoDB Errors > 0",
                          metric=dynamo_db_total_errors,
                          threshold=0,
                          evaluation_periods=6,
                          datapoints_to_alarm=1,
                          treat_missing_data=cloud_watch.TreatMissingData.NOT_BREACHING) \
            .add_alarm_action(actions.SnsAction(error_topic))

        dashboard = cloud_watch.Dashboard(self, id="CloudWatchDashBoard")
        dashboard.add_widgets(
            cloud_watch.GraphWidget(title="Requests",
                                    width=8,
                                    left=[
                                        self.metric_for_api_gw(
                                            api_id=api.http_api_id,
                                            metric_name="Count",
                                            label="# Requests",
                                            stat="sum")
                                    ]),
            cloud_watch.GraphWidget(
                title="API GW Latency",
                width=8,
                stacked=True,
                left=[
                    self.metric_for_api_gw(api_id=api.http_api_id,
                                           metric_name="Latency",
                                           label="API Latency p50",
                                           stat="p50"),
                    self.metric_for_api_gw(api_id=api.http_api_id,
                                           metric_name="Latency",
                                           label="API Latency p90",
                                           stat="p90"),
                    self.metric_for_api_gw(api_id=api.http_api_id,
                                           metric_name="Latency",
                                           label="API Latency p99",
                                           stat="p99")
                ]),
            cloud_watch.GraphWidget(
                title="API GW Errors",
                width=8,
                stacked=True,
                left=[
                    self.metric_for_api_gw(api_id=api.http_api_id,
                                           metric_name="4XXError",
                                           label="4XX Errors",
                                           stat="sum"),
                    self.metric_for_api_gw(api_id=api.http_api_id,
                                           metric_name="5XXError",
                                           label="5XX Errors",
                                           stat="sum")
                ]),
            cloud_watch.GraphWidget(title="Dynamo Lambda Error %",
                                    width=8,
                                    left=[lambda_error_perc]),
            cloud_watch.GraphWidget(
                title="Dynamo Lambda Duration",
                width=8,
                stacked=True,
                left=[
                    dynamo_lambda.metric_duration(statistic="p50"),
                    dynamo_lambda.metric_duration(statistic="p90"),
                    dynamo_lambda.metric_duration(statistic="p99")
                ]),
            cloud_watch.GraphWidget(title="Dynamo Lambda Throttle %",
                                    width=8,
                                    left=[lambda_throttled_perc]),
            cloud_watch.GraphWidget(
                title="DynamoDB Latency",
                width=8,
                stacked=True,
                left=[
                    table.metric_successful_request_latency(
                        dimensions={
                            "TableName": table.table_name,
                            "Operation": "GetItem"
                        }),
                    table.metric_successful_request_latency(
                        dimensions={
                            "TableName": table.table_name,
                            "Operation": "UpdateItem"
                        }),
                    table.metric_successful_request_latency(
                        dimensions={
                            "TableName": table.table_name,
                            "Operation": "PutItem"
                        }),
                    table.metric_successful_request_latency(
                        dimensions={
                            "TableName": table.table_name,
                            "Operation": "DeleteItem"
                        }),
                    table.metric_successful_request_latency(
                        dimensions={
                            "TableName": table.table_name,
                            "Operation": "Query"
                        }),
                ]),
            cloud_watch.GraphWidget(
                title="DynamoDB Consumed Read/Write Units",
                width=8,
                stacked=False,
                left=[
                    table.metric(metric_name="ConsumedReadCapacityUnits"),
                    table.metric(metric_name="ConsumedWriteCapacityUnits")
                ]),
            cloud_watch.GraphWidget(
                title="DynamoDB Throttles",
                width=8,
                stacked=True,
                left=[
                    table.metric(metric_name="ReadThrottleEvents",
                                 statistic="sum"),
                    table.metric(metric_name="WriteThrottleEvents",
                                 statistic="sum")
                ]),
        )
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        env = kwargs['env']

        work_dir = pathlib.Path(__file__).parents[1]

        # These below steps allows to reuse ecs cluster which is aleady creatd by shared stack

        # Get cluster name from ssm parameter
        cluster_name = ssm.StringParameter.from_string_parameter_name(
            self,
            "GetClusterName",
            string_parameter_name="/dev/compute/container/ecs-cluster-name"
        ).string_value

        vpc_az = ssm.StringListParameter.from_string_list_parameter_name(
            self,
            "GetVpcAz",
            string_list_parameter_name="/dev/network/vpc/vpc-az"
        ).string_list_value

        # using string instead of stringlist because of subnets parsing issue
        vpc_public_subnets_1 = ssm.StringParameter.from_string_parameter_name(
            self,
            "GetVpcPublicSubnets1",
            string_parameter_name="/dev/network/vpc/vpc-public-subnets-1"
        ).string_value

        vpc_public_subnets_2 = ssm.StringParameter.from_string_parameter_name(
            self,
            "GetVpcPublicSubnets2",
            string_parameter_name="/dev/network/vpc/vpc-public-subnets-2"
        ).string_value

        vpc_id = ssm.StringParameter.from_string_parameter_name(
            self, "GetVpcId",
            string_parameter_name="/dev/network/vpc/vpc-id").string_value

        ec2_vpc = ec2.Vpc.from_vpc_attributes(
            self,
            "GetVpc",
            availability_zones=vpc_az,
            vpc_id=vpc_id,
            public_subnet_ids=[vpc_public_subnets_1, vpc_public_subnets_2])

        # Get security group id from ssm parameter
        security_group_id = ssm.StringParameter.from_string_parameter_name(
            self,
            "GetSgId",
            string_parameter_name="/dev/network/vpc/security-group-id"
        ).string_value

        # Get security group from lookup
        ec2_sgp = ec2.SecurityGroup.from_security_group_id(
            self, "GetSgp", security_group_id=security_group_id)

        # myDateTimeFunction lambda function
        my_datetime_lambda = _lambda.Function(
            self,
            "my-datetime",
            runtime=_lambda.Runtime.NODEJS_12_X,
            handler="myDateTimeFunction.handler",
            code=_lambda.Code.asset("./lambda"),
            current_version_options=_lambda.VersionOptions(
                removal_policy=core.RemovalPolicy.RETAIN, retry_attempts=1))

        my_datetime_lambda.add_to_role_policy(
            iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                actions=["lambda:InvokeFunction"],
                                resources=["*"]))

        # beforeAllowTraffic lambda function
        pre_traffic_lambda = _lambda.Function(
            self,
            "pre-traffic",
            runtime=_lambda.Runtime.NODEJS_12_X,
            handler="beforeAllowTraffic.handler",
            code=_lambda.Code.asset("./lambda"),
            environment=dict(
                NewVersion=my_datetime_lambda.current_version.function_arn))

        pre_traffic_lambda.add_to_role_policy(
            iam.PolicyStatement(
                effect=iam.Effect.ALLOW,
                actions=["codedeploy:PutLifecycleEventHookExecutionStatus"],
                resources=["*"]))

        pre_traffic_lambda.add_to_role_policy(
            iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                actions=["lambda:InvokeFunction"],
                                resources=["*"]))

        # afterAllowTraffic lambda function
        post_traffic_lambda = _lambda.Function(
            self,
            "post-traffic",
            runtime=_lambda.Runtime.NODEJS_12_X,
            handler="afterAllowTraffic.handler",
            code=_lambda.Code.asset("./lambda"),
            environment=dict(
                NewVersion=my_datetime_lambda.current_version.function_arn))

        post_traffic_lambda.add_to_role_policy(
            iam.PolicyStatement(
                effect=iam.Effect.ALLOW,
                actions=["codedeploy:PutLifecycleEventHookExecutionStatus"],
                resources=["*"]))

        post_traffic_lambda.add_to_role_policy(
            iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                actions=["lambda:InvokeFunction"],
                                resources=["*"]))

        # create a cloudwatch event rule
        rule = events.Rule(
            self,
            "CanaryRule",
            schedule=events.Schedule.expression("rate(10 minutes)"),
            targets=[
                events_targets.LambdaFunction(
                    my_datetime_lambda.current_version)
            ],
        )

        # create a cloudwatch alarm based on the lambda erros metrics
        alarm = cloudwatch.Alarm(
            self,
            "CanaryAlarm",
            metric=my_datetime_lambda.current_version.metric_invocations(),
            threshold=0,
            evaluation_periods=2,
            datapoints_to_alarm=2,
            treat_missing_data=cloudwatch.TreatMissingData.IGNORE,
            period=core.Duration.minutes(5),
            alarm_name="CanaryAlarm")

        lambda_deployment_group = codedeploy.LambdaDeploymentGroup(
            self,
            "datetime-lambda-deployment",
            alias=my_datetime_lambda.current_version.add_alias("live"),
            deployment_config=codedeploy.LambdaDeploymentConfig.ALL_AT_ONCE,
            alarms=[alarm],
            auto_rollback=codedeploy.AutoRollbackConfig(
                deployment_in_alarm=True),
            pre_hook=pre_traffic_lambda,
            post_hook=post_traffic_lambda)

        # Pass vpc, sgp and ecs cluster name to get ecs cluster info
        ecs_cluster = ecs.Cluster.from_cluster_attributes(
            self,
            "GetEcsCluster",
            cluster_name=cluster_name,
            vpc=ec2_vpc,
            security_groups=[ec2_sgp])

        # Fargate Service
        task_definition = ecs.FargateTaskDefinition(
            self,
            "TaskDef",
            memory_limit_mib=512,
            cpu=256,
        )

        container = task_definition.add_container(
            "web",
            image=ecs.ContainerImage.from_asset(
                os.path.join(work_dir, "container")),
            # Built custom health check for your application specific
            # and add them here. Ex: Pingcheck, Database etc.
            health_check=ecs.HealthCheck(command=["CMD-SHELL", "echo"]),
            # environment=dict(name="latest")
        )

        port_mapping = ecs.PortMapping(container_port=8000,
                                       protocol=ecs.Protocol.TCP)

        container.add_port_mappings(port_mapping)

        # Create Fargate Service
        # Current limitation: Blue/Green deployment
        # https://github.com/aws/aws-cdk/issues/1559
        service = ecs.FargateService(
            self,
            "Service",
            cluster=ecs_cluster,
            task_definition=task_definition,
            assign_public_ip=True,
            deployment_controller=ecs.DeploymentController(
                type=ecs.DeploymentControllerType.ECS),
            desired_count=2,
            min_healthy_percent=50)

        # Create Application LoadBalancer
        lb = elbv2.ApplicationLoadBalancer(self,
                                           "LB",
                                           vpc=ec2_vpc,
                                           internet_facing=True)

        # Add listener to the LB
        listener = lb.add_listener("Listener", port=80, open=True)

        # Default to Lambda
        listener.add_targets(
            "Lambda", targets=[elb_targets.LambdaTarget(my_datetime_lambda)])

        # Additionally route to container
        listener.add_targets("Fargate",
                             port=8000,
                             path_pattern="/container",
                             priority=10,
                             targets=[service])

        # add an output with a well-known name to read it from the integ tests
        self.load_balancer_dns_name = lb.load_balancer_dns_name
Beispiel #21
0
    def __init__(self, scope: core.Construct, id: str, vpc:aws_ec2.Vpc, ecs_cluster=aws_ecs.Cluster, alb=elbv2.ApplicationLoadBalancer, albTestListener=elbv2.ApplicationListener,albProdListener=elbv2.ApplicationListener, FlaskBlueGroup=elbv2.ApplicationTargetGroup, FlaskGreenGroup=elbv2.ApplicationTargetGroup, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        ECS_APP_NAME="Flask-app",
        ECS_DEPLOYMENT_GROUP_NAME = "FlaskAppECSBlueGreen"
        ECS_DEPLOYMENT_CONFIG_NAME = "CodeDeployDefault.ECSAllAtOnce"
        ECS_TASKSET_TERMINATION_WAIT_TIME = 10
        ECS_TASK_FAMILY_NAME = "Flask-microservice"
        ECS_APP_NAME = "Flask-microservice"
        ECS_APP_LOG_GROUP_NAME = "/ecs/Flask-microservice"
        DUMMY_TASK_FAMILY_NAME = "sample-Nginx-microservice"
        DUMMY_APP_NAME = "sample-Nginx-microservice"
        DUMMY_APP_LOG_GROUP_NAME = "/ecs/sample-Nginx-microservice"
        DUMMY_CONTAINER_IMAGE = "smuralee/nginx"


        # =============================================================================
        # ECR and CodeCommit repositories for the Blue/ Green deployment
        # =============================================================================

        # ECR repository for the docker images
        FlaskecrRepo = aws_ecr.Repository(self, "FlaskRepo",
            image_scan_on_push=True
        )

        FlaskCodeCommitrepo = aws_codecommit.Repository(self, "FlaskRepository",
            repository_name=ECS_APP_NAME,
            description="Oussama Flask application"
        )

        # =============================================================================
        #   CODE BUILD and ECS TASK ROLES for the Blue/ Green deployment
        # =============================================================================

        # IAM role for the Code Build project
        FlaskcodeBuildServiceRole = aws_iam.Role(self, "FlaskcodeBuildServiceRole",
            assumed_by=aws_iam.ServicePrincipal('codebuild.amazonaws.com')
        )

        inlinePolicyForCodeBuild = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            actions=[
                "ecr:GetAuthorizationToken",
                "ecr:BatchCheckLayerAvailability",
                "ecr:InitiateLayerUpload",
                "ecr:UploadLayerPart",
                "ecr:CompleteLayerUpload",
                "ecr:PutImage"
            ],
            resources=["*"]
        )

        FlaskcodeBuildServiceRole.add_to_policy(inlinePolicyForCodeBuild)

        # ECS task role
        FlaskecsTaskRole = aws_iam.Role(self, "FlaskecsTaskRole", 
            assumed_by=aws_iam.ServicePrincipal('ecs-tasks.amazonaws.com')
        )

        FlaskecsTaskRole.add_managed_policy(aws_iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AmazonECSTaskExecutionRolePolicy"))

        # =============================================================================
        # CODE BUILD PROJECT for the Blue/ Green deployment
        # =============================================================================

        # Creating the code build project
        FlaskAppcodebuild = aws_codebuild.Project(self, "FlaskAppCodeBuild",
            role=FlaskcodeBuildServiceRole,
            environment=aws_codebuild.BuildEnvironment(
                build_image=aws_codebuild.LinuxBuildImage.STANDARD_4_0,
                compute_type=aws_codebuild.ComputeType.SMALL,
                privileged=True,
                environment_variables={
                    'REPOSITORY_URI':{
                        'value': FlaskecrRepo.repository_uri,
                        'type': aws_codebuild.BuildEnvironmentVariableType.PLAINTEXT
                    },
                    'TASK_EXECUTION_ARN':{
                        'value': FlaskecsTaskRole.role_arn,
                        'type': aws_codebuild.BuildEnvironmentVariableType.PLAINTEXT
                    },
                    'TASK_FAMILY': {
                        'value': ECS_TASK_FAMILY_NAME,
                        'type': aws_codebuild.BuildEnvironmentVariableType.PLAINTEXT
                    }
                }
            ),
            source=aws_codebuild.Source.code_commit(repository=FlaskCodeCommitrepo)
        )

        # =============================================================================
        # CODE DEPLOY APPLICATION for the Blue/ Green deployment
        # =============================================================================

        # Creating the code deploy application
        FlaskcodeDeployApplication = codedeploy.EcsApplication(self, "FlaskAppCodeDeploy");

        # Creating the code deploy service role
        FlaskcodeDeployServiceRole = aws_iam.Role(self, "FlaskcodeDeployServiceRole",
            assumed_by=aws_iam.ServicePrincipal('codedeploy.amazonaws.com')
        )
        FlaskcodeDeployServiceRole.add_managed_policy(aws_iam.ManagedPolicy.from_aws_managed_policy_name("AWSCodeDeployRoleForECS"));


        # IAM role for custom lambda function
        FlaskcustomLambdaServiceRole = aws_iam.Role(self, "FlaskcodeDeployCustomLambda",
            assumed_by= aws_iam.ServicePrincipal('lambda.amazonaws.com')
        )

        inlinePolicyForLambda = aws_iam.PolicyStatement(
            effect= aws_iam.Effect.ALLOW,
            actions=[
                "iam:PassRole",
                "sts:AssumeRole",
                "codedeploy:List*",
                "codedeploy:Get*",
                "codedeploy:UpdateDeploymentGroup",
                "codedeploy:CreateDeploymentGroup",
                "codedeploy:DeleteDeploymentGroup"
            ],
            resources= ["*"]
        )

        FlaskcustomLambdaServiceRole.add_managed_policy(aws_iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaBasicExecutionRole'))
        FlaskcustomLambdaServiceRole.add_to_policy(inlinePolicyForLambda);

        # Custom resource to create the deployment group
        createFlaskDeploymentGroupLambda = aws_lambda.Function(self, 'createFlaskDeploymentGroupLambda',
            code = aws_lambda.Code.from_asset("custom_resources"),
            runtime= aws_lambda.Runtime.PYTHON_3_8,
            handler= 'create_deployment_group.handler',
            role= FlaskcustomLambdaServiceRole,
            description= "Custom resource to create deployment group",
            memory_size= 128,
            timeout= core.Duration.seconds(60)
        )

 
       # ================================================================================================
        # CloudWatch Alarms for 4XX errors
        Flaskblue4xxMetric = aws_cloudwatch.Metric(
            namespace= 'AWS/ApplicationELB',
            metric_name= 'FlaskHTTPCode_Target_4XX_Count',
            dimensions={
                "TargetGroup":FlaskBlueGroup.target_group_full_name,
                "LoadBalancer":alb.load_balancer_full_name
            },
            statistic="sum",
            period=core.Duration.minutes(1)
        )

        FlaskblueGroupAlarm = aws_cloudwatch.Alarm(self, "Flaskblue4xxErrors",
            alarm_name= "FlaskBlue_4xx_Alarm",
            alarm_description= "CloudWatch Alarm for the 4xx errors of Blue target group",
            metric= Flaskblue4xxMetric,
            threshold= 1,
            evaluation_periods= 1
        )

        Flaskgreen4xxMetric = aws_cloudwatch.Metric(
            namespace= 'AWS/ApplicationELB',
            metric_name= 'FlaskHTTPCode_Target_4XX_Count',
            dimensions= {
                "TargetGroup":FlaskGreenGroup.target_group_full_name,
                "LoadBalancer":alb.load_balancer_full_name
            },
            statistic= "sum",
            period= core.Duration.minutes(1)
        )
        FlaskgreenGroupAlarm = aws_cloudwatch.Alarm(self, "Flaskgreen4xxErrors",
            alarm_name= "FlaskGreen_4xx_Alarm",
            alarm_description= "CloudWatch Alarm for the 4xx errors of Green target group",
            metric= Flaskgreen4xxMetric,
            threshold= 1,
            evaluation_periods= 1
        )

        # ================================================================================================
        # ECS task definition using ECR image
        # Will be used by the CODE DEPLOY for  deployment
        # ================================================================================================
        FlaskTaskDefinition = aws_ecs.FargateTaskDefinition(self, "FlaskappTaskDefn", 
            family= ECS_TASK_FAMILY_NAME,
            cpu= 256,
            memory_limit_mib= 1024,
            task_role= FlaskecsTaskRole,
            execution_role= FlaskecsTaskRole
        )

        FlaskcontainerDefinition = FlaskTaskDefinition.add_container("FlaskAppContainer",
            image= aws_ecs.ContainerImage.from_ecr_repository(FlaskecrRepo, "latest"),
            logging= aws_ecs.AwsLogDriver(
                log_group= aws_logs.LogGroup(self, "FlaskAppLogGroup",
                    log_group_name= ECS_APP_LOG_GROUP_NAME,
                    removal_policy= core.RemovalPolicy.DESTROY
                ),
                stream_prefix=ECS_APP_NAME
            ),
            docker_labels= {
                "name": ECS_APP_NAME
            }
        )

        port_mapping = aws_ecs.PortMapping(
            container_port=80,
            protocol=aws_ecs.Protocol.TCP
        )

        FlaskcontainerDefinition.add_port_mappings(port_mapping)

        # =============================================================================
        # ECS SERVICE for the Blue/ Green deployment
        # =============================================================================
        FlaskAppService = aws_ecs.FargateService(self, "FlaskAppService",
            cluster=ecs_cluster,
            task_definition= FlaskTaskDefinition,
            health_check_grace_period= core.Duration.seconds(10),
            desired_count= 3,
            deployment_controller= {
                "type": aws_ecs.DeploymentControllerType.CODE_DEPLOY
            },
            service_name= ECS_APP_NAME
        )

        FlaskAppService.connections.allow_from(alb, aws_ec2.Port.tcp(80))
        FlaskAppService.connections.allow_from(alb, aws_ec2.Port.tcp(8080))
        FlaskAppService.attach_to_application_target_group(FlaskBlueGroup)

        # =============================================================================
        # CODE DEPLOY - Deployment Group CUSTOM RESOURCE for the Application deployment
        # =============================================================================


        core.CustomResource(self, 'FlaskcustomEcsDeploymentGroup',
            service_token= createFlaskDeploymentGroupLambda.function_arn,
            properties= {
                "ApplicationName": FlaskcodeDeployApplication.application_name,
                "DeploymentGroupName": ECS_DEPLOYMENT_GROUP_NAME,
                "DeploymentConfigName": ECS_DEPLOYMENT_CONFIG_NAME,
                "ServiceRoleArn": FlaskcodeDeployServiceRole.role_arn,
                "BlueTargetGroup": FlaskBlueGroup.target_group_name,
                "GreenTargetGroup": FlaskGreenGroup.target_group_name,
                "ProdListenerArn": albProdListener.listener_arn,
                "TestListenerArn": albTestListener.listener_arn,
                "EcsClusterName": ecs_cluster.cluster_name,
                "EcsServiceName": FlaskAppService.service_name,
                "TerminationWaitTime": ECS_TASKSET_TERMINATION_WAIT_TIME,
                "BlueGroupAlarm": FlaskblueGroupAlarm.alarm_name,
                "GreenGroupAlarm": FlaskgreenGroupAlarm.alarm_name,
            }
        )

        FlaskecsDeploymentGroup = codedeploy.EcsDeploymentGroup.from_ecs_deployment_group_attributes(self, "FlaskecsDeploymentGroup",
            application= FlaskcodeDeployApplication,
            deployment_group_name= ECS_DEPLOYMENT_GROUP_NAME,
            deployment_config= codedeploy.EcsDeploymentConfig.from_ecs_deployment_config_name(self, "FlaskecsDeploymentConfig", ECS_DEPLOYMENT_CONFIG_NAME)
        )
        # =============================================================================
        # CODE PIPELINE for  ECS deployment
        # =============================================================================

        FlaskcodePipelineServiceRole = aws_iam.Role(self, "FlaskcodePipelineServiceRole", 
            assumed_by=aws_iam.ServicePrincipal('codepipeline.amazonaws.com')
        )

        inlinePolicyForCodePipeline = aws_iam.PolicyStatement(
            effect= aws_iam.Effect.ALLOW,
            actions= [
                "iam:PassRole",
                "sts:AssumeRole",
                "codecommit:Get*",
                "codecommit:List*",
                "codecommit:GitPull",
                "codecommit:UploadArchive",
                "codecommit:CancelUploadArchive",
                "codebuild:BatchGetBuilds",
                "codebuild:StartBuild",
                "codedeploy:CreateDeployment",
                "codedeploy:Get*",
                "codedeploy:RegisterApplicationRevision",
                "s3:Get*",
                "s3:List*",
                "s3:PutObject"
            ],
            resources= ["*"]
        )

        FlaskcodePipelineServiceRole.add_to_policy(inlinePolicyForCodePipeline);

        sourceArtifact = codepipeline.Artifact('sourceArtifact')
        buildArtifact = codepipeline.Artifact('buildArtifact')

        # S3 bucket for storing the code pipeline artifacts
        FlaskAppArtifactsBucket = s3.Bucket(self, "FlaskAppArtifactsBucket",
            encryption= s3.BucketEncryption.S3_MANAGED,
            block_public_access= s3.BlockPublicAccess.BLOCK_ALL
        )

        # S3 bucket policy for the code pipeline artifacts
        FlaskBucketdenyUnEncryptedObjectUploads = aws_iam.PolicyStatement(
            effect= aws_iam.Effect.DENY,
            actions= ["s3:PutObject"],
            principals= [aws_iam.AnyPrincipal()],
            resources= [FlaskAppArtifactsBucket.bucket_arn+"/*"],
            conditions={
                "StringNotEquals":{
                    "s3:x-amz-server-side-encryption": "aws:kms"
                }
            }
        )

        FlaskBucketdenyInsecureConnections = aws_iam.PolicyStatement(
            effect= aws_iam.Effect.DENY,
            actions= ["s3:*"],
            principals= [aws_iam.AnyPrincipal()],
            resources= [FlaskAppArtifactsBucket.bucket_arn+"/*"],
            conditions= {
                "Bool":{
                    "aws:SecureTransport": "false"
                }
            }
        )

        FlaskAppArtifactsBucket.add_to_resource_policy(FlaskBucketdenyUnEncryptedObjectUploads)
        FlaskAppArtifactsBucket.add_to_resource_policy(FlaskBucketdenyInsecureConnections)

        # Code Pipeline - CloudWatch trigger event is created by CDK
        codepipeline.Pipeline(self, "FlaskECSPipeline", 
            role= FlaskcodePipelineServiceRole,
            artifact_bucket= FlaskAppArtifactsBucket,
            stages=[
                codepipeline.StageProps(
                    stage_name='Source',
                    actions= [
                        aws_codepipeline_actions.CodeCommitSourceAction(
                            action_name= 'Source',
                            repository= FlaskCodeCommitrepo,
                            output= sourceArtifact,
                        )
                    ]
                ),
                codepipeline.StageProps(
                    stage_name= 'Build',
                    actions= [
                        aws_codepipeline_actions.CodeBuildAction(
                            action_name= 'Build',
                            project= FlaskAppcodebuild,
                            input= sourceArtifact,
                            outputs= [buildArtifact]
                        )
                    ]
                ),
                codepipeline.StageProps(
                    stage_name= 'Deploy',
                    actions= [
                        aws_codepipeline_actions.CodeDeployEcsDeployAction(
                            action_name= 'Deploy',
                            deployment_group= FlaskecsDeploymentGroup,
                            app_spec_template_input= buildArtifact,
                            task_definition_template_input= buildArtifact,
                        )
                    ]
                )
            ]
        )

        # =============================================================================
        # Export the outputs
        # =============================================================================
        core.CfnOutput(self, "FlaskECSCodeRepo", 
            description= "Flask code commit repository",
            export_name= "FlaskAppRepo",
            value= FlaskCodeCommitrepo.repository_clone_url_http
        )

        core.CfnOutput(self, "FlaskLBDns", 
            description= "Load balancer DNS",
            export_name= "FlaskLBDns",
            value= alb.load_balancer_dns_name
        )
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        queue = sqs.Queue(self,
                          "StartProwlerScan",
                          receive_message_wait_time=core.Duration.seconds(20),
                          visibility_timeout=core.Duration.seconds(7200))
        push_all_active_accounts_onto_queue_lambda_function = lambda_.Function(
            self,
            "PushAllActiveAccountsOntoQueue",
            runtime=lambda_.Runtime.PYTHON_3_8,
            code=lambda_.Code.asset("lambda/pushAllActiveActivesOntoQueue"),
            handler="lambda_function.lambda_handler",
            environment={"SQS_QUEUE_URL": queue.queue_url})
        event_lambda_target = events_targets.LambdaFunction(
            handler=push_all_active_accounts_onto_queue_lambda_function)
        queue.grant_send_messages(
            push_all_active_accounts_onto_queue_lambda_function)
        schedule = events.Schedule.rate(core.Duration.days(1))
        events.Rule(self,
                    "DailyTrigger",
                    schedule=schedule,
                    targets=[event_lambda_target])

        vpc = ec2.Vpc(self, "Vpc")
        cluster = ecs.Cluster(self, "Cluster", vpc=vpc)
        logging = ecs.AwsLogDriver(stream_prefix="ProwlerTask",
                                   log_retention=logs.RetentionDays.ONE_DAY)
        results_bucket = s3.Bucket(self, "ResultsBucket")
        dockerfile_directory = path.join(path.dirname(path.realpath(__file__)),
                                         "docker")
        image = ecr_assets.DockerImageAsset(self,
                                            "ProwlerImageBuild",
                                            directory=dockerfile_directory)
        prowler_task = ecs.FargateTaskDefinition(self,
                                                 "ProwlerTaskDefinition",
                                                 cpu=256,
                                                 memory_limit_mib=512)
        prowler_task.add_container(
            "Prowler_image",
            image=ecs.ContainerImage.from_docker_image_asset(image),
            logging=logging,
            environment={
                "RESULTS_BUCKET": results_bucket.bucket_name,
                "SQS_QUEUE_URL": queue.queue_url
            })
        task_role = prowler_task.task_role
        task_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name("ReadOnlyAccess"))
        queue.grant(task_role, "sqs:DeleteMessage")
        results_bucket.grant_put(task_role)
        task_role.attach_inline_policy(
            iam.Policy(self,
                       "AssumeRolePermissions",
                       statements=[
                           iam.PolicyStatement(actions=["sts:AssumeRole"],
                                               effect=iam.Effect.ALLOW,
                                               resources=["*"])
                       ]))
        run_fargate_task_lambda_function = lambda_.Function(
            self,
            "RunFargateTask",
            runtime=lambda_.Runtime.PYTHON_3_8,
            code=lambda_.Code.asset("lambda/runFargateTask"),
            handler="lambda_function.lambda_handler",
            environment={
                "CLUSTER_ARN":
                cluster.cluster_arn,
                "SUBNET_IDS":
                json.dumps(
                    [subnet.subnet_id for subnet in vpc.private_subnets]),
                "QUEUE_URL":
                queue.queue_url,
                "TASK_DEFINITION_ARN":
                prowler_task.task_definition_arn
            })
        queue.grant(run_fargate_task_lambda_function, "sqs:GetQueueAttributes")
        sqs_alarm_topic = sns.Topic(self, "SqsAlarmTopic")
        sqs_alarm_topic.grant_publish(run_fargate_task_lambda_function)
        sqs_alarm_queue = sqs.Queue(
            self,
            "SqsAlarmQueue",
            retention_period=core.Duration.days(14),
            visibility_timeout=core.Duration.minutes(3))
        sqs_alarm_topic.add_subscription(
            sns_subscriptions.SqsSubscription(sqs_alarm_queue))
        run_fargate_task_lambda_function.add_event_source(
            lambda_event_sources.SqsEventSource(sqs_alarm_queue))
        run_fargate_task_lambda_function.add_to_role_policy(
            iam.PolicyStatement(actions=["ecs:RunTask"],
                                effect=iam.Effect.ALLOW,
                                resources=[prowler_task.task_definition_arn]))
        run_fargate_task_lambda_function.add_to_role_policy(
            iam.PolicyStatement(actions=["iam:PassRole"],
                                effect=iam.Effect.ALLOW,
                                resources=[
                                    prowler_task.execution_role.role_arn,
                                    prowler_task.task_role.role_arn
                                ]))
        sqs_ok_topic = sns.Topic(self, "SqsOkTopic")
        clear_alarm_queue = lambda_.Function(
            self,
            "ClearAlarmQueue",
            runtime=lambda_.Runtime.PYTHON_3_8,
            code=lambda_.Code.asset("lambda/clearAlarmQueue"),
            handler="lambda_function.lambda_handler",
            environment={"QUEUE_URL": sqs_alarm_queue.queue_url})
        clear_alarm_queue.add_event_source(
            lambda_event_sources.SnsEventSource(sqs_ok_topic))
        sqs_alarm_queue.grant(clear_alarm_queue, "sqs:DeleteMessage")

        alarm = cloudwatch.Alarm(
            self,
            "FargateTaskTrigger",
            metric=queue.metric_approximate_number_of_messages_visible(
                period=core.Duration.seconds(60), statistic="max"),
            evaluation_periods=1,
            threshold=1,
            alarm_description="Run a fargate task when there "
            "are messages in the queue",
            treat_missing_data=cloudwatch.TreatMissingData.IGNORE)
        alarm.add_alarm_action(cloudwatch_actions.SnsAction(sqs_alarm_topic))
        alarm.add_ok_action(cloudwatch_actions.SnsAction(sqs_ok_topic))