Ejemplo n.º 1
0
    def add_monitoring(self, monitoring):

        resource_metrics = [
            (self.new_recording_resource, "4XXError"),
            (self.new_recording_resource, "5XXError"),
            (self.ingest_resource, "5XXError"),
        ]
        for resource, metric_name in resource_metrics:
            construct_id = (
                f"{metric_name}-{resource.path.replace('/', '_')}-alarm"
            )
            alarm = cloudwatch.Alarm(
                self,
                construct_id,
                metric=cloudwatch.Metric(
                    metric_name=metric_name,
                    namespace="AWS/ApiGateway",
                    dimensions={
                        "ApiName": self.rest_api_name,
                        "Stage": names.API_STAGE,
                        "Method": "POST",
                        "Resource": resource.path,
                    },
                    period=core.Duration.minutes(1),
                ),
                statistic="sum",
                threshold=1,
                evaluation_periods=1,
                comparison_operator=cloudwatch.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
            )
            monitoring.add_alarm_action(alarm)

        webhook_latency_alarm = cloudwatch.Alarm(
            self,
            "WebhookLatencyAlarm",
            metric=cloudwatch.Metric(
                metric_name="Latency",
                namespace="AWS/ApiGateway",
                dimensions={
                    "ApiName": self.rest_api_name,
                    "Stage": names.API_STAGE,
                    "Method": "POST",
                    "Resource": self.new_recording_resource.path,
                },
                period=core.Duration.minutes(1),
            ),
            statistic="avg",
            threshold=10000,
            evaluation_periods=3,
            comparison_operator=cloudwatch.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
        )
        monitoring.add_alarm_action(webhook_latency_alarm)
Ejemplo n.º 2
0
    def createOps(self):
        alarmTopic = sns.Topic(self, 'TipBotAlarmTopic',
            display_name='TipBotAlarmTopic',
            fifo=False,
        )
        alarmTopic.add_subscription(snss.EmailSubscription(self.getEmail(), json=True))

        cw.CompositeAlarm(self, 'TipBotCompositeAlarm',
            alarm_rule=cw.AlarmRule.any_of(
                cw.Alarm(self, "LNDAlarm",
                    metric=cw.Metric(
                        metric_name='LndUp',
                        namespace='LNTipBot',
                        period=cdk.Duration.minutes(1),
                        statistic='sum',
                        unit=cw.Unit.NONE,
                    ),
                    threshold=1,
                    actions_enabled=False,
                    alarm_description='Alarm for when the LND service has gone down',
                    alarm_name='LND Alarm',
                    comparison_operator=cw.ComparisonOperator.LESS_THAN_THRESHOLD,
                    datapoints_to_alarm=5,
                    evaluation_periods=5,
                    treat_missing_data=cw.TreatMissingData.BREACHING
                ),
                cw.Alarm(self, "BTCAlarm",
                    metric=cw.Metric(
                        metric_name='BtcUp',
                        namespace='LNTipBot',
                        period=cdk.Duration.minutes(1),
                        statistic='sum',
                        unit=cw.Unit.NONE,
                    ),
                    threshold=1,
                    actions_enabled=False,
                    alarm_description='Alarm for when the BTC service has gone down',
                    alarm_name='BTC Alarm',
                    comparison_operator=cw.ComparisonOperator.LESS_THAN_THRESHOLD,
                    datapoints_to_alarm=5,
                    evaluation_periods=5,
                    treat_missing_data=cw.TreatMissingData.BREACHING
                )
            ),
            actions_enabled=True,
            alarm_description='TipBot Composite Alarm',
            composite_alarm_name='TipBot Composite Alarm',
        ).add_alarm_action(cwa.SnsAction(alarmTopic))
    def _add_conditional_storage_widgets(
        self,
        conditional_metrics,
        volumes_list,
        namespace,
        dimension_vol_name,
        vol_attribute_name,
    ):
        """Add widgets for conditional metrics for EBS, Raid and EFS."""
        widgets_list = []
        for metric_condition_params in conditional_metrics:
            metric_list = []
            for volume in volumes_list:
                if getattr(volume.config, vol_attribute_name
                           ) in metric_condition_params.supported_vol_types:
                    cloudwatch_metric = cloudwatch.Metric(
                        namespace=namespace,
                        metric_name=metric_condition_params.metrics,
                        dimensions={dimension_vol_name: volume.id},
                    )
                    metric_list.append(cloudwatch_metric)

            if len(
                    metric_list
            ) > 0:  # Add the metrics only if there exist support volumes for it
                graph_widget = self._generate_graph_widget(
                    metric_condition_params.title, metric_list)
                widgets_list.append(graph_widget)
        return widgets_list
    def create_cw_alarm_with_action(
        self,
        metric_name,
        threshold,
        comparison_operator,
        period,
        evaluation_periods,
        statistic,
        sns_topic_list=[],
    ) -> None:
        # Creating a CW Alarm for the provided metric
        self._cw_alarm = cloudwatch.Alarm(
            self,
            self._domain_name + f"-{metric_name}Alarm",
            metric=cloudwatch.Metric(
                metric_name=metric_name,
                namespace="AWS/ES",
                dimensions={
                    "DomainName": self._domain_name,
                    "ClientId": self._account
                },
            ),
            threshold=threshold,
            comparison_operator=comparison_operator,
            period=core.Duration.minutes(period),
            evaluation_periods=evaluation_periods,
            statistic=statistic,
            treat_missing_data=cloudwatch.TreatMissingData.MISSING,
        )

        # If SNS topic list is provided by the user, setting the Alarm action to the topic(s)
        if sns_topic_list:
            self._cw_alarm.add_alarm_action(
                *list(map(cloudwatch_actions.SnsAction, sns_topic_list)))
    def __create_asg_scaling_policy(asg):
        cpu_utilization = cloudwatch.Metric(
            namespace="AWS/EC2",
            metric_name="CPUUtilization",
            dimensions={"AutoScalingGroupName": asg.auto_scaling_group_name},
            period=core.Duration.minutes(15))

        asg.scale_on_metric(
            "ImagizerClusterCpuTarget",
            metric=cpu_utilization,
            adjustment_type=autoscaling.AdjustmentType.CHANGE_IN_CAPACITY,
            estimated_instance_warmup=core.Duration.seconds(400),
            scaling_steps=[
                autoscaling.ScalingInterval(
                    change=variables.ASG_CAPACITY_INCREASE,
                    lower=variables.ASG_CPU_HIGH_THRESHOLD),
                autoscaling.ScalingInterval(
                    change=-variables.ASG_CAPACITY_DECREASE,
                    upper=variables.ASG_CPU_LOW_THRESHOLD)
            ])

        asg.scale_on_request_count(
            "ImagizerClusterRpsTarget",
            target_requests_per_second=variables.ASG_RPS_THRESHOLD,
            disable_scale_in=True)
 def _generate_ec2_metrics_list(self, metrics):
     metric_list = []
     for metric in metrics:
         cloudwatch_metric = cloudwatch.Metric(
             namespace="AWS/EC2",
             metric_name=metric,
             dimensions={"InstanceId": self.head_node_instance.ref})
         metric_list.append(cloudwatch_metric)
     return metric_list
Ejemplo n.º 7
0
 def get_yelp_cleaner_graph():
     return YelpOrchestratorStack.graph_widget(
         "YelpCleanerDeletions",
         *[
             aws_cloudwatch.Metric(
                 namespace="YelpOrchestrator",
                 metric_name=metric_name,
                 statistic="Sum",
                 period=core.Duration.minutes(5),
             ) for metric_name in ("UrlTableRecordsDeleted",
                                   "YelpTableRecordsDeleted")
         ],
     )
Ejemplo n.º 8
0
 def get_s3_graphs(bucket):
     return (YelpOrchestratorStack.graph_widget(
         "ObjectCount",
         aws_cloudwatch.Metric(
             namespace="AWS/S3",
             metric_name="NumberOfObjects",
             dimensions={
                 "StorageType": "AllStorageTypes",
                 "BucketName": bucket.bucket_name,
             },
             statistic="Sum",
             period=core.Duration.minutes(5),
         ),
     ), )
 def build_metric(metric_name: str,
                  name_space: str,
                  dimensions,
                  unit: cloud_watch.Unit,
                  label: str,
                  stat: str = 'avg',
                  period: int = 900):
     return cloud_watch.Metric(metric_name=metric_name,
                               namespace=name_space,
                               dimensions=dimensions,
                               unit=unit,
                               label=label,
                               statistic=stat,
                               period=core.Duration.seconds(period))
Ejemplo n.º 10
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)
        role_arn = 'arn:aws:iam::315207712355:role/lbrole'
        role = iam.Role.from_role_arn(self, id='role_id', role_arn=role_arn)
        # The code that defines your stack goes here
        this_dir = path.dirname(__file__)

        handler = lmb.Function(self,
                               'Handler',
                               runtime=lmb.Runtime.PYTHON_3_7,
                               role=role,
                               handler='handler.handler',
                               code=lmb.Code.from_asset(
                                   path.join(this_dir, 'lambda')))

        alias = lmb.Alias(self,
                          'HandlerAlias',
                          alias_name='Current',
                          version=handler.current_version)

        gw = apigw.LambdaRestApi(
            self,
            'Gateway',
            description='Endpoint for a simple Lambda-powered web service',
            handler=alias)

        failure_alarm = cloudwatch.Alarm(self,
                                         'FailureAlarm',
                                         metric=cloudwatch.Metric(
                                             metric_name='5XXError',
                                             namespace='AWS/ApiGateway',
                                             dimensions={
                                                 'ApiName': 'Gateway',
                                             },
                                             statistic='Sum',
                                             period=core.Duration.minutes(1)),
                                         threshold=1,
                                         evaluation_periods=1)

        codedeploy.LambdaDeploymentGroup(
            self,
            'DeploymentGroup',
            alias=alias,
            deployment_config=codedeploy.LambdaDeploymentConfig.
            CANARY_10_PERCENT_10_MINUTES,
            alarms=[failure_alarm])

        self.url_output = core.CfnOutput(self, 'Url', value=gw.url)
 def _add_storage_widgets(self, metrics, storages_list, namespace,
                          dimension_name):
     widgets_list = []
     for metrics_param in metrics:
         metric_list = []
         for metric in metrics_param.metrics:
             for storage in storages_list:
                 cloudwatch_metric = cloudwatch.Metric(
                     namespace=namespace,
                     metric_name=metric,
                     dimensions={dimension_name: storage.id},
                 )
                 metric_list.append(cloudwatch_metric)
         graph_widget = self._generate_graph_widget(metrics_param.title,
                                                    metric_list)
         widgets_list.append(graph_widget)
     return widgets_list
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        this_dir = path.dirname(__file__)

        handler = lmb.Function(self, 'Handler',
            runtime=lmb.Runtime.PYTHON_3_7,
            handler='handler.handler',
            code=lmb.Code.from_asset(path.join(this_dir, 'lambda'))
        )
        # For canary deployments
        alias = lmb.Alias(self, 'HandlerAlias',
            alias_name='Current',
            version=handler.current_version
        )

        gw = apigw.LambdaRestApi(self, 'Gateway',
            description='Endpoint for app',
            handler=alias
        )

        failure_alarm = cloudwatch.Alarm(self, 'FailureAlarm',
            metric=cloudwatch.Metric(
                metric_name='5XXError',
                namespace='AWS/ApiGateway',
                dimensions={
                    'ApiName': 'Gateway'
                },
                statistic='Sum',
                period=core.Duration.minutes(1)
            ),
            threshold=1,
            evaluation_periods=1
        )

        codedeploy.LambdaDeploymentGroup(self, 'Deploy',
            alias=alias,
            deployment_config=codedeploy.LambdaDeploymentConfig.CANARY_10_PERCENT_10_MINUTES,
            alarms=[failure_alarm]
        )

        self.url_output = core.CfnOutput(self, 'Url',
            value=gw.url
        )
Ejemplo n.º 13
0
 def get_alarm(self, params):
     description = params['description'].format(params['name'],
                                                self.account)
     metric = cloudwatch.Metric(
         metric_name=params['metric_name'],
         namespace=params['namespace'],
         dimensions={params['dimension']: params['name']})
     alarm = cloudwatch.Alarm(
         self,
         "{}Alarm".format(params['name']),
         alarm_description=description,
         alarm_name=description,
         comparison_operator=cloudwatch.ComparisonOperator(
             'GREATER_THAN_OR_EQUAL_TO_THRESHOLD'),
         metric=metric,
         evaluation_periods=params['evaluation_periods'],
         period=core.Duration.seconds(params['period']),
         statistic=params['statistic'],
         threshold=params['threshold'],
         treat_missing_data=cloudwatch.TreatMissingData('MISSING'))
     return alarm
Ejemplo n.º 14
0
    def __init__(self, scope: core.Construct, id: str,
                 squid_asgs: list) -> None:
        super().__init__(scope, id)

        # SNS Topic for alarm
        self.squid_alarm_topic = sns.Topic(
            self,
            "squid-asg-alarm-topic",
            display_name='Squid ASG Alarm topic')

        # Create metric to use for triggering alarm when there is no CPU usage from the squid process
        for count, asg in enumerate(squid_asgs, start=1):
            squid_metric = cloudwatch.Metric(
                metric_name="procstat_cpu_usage",
                namespace='CWAgent',
                dimensions=dict(
                    AutoScalingGroupName=asg.auto_scaling_group_name,
                    pidfile="/var/run/squid.pid",
                    process_name="squid"))

            # CloudWatch alarms to alert on Squid ASG issue
            squid_alarm = cloudwatch.Alarm(
                self,
                f"squid-alarm-{count}",
                alarm_description=f"Heart beat for Squid instance {count}",
                alarm_name=f"squid-alarm_{asg.auto_scaling_group_name}",
                comparison_operator=cloudwatch.ComparisonOperator.
                LESS_THAN_THRESHOLD,
                metric=squid_metric,
                period=core.Duration.seconds(10),
                evaluation_periods=1,
                threshold=0.0,
                statistic='Average',
                treat_missing_data=cloudwatch.TreatMissingData.BREACHING)
            squid_alarm.add_alarm_action(
                cw_actions.SnsAction(self.squid_alarm_topic))
            squid_alarm.add_ok_action(
                cw_actions.SnsAction(self.squid_alarm_topic))
Ejemplo n.º 15
0
 def get_dashboard(self, params):
     with open(params['dashboard_file']) as json_file:
         params['dashboard_widgets'] = json.load(json_file)
     graph_widgets = []
     for widget in params['dashboard_widgets']:
         metric = [
             cloudwatch.Metric(
                 namespace=widget['properties']['metrics'][0][0],
                 metric_name=widget['properties']['metrics'][0][1],
                 dimensions={
                     widget['properties']['metrics'][0][2]: params['name']
                 })
         ]
         graph_widget = cloudwatch.GraphWidget(height=widget['height'],
                                               width=widget['width'],
                                               left=metric)
         graph_widget.position(widget['x'], widget['y'])
         graph_widgets.append(graph_widget)
     dashboard = cloudwatch.Dashboard(self,
                                      "{}Dashboard".format(params['name']),
                                      dashboard_name=params['name'],
                                      widgets=[graph_widgets])
     return dashboard
    def __init__(
            self,
            scope: core.Construct,
            _id: str,
            vpc,
            bucket_para,
            # key_name,
            ddb_file_list,
            sqs_queue,
            sqs_queue_DLQ,
            ssm_bucket_para,
            ssm_credential_para,
            # s3bucket,
            **kwargs) -> None:
        super().__init__(scope, _id, **kwargs)

        # Create jobsender ec2 node
        jobsender = ec2.Instance(
            self,
            "jobsender",
            instance_name="s3_migrate_cluster_jobsender",
            instance_type=ec2.InstanceType(
                instance_type_identifier=jobsender_type),
            machine_image=linux_ami,
            # key_name=key_name,
            user_data=ec2.UserData.custom(user_data_jobsender),
            vpc=vpc,
            vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC))

        # jobsender.connections.allow_from_any_ipv4(ec2.Port.tcp(22), "Internet access SSH")
        # Don't need SSH since we use Session Manager

        # Assign EC2 Policy to use SSM and CWAgent
        jobsender.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonSSMManagedInstanceCore"))
        jobsender.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "CloudWatchAgentServerPolicy"))

        # jobsender.role.add_managed_policy(
        #     iam.ManagedPolicy.from_aws_managed_policy_name("AmazonS3FullAccess"))
        # Don't give full access s3 to ec2, violate security rule

        # Create Autoscaling Group with fixed 2*EC2 hosts
        worker_asg = autoscaling.AutoScalingGroup(
            self,
            "worker-asg",
            vpc=vpc,
            vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC),
            instance_type=ec2.InstanceType(
                instance_type_identifier=worker_type),
            machine_image=linux_ami,
            # key_name=key_name,  # Optional if use SSM-SessionManager
            user_data=ec2.UserData.custom(user_data_worker),
            desired_capacity=1,
            min_capacity=1,
            max_capacity=10,
            spot_price="0.5")

        # TODO: There is no MetricsCollection in CDK autoscaling group high level API yet.
        # You need to enable "Group Metrics Collection" in EC2 Console Autoscaling Group - Monitoring tab for metric:
        # GroupDesiredCapacity, GroupInServiceInstances, GroupPendingInstances and etc.

        # worker_asg.connections.allow_from_any_ipv4(ec2.Port.tcp(22), "Internet access SSH")
        # Don't need SSH since we use Session Manager

        # Assign EC2 Policy to use SSM and CWAgent
        worker_asg.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonSSMManagedInstanceCore"))
        worker_asg.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "CloudWatchAgentServerPolicy"))

        # Allow EC2 access new DynamoDB Table
        ddb_file_list.grant_full_access(jobsender)
        ddb_file_list.grant_full_access(worker_asg)

        # Allow EC2 access new sqs and its DLQ
        sqs_queue.grant_consume_messages(jobsender)
        sqs_queue.grant_send_messages(jobsender)
        sqs_queue.grant_consume_messages(worker_asg)
        sqs_queue_DLQ.grant_consume_messages(jobsender)

        # Allow EC2 access SSM Parameter Store, get bucket infor and get credential
        ssm_bucket_para.grant_read(jobsender)
        ssm_credential_para.grant_read(jobsender)
        ssm_credential_para.grant_read(worker_asg)

        # Allow EC2 access new s3 bucket
        # s3bucket.grant_read(jobsender)
        # s3bucket.grant_read(worker_asg)

        # Allow EC2 access exist s3 bucket
        bucket_name = ''
        for b in bucket_para:
            if bucket_name != b['src_bucket']:  # 如果列了多个相同的Bucket,就跳过
                bucket_name = b['src_bucket']
                s3exist_bucket = s3.Bucket.from_bucket_name(
                    self,
                    bucket_name,  # 用这个做id
                    bucket_name=bucket_name)
                s3exist_bucket.grant_read(jobsender)
                s3exist_bucket.grant_read(worker_asg)

        # Dashboard to monitor SQS and EC2
        board = cw.Dashboard(self,
                             "s3_migrate",
                             dashboard_name="s3_migrate_cluster")

        ec2_metric_net = cw.Metric(
            namespace="AWS/EC2",
            metric_name="NetworkOut",
            # dimensions={"AutoScalingGroupName": worker_asg.auto_scaling_group_name},
            period=core.Duration.minutes(1),
            statistic="Sum")
        ec2_metric_cpu_max = cw.Metric(
            namespace="AWS/EC2",
            metric_name="CPUUtilization",
            # dimensions={"AutoScalingGroupName": worker_asg.auto_scaling_group_name},
            period=core.Duration.minutes(1),
            statistic="Maximum")
        ec2_metric_cpu_avg = cw.Metric(
            namespace="AWS/EC2",
            metric_name="CPUUtilization",
            # dimensions={"AutoScalingGroupName": worker_asg.auto_scaling_group_name},
            period=core.Duration.minutes(1))

        autoscaling_GroupDesiredCapacity = cw.Metric(
            namespace="AWS/AutoScaling",
            metric_name="GroupDesiredCapacity",
            dimensions={
                "AutoScalingGroupName": worker_asg.auto_scaling_group_name
            },
            period=core.Duration.minutes(1))
        autoscaling_GroupInServiceInstances = cw.Metric(
            namespace="AWS/AutoScaling",
            metric_name="GroupInServiceInstances",
            dimensions={
                "AutoScalingGroupName": worker_asg.auto_scaling_group_name
            },
            period=core.Duration.minutes(1))
        autoscaling_GroupMinSize = cw.Metric(
            namespace="AWS/AutoScaling",
            metric_name="GroupMinSize",
            dimensions={
                "AutoScalingGroupName": worker_asg.auto_scaling_group_name
            },
            period=core.Duration.minutes(1))
        autoscaling_GroupMaxSize = cw.Metric(
            namespace="AWS/AutoScaling",
            metric_name="GroupMaxSize",
            dimensions={
                "AutoScalingGroupName": worker_asg.auto_scaling_group_name
            },
            period=core.Duration.minutes(1))
        # CWAgent collected metric
        cwagent_mem_avg = cw.Metric(namespace="CWAgent",
                                    metric_name="mem_used_percent",
                                    dimensions={
                                        "AutoScalingGroupName":
                                        worker_asg.auto_scaling_group_name
                                    },
                                    statistic="Average",
                                    period=core.Duration.minutes(1))
        cwagent_mem_max = cw.Metric(namespace="CWAgent",
                                    metric_name="mem_used_percent",
                                    dimensions={
                                        "AutoScalingGroupName":
                                        worker_asg.auto_scaling_group_name
                                    },
                                    statistic="Maximum",
                                    period=core.Duration.minutes(1))

        # CWAgent collected application logs - filter metric
        s3_migrate_log = logs.LogGroup(self,
                                       "applog",
                                       log_group_name="s3_migration_log")
        s3_migrate_log.add_metric_filter(
            "ERROR",
            metric_name="ERROR-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"ERROR"'))
        s3_migrate_log.add_metric_filter(
            "WARNING",
            metric_name="WARNING-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"WARNING"'))
        log_metric_ERROR = cw.Metric(namespace="s3_migrate",
                                     metric_name="ERROR-Logs",
                                     statistic="Sum",
                                     period=core.Duration.minutes(1))
        log_metric_WARNING = cw.Metric(namespace="s3_migrate",
                                       metric_name="WARNING-Logs",
                                       statistic="Sum",
                                       period=core.Duration.minutes(1))

        board.add_widgets(
            cw.GraphWidget(title="EC2-ALL-NETWORK", left=[ec2_metric_net]),
            cw.GraphWidget(title="EC2-ALL-CPU",
                           left=[ec2_metric_cpu_avg, ec2_metric_cpu_max]),
            cw.GraphWidget(title="EC2-AutoscalingGroup-MEMORY",
                           left=[cwagent_mem_max, cwagent_mem_avg]),
            cw.SingleValueWidget(title="EC2-AutoscalingGroup-Capacity",
                                 metrics=[
                                     autoscaling_GroupDesiredCapacity,
                                     autoscaling_GroupInServiceInstances,
                                     autoscaling_GroupMinSize,
                                     autoscaling_GroupMaxSize
                                 ],
                                 height=6),
        )

        board.add_widgets(
            cw.GraphWidget(
                title="SQS-Jobs",
                left=[
                    sqs_queue.metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1))
                ]),
            cw.GraphWidget(
                title="SQS-DeadLetterQueue",
                left=[
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1))
                ]),
            cw.GraphWidget(title="ERROR/WARNING Logs",
                           left=[log_metric_ERROR],
                           right=[log_metric_WARNING],
                           height=6),
            cw.SingleValueWidget(
                title="Running/Waiting and Death Jobs",
                metrics=[
                    sqs_queue.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue.metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1))
                ],
                height=6))

        # Autoscaling up when visible message > 100 every 3 of 3 x 5 mins
        worker_asg.scale_on_metric(
            "scaleup",
            metric=sqs_queue.metric_approximate_number_of_messages_visible(),
            scaling_steps=[
                autoscaling.ScalingInterval(change=1, lower=100, upper=500),
                autoscaling.ScalingInterval(change=2, lower=500),
                autoscaling.ScalingInterval(change=0, upper=100, lower=0)
            ],
            adjustment_type=autoscaling.AdjustmentType.CHANGE_IN_CAPACITY)

        # Alarm for queue empty and ec2 > 1
        # 消息队列空(没有Visible+Invisible),并且EC2不止一台,则告警,并设置EC2为1台
        # 这里还可以根据场景调整,如果Jobsender也用来做传输,则可以在这里设置没有任务的时候,Autoscaling Group为0
        metric_all_message = cw.MathExpression(
            expression="IF(((a+b) == 0) AND (c >1), 0, 1)",  # a+b且c>1则设置为0,告警
            label="empty_queue_expression",
            using_metrics={
                "a": sqs_queue.metric_approximate_number_of_messages_visible(),
                "b":
                sqs_queue.metric_approximate_number_of_messages_not_visible(),
                "c": autoscaling_GroupInServiceInstances
            })
        alarm_0 = cw.Alarm(
            self,
            "SQSempty",
            alarm_name=
            "s3-migration-cluster-SQS queue empty and ec2 more than 1 in Cluster",
            metric=metric_all_message,
            threshold=0,
            comparison_operator=cw.ComparisonOperator.
            LESS_THAN_OR_EQUAL_TO_THRESHOLD,
            evaluation_periods=3,
            datapoints_to_alarm=3,
            treat_missing_data=cw.TreatMissingData.NOT_BREACHING)
        alarm_topic_empty = sns.Topic(
            self, "SQS queue empty and ec2 more than 1 in Cluster")
        # 这个告警可以作为批量传输完成后的通知,而且这样做可以只通知一次,而不会不停地通知
        alarm_topic_empty.add_subscription(
            subscription=sub.EmailSubscription(alarm_email))
        alarm_0.add_alarm_action(action.SnsAction(alarm_topic_empty))

        # If queue empty, set autoscale down to 1 EC2
        action_shutdown = autoscaling.StepScalingAction(
            self,
            "shutdown",
            auto_scaling_group=worker_asg,
            adjustment_type=autoscaling.AdjustmentType.EXACT_CAPACITY)
        action_shutdown.add_adjustment(adjustment=1, upper_bound=0)
        alarm_0.add_alarm_action(action.AutoScalingAction(action_shutdown))

        # While message in SQS-DLQ, alarm to sns
        alarm_DLQ = cw.Alarm(
            self,
            "SQS_DLQ",
            alarm_name=
            "s3-migration-cluster-SQS DLQ more than 1 message-Cluster",
            metric=sqs_queue_DLQ.metric_approximate_number_of_messages_visible(
            ),
            threshold=0,
            comparison_operator=cw.ComparisonOperator.GREATER_THAN_THRESHOLD,
            evaluation_periods=3,
            datapoints_to_alarm=3,
            treat_missing_data=cw.TreatMissingData.IGNORE)
        alarm_topic_DLQ = sns.Topic(self,
                                    "SQS DLQ more than 1 message-Cluster")
        alarm_topic_DLQ.add_subscription(
            subscription=sub.EmailSubscription(alarm_email))
        alarm_DLQ.add_alarm_action(action.SnsAction(alarm_topic_DLQ))

        # Output
        core.CfnOutput(self, "JobSenderEC2", value=jobsender.instance_id)
        core.CfnOutput(self,
                       "WorkerEC2AutoscalingGroup",
                       value=worker_asg.auto_scaling_group_name)
        core.CfnOutput(self,
                       "Dashboard",
                       value="CloudWatch Dashboard name s3_migrate_cluster")
        core.CfnOutput(self,
                       "Alarm",
                       value="CloudWatch SQS queue empty Alarm for cluster: " +
                       alarm_email)
Ejemplo n.º 17
0
    def __init__(
            self,
            scope: core.Construct,
            _id: str,
            vpc,
            bucket_para,
            # key_name,
            ddb_file_list,
            sqs_queue,
            sqs_queue_DLQ,
            ssm_bucket_para,
            ssm_credential_para,
            s3bucket,
            s3_deploy,
            **kwargs) -> None:
        super().__init__(scope, _id, **kwargs)

        # Create environment variable into userdata
        env_var = f'export table_queue_name={ddb_file_list.table_name}\n' \
                  f'export sqs_queue_name={sqs_queue.queue_name}\n' \
                  f'export ssm_parameter_bucket={ssm_bucket_para.parameter_name}\n'
        env_var_st = f'echo \"export table_queue_name={ddb_file_list.table_name}\" >> /etc/rc.local\n' \
                     f'echo \"export sqs_queue_name={sqs_queue.queue_name}\" >> /etc/rc.local\n' \
                     f'echo \"export ssm_parameter_bucket={ssm_bucket_para.parameter_name}\" >> /etc/rc.local\n'
        # Create log group and put group name into userdata
        s3_migrate_log = logs.LogGroup(self, "applog")
        cw_agent_config['logs']['logs_collected']['files']['collect_list'][0][
            'log_group_name'] = s3_migrate_log.log_group_name
        cw_agent_config['logs']['logs_collected']['files']['collect_list'][1][
            'log_group_name'] = s3_migrate_log.log_group_name
        cw_agent_config['metrics']['append_dimensions'][
            'AutoScalingGroupName'] = "\\${aws:AutoScalingGroupName}"
        cw_agent_config['metrics']['append_dimensions'][
            'InstanceId'] = "\\${aws:InstanceId}"
        cw_agent_config_str = json.dumps(cw_agent_config,
                                         indent=4).replace("\\\\", "\\")
        userdata_head = user_data_part1 + cw_agent_config_str + user_data_part2 + \
                        s3_deploy.bucket_name + " .\n" + env_var + env_var_st
        jobsender_userdata = userdata_head + user_data_jobsender_p
        worker_userdata = userdata_head + user_data_worker_p

        # Create jobsender ec2 node
        jobsender = autoscaling.AutoScalingGroup(
            self,
            "jobsender",
            instance_type=ec2.InstanceType(
                instance_type_identifier=jobsender_type),
            machine_image=linux_ami,
            # key_name=key_name,
            user_data=ec2.UserData.custom(jobsender_userdata),
            vpc=vpc,
            vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC),
            desired_capacity=1,
            min_capacity=0,
            max_capacity=1)

        # jobsender.connections.allow_from_any_ipv4(ec2.Port.tcp(22), "Internet access SSH")
        # Don't need SSH since we use Session Manager

        # Assign EC2 Policy to use SSM and CWAgent
        jobsender.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonSSMManagedInstanceCore"))
        jobsender.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "CloudWatchAgentServerPolicy"))

        # jobsender.role.add_managed_policy(
        #     iam.ManagedPolicy.from_aws_managed_policy_name("AmazonS3FullAccess"))
        # Don't give full access s3 to ec2, violate security rule

        # Create Autoscaling Group with fixed 2*EC2 hosts
        worker_asg = autoscaling.AutoScalingGroup(
            self,
            "worker-asg",
            vpc=vpc,
            vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC),
            instance_type=ec2.InstanceType(
                instance_type_identifier=worker_type),
            machine_image=linux_ami,
            # key_name=key_name,  # Optional if use SSM-SessionManager
            user_data=ec2.UserData.custom(worker_userdata),
            desired_capacity=2,
            min_capacity=2,
            max_capacity=10,
            spot_price="0.5")

        # TODO: There is no MetricsCollection in CDK autoscaling group high level API yet.
        # You need to enable "Group Metrics Collection" in EC2 Console Autoscaling Group - Monitoring tab for metric:
        # GroupDesiredCapacity, GroupInServiceInstances, GroupPendingInstances and etc.

        # worker_asg.connections.allow_from_any_ipv4(ec2.Port.tcp(22), "Internet access SSH")
        # Don't need SSH since we use Session Manager

        # Assign EC2 Policy to use SSM and CWAgent
        worker_asg.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonSSMManagedInstanceCore"))
        worker_asg.role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "CloudWatchAgentServerPolicy"))

        # Allow EC2 access new DynamoDB Table
        ddb_file_list.grant_full_access(jobsender)
        ddb_file_list.grant_full_access(worker_asg)

        # Allow EC2 access new sqs and its DLQ
        sqs_queue.grant_consume_messages(jobsender)
        sqs_queue.grant_send_messages(jobsender)
        sqs_queue.grant_consume_messages(worker_asg)
        sqs_queue_DLQ.grant_consume_messages(jobsender)

        # Allow EC2 access SSM Parameter Store, get bucket infor and get credential
        ssm_bucket_para.grant_read(jobsender)
        ssm_credential_para.grant_read(jobsender)
        ssm_credential_para.grant_read(worker_asg)

        # Allow EC2 access source code on s3_deploy bucket
        s3_deploy.grant_read(jobsender)
        s3_deploy.grant_read(worker_asg)

        # Allow EC2 access new s3 bucket
        s3bucket.grant_read(jobsender)
        s3bucket.grant_read(worker_asg)

        # Allow EC2 access exist s3 bucket for PUT mode: readonly access the source buckets
        bucket_name = ''
        for b in bucket_para:
            if bucket_name != b['src_bucket']:  # 如果列了多个相同的Bucket,就跳过
                bucket_name = b['src_bucket']
                s3exist_bucket = s3.Bucket.from_bucket_name(
                    self,
                    bucket_name,  # 用这个做id
                    bucket_name=bucket_name)
                s3exist_bucket.grant_read(jobsender)
                s3exist_bucket.grant_read(worker_asg)
        # Allow EC2 access exist s3 bucket for GET mode: read and write access the destination buckets
        # bucket_name = ''
        # for b in bucket_para:
        #     if bucket_name != b['des_bucket']:  # 如果列了多个相同的Bucket,就跳过
        #         bucket_name = b['des_bucket']
        #         s3exist_bucket = s3.Bucket.from_bucket_name(self,
        #                                                     bucket_name,  # 用这个做id
        #                                                     bucket_name=bucket_name)
        #         s3exist_bucket.grant_read_write(jobsender)
        #         s3exist_bucket.grant_read_write(worker_asg)

        # Dashboard to monitor SQS and EC2
        board = cw.Dashboard(self, "s3_migrate")

        ec2_metric_cpu_avg = cw.Metric(namespace="AWS/EC2",
                                       metric_name="CPUUtilization",
                                       dimensions={
                                           "AutoScalingGroupName":
                                           worker_asg.auto_scaling_group_name
                                       },
                                       period=core.Duration.minutes(1))

        ec2_metric_net_out = cw.MathExpression(
            expression=
            "SEARCH('{AWS/EC2, InstanceId} NetworkOut', 'Average', 60)",
            label="EC2-NetworkOut",
            using_metrics={})

        autoscaling_GroupDesiredCapacity = cw.Metric(
            namespace="AWS/AutoScaling",
            metric_name="GroupDesiredCapacity",
            dimensions={
                "AutoScalingGroupName": worker_asg.auto_scaling_group_name
            },
            period=core.Duration.minutes(1))
        autoscaling_GroupInServiceInstances = cw.Metric(
            namespace="AWS/AutoScaling",
            metric_name="GroupInServiceInstances",
            dimensions={
                "AutoScalingGroupName": worker_asg.auto_scaling_group_name
            },
            period=core.Duration.minutes(1))
        autoscaling_GroupMinSize = cw.Metric(
            namespace="AWS/AutoScaling",
            metric_name="GroupMinSize",
            dimensions={
                "AutoScalingGroupName": worker_asg.auto_scaling_group_name
            },
            period=core.Duration.minutes(1))
        autoscaling_GroupMaxSize = cw.Metric(
            namespace="AWS/AutoScaling",
            metric_name="GroupMaxSize",
            dimensions={
                "AutoScalingGroupName": worker_asg.auto_scaling_group_name
            },
            period=core.Duration.minutes(1))

        # CWAgent collected metric
        cwagent_mem_avg = cw.MathExpression(
            expression=
            "SEARCH('{CWAgent, AutoScalingGroupName, InstanceId} (AutoScalingGroupName="
            + worker_asg.auto_scaling_group_name +
            " AND MetricName=mem_used_percent)', 'Average', 60)",
            label="mem_avg",
            using_metrics={})
        cwagent_disk_avg = cw.MathExpression(
            expression=
            "SEARCH('{CWAgent, path, InstanceId, AutoScalingGroupName, device, fstype} "
            "(AutoScalingGroupName=" + worker_asg.auto_scaling_group_name +
            " AND MetricName=disk_used_percent AND path=\"/\")', 'Average', 60)",
            label="disk_avg",
            using_metrics={})
        cwagent_net_tcp = cw.MathExpression(
            expression=
            "SEARCH('{CWAgent, AutoScalingGroupName, InstanceId} (AutoScalingGroupName="
            + worker_asg.auto_scaling_group_name +
            " AND MetricName=tcp_established)', 'Average', 60)",
            label="tcp_conn",
            using_metrics={})

        # CWAgent collected application logs - filter metric
        s3_migrate_log.add_metric_filter(
            "Completed-bytes",
            metric_name="Completed-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[date, time, info, hs, p="--->Complete", bytes, key]'))
        s3_migrate_log.add_metric_filter(
            "Uploading-bytes",
            metric_name="Uploading-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[date, time, info, hs, p="--->Uploading", bytes, key]'))
        s3_migrate_log.add_metric_filter(
            "Downloading-bytes",
            metric_name="Downloading-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[date, time, info, hs, p="--->Downloading", bytes, key]'))
        traffic_metric_Complete = cw.Metric(namespace="s3_migrate",
                                            metric_name="Completed-bytes",
                                            statistic="Sum",
                                            period=core.Duration.minutes(1))
        traffic_metric_Upload = cw.Metric(namespace="s3_migrate",
                                          metric_name="Uploading-bytes",
                                          statistic="Sum",
                                          period=core.Duration.minutes(1))
        traffic_metric_Download = cw.Metric(namespace="s3_migrate",
                                            metric_name="Downloading-bytes",
                                            statistic="Sum",
                                            period=core.Duration.minutes(1))
        s3_migrate_log.add_metric_filter(
            "ERROR",
            metric_name="ERROR-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"ERROR"'))
        s3_migrate_log.add_metric_filter(
            "WARNING",
            metric_name="WARNING-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"WARNING"'))
        log_metric_ERROR = cw.Metric(namespace="s3_migrate",
                                     metric_name="ERROR-Logs",
                                     statistic="Sum",
                                     period=core.Duration.minutes(1))
        log_metric_WARNING = cw.Metric(namespace="s3_migrate",
                                       metric_name="WARNING-Logs",
                                       statistic="Sum",
                                       period=core.Duration.minutes(1))

        board.add_widgets(
            cw.GraphWidget(title="S3-MIGRATION-TOTAL-TRAFFIC",
                           left=[
                               traffic_metric_Complete, traffic_metric_Upload,
                               traffic_metric_Download
                           ],
                           left_y_axis=cw.YAxisProps(label="Bytes/min",
                                                     show_units=False)),
            cw.GraphWidget(title="ERROR/WARNING LOGS",
                           left=[log_metric_ERROR],
                           left_y_axis=cw.YAxisProps(label="Count",
                                                     show_units=False),
                           right=[log_metric_WARNING],
                           right_y_axis=cw.YAxisProps(label="Count",
                                                      show_units=False)),
            cw.GraphWidget(
                title="SQS-JOBS",
                left=[
                    sqs_queue.metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1))
                ]),
            cw.SingleValueWidget(
                title="RUNNING, WAITING & DEATH JOBS",
                metrics=[
                    sqs_queue.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue.metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1))
                ],
                height=6))

        board.add_widgets(
            cw.GraphWidget(title="EC2-AutoscalingGroup-TCP",
                           left=[cwagent_net_tcp],
                           left_y_axis=cw.YAxisProps(label="Count",
                                                     show_units=False)),
            cw.GraphWidget(title="EC2-AutoscalingGroup-CPU/MEMORY",
                           left=[ec2_metric_cpu_avg, cwagent_mem_avg],
                           left_y_axis=cw.YAxisProps(max=100,
                                                     min=0,
                                                     label="%",
                                                     show_units=False)),
            cw.GraphWidget(title="EC2-AutoscalingGroup-DISK",
                           left=[cwagent_disk_avg],
                           left_y_axis=cw.YAxisProps(max=100,
                                                     min=0,
                                                     label="%",
                                                     show_units=False)),
            cw.SingleValueWidget(title="EC2-AutoscalingGroup-CAPACITY",
                                 metrics=[
                                     autoscaling_GroupDesiredCapacity,
                                     autoscaling_GroupInServiceInstances,
                                     autoscaling_GroupMinSize,
                                     autoscaling_GroupMaxSize
                                 ],
                                 height=6))
        board.add_widgets(
            cw.GraphWidget(title="EC2-NetworkOut",
                           left=[ec2_metric_net_out],
                           left_y_axis=cw.YAxisProps(label="Bytes/min",
                                                     show_units=False)))

        # Autoscaling up when visible message > 100 in 5 mins
        worker_asg.scale_on_metric(
            "scaleup",
            metric=sqs_queue.metric_approximate_number_of_messages_visible(),
            scaling_steps=[
                autoscaling.ScalingInterval(change=1, lower=100, upper=500),
                autoscaling.ScalingInterval(change=2, lower=500),
                autoscaling.ScalingInterval(change=0, upper=100, lower=0)
            ],
            adjustment_type=autoscaling.AdjustmentType.CHANGE_IN_CAPACITY)

        # Alarm for queue empty and ec2 > 1
        # 消息队列空(没有Visible+Invisible),并且EC2不止一台,则告警,并设置EC2为1台
        # 这里还可以根据场景调整,如果Jobsender也用来做传输,则可以在这里设置没有任务的时候,Autoscaling Group为0
        metric_all_message = cw.MathExpression(
            expression="IF(((a+b) == 0) AND (c >1), 0, 1)",  # a+b且c>1则设置为0,告警
            label="empty_queue_expression",
            using_metrics={
                "a": sqs_queue.metric_approximate_number_of_messages_visible(),
                "b":
                sqs_queue.metric_approximate_number_of_messages_not_visible(),
                "c": autoscaling_GroupInServiceInstances
            })
        alarm_0 = cw.Alarm(
            self,
            "SQSempty",
            alarm_name=
            "s3-migration-cluster-SQS queue empty and ec2 more than 1 in Cluster",
            metric=metric_all_message,
            threshold=0,
            comparison_operator=cw.ComparisonOperator.
            LESS_THAN_OR_EQUAL_TO_THRESHOLD,
            evaluation_periods=3,
            datapoints_to_alarm=3,
            treat_missing_data=cw.TreatMissingData.NOT_BREACHING)
        alarm_topic_empty = sns.Topic(
            self, "SQS queue empty and ec2 more than 1 in Cluster")
        # 这个告警可以作为批量传输完成后的通知,而且这样做可以只通知一次,而不会不停地通知
        alarm_topic_empty.add_subscription(
            subscription=sub.EmailSubscription(alarm_email))
        alarm_0.add_alarm_action(action.SnsAction(alarm_topic_empty))

        # If queue empty, set autoscale down to 1 EC2
        action_shutdown = autoscaling.StepScalingAction(
            self,
            "shutdown",
            auto_scaling_group=worker_asg,
            adjustment_type=autoscaling.AdjustmentType.EXACT_CAPACITY)
        action_shutdown.add_adjustment(adjustment=1, upper_bound=0)
        alarm_0.add_alarm_action(action.AutoScalingAction(action_shutdown))

        # While message in SQS-DLQ, alarm to sns
        alarm_DLQ = cw.Alarm(
            self,
            "SQS_DLQ",
            alarm_name=
            "s3-migration-cluster-SQS DLQ more than 1 message-Cluster",
            metric=sqs_queue_DLQ.metric_approximate_number_of_messages_visible(
            ),
            threshold=0,
            comparison_operator=cw.ComparisonOperator.GREATER_THAN_THRESHOLD,
            evaluation_periods=3,
            datapoints_to_alarm=3,
            treat_missing_data=cw.TreatMissingData.IGNORE)
        alarm_topic_DLQ = sns.Topic(self,
                                    "SQS DLQ more than 1 message-Cluster")
        alarm_topic_DLQ.add_subscription(
            subscription=sub.EmailSubscription(alarm_email))
        alarm_DLQ.add_alarm_action(action.SnsAction(alarm_topic_DLQ))

        # Output
        core.CfnOutput(self, "LogGroup", value=s3_migrate_log.log_group_name)
        core.CfnOutput(self,
                       "Dashboard",
                       value="CloudWatch Dashboard name s3_migrate_cluster")
        core.CfnOutput(self,
                       "Alarm",
                       value="CloudWatch SQS queue empty Alarm for cluster: " +
                       alarm_email)
Ejemplo n.º 18
0
    def __init__(self, scope: core.Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        high_cpu_topic = sns.Topic(self, 'high-cpu-topic',
                                   display_name='myHighCpuAlarm')
        # phone number format must be 12225558888 for US
        phone_param = ssm.StringParameter.from_string_parameter_name(self, 'phone-param',
                                                                     'notification-phone')
        high_cpu_topic_sub = sns.Subscription(self, 'high-cpu-topic-sub',
                                              topic=high_cpu_topic,
                                              protocol=sns.SubscriptionProtocol.SMS,
                                              endpoint=phone_param.string_value)

        default_vpc = ec2.Vpc.from_lookup(self, 'default-vpc', is_default=True)
        monitored_instance = ec2.Instance(self, 'monitored-instance',
                                          instance_name='devassoc-monitored',
                                          instance_type=type.R3_XLARGE,
                                          machine_image=ec2.MachineImage.generic_linux(
                                              ami_map=ami_map
                                          ),
                                          vpc=default_vpc)

        high_cpu_metric = cw.Metric(namespace='AWS/EC2',
                                    metric_name='CPUUtilization',
                                    dimensions={
                                        'InstanceId': monitored_instance.instance_id
                                    },
                                    statistic='Average',
                                    unit=cw.Unit.PERCENT,
                                    period=core.Duration.seconds(300))
        high_cpu_alarm = high_cpu_metric.create_alarm(self, 'high-cpu-alarm',
                                                      alarm_name='cpu-mon',
                                                      alarm_description='Alarm when CPU exceeds 70%',
                                                      comparison_operator=cw.ComparisonOperator.GREATER_THAN_THRESHOLD,
                                                      evaluation_periods=2,
                                                      period=core.Duration.seconds(300),
                                                      threshold=70,
                                                      actions_enabled=True)
        high_cpu_action = cwa.SnsAction(high_cpu_topic)
        high_cpu_alarm.add_alarm_action(high_cpu_action)

        ec2.CfnEIP(self, 'devassoc-elastic-ip')

        # not really a service role, but there are problems with that, per
        # https://github.com/aws/aws-cdk/issues/3492
        config_service_role = iam.Role(self, 'devassoc-config-service-role',
                                       assumed_by=iam.ServicePrincipal('config.amazonaws.com'),
                                       managed_policies=[
                                           iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSConfigRole')
                                       ])
        config_recorder = config.CfnConfigurationRecorder(self, 'devassoc-recorder',
                                                          name='ConfigRecorder',
                                                          role_arn=config_service_role.role_arn,
                                                          recording_group=config.CfnConfigurationRecorder.RecordingGroupProperty(
                                                              all_supported=True)
                                                          )
        config_bucket = s3.Bucket(self, 'config-bucket',
                                  bucket_name='devassoc-config',
                                  removal_policy=core.RemovalPolicy.DESTROY,
                                  auto_delete_objects=True)
        config_bucket.add_to_resource_policy(iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                                                 principals=[iam.ServicePrincipal('config.amazonaws.com')],
                                                                 resources=[config_bucket.bucket_arn],
                                                                 actions=['s3:GetBucketAcl']))
        config_bucket.add_to_resource_policy(iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                                                 principals=[iam.ServicePrincipal('config.amazonaws.com')],
                                                                 resources=[config_bucket.arn_for_objects(
                                                                     f"AWSLogs/{core.Stack.of(self).account}/Config/*")],
                                                                 actions=['s3:PutObject'],
                                                                 conditions={'StringEquals': {
                                                                     's3:x-amz-acl': 'bucket-owner-full-control'}}))
        eip_rule = config.ManagedRule(self, 'devassoc-managed-rule',
                                      identifier=config.ManagedRuleIdentifiers.EIP_ATTACHED,
                                      config_rule_name='devassoc-eip-rule')
        eip_rule.node.add_dependency(config_recorder)
        eip_compliance_topic = sns.Topic(self, 'eip-compliance-topic',
                                         display_name='EIP Compliance Topic')
        eip_compliance_topic_sub = sns.Subscription(self, 'eip-compliance-topic-sub',
                                                    topic=eip_compliance_topic,
                                                    protocol=sns.SubscriptionProtocol.SMS,
                                                    endpoint=phone_param.string_value)
        eip_rule.on_compliance_change('eip-compliance-change',
                                      target=targets.SnsTopic(eip_compliance_topic))
        config.CfnDeliveryChannel(self, 'devassoc-config-delivery',
                                  s3_bucket_name=config_bucket.bucket_name,
                                  sns_topic_arn=eip_compliance_topic.topic_arn)
Ejemplo n.º 19
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        security_distribution_list_email = '*****@*****.**'

        # securityhub_instance = securityhub.CfnHub(self, 'SecurityHub')

        # Ensure AWS Config is enabled / Ensure CloudTrail is enabled in all Regions 2.1 - 2.8
        cloudtrail_bucket_accesslogs = s3.Bucket(
            self,
            "CloudTrailS3Accesslogs",
            block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
            encryption=s3.BucketEncryption.S3_MANAGED,
            removal_policy=core.RemovalPolicy.RETAIN)

        cloudtrail_bucket = s3.Bucket(
            self,
            "CloudTrailS3",
            block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
            encryption=s3.BucketEncryption.S3_MANAGED,
            removal_policy=core.RemovalPolicy.RETAIN,
            server_access_logs_bucket=cloudtrail_bucket_accesslogs,
        )

        cloudtrail_kms = kms.Key(self,
                                 "CloudTrailKey",
                                 enable_key_rotation=True)

        # CloudTrail - single account, not Organization
        trail = cloudtrail.Trail(
            self,
            "CloudTrail",
            enable_file_validation=True,
            is_multi_region_trail=True,
            include_global_service_events=True,
            send_to_cloud_watch_logs=True,
            cloud_watch_logs_retention=logs.RetentionDays.FOUR_MONTHS,
            bucket=cloudtrail_bucket,
            kms_key=cloudtrail_kms)

        cloudtrail_kms.grant(iam.ServicePrincipal('cloudtrail.amazonaws.com'),
                             'kms:DescribeKey')

        cloudtrail_kms.grant(
            iam.ServicePrincipal(
                'cloudtrail.amazonaws.com',
                conditions={
                    'StringLike': {
                        'kms:EncryptionContext:aws:cloudtrail:arn':
                        'arn:aws:cloudtrail:*:' + core.Stack.of(self).account +
                        ':trail/*'
                    }
                }), 'kms:GenerateDataKey*')

        cloudtrail_kms.add_to_resource_policy(
            iam.PolicyStatement(
                actions=["kms:Decrypt", "kms:ReEncryptFrom"],
                conditions={
                    'StringEquals': {
                        'kms:CallerAccount': core.Stack.of(self).account
                    },
                    'StringLike': {
                        'kms:EncryptionContext:aws:cloudtrail:arn':
                        'arn:aws:cloudtrail:*:' + core.Stack.of(self).account +
                        ':trail/*'
                    }
                },
                effect=iam.Effect.ALLOW,
                principals=[iam.AnyPrincipal()],
                resources=['*']))

        cloudtrail_kms.add_to_resource_policy(
            iam.PolicyStatement(actions=["kms:CreateAlias"],
                                conditions={
                                    'StringEquals': {
                                        'kms:CallerAccount':
                                        core.Stack.of(self).account,
                                        'kms:ViaService':
                                        'ec2.' + core.Stack.of(self).region +
                                        '.amazonaws.com'
                                    }
                                },
                                effect=iam.Effect.ALLOW,
                                principals=[iam.AnyPrincipal()],
                                resources=['*']))

        cloudtrail_kms.add_to_resource_policy(
            iam.PolicyStatement(
                actions=["kms:Decrypt", "kms:ReEncryptFrom"],
                conditions={
                    'StringEquals': {
                        'kms:CallerAccount': core.Stack.of(self).account
                    },
                    'StringLike': {
                        'kms:EncryptionContext:aws:cloudtrail:arn':
                        'arn:aws:cloudtrail:*:' + core.Stack.of(self).account +
                        ':trail/*'
                    }
                },
                effect=iam.Effect.ALLOW,
                principals=[iam.AnyPrincipal()],
                resources=['*']))

        config_role = iam.CfnServiceLinkedRole(
            self,
            id='ServiceLinkedRoleConfig',
            aws_service_name='config.amazonaws.com')

        global_config = config.CfnConfigurationRecorder(self, 'ConfigRecorder',
                                                        name='default',
                                                        # role_arn=config_role.role_arn,
                                                        role_arn="arn:aws:iam::" + \
                                                        core.Stack.of(
                                                            self).account+":role/aws-service-role/config.amazonaws.com/AWSServiceRoleForConfig",
                                                        # role_arn=config_role.get_att(
                                                        #     attribute_name='resource.arn').to_string(),
                                                        recording_group=config.CfnConfigurationRecorder.RecordingGroupProperty(
                                                            all_supported=True,
                                                            include_global_resource_types=True
                                                        )
                                                        )

        config_bucket = s3.Bucket(
            self,
            "ConfigS3",
            block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
            encryption=s3.BucketEncryption.S3_MANAGED,
            removal_policy=core.RemovalPolicy.RETAIN,
        )

        config_bucket.add_to_resource_policy(
            iam.PolicyStatement(
                actions=['s3:GetBucketAcl'],
                effect=iam.Effect.ALLOW,
                principals=[iam.ServicePrincipal('config.amazonaws.com')],
                resources=[config_bucket.bucket_arn]))

        config_bucket.add_to_resource_policy(
            iam.PolicyStatement(
                actions=['s3:PutObject'],
                effect=iam.Effect.ALLOW,
                principals=[iam.ServicePrincipal('config.amazonaws.com')],
                resources=[
                    config_bucket.arn_for_objects('AWSLogs/' +
                                                  core.Stack.of(self).account +
                                                  '/Config/*')
                ],
                conditions={
                    "StringEquals": {
                        's3:x-amz-acl': 'bucket-owner-full-control',
                    }
                }))

        config_delivery_stream = config.CfnDeliveryChannel(
            self,
            "ConfigDeliveryChannel",
            s3_bucket_name=config_bucket.bucket_name)

        # Config Aggregator in Organizations account
        # config_aggregator = config.CfnConfigurationAggregator(self, 'ConfigAggregator',
        #                                                       configuration_aggregator_name='ConfigAggregator',
        #                                                       organization_aggregation_source=config.CfnConfigurationAggregator.OrganizationAggregationSourceProperty(
        #                                                           role_arn=iam.Role(self, "AWSConfigRoleForOrganizations",
        #                                                                             assumed_by=iam.ServicePrincipal(
        #                                                                                 'config.amazonaws.com'),
        #                                                                             managed_policies=[iam.ManagedPolicy.from_aws_managed_policy_name(
        #                                                                                 'service-role/AWSConfigRoleForOrganizations')]
        #                                                                             ).role_arn,
        #                                                           all_aws_regions=True
        #                                                       )
        #                                                       )

        # 2.9 – Ensure VPC flow logging is enabled in all VPCs
        # vpc = ec2.Vpc.from_lookup(self, "VPC",
        #                           is_default=True,
        #                           )

        # S3 for VPC flow logs
        # vpc_flow_logs_bucket = s3.Bucket(self, "VPCFlowLogsBucket",
        #                                  block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
        #                                  encryption=s3.BucketEncryption.S3_MANAGED,
        #                                  removal_policy=core.RemovalPolicy.RETAIN
        #                                  )

        # Ensure a log metric filter and alarm exist for 3.1 – 3.14
        security_notifications_topic = sns.Topic(self,
                                                 'CIS_Topic',
                                                 display_name='CIS_Topic',
                                                 topic_name='CIS_Topic')

        sns.Subscription(self,
                         'CIS_Subscription',
                         topic=security_notifications_topic,
                         protocol=sns.SubscriptionProtocol.EMAIL,
                         endpoint=security_distribution_list_email)

        cloudwatch_actions_cis = cloudwatch_actions.SnsAction(
            security_notifications_topic)

        cis_metricfilter_alarms = {
            'CIS-3.1-UnauthorizedAPICalls':
            '($.errorCode="*UnauthorizedOperation") || ($.errorCode="AccessDenied*")',
            'CIS-3.2-ConsoleSigninWithoutMFA':
            '($.eventName="ConsoleLogin") && ($.additionalEventData.MFAUsed !="Yes")',
            'RootAccountUsageAlarm':
            '$.userIdentity.type="Root" && $.userIdentity.invokedBy NOT EXISTS && $.eventType !="AwsServiceEvent"',
            'CIS-3.4-IAMPolicyChanges':
            '($.eventName=DeleteGroupPolicy) || ($.eventName=DeleteRolePolicy) || ($.eventName=DeleteUserPolicy) || ($.eventName=PutGroupPolicy) || ($.eventName=PutRolePolicy) || ($.eventName=PutUserPolicy) || ($.eventName=CreatePolicy) || ($.eventName=DeletePolicy) || ($.eventName=CreatePolicyVersion) || ($.eventName=DeletePolicyVersion) || ($.eventName=AttachRolePolicy) || ($.eventName=DetachRolePolicy) || ($.eventName=AttachUserPolicy) || ($.eventName=DetachUserPolicy) || ($.eventName=AttachGroupPolicy) || ($.eventName=DetachGroupPolicy)',
            'CIS-3.5-CloudTrailChanges':
            '($.eventName=CreateTrail) || ($.eventName=UpdateTrail) || ($.eventName=DeleteTrail) || ($.eventName=StartLogging) || ($.eventName=StopLogging)',
            'CIS-3.6-ConsoleAuthenticationFailure':
            '($.eventName=ConsoleLogin) && ($.errorMessage="Failed authentication")',
            'CIS-3.7-DisableOrDeleteCMK':
            '($.eventSource=kms.amazonaws.com) && (($.eventName=DisableKey) || ($.eventName=ScheduleKeyDeletion))',
            'CIS-3.8-S3BucketPolicyChanges':
            '($.eventSource=s3.amazonaws.com) && (($.eventName=PutBucketAcl) || ($.eventName=PutBucketPolicy) || ($.eventName=PutBucketCors) || ($.eventName=PutBucketLifecycle) || ($.eventName=PutBucketReplication) || ($.eventName=DeleteBucketPolicy) || ($.eventName=DeleteBucketCors) || ($.eventName=DeleteBucketLifecycle) || ($.eventName=DeleteBucketReplication))',
            'CIS-3.9-AWSConfigChanges':
            '($.eventSource=config.amazonaws.com) && (($.eventName=StopConfigurationRecorder) || ($.eventName=DeleteDeliveryChannel) || ($.eventName=PutDeliveryChannel) || ($.eventName=PutConfigurationRecorder))',
            'CIS-3.10-SecurityGroupChanges':
            '($.eventName=AuthorizeSecurityGroupIngress) || ($.eventName=AuthorizeSecurityGroupEgress) || ($.eventName=RevokeSecurityGroupIngress) || ($.eventName=RevokeSecurityGroupEgress) || ($.eventName=CreateSecurityGroup) || ($.eventName=DeleteSecurityGroup)',
            'CIS-3.11-NetworkACLChanges':
            '($.eventName=CreateNetworkAcl) || ($.eventName=CreateNetworkAclEntry) || ($.eventName=DeleteNetworkAcl) || ($.eventName=DeleteNetworkAclEntry) || ($.eventName=ReplaceNetworkAclEntry) || ($.eventName=ReplaceNetworkAclAssociation)',
            'CIS-3.12-NetworkGatewayChanges':
            '($.eventName=CreateCustomerGateway) || ($.eventName=DeleteCustomerGateway) || ($.eventName=AttachInternetGateway) || ($.eventName=CreateInternetGateway) || ($.eventName=DeleteInternetGateway) || ($.eventName=DetachInternetGateway)',
            'CIS-3.13-RouteTableChanges':
            '($.eventName=CreateRoute) || ($.eventName=CreateRouteTable) || ($.eventName=ReplaceRoute) || ($.eventName=ReplaceRouteTableAssociation) || ($.eventName=DeleteRouteTable) || ($.eventName=DeleteRoute) || ($.eventName=DisassociateRouteTable)',
            'CIS-3.14-VPCChanges':
            '($.eventName=CreateVpc) || ($.eventName=DeleteVpc) || ($.eventName=ModifyVpcAttribute) || ($.eventName=AcceptVpcPeeringConnection) || ($.eventName=CreateVpcPeeringConnection) || ($.eventName=DeleteVpcPeeringConnection) || ($.eventName=RejectVpcPeeringConnection) || ($.eventName=AttachClassicLinkVpc) || ($.eventName=DetachClassicLinkVpc) || ($.eventName=DisableVpcClassicLink) || ($.eventName=EnableVpcClassicLink)',
        }
        for x, y in cis_metricfilter_alarms.items():
            str_x = str(x)
            str_y = str(y)
            logs.MetricFilter(
                self,
                "MetricFilter_" + str_x,
                log_group=trail.log_group,
                filter_pattern=logs.JsonPattern(json_pattern_string=str_y),
                metric_name=str_x,
                metric_namespace="LogMetrics",
                metric_value='1')
            cloudwatch.Alarm(
                self,
                "Alarm_" + str_x,
                alarm_name=str_x,
                alarm_description=str_x,
                statistic='Sum',
                period=core.Duration.minutes(5),
                comparison_operator=cloudwatch.ComparisonOperator.
                GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
                evaluation_periods=1,
                threshold=1,
                metric=cloudwatch.Metric(metric_name=str_x,
                                         namespace="LogMetrics"),
            ).add_alarm_action(cloudwatch_actions_cis)

        # IAM Password Policy custom resource CIS 1.5 - 1.11
        cfn_template = cfn_inc.CfnInclude(
            self,
            "includeTemplate",
            template_file="account-password-policy.yaml",
            parameters={
                "MaxPasswordAge": 90,
                "MinimumPasswordLength": 14,
                "PasswordReusePrevention": 24,
                "RequireLowercaseCharacters": True,
                "RequireNumbers": True,
                "RequireSymbols": True,
                "RequireUppercaseCharacters": True,
            })

        # CIS 1.20
        support_role = iam.Role(
            self,
            "SupportRole",
            assumed_by=iam.AccountPrincipal(
                account_id=core.Stack.of(self).account),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AWSSupportAccess')
            ],
            role_name='AWSSupportAccess')

        guardduty_detector = guardduty.CfnDetector(self,
                                                   'GuardDutyDetector',
                                                   enable=True)

        guardduty_event = events.Rule(
            self,
            'GuardDutyEvent',
            rule_name='guardduty-notification',
            description='GuardDuty Notification',
            event_pattern=events.EventPattern(
                source=['aws.guardduty'], detail_type=['GuardDuty Finding']),
            targets=[events_targets.SnsTopic(security_notifications_topic)])
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        bucket_name = 'devassoc-monitored'
        bucket = s3.Bucket(self,
                           'bucket-monitored',
                           bucket_name=bucket_name,
                           removal_policy=core.RemovalPolicy.DESTROY,
                           auto_delete_objects=True)

        core.CfnOutput(self, 'monitored-bucket', value=bucket.bucket_name)

        size_metric = cw.Metric(namespace='AWS/S3',
                                metric_name='BucketSizeBytes',
                                dimensions={
                                    'BucketName': bucket.bucket_name,
                                    'StorageType': 'StandardStorage'
                                },
                                period=core.Duration.days(1))
        size_alarm = size_metric.create_alarm(
            self,
            'bucket-alarm',
            alarm_name='S3 Storage Alarm',
            comparison_operator=cw.ComparisonOperator.
            GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
            evaluation_periods=1,
            period=core.Duration.days(1),
            threshold=1000,
            actions_enabled=True)
        size_topic = sns.Topic(self,
                               'size-topic',
                               display_name='My S3 Alarm List')
        email_param = ssm.StringParameter.from_string_parameter_name(
            self, 'email-param', 'notification-email')
        size_topic_sub = sns.Subscription(
            self,
            'size-topic-sub',
            topic=size_topic,
            protocol=sns.SubscriptionProtocol.EMAIL,
            endpoint=email_param.string_value)
        size_action = cwa.SnsAction(size_topic)
        size_alarm.add_alarm_action(size_action)

        bucket_name = 'devassoc-s3-logs'
        log_bucket = s3.Bucket(self,
                               'bucket-s3-logs',
                               bucket_name=bucket_name,
                               removal_policy=core.RemovalPolicy.DESTROY,
                               auto_delete_objects=True)
        s3_trail = ct.Trail(self,
                            'bucket-trail',
                            bucket=log_bucket,
                            trail_name='s3_logs')
        s3_trail.add_s3_event_selector([ct.S3EventSelector(bucket=bucket)])
        s3_trail.log_all_s3_data_events()

        single_value_widget = cw.SingleValueWidget(metrics=[size_metric])
        graph_widget = cw.GraphWidget(left=[size_metric])
        cw.Dashboard(self,
                     'cloudwatch-dashboard',
                     dashboard_name='S3Dashboard',
                     widgets=[[single_value_widget, graph_widget]])
Ejemplo n.º 21
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        #import function code
        try:
            with open("serverless_stack/functions/metric_logs_generator.py",
                      mode="r") as file:
                function_body = file.read()
        except OSError:
            print('File can not read')

        #function
        function_01 = aws_lambda.Function(
            self,
            "lambdafunction01",
            function_name="LambdaTestCustomMEtric",
            runtime=aws_lambda.Runtime.PYTHON_3_6,
            handler="index.lambda_handler",
            code=aws_lambda.InlineCode(function_body),
            timeout=core.Duration.seconds(5),
            reserved_concurrent_executions=1,
            environment={
                'LOG_LEVEL': 'INFO',
                'PERCENTAGE_ERRORS': '75'
            })

        #attached cloudwatch log group
        custom_metric_log_group01 = aws_logs.LogGroup(
            self,
            "cloudwatchlog01",
            log_group_name=f"/aws/lambda/{function_01.function_name}",
            removal_policy=core.RemovalPolicy.DESTROY,
            retention=aws_logs.RetentionDays.ONE_DAY)

        #Custom metric namespace
        custom_metric_namespace01 = aws_cw.Metric(
            namespace=f"custom-error-metric",
            metric_name="custom-error-metric",
            label="Amount of Custom API errors",
            period=core.Duration.minutes(1),
            statistic="Sum")

        #Custom metric logs filter
        custom_metric_filter01 = aws_logs.MetricFilter(
            self,
            "customMetricFilter",
            filter_pattern=aws_logs.FilterPattern.boolean_value(
                "$.custom_api_error", True),
            log_group=custom_metric_log_group01,
            metric_namespace=custom_metric_namespace01.namespace,
            metric_name=custom_metric_namespace01.metric_name,
            default_value=0,
            metric_value="1")

        #create custom alarm
        custom_metric_alarm01 = aws_cw.Alarm(
            self,
            "customMetricAlarm",
            alarm_description="Custom API errors",
            alarm_name="Custom-API-alarm",
            metric=custom_metric_namespace01,
            comparison_operator=aws_cw.ComparisonOperator.
            GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
            threshold=2,
            evaluation_periods=2,
            datapoints_to_alarm=1,
            period=core.Duration.minutes(1),
            treat_missing_data=aws_cw.TreatMissingData.NOT_BREACHING)

        #cloudwatch dashboard
        custom_dashboard01 = aws_cw.Dashboard(
            self, id="CustomDashBoard", dashboard_name="CDK-custom-DashBoard")

        #lambda metrics to dashboard
        custom_dashboard01.add_widgets(
            aws_cw.Row(
                aws_cw.GraphWidget(title="Lambda-invoke",
                                   left=[
                                       function_01.metric_invocations(
                                           statistic="Sum",
                                           period=core.Duration.minutes(1))
                                   ]),
                aws_cw.GraphWidget(title="Lambda-errors",
                                   left=[
                                       function_01.metric_errors(
                                           statistic="Sum",
                                           period=core.Duration.minutes(1))
                                   ])))

        #custom api errors to dashboard
        custom_dashboard01.add_widgets(
            aws_cw.Row(
                aws_cw.SingleValueWidget(title="Custom-API-errors",
                                         metrics=[custom_metric_namespace01])))
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # load configs from "./comfigurations/config.json"
        configs = {}
        with open("./configurations/config.json") as json_file:
            configs = json.load(json_file)

        # Default lambdas for testing
        mem_list = configs['MemorySizeList']
        cold_start_lambdas = {}
        for mem in mem_list:
            python38_lambda = lambda_.Function(
                self,
                id="coldstart_python38_" + str(mem) + "_",
                runtime=lambda_.Runtime.PYTHON_3_8,
                handler="lambda_function.lambda_handler",
                memory_size=mem,
                tracing=lambda_.Tracing.ACTIVE,
                code=lambda_.Code.asset("./cold_start_lambdas/python38"))
            cold_start_lambdas['PYTHON38_' + str(mem)] = python38_lambda

        for mem in mem_list:
            nodejs12x_lambda = lambda_.Function(
                self,
                id="coldstart_nodejs12x" + str(mem) + "_",
                runtime=lambda_.Runtime.NODEJS_12_X,
                handler="index.handler",
                memory_size=mem,
                tracing=lambda_.Tracing.ACTIVE,
                code=lambda_.Code.asset("./cold_start_lambdas/nodejs12x"))
            cold_start_lambdas['NODEJS12X_' + str(mem)] = nodejs12x_lambda

        for mem in mem_list:
            go1x_lambda = lambda_.Function(
                self,
                id="coldstart_go1x" + str(mem) + "_",
                runtime=lambda_.Runtime.GO_1_X,
                handler="hello",
                memory_size=mem,
                tracing=lambda_.Tracing.ACTIVE,
                code=lambda_.Code.asset("./cold_start_lambdas/go1x"))
            cold_start_lambdas['GO1X_' + str(mem)] = go1x_lambda

        for mem in mem_list:
            netcore31_lambda = lambda_.Function(
                self,
                id="coldstart_netcore31" + str(mem) + "_",
                runtime=lambda_.Runtime.DOTNET_CORE_3_1,
                handler="LambdaTest::LambdaTest.LambdaHandler::handleRequest",
                tracing=lambda_.Tracing.ACTIVE,
                code=lambda_.Code.asset("./cold_start_lambdas/netcore31"),
                memory_size=mem,
            )
            cold_start_lambdas['NETCORE31_' + str(mem)] = netcore31_lambda

        for mem in mem_list:
            java11corretto_lambda = lambda_.Function(
                self,
                id="coldstart_java11corretto" + str(mem) + "_",
                runtime=lambda_.Runtime.JAVA_11,
                handler="example.Hello::handleRequest",
                memory_size=mem,
                tracing=lambda_.Tracing.ACTIVE,
                code=lambda_.Code.asset("./cold_start_lambdas/java11corretto"))
            cold_start_lambdas['JAVA11_' + str(mem)] = java11corretto_lambda

        for mem in mem_list:
            ruby27_lambda = lambda_.Function(
                self,
                id="coldstart_ruby27" + str(mem) + "_",
                runtime=lambda_.Runtime.RUBY_2_7,
                handler="lambda_function.lambda_handler",
                memory_size=mem,
                tracing=lambda_.Tracing.ACTIVE,
                code=lambda_.Code.asset("./cold_start_lambdas/ruby27"))
            cold_start_lambdas['RUBY27_' + str(mem)] = ruby27_lambda

        # Caller
        cold_start_caller = lambda_.Function(
            self,
            id="cold_start_caller",
            runtime=lambda_.Runtime.PYTHON_3_8,
            handler="ColdStartCaller.lambda_handler",
            code=lambda_.Code.asset("./cold_start_lambdas/cold_start_caller"),
            timeout=core.Duration.seconds(180))
        cold_start_caller.role.add_managed_policy(
            iam_.ManagedPolicy.from_aws_managed_policy_name(
                "AWSXrayReadOnlyAccess"))
        cold_start_caller.role.add_to_policy(
            iam_.PolicyStatement(effect=iam_.Effect.ALLOW,
                                 actions=['lambda:GetFunctionConfiguration'],
                                 resources=["*"]))
        for lambda_name in cold_start_lambdas:
            cold_start_caller.add_environment(
                lambda_name, cold_start_lambdas[lambda_name].function_arn)
            cold_start_lambdas[lambda_name].grant_invoke(cold_start_caller)

        # DynamoDB
        cold_start_table = dynamodb_.Table(
            self,
            id="cold_start_benchmark_table",
            partition_key=dynamodb_.Attribute(
                name="PK", type=dynamodb_.AttributeType.STRING),
            sort_key=dynamodb_.Attribute(name="SK",
                                         type=dynamodb_.AttributeType.NUMBER),
            time_to_live_attribute="TTL")
        cold_start_table.grant_write_data(cold_start_caller)
        cold_start_caller.add_environment('TABLE_NAME',
                                          cold_start_table.table_name)

        # S3
        life_cycle_rule = s3_.LifecycleRule(transitions=[
            s3_.Transition(storage_class=s3_.StorageClass.INFREQUENT_ACCESS,
                           transition_after=core.Duration.days(30))
        ])
        cold_start_backup_s3 = s3_.Bucket(self,
                                          "cold_start_benchmark_backup",
                                          lifecycle_rules=[life_cycle_rule])
        cold_start_backup_s3.grant_write(cold_start_caller)
        cold_start_caller.add_environment('BACKUP_BUCKET_NAME',
                                          cold_start_backup_s3.bucket_name)

        # CW event
        cron_job = events_.Rule(
            self,
            "cold_start_caller_cron_job",
            description="Run cold start caller twice every 1 hour",
            schedule=events_.Schedule.cron(minute="0,1"),
            targets=[targets_.LambdaFunction(cold_start_caller)])

        # alarm when caller failed, send email for notification
        errorAlarm = cloudwatch_.Alarm(
            self,
            "cold_start_caller_error_alarm",
            metric=cloudwatch_.Metric(
                metric_name="Errors",
                namespace="AWS/Lambda",
                period=core.Duration.minutes(5),
                statistic="Maximum",
                dimensions={"FunctionName": cold_start_caller.function_name}),
            evaluation_periods=1,
            datapoints_to_alarm=1,
            threshold=1,
            actions_enabled=True,
            alarm_description="Alarm when cold start caller failed",
            alarm_name="cold_start_caller_errer_alarm",
            comparison_operator=cloudwatch_.ComparisonOperator.
            GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
            treat_missing_data=cloudwatch_.TreatMissingData.MISSING)
        cold_start_caller_error_alarm_topic = sns_.Topic(
            self,
            "cold_start_caller_error_alarm_topic",
            display_name="ColdStartCallerErrorAlarmTopic",
            topic_name="ColdStartCallerErrorAlarmTopic")
        cold_start_caller_error_alarm_topic.add_subscription(
            sns_subs_.EmailSubscription(
                configs['AlarmNotificationEmailAddress']))
        errorAlarm.add_alarm_action(
            cloudwatch_actions_.SnsAction(cold_start_caller_error_alarm_topic))

        # Summarizer
        cold_start_summarizer = lambda_.Function(
            self,
            id="cold_start_summarizer",
            runtime=lambda_.Runtime.PYTHON_3_8,
            handler="ColdStartSummarizer.lambda_handler",
            code=lambda_.Code.asset(
                "./cold_start_lambdas/cold_start_summarizer"),
            timeout=core.Duration.seconds(10))
        cold_start_table.grant_read_write_data(cold_start_summarizer)
        cold_start_summarizer.add_environment('TABLE_NAME',
                                              cold_start_table.table_name)

        # setup CW event for summarizer
        cron_job_summarizer = events_.Rule(
            self,
            "cold_start_summarizer_cron_job",
            description="Run cold start summarizer once every day",
            schedule=events_.Schedule.cron(minute='30', hour='0'),
            targets=[targets_.LambdaFunction(cold_start_summarizer)])

        # error alarm for summarizer
        errorAlarm_summarizer = cloudwatch_.Alarm(
            self,
            "cold_start_summarizer_error_alarm",
            metric=cloudwatch_.Metric(metric_name='Errors',
                                      namespace='AWS/Lambda',
                                      period=core.Duration.minutes(5),
                                      statistic='Maximum',
                                      dimensions={
                                          'FunctionName':
                                          cold_start_summarizer.function_name
                                      }),
            evaluation_periods=1,
            datapoints_to_alarm=1,
            threshold=1,
            actions_enabled=True,
            alarm_description="Alarm when cold start summarizer failed",
            alarm_name="cold_start_summarizer_errer_alarm",
            comparison_operator=cloudwatch_.ComparisonOperator.
            GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
            treat_missing_data=cloudwatch_.TreatMissingData.MISSING)
        cold_start_summarizer_error_alarm_topic = sns_.Topic(
            self,
            "cold_start_summarizer_error_alarm_topic",
            display_name="ColdStartSummarizerErrorAlarmTopic",
            topic_name="ColdStartSummarizerErrorAlarmTopic")
        cold_start_summarizer_error_alarm_topic.add_subscription(
            sns_subs_.EmailSubscription(
                configs['AlarmNotificationEmailAddress']))
        errorAlarm_summarizer.add_alarm_action(
            cloudwatch_actions_.SnsAction(
                cold_start_summarizer_error_alarm_topic))

        # GraphQL API
        graphql_api = appsync_.GraphqlApi(
            self,
            "cold_start_benchmark_graphql_api",
            name="cold_start_benchmark_graphql_api",
            authorization_config=appsync_.AuthorizationConfig(
                default_authorization=appsync_.AuthorizationMode(
                    authorization_type=appsync_.AuthorizationType.API_KEY,
                    api_key_config=appsync_.ApiKeyConfig(
                        description="cold_start_benchmark_graphql_api_key",
                        expires=core.Expiration.after(core.Duration.days(365)),
                        name="cold_start_benchmark_graphql_api_key"))),
            schema=appsync_.Schema.from_asset(
                './cold_start_benchmark/graphql_schema/schema.graphql'),
            xray_enabled=True)
        dynamodb_data_source = graphql_api.add_dynamo_db_data_source(
            id="cold_start_dynamodb_data_source", table=cold_start_table)
        dynamodb_data_source.create_resolver(
            field_name="listColdStartSummariesAfterTimestamp",
            type_name="Query",
            request_mapping_template=appsync_.MappingTemplate.from_file(
                './cold_start_benchmark/graphql_schema/request_mapping_template'
            ),
            response_mapping_template=appsync_.MappingTemplate.from_file(
                './cold_start_benchmark/graphql_schema/response_mapping_template'
            ))

        front_end_amplify_app = amplify_.App(
            self,
            "cold-start-front-end",
            app_name="cold_start_front_end",
            source_code_provider=amplify_.GitHubSourceCodeProvider(
                owner="ZzzGin",
                repository="cold-start-frontend-website",
                oauth_token=core.SecretValue.secrets_manager(
                    "zzzgin/github/token", json_field="zzzgin-github-token")))
        master_Branch = front_end_amplify_app.add_branch("master")
        domain = front_end_amplify_app.add_domain('zzzgin.com')
        domain.map_sub_domain(master_Branch, 'coldstart')
Ejemplo n.º 23
0
    def __init__(self, app: core.App, id: str) -> None:
        super().__init__(app, id)

        with open("config.json") as f:
            self.config = json.load(f)
        assert (
            "SECRET_KEY"
            in self.config), "Need random SECRET_KEY specified in config.json"
        assert (
            "CERTIFICATE_ARN"
            in self.config), "Need CERTIFICATE_ARN specified in config.json"

        self.lambda_dir = "assets/lambda"
        os.makedirs(os.path.join(self.lambda_dir, "templates", "generated"),
                    exist_ok=True)

        r = requests.get(
            "https://api.github.com/repos/sumpfork/dominiontabs/releases")
        changelog = r.json()
        changelog = [{
            "url":
            ch["html_url"],
            "date":
            dt.datetime.strptime(ch["published_at"][:10], "%Y-%m-%d").date(),
            "name":
            ch["name"],
            "tag":
            ch["tag_name"],
            "description":
            ch["body"],
        } for ch in changelog]

        env = Environment(loader=FileSystemLoader("templates"),
                          autoescape=select_autoescape(["html"]))
        t = env.get_template("changelog.html.j2")
        generated_template_path = os.path.join(self.lambda_dir, "templates",
                                               "generated")
        shutil.rmtree(generated_template_path)
        os.mkdir(generated_template_path)

        with open(
                os.path.join(generated_template_path, "changelog.html"),
                "w",
        ) as f:
            f.write(t.render(changelog=changelog))

        static_website_bucket = s3.Bucket(
            self,
            "Dominion Divider Generator Site",
        )

        cf_static_dist = cloudfront.Distribution(
            self,
            "StaticCloudfrontDist",
            default_behavior=cloudfront.BehaviorOptions(
                origin=cloudfront_origins.S3Origin(static_website_bucket)),
        )

        s3_deployment.BucketDeployment(
            self,
            "Static Files Deployment",
            sources=[s3_deployment.Source.asset("./static")],
            destination_bucket=static_website_bucket,
            destination_key_prefix="static",
        )

        flask_app = lambda_python.PythonFunction(
            self,
            "DominionDividersFlaskApp",
            entry=self.lambda_dir,
            index="lambda-handlers.py",
            handler="apig_wsgi_handler",
            environment={
                "STATIC_WEB_URL": f"https://{cf_static_dist.domain_name}",
                "FLASK_SECRET_KEY": self.config["SECRET_KEY"],
                "GA_CONFIG": self.config.get("GA_CONFIG", ""),
            },
            timeout=core.Duration.seconds(60),
            memory_size=512,
            runtime=lambda_.Runtime.PYTHON_3_8,
        )
        api = apig.LambdaRestApi(
            self,
            "bgtools-api",
            handler=flask_app,
            binary_media_types=["*/*"],
            minimum_compression_size=10e4,
            deploy_options={
                "method_options": {
                    "/*/*":
                    apig.MethodDeploymentOptions(throttling_rate_limit=10,
                                                 throttling_burst_limit=20)
                }
            },
        )
        cloudfront.Distribution(
            self,
            "BGToolsCloudfrontDist",
            default_behavior=cloudfront.BehaviorOptions(
                origin=cloudfront_origins.HttpOrigin(
                    core.Fn.select(2, core.Fn.split("/", api.url)),
                    origin_path=core.Fn.join(
                        "",
                        ["/",
                         core.Fn.select(3, core.Fn.split("/", api.url))]),
                ),
                origin_request_policy=cloudfront.OriginRequestPolicy(
                    self,
                    "OriginRequestPolicy",
                    cookie_behavior=cloudfront.OriginRequestCookieBehavior.all(
                    ),
                ),
                allowed_methods=cloudfront.AllowedMethods.ALLOW_ALL,
            ),
            domain_names=["domdiv.bgtools.net"],
            certificate=acm.Certificate.from_certificate_arn(
                self,
                "cert",
                self.config["CERTIFICATE_ARN"],
            ),
        )

        dashboard = aws_cloudwatch.Dashboard(
            self,
            f"bgtools-dashboard",
            dashboard_name=f"bgtools-prod",
            start="-P1D",
            period_override=aws_cloudwatch.PeriodOverride.INHERIT,
        )
        dashboard.add_widgets(
            aws_cloudwatch.GraphWidget(
                title="API Gateway Counts",
                width=6,
                height=6,
                left=[
                    aws_cloudwatch.Metric(
                        namespace="AWS/ApiGateway",
                        metric_name="5XXError",
                        dimensions={
                            "ApiName": "bgtools-api",
                            "Stage": api.deployment_stage.stage_name,
                        },
                        period=core.Duration.minutes(amount=30),
                        statistic="Sum",
                        color="#d62728",
                    ),
                    aws_cloudwatch.Metric(
                        namespace="AWS/ApiGateway",
                        metric_name="4XXError",
                        dimensions={
                            "ApiName": "bgtools-api",
                            "Stage": api.deployment_stage.stage_name,
                        },
                        period=core.Duration.minutes(amount=30),
                        statistic="Sum",
                        color="#8c564b",
                    ),
                    aws_cloudwatch.Metric(
                        namespace="AWS/ApiGateway",
                        metric_name="Count",
                        dimensions={
                            "ApiName": "bgtools-api",
                            "Stage": api.deployment_stage.stage_name,
                        },
                        period=core.Duration.minutes(amount=30),
                        statistic="Sum",
                        color="#2ca02c",
                    ),
                ],
            ),
            aws_cloudwatch.GraphWidget(
                title="API Gateway Latencies",
                width=6,
                height=6,
                left=[
                    aws_cloudwatch.Metric(
                        namespace="AWS/ApiGateway",
                        metric_name="Latency",
                        dimensions={
                            "ApiName": "bgtools-api",
                            "Stage": api.deployment_stage.stage_name,
                        },
                        period=core.Duration.minutes(amount=30),
                        statistic="Average",
                    ),
                    aws_cloudwatch.Metric(
                        namespace="AWS/ApiGateway",
                        metric_name="IntegrationLatency",
                        dimensions={
                            "ApiName": "bgtools-api",
                            "Stage": api.deployment_stage.stage_name,
                        },
                        period=core.Duration.minutes(amount=30),
                        statistic="Average",
                    ),
                ],
            ),
        )
Ejemplo n.º 24
0
    def __init__(self, scope: core.Construct, id: str, ** kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Create SNS Topic for Operations Team):
        konstone_ops_team = _sns.Topic(self,
                                       "konstoneOpsTeam",
                                       display_name="KonStone 24x7 On Watsapp? Support",
                                       topic_name="konstoneOpsTeam"
                                       )

        # Add Subscription to SNS Topic
        konstone_ops_team.add_subscription(
            _subs.EmailSubscription("*****@*****.**")
        )

        # Create a MultiAZ VPC):
        vpc = _ec2.Vpc(
            self,
            "konstoneVpcId",
            cidr="10.111.0.0/16",
            max_azs=2,
            nat_gateways=0,
            subnet_configuration=[
                _ec2.SubnetConfiguration(
                    name="public", subnet_type=_ec2.SubnetType.PUBLIC
                )
            ]
        )

        # Read EC2 BootStrap Script
        try:
            with open("bootstrap_scripts/install_httpd.sh", mode="r") as file:
                user_data = file.read()
        except OSError:
            print('Unable to read UserData script')

        # Get the latest ami
        amzn_linux_ami = _ec2.MachineImage.latest_amazon_linux(
            generation=_ec2.AmazonLinuxGeneration.AMAZON_LINUX_2,
            edition=_ec2.AmazonLinuxEdition.STANDARD,
            storage=_ec2.AmazonLinuxStorage.EBS,
            virtualization=_ec2.AmazonLinuxVirt.HVM
        )

        # WebServer Instance
        web_server = _ec2.Instance(self,
                                   "WebServer004Id",
                                   instance_type=_ec2.InstanceType(
                                       instance_type_identifier="t2.micro"),
                                   instance_name="WebServer004",
                                   machine_image=amzn_linux_ami,
                                   vpc=vpc,
                                   vpc_subnets=_ec2.SubnetSelection(
                                       subnet_type=_ec2.SubnetType.PUBLIC
                                   ),
                                   user_data=_ec2.UserData.custom(user_data)
                                   )

        # Allow Web Traffic to WebServer
        web_server.connections.allow_from_any_ipv4(
            _ec2.Port.tcp(80), description="Allow Web Traffic"
        )

        # Add permission to web server instance profile
        web_server.role.add_managed_policy(
            _iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonSSMManagedInstanceCore")
        )

        # Read Lambda Code
        try:
            with open("serverless_stacks/lambda_src/konstone_processor.py", mode="r") as f:
                konstone_fn_code = f.read()
        except OSError:
            print("Unable to read Lambda Function Code")

        # Simple Lambda Function to return event
        konstone_fn = _lambda.Function(self,
                                       "konstoneFunction",
                                       function_name="konstone_function",
                                       runtime=_lambda.Runtime.PYTHON_3_7,
                                       handler="index.lambda_handler",
                                       code=_lambda.InlineCode(
                                           konstone_fn_code),
                                       timeout=core.Duration.seconds(3),
                                       reserved_concurrent_executions=1,
                                       environment={
                                           "LOG_LEVEL": "INFO",
                                           "AUTOMATION": "SKON"
                                       }
                                       )

        # EC2 Metric for Avg. CPU
        ec2_metric_for_avg_cpu = _cloudwatch.Metric(
            namespace="AWS/EC2",
            metric_name="CPUUtilization",
            dimensions={
                "InstanceId": web_server.instance_id
            },
            period=core.Duration.minutes(5)
        )

        # Low CPU Alarm for Web Server
        low_cpu_alarm = _cloudwatch.Alarm(
            self,
            "lowCPUAlarm",
            alarm_description="Alert if CPU is less than 10%",
            alarm_name="low-cpu-alarm",
            actions_enabled=True,
            metric=ec2_metric_for_avg_cpu,
            threshold=10,
            comparison_operator=_cloudwatch.ComparisonOperator.LESS_THAN_OR_EQUAL_TO_THRESHOLD,
            evaluation_periods=1,
            datapoints_to_alarm=1,
            period=core.Duration.minutes(5),
            treat_missing_data=_cloudwatch.TreatMissingData.NOT_BREACHING
        )

        # Inform SNS on EC2 Alarm State
        low_cpu_alarm.add_alarm_action(
            _cloudwatch_actions.SnsAction(
                konstone_ops_team
            )
        )

        # Create Lambda Alarm
        konstone_fn_error_alarm = _cloudwatch.Alarm(
            self,
            "konstoneFunctionErrorAlarm",
            metric=konstone_fn.metric_errors(),
            threshold=2,
            evaluation_periods=1,
            datapoints_to_alarm=1,
            period=core.Duration.minutes(5)
        )

        # Inform SNS on Lambda Alarm State
        konstone_fn_error_alarm.add_alarm_action(
            _cloudwatch_actions.SnsAction(
                konstone_ops_team
            )
        )
Ejemplo n.º 25
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Lets create couple of instances to test):
        vpc = _ec2.Vpc(self,
                       "abacVPC",
                       cidr="10.13.0.0/21",
                       max_azs=2,
                       nat_gateways=0,
                       subnet_configuration=[
                           _ec2.SubnetConfiguration(
                               name="pubSubnet",
                               cidr_mask=24,
                               subnet_type=_ec2.SubnetType.PUBLIC)
                       ])
        core.Tag.add(vpc,
                     key="ServiceProvider",
                     value="KonStone",
                     include_resource_types=[])

        weak_sg = _ec2.SecurityGroup(
            self,
            "web_sec_grp",
            vpc=vpc,
            description="Allow internet access from the world",
            allow_all_outbound=True)
        # vpc_cidr_block
        # weak_sg.add_ingress_rule(_ec2.Peer.any_ipv4(),
        weak_sg.add_ingress_rule(_ec2.Peer.ipv4(vpc.vpc_cidr_block),
                                 _ec2.Port.tcp(22),
                                 "Allow SSH access from the VPC Only.")

        # We are using the latest AMAZON LINUX AMI
        # Benefit of having SSM Agent pre-installed
        ami_id = _ec2.AmazonLinuxImage(generation=_ec2.AmazonLinuxGeneration.
                                       AMAZON_LINUX_2).get_image(self).image_id

        # https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_iam/Role.html
        instace_profile_role = _iam.Role(
            self,
            'ec2ssmroleid',
            assumed_by=_iam.ServicePrincipal('ec2.amazonaws.com'),
            role_name="instace_profile_role")

        instace_profile_role.add_managed_policy(
            _iam.ManagedPolicy.from_aws_managed_policy_name(
                'AmazonSSMManagedInstanceCore'))

        instance_profile_role_additional_perms = _iam.PolicyStatement(
            effect=_iam.Effect.ALLOW,
            resources=[
                "arn:aws:logs:*:*:*",
            ],
            actions=["logs:Create*", "logs:PutLogEvents"])
        instance_profile_role_additional_perms.sid = "PutBucketPolicy"
        instace_profile_role.add_to_policy(
            instance_profile_role_additional_perms)

        inst_profile_01 = _iam.CfnInstanceProfile(
            self,
            "instProfile01Id",
            roles=[instace_profile_role.role_name],
        )

        # Let us bootstrap the server with the required agents
        try:
            with open("./bootstrap_scripts/install_agents.sh",
                      mode='rb') as file:
                bootstrap_data = file.read()
        except OSError:
            print('Failed to get UserData script')

        install_agents = _ec2.UserData.for_linux()
        install_agents.add_commands(str(bootstrap_data, 'utf-8'))

        # The EC2 Instance to monitor for failed SSH Logins
        ssh_monitored_inst_01 = _ec2.CfnInstance(
            self,
            "sshMonitoredInstance01",
            image_id=ami_id,
            instance_type="t2.micro",
            monitoring=False,
            tags=[{
                "key": "ServiceProvider",
                "value": "KonStone"
            }],
            iam_instance_profile=inst_profile_01.ref,
            network_interfaces=[{
                "deviceIndex": "0",
                "associatePublicIpAddress": True,
                "subnetId": vpc.public_subnets[0].subnet_id,
                "groupSet": [weak_sg.security_group_id]
            }],  #https: //github.com/aws/aws-cdk/issues/3419
            user_data=core.Fn.base64(install_agents.render()),
        )
        """
        linux_ami = _ec2.GenericLinuxImage({ "cn-northwest-1": "ami-0f62e91915e16cfc2","eu-west-1": "ami-12345678"})
        ssh_monitored_inst_01_02 = _ec2.Instance(self,
            "monitoredInstance02",
            instance_type=_ec2.InstanceType(instance_type_identifier="t2.micro"),
            instance_name="monitoredInstance02",
            machine_image=linux_ami,
            vpc=vpc,
            security_group=[weak_sg.security_group_id],
            # vpc_subnets=_ec2.SubnetSelection(subnet_type=_ec2.SubnetType.PUBLIC)
            vpc_subnets=vpc.public_subnets[0].subnet_id,
            # user_data=_ec2.UserData.custom(t_user_data)
            )
        """

        # The log group name to store logs
        info_sec_ops_log_group = _logs.LogGroup(
            self,
            "infoSecOpsLogGroupId",
            log_group_name=(f"/Mystique/InfoSec/Automation/"
                            f"{ssh_monitored_inst_01.ref}"),
            retention=_logs.RetentionDays.ONE_WEEK)

        # Defines an AWS Lambda resource

        with open("lambda_src/quarantine_ec2_instance.py",
                  encoding="utf8") as fp:
            quarantine_ec2_instance_fn_handler_code = fp.read()

        quarantine_ec2_instance_fn = _lambda.Function(
            self,
            id='quarantineEc2InstanceFnId',
            function_name="quarantine_ec2_instance",
            runtime=_lambda.Runtime.PYTHON_3_7,
            code=_lambda.InlineCode(quarantine_ec2_instance_fn_handler_code),
            handler='index.lambda_handler',
            timeout=core.Duration.seconds(5))
        quarantine_ec2_instance_fn_perms = _iam.PolicyStatement(
            effect=_iam.Effect.ALLOW,
            resources=[
                "*",
            ],
            actions=[
                "ec2:RevokeSecurityGroupIngress",
                "ec2:DescribeSecurityGroupReferences",
                "ec2:RevokeSecurityGroupEgress",
                "ec2:ApplySecurityGroupsToClientVpnTargetNetwork",
                "ec2:DescribeSecurityGroups", "ec2:CreateSecurityGroup",
                "ec2:DescribeInstances", "ec2:CreateTags", "ec2:StopInstances",
                "ec2:CreateVolume", "ec2:CreateSnapshots",
                "ec2:CreateSnapshot", "ec2:DescribeSnapshots",
                "ec2:ModifyInstanceAttribute"
            ])
        quarantine_ec2_instance_fn_perms.sid = "AllowLambdaToQuarantineEC2"
        quarantine_ec2_instance_fn.add_to_role_policy(
            quarantine_ec2_instance_fn_perms)

        info_sec_ops_topic = _sns.Topic(self,
                                        "infoSecOpsTopicId",
                                        display_name="InfoSecTopic",
                                        topic_name="InfoSecOpsTopic")

        # Ref: https://docs.aws.amazon.com/cdk/api/latest/docs/aws-stepfunctions-readme.html
        ###############################################################################
        ################# STEP FUNCTIONS EXPERIMENTAL CODE - UNSTABLE #################
        ###############################################################################

        quarantine_ec2_instance_task = _sfn.Task(
            self,
            "Quarantine EC2 Instance",
            task=_tasks.InvokeFunction(quarantine_ec2_instance_fn),
            result_path="$")

        notify_secops_task = _sfn.Task(
            self,
            "Notify InfoSecOps",
            task=_tasks.PublishToTopic(
                info_sec_ops_topic,
                integration_pattern=_sfn.ServiceIntegrationPattern.
                FIRE_AND_FORGET,
                message=_sfn.TaskInput.from_data_at("$.message"),
                subject="SSH Error Response Notification"))

        ssh_error_response_failure = _sfn.Fail(
            self,
            "SSH Error Response Actions Failed",
            cause="All Response Actions were NOT completed",
            error="Check Logs")

        ssh_error_response_success = _sfn.Succeed(
            self,
            "SSH Error Response Actions Succeeded",
            comment="All Response Action Completed Successfully",
        )

        ssh_error_response_sfn_definition = quarantine_ec2_instance_task\
            .next(notify_secops_task\
                .next(_sfn.Choice(self, "SSH Errors Response Complete?")\
                    .when(_sfn.Condition.number_equals("$.SdkHttpMetadata.HttpStatusCode", 200),ssh_error_response_success)\
                    .when(_sfn.Condition.not_(
                        _sfn.Condition.number_equals("$.SdkHttpMetadata.HttpStatusCode", 200)), ssh_error_response_failure)\
                    .otherwise(ssh_error_response_failure)
                    )
            )

        ssh_error_response_statemachine = _sfn.StateMachine(
            self,
            "stateMachineId",
            definition=ssh_error_response_sfn_definition,
            timeout=core.Duration.minutes(5))

        ###############################################################################
        ################# STEP FUNCTIONS EXPERIMENTAL CODE - UNSTABLE #################
        ###############################################################################

        # LAMBDA TO TRIGGER STATE MACHINE - since state cannot be invoked by SNS
        with open("lambda_src/trigger_state_machine.py",
                  encoding="utf8") as fp:
            trigger_state_machine_fn_handler_code = fp.read()

        trigger_state_machine_fn = _lambda.Function(
            self,
            id='sshErrorResponseFnId',
            function_name="trigger_ssh_error_response_state_machine_fn",
            runtime=_lambda.Runtime.PYTHON_3_7,
            code=_lambda.InlineCode(trigger_state_machine_fn_handler_code),
            # code=_lambda.Code.asset("lambda_src/is_policy_permissive.py"),
            # code=_lambda.Code.asset('lambda_src'),
            # code=_lambda.InlineCode(code_body),
            handler='index.lambda_handler',
            timeout=core.Duration.seconds(5),
            environment={
                "STATE_MACHINE_ARN":
                f"{ssh_error_response_statemachine.state_machine_arn}",
            })

        trigger_state_machine_fn_perms = _iam.PolicyStatement(
            effect=_iam.Effect.ALLOW,
            resources=[
                f"{ssh_error_response_statemachine.state_machine_arn}",
            ],
            actions=["states:StartExecution"])
        trigger_state_machine_fn_perms.sid = "PutBucketPolicy"
        trigger_state_machine_fn.add_to_role_policy(
            trigger_state_machine_fn_perms)
        """
        version = trigger_state_machine_fn.add_version(name=datetime.now().isoformat())
        trigger_state_machine_fn_alias = _lambda.Alias(self, 
            'lmdaAliasId',
            alias_name='MystiqueTestAlias',
            version=version
            )
        """

        # Lets add permission to SNS to trigger our lambda function
        trigger_lambda_perms = _iam.PolicyStatement(
            effect=_iam.Effect.ALLOW,
            resources=[
                trigger_state_machine_fn.function_arn,
            ],
            actions=[
                "lambda:InvokeFunction",
            ])
        trigger_lambda_perms.sid = "TriggerLambaFunction"
        # info_sec_ops_topic.add_to_resource_policy( trigger_lambda_perms )

        # Subscribe InfoSecOps Email to topic
        info_sec_ops_topic.add_subscription(
            _subs.EmailSubscription(global_args.INFO_SEC_OPS_EMAIL))
        # info_sec_ops_topic.add_subscription(_subs.LambdaSubscription(trigger_state_machine_fn))

        trigger_state_machine_fn_alarm = trigger_state_machine_fn.metric_all_errors(
        ).create_alarm(
            self,
            "fn-error-alarm",
            threshold=5,
            alarm_name="trigger_state_machine_fn_error_alarm",
            evaluation_periods=5,
            period=core.Duration.minutes(1),
        )

        subscribe_trigger_state_machine_fn_to_logs = _logs.SubscriptionFilter(
            self,
            "sshErrorLogSubscriptionId",
            log_group=info_sec_ops_log_group,
            destination=_logs_destination.LambdaDestination(
                trigger_state_machine_fn),
            filter_pattern=_logs.FilterPattern.space_delimited(
                "Mon", "day", "timestamp", "ip", "id", "status",
                "...").where_string("status", "=", "Invalid"),
        )

        # https://pypi.org/project/aws-cdk.aws-logs/
        # We are creating three filter
        # tooManySshDisconnects, invalidSshUser and invalidSshKey:
        # When a user tries to SSH with invalid username the next line is logged in the SSH log file:
        # Apr 20 02:39:35 ip-172-31-63-56 sshd[17136]: Received disconnect from xxx.xxx.xxx.xxx: 11:  [preauth]
        too_many_ssh_disconnects_metric = _cloudwatch.Metric(
            namespace=f"{global_args.OWNER}",
            metric_name="tooManySshDisconnects")
        too_many_ssh_disconnects_filter = _logs.MetricFilter(
            self,
            "tooManySshDisconnectsFilterId",
            log_group=info_sec_ops_log_group,
            metric_namespace=too_many_ssh_disconnects_metric.namespace,
            metric_name=too_many_ssh_disconnects_metric.metric_name,
            filter_pattern=_logs.FilterPattern.space_delimited(
                "Mon", "day", "timestamp", "ip", "id", "msg1", "msg2",
                "...").where_string("msg2", "=", "disconnect"),
            metric_value="1")

        invalid_ssh_user_metric = _cloudwatch.Metric(
            namespace=f"{global_args.OWNER}",
            metric_name="invalidSshUser",
        )
        invalid_ssh_user_filter = _logs.MetricFilter(
            self,
            "invalidSshUserFilterId",
            log_group=info_sec_ops_log_group,
            metric_namespace=invalid_ssh_user_metric.namespace,
            metric_name=invalid_ssh_user_metric.metric_name,
            filter_pattern=_logs.FilterPattern.space_delimited(
                "Mon", "day", "timestamp", "ip", "id", "status",
                "...").where_string("status", "=", "Invalid"),
            metric_value="1")

        invalid_ssh_key_metric = _cloudwatch.Metric(
            namespace=f"{global_args.OWNER}", metric_name="invalidSshKey")

        invalid_ssh_key_filter = _logs.MetricFilter(
            self,
            "invalidSshKeyFilterId",
            log_group=info_sec_ops_log_group,
            metric_namespace=invalid_ssh_key_metric.namespace,
            metric_name=invalid_ssh_key_metric.metric_name,
            filter_pattern=_logs.FilterPattern.space_delimited(
                "Mon", "day", "timestamp", "ip", "id", "msg1", "msg2",
                "...").where_string("msg1", "=", "Connection").where_string(
                    "msg2", "=", "closed"),
            metric_value="1")

        # Now let us create alarms
        # alarm is raised there are more than 5(threshold) of the measured metrics in two(datapoint) of the last three seconds(evaluation):
        # Period=60Seconds, Eval=3, Threshold=5
        too_many_ssh_disconnects_alarm = _cloudwatch.Alarm(
            self,
            "tooManySshDisconnectsAlarmId",
            alarm_name="too_many_ssh_disconnects_alarm",
            alarm_description=
            "The number disconnect requests is greater then 5, even 1 time in 3 minutes",
            metric=too_many_ssh_disconnects_metric,
            actions_enabled=True,
            period=core.Duration.minutes(1),
            threshold=5,
            evaluation_periods=3,
            datapoints_to_alarm=1,
            statistic="sum",
            comparison_operator=_cloudwatch.ComparisonOperator.
            GREATER_THAN_OR_EQUAL_TO_THRESHOLD)

        invalid_ssh_user_alarm = _cloudwatch.Alarm(
            self,
            "invalidSshUserAlarmId",
            alarm_name="too_many_invalid_ssh_users_alarm",
            alarm_description=
            "The number of invalid ssh users connecting is greater then 5, even 1 time in 3 minutes",
            metric=invalid_ssh_user_metric,
            actions_enabled=True,
            period=core.Duration.minutes(1),
            threshold=5,
            evaluation_periods=3,
            datapoints_to_alarm=1,
            statistic="sum",
            comparison_operator=_cloudwatch.ComparisonOperator.
            GREATER_THAN_THRESHOLD)
        invalid_ssh_user_alarm.add_alarm_action(
            _cloudwatch_actions.SnsAction(info_sec_ops_topic))

        invalid_ssh_key_alarm = _cloudwatch.Alarm(
            self,
            "invalidSshKeyAlarmId",
            alarm_name="too_many_invalid_ssh_key_alarm",
            alarm_description=
            "The number of invalid ssh keys connecting is greater then 5, even 1 time in 3 minutes",
            metric=invalid_ssh_key_metric,
            actions_enabled=True,
            period=core.Duration.minutes(1),
            threshold=5,
            evaluation_periods=3,
            datapoints_to_alarm=1,
            statistic="sum",
            comparison_operator=_cloudwatch.ComparisonOperator.
            GREATER_THAN_OR_EQUAL_TO_THRESHOLD)
        invalid_ssh_key_alarm.add_alarm_action(
            _cloudwatch_actions.SnsAction(info_sec_ops_topic))

        ###########################################
        ################# OUTPUTS #################
        ###########################################

        output0 = core.CfnOutput(
            self,
            "SecuirtyAutomationFrom",
            value=f"{global_args.SOURCE_INFO}",
            description=
            "To know more about this automation stack, check out our github page."
        )

        output1_1 = core.Fn.get_att(
            logical_name_of_resource="sshMonitoredInstance01",
            attribute_name="PublicIp")
        output1 = core.CfnOutput(self,
                                 "MonitoredInstance",
                                 value=output1_1.to_string(),
                                 description="Web Server Public IP to attack")

        output2 = core.CfnOutput(
            self,
            "SSHAlarms",
            value=
            (f"https://console.aws.amazon.com/cloudwatch/home?region="
             f"{core.Aws.REGION}"
             f"#/configuration/"
             f"#alarmsV2:?search=ssh&alarmStateFilter=ALL&alarmTypeFilter=ALL"
             ),
            description="Check out the cloudwatch Alarms")

        output3 = core.CfnOutput(
            self,
            "SubscribeToNotificationTopic",
            value=(f"https://console.aws.amazon.com/sns/v3/home?"
                   f"{core.Aws.REGION}"
                   f"#/topic/"
                   f"{info_sec_ops_topic.topic_arn}"),
            description=
            "Add your email to subscription and confirm subscription")

        output_test_1 = core.CfnOutput(
            self,
            "ToGenInvalidKeyErrors",
            value=
            (f"for i in {{1..30}}; do ssh -i $RANDOM ec2-user@{output1_1.to_string()}; sleep 2; done &"
             ),
            description=
            "Generates random key names and connects to server 30 times over 60 seconds"
        )

        output_test_2 = core.CfnOutput(
            self,
            "ToGenInvalidUserErrors",
            value=
            (f"for i in {{1..30}}; do ssh ec2-user$RANDOM@{output1_1.to_string()}; sleep 2; done &"
             ),
            description=
            "Generates random user names and connects to server 30 times over 60 seconds"
        )
        """
Ejemplo n.º 26
0
    def __init__(self, scope: core.Construct, id: str,
                 stream_producer_lg,
                 stream_pipe,
                 py_stream_record_processor_fn,
                 node_stream_record_processor_fn,
                 ** kwargs
                 ) -> None:
        super().__init__(scope, id, **kwargs)

        # ):
        ##### MONITORING ######

        ##################################################
        ##########        STREAM  METRICS        #########
        ##################################################

        # Shows you the ingestion rate into the shard.
        stream_in_bytes_metric = _cloudwatch.Metric(
            namespace="AWS/Kinesis",
            metric_name="IncomingBytes",
            dimensions={
                "StreamName": f"{stream_pipe.stream_name}"
            },
            label="IncomingBytes",
            period=core.Duration.minutes(30),
            statistic="Sum"
        )
        stream_in_records_metric = _cloudwatch.Metric(
            namespace="AWS/Kinesis",
            metric_name="IncomingRecords",
            dimensions={
                "StreamName": f"{stream_pipe.stream_name}"
            },
            label="IncomingRecords",
            period=core.Duration.minutes(30),
            statistic="Sum"
        )
        stream_w_throttle_metric = _cloudwatch.Metric(
            namespace="AWS/Kinesis",
            metric_name="WriteProvisionedThroughputExceeded",
            dimensions={
                "StreamName": f"{stream_pipe.stream_name}"
            },
            label="WriteProvisionedThroughputExceeded",
            period=core.Duration.minutes(30),
            statistic="Sum"
        )
        stream_r_throttle_metric = _cloudwatch.Metric(
            namespace="AWS/Kinesis",
            metric_name="ReadProvisionedThroughputExceeded",
            dimensions={
                "StreamName": f"{stream_pipe.stream_name}"
            },
            label="ReadProvisionedThroughputExceeded",
            period=core.Duration.minutes(30),
            statistic="Sum"
        )
        stream_put_success_metric = _cloudwatch.Metric(
            namespace="AWS/Kinesis",
            metric_name="PutRecords.Success",
            dimensions={
                "StreamName": f"{stream_pipe.stream_name}"
            },
            label="PutRecords.LatSuccessency",
            period=core.Duration.minutes(30),
            statistic="Sum"
        )
        stream_put_latency_metric = _cloudwatch.Metric(
            namespace="AWS/Kinesis",
            metric_name="PutRecords.Latency",
            dimensions={
                "StreamName": f"{stream_pipe.stream_name}"
            },
            label="PutRecords.Latency",
            period=core.Duration.minutes(30),
            statistic="Sum"
        )
        stream_get_latency_metric = _cloudwatch.Metric(
            namespace="AWS/Kinesis",
            metric_name="GetRecords.Latency",
            dimensions={
                "StreamName": f"{stream_pipe.stream_name}"
            },
            label="GetRecords.Latency",
            period=core.Duration.minutes(30),
            statistic="Sum"
        )

        ##################################################
        ##########    STREAM PRODUCER METRICS    #########
        ##################################################
        # JSON Metric Filter - https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/FilterAndPatternSyntax.html

        records_produced_metric = _cloudwatch.Metric(
            namespace=f"{global_args.OWNER}-stream-data-processor",
            metric_name="recordsProducedCount",
            label="Total No. Of Records Produced",
            period=core.Duration.minutes(30),
            statistic="Sum"
        )

        records_produced_metric_filter = _logs.MetricFilter(self, "recordsProducedCountFilter",
                                                            filter_pattern=_logs.FilterPattern.exists(
                                                                "$.records_produced"),
                                                            log_group=stream_producer_lg,
                                                            metric_namespace=records_produced_metric.namespace,
                                                            metric_name=records_produced_metric.metric_name,
                                                            default_value=0,
                                                            metric_value="$.records_produced",
                                                            )

        ##################################################
        ##########    STREAM CONSUMER METRICS    #########
        ##################################################

        py_records_processed_metric = _cloudwatch.Metric(
            namespace=f"{global_args.OWNER}-stream-data-processor",
            # dimensions={
            #     "RecordsProcessed": "py_processor"
            # },
            metric_name="pyRecordsProcessedCount",
            label="Total No. Of Records Processed",
            period=core.Duration.minutes(30),
            statistic="Sum"
        )

        py_stream_record_processor = _logs.MetricFilter(self, "processedRecordCountFilter01",
                                                        filter_pattern=_logs.FilterPattern.exists(
                                                            "$.records_processed"),
                                                        log_group=py_stream_record_processor_fn.log_group,
                                                        metric_namespace=py_records_processed_metric.namespace,
                                                        metric_name=py_records_processed_metric.metric_name,
                                                        default_value=0,
                                                        metric_value="$.records_processed",
                                                        )
        node_records_processed_metric = _cloudwatch.Metric(
            namespace=f"{global_args.OWNER}-stream-data-processor",
            metric_name="nodeRecordsProcessedCount",
            label="Total No. Of Records Processed",
            period=core.Duration.minutes(30),
            statistic="Sum"
        )
        node_stream_record_processor = _logs.MetricFilter(self, "processedRecordCountFilter02",
                                                                filter_pattern=_logs.FilterPattern.exists(
                                                                    "$.records_processed"),
                                                                log_group=node_stream_record_processor_fn.log_group,
                                                                metric_namespace=node_records_processed_metric.namespace,
                                                                metric_name=node_records_processed_metric.metric_name,
                                                                default_value=0,
                                                                metric_value="$.records_processed",
                                                          )

        # Create CloudWatch Dashboard for Streams
        stream_processor_dashboard = _cloudwatch.Dashboard(self,
                                                           id="streamProcessorDashboard",
                                                           dashboard_name="Stream-Processor"
                                                           )

        stream_processor_dashboard.add_widgets(
            _cloudwatch.SingleValueWidget(
                title="TotalRecordsProduced",
                metrics=[records_produced_metric]
            ),
            _cloudwatch.SingleValueWidget(
                title="RecordsProcessed-by-Python-Consumer",
                metrics=[py_records_processed_metric]
            ),
            _cloudwatch.SingleValueWidget(
                title="RecordsProcessed-by-Node-Consumer",
                metrics=[node_records_processed_metric]
            )
        )

        # Stream Incoming bytes Graph
        stream_processor_dashboard.add_widgets(
            _cloudwatch.Row(
                _cloudwatch.GraphWidget(
                    title="Shard Ingestion Metrics",
                    left=[stream_in_bytes_metric],
                    right=[stream_in_records_metric]
                ),
                _cloudwatch.GraphWidget(
                    title="Shard Throttle Metrics",
                    left=[stream_w_throttle_metric],
                    right=[stream_r_throttle_metric]
                )
            )
        )

        stream_processor_dashboard.add_widgets(
            _cloudwatch.Row(
                _cloudwatch.GraphWidget(
                    title="Stream Put Latency",
                    left=[stream_put_latency_metric]
                ),
                _cloudwatch.GraphWidget(
                    title="Stream Get Latency",
                    left=[stream_get_latency_metric]
                ),
                _cloudwatch.GraphWidget(
                    title="Stream Put Success",
                    left=[stream_put_success_metric]
                )
            )
        )

        ###########################################
        ################# OUTPUTS #################
        ###########################################

        output_0 = core.CfnOutput(self,
                                  "SecuirtyAutomationFrom",
                                  value=f"{global_args.SOURCE_INFO}",
                                  description="To know more about this automation stack, check out our github page."
                                  )
Ejemplo n.º 27
0
    def __init__(self, scope: core.Construct, _id: str, **kwargs) -> None:
        super().__init__(scope, _id, **kwargs)

        # Setup SSM parameter of credentials, bucket_para, ignore_list
        ssm_credential_para = ssm.StringParameter.from_secure_string_parameter_attributes(
            self,
            "ssm_parameter_credentials",
            parameter_name=ssm_parameter_credentials,
            version=1)

        ssm_bucket_para = ssm.StringParameter(self,
                                              "s3bucket_serverless",
                                              string_value=json.dumps(
                                                  bucket_para, indent=4))

        ssm_parameter_ignore_list = ssm.StringParameter(
            self, "s3_migrate_ignore_list", string_value=ignore_list)

        # Setup DynamoDB
        ddb_file_list = ddb.Table(self,
                                  "s3migrate_serverless",
                                  partition_key=ddb.Attribute(
                                      name="Key",
                                      type=ddb.AttributeType.STRING),
                                  billing_mode=ddb.BillingMode.PAY_PER_REQUEST)
        ddb_file_list.add_global_secondary_index(
            partition_key=ddb.Attribute(name="desBucket",
                                        type=ddb.AttributeType.STRING),
            index_name="desBucket-index",
            projection_type=ddb.ProjectionType.INCLUDE,
            non_key_attributes=["desKey", "versionId"])

        # Setup SQS
        sqs_queue_DLQ = sqs.Queue(self,
                                  "s3migrate_serverless_Q_DLQ",
                                  visibility_timeout=core.Duration.minutes(15),
                                  retention_period=core.Duration.days(14))
        sqs_queue = sqs.Queue(self,
                              "s3migrate_serverless_Q",
                              visibility_timeout=core.Duration.minutes(15),
                              retention_period=core.Duration.days(14),
                              dead_letter_queue=sqs.DeadLetterQueue(
                                  max_receive_count=60, queue=sqs_queue_DLQ))

        # Setup API for Lambda to get IP address (for debug networking routing purpose)
        checkip = api.RestApi(
            self,
            "lambda-checkip-api",
            cloud_watch_role=True,
            deploy=True,
            description="For Lambda get IP address",
            default_integration=api.MockIntegration(
                integration_responses=[
                    api.IntegrationResponse(status_code="200",
                                            response_templates={
                                                "application/json":
                                                "$context.identity.sourceIp"
                                            })
                ],
                request_templates={"application/json": '{"statusCode": 200}'}),
            endpoint_types=[api.EndpointType.REGIONAL])
        checkip.root.add_method("GET",
                                method_responses=[
                                    api.MethodResponse(
                                        status_code="200",
                                        response_models={
                                            "application/json":
                                            api.Model.EMPTY_MODEL
                                        })
                                ])

        # Setup Lambda functions
        handler = lam.Function(self,
                               "s3-migrate-worker",
                               code=lam.Code.asset("./lambda"),
                               handler="lambda_function_worker.lambda_handler",
                               runtime=lam.Runtime.PYTHON_3_8,
                               memory_size=1024,
                               timeout=core.Duration.minutes(15),
                               tracing=lam.Tracing.ACTIVE,
                               environment={
                                   'table_queue_name':
                                   ddb_file_list.table_name,
                                   'Des_bucket_default': Des_bucket_default,
                                   'Des_prefix_default': Des_prefix_default,
                                   'StorageClass': StorageClass,
                                   'checkip_url': checkip.url,
                                   'ssm_parameter_credentials':
                                   ssm_parameter_credentials,
                                   'JobType': JobType,
                                   'MaxRetry': MaxRetry,
                                   'MaxThread': MaxThread,
                                   'MaxParallelFile': MaxParallelFile,
                                   'JobTimeout': JobTimeout,
                                   'UpdateVersionId': UpdateVersionId,
                                   'GetObjectWithVersionId':
                                   GetObjectWithVersionId
                               })

        handler_jobsender = lam.Function(
            self,
            "s3-migrate-jobsender",
            code=lam.Code.asset("./lambda"),
            handler="lambda_function_jobsender.lambda_handler",
            runtime=lam.Runtime.PYTHON_3_8,
            memory_size=1024,
            timeout=core.Duration.minutes(15),
            tracing=lam.Tracing.ACTIVE,
            environment={
                'table_queue_name': ddb_file_list.table_name,
                'StorageClass': StorageClass,
                'checkip_url': checkip.url,
                'sqs_queue': sqs_queue.queue_name,
                'ssm_parameter_credentials': ssm_parameter_credentials,
                'ssm_parameter_ignore_list':
                ssm_parameter_ignore_list.parameter_name,
                'ssm_parameter_bucket': ssm_bucket_para.parameter_name,
                'JobType': JobType,
                'MaxRetry': MaxRetry,
                'JobsenderCompareVersionId': JobsenderCompareVersionId
            })

        # Allow lambda read/write DDB, SQS
        ddb_file_list.grant_read_write_data(handler)
        ddb_file_list.grant_read_write_data(handler_jobsender)
        sqs_queue.grant_send_messages(handler_jobsender)
        # SQS trigger Lambda worker
        handler.add_event_source(SqsEventSource(sqs_queue, batch_size=1))

        # Option1: Create S3 Bucket, all new objects in this bucket will be transmitted by Lambda Worker
        s3bucket = s3.Bucket(self, "s3_new_migrate")
        s3bucket.grant_read(handler)
        s3bucket.add_event_notification(s3.EventType.OBJECT_CREATED,
                                        s3n.SqsDestination(sqs_queue))

        # Option2: Allow Exist S3 Buckets to be read by Lambda functions.
        # Lambda Jobsender will scan and compare the these buckets and trigger Lambda Workers to transmit
        bucket_name = ''
        for b in bucket_para:
            if bucket_name != b['src_bucket']:  # 如果列了多个相同的Bucket,就跳过
                bucket_name = b['src_bucket']
                s3exist_bucket = s3.Bucket.from_bucket_name(
                    self,
                    bucket_name,  # 用这个做id
                    bucket_name=bucket_name)
                if JobType == 'PUT':
                    s3exist_bucket.grant_read(handler_jobsender)
                    s3exist_bucket.grant_read(handler)
                else:  # 'GET' mode
                    s3exist_bucket.grant_read_write(handler_jobsender)
                    s3exist_bucket.grant_read_write(handler)

        # Allow Lambda read ssm parameters
        ssm_bucket_para.grant_read(handler_jobsender)
        ssm_credential_para.grant_read(handler)
        ssm_credential_para.grant_read(handler_jobsender)
        ssm_parameter_ignore_list.grant_read(handler_jobsender)

        # Schedule cron event to trigger Lambda Jobsender per hour:
        event.Rule(self,
                   'cron_trigger_jobsender',
                   schedule=event.Schedule.rate(core.Duration.hours(1)),
                   targets=[target.LambdaFunction(handler_jobsender)])

        # TODO: Trigger event imediately, add custom resource lambda to invoke handler_jobsender

        # Create Lambda logs filter to create network traffic metric
        handler.log_group.add_metric_filter(
            "Completed-bytes",
            metric_name="Completed-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[info, date, sn, p="--->Complete", bytes, key]'))
        handler.log_group.add_metric_filter(
            "Uploading-bytes",
            metric_name="Uploading-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[info, date, sn, p="--->Uploading", bytes, key]'))
        handler.log_group.add_metric_filter(
            "Downloading-bytes",
            metric_name="Downloading-bytes",
            metric_namespace="s3_migrate",
            metric_value="$bytes",
            filter_pattern=logs.FilterPattern.literal(
                '[info, date, sn, p="--->Downloading", bytes, key]'))
        handler.log_group.add_metric_filter(
            "MaxMemoryUsed",
            metric_name="MaxMemoryUsed",
            metric_namespace="s3_migrate",
            metric_value="$memory",
            filter_pattern=logs.FilterPattern.literal(
                '[head="REPORT", a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, '
                'a13, a14, a15, a16, memory, MB="MB", rest]'))
        lambda_metric_Complete = cw.Metric(namespace="s3_migrate",
                                           metric_name="Completed-bytes",
                                           statistic="Sum",
                                           period=core.Duration.minutes(1))
        lambda_metric_Upload = cw.Metric(namespace="s3_migrate",
                                         metric_name="Uploading-bytes",
                                         statistic="Sum",
                                         period=core.Duration.minutes(1))
        lambda_metric_Download = cw.Metric(namespace="s3_migrate",
                                           metric_name="Downloading-bytes",
                                           statistic="Sum",
                                           period=core.Duration.minutes(1))
        lambda_metric_MaxMemoryUsed = cw.Metric(
            namespace="s3_migrate",
            metric_name="MaxMemoryUsed",
            statistic="Maximum",
            period=core.Duration.minutes(1))
        handler.log_group.add_metric_filter(
            "ERROR",
            metric_name="ERROR-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"ERROR"'))
        handler.log_group.add_metric_filter(
            "WARNING",
            metric_name="WARNING-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"WARNING"'))
        # Task timed out
        handler.log_group.add_metric_filter(
            "TIMEOUT",
            metric_name="TIMEOUT-Logs",
            metric_namespace="s3_migrate",
            metric_value="1",
            filter_pattern=logs.FilterPattern.literal('"Task timed out"'))
        log_metric_ERROR = cw.Metric(namespace="s3_migrate",
                                     metric_name="ERROR-Logs",
                                     statistic="Sum",
                                     period=core.Duration.minutes(1))
        log_metric_WARNING = cw.Metric(namespace="s3_migrate",
                                       metric_name="WARNING-Logs",
                                       statistic="Sum",
                                       period=core.Duration.minutes(1))
        log_metric_TIMEOUT = cw.Metric(namespace="s3_migrate",
                                       metric_name="TIMEOUT-Logs",
                                       statistic="Sum",
                                       period=core.Duration.minutes(1))

        # Dashboard to monitor SQS and Lambda
        board = cw.Dashboard(self, "s3_migrate_serverless")

        board.add_widgets(
            cw.GraphWidget(title="Lambda-NETWORK",
                           left=[
                               lambda_metric_Download, lambda_metric_Upload,
                               lambda_metric_Complete
                           ]),
            cw.GraphWidget(title="Lambda-concurrent",
                           left=[
                               handler.metric(
                                   metric_name="ConcurrentExecutions",
                                   period=core.Duration.minutes(1))
                           ]),
            cw.GraphWidget(
                title="Lambda-invocations/errors/throttles",
                left=[
                    handler.metric_invocations(
                        period=core.Duration.minutes(1)),
                    handler.metric_errors(period=core.Duration.minutes(1)),
                    handler.metric_throttles(period=core.Duration.minutes(1))
                ]),
            cw.GraphWidget(
                title="Lambda-duration",
                left=[
                    handler.metric_duration(period=core.Duration.minutes(1))
                ]),
        )

        board.add_widgets(
            cw.GraphWidget(title="Lambda_MaxMemoryUsed(MB)",
                           left=[lambda_metric_MaxMemoryUsed]),
            cw.GraphWidget(title="ERROR/WARNING Logs",
                           left=[log_metric_ERROR],
                           right=[log_metric_WARNING, log_metric_TIMEOUT]),
            cw.GraphWidget(
                title="SQS-Jobs",
                left=[
                    sqs_queue.metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1))
                ]),
            cw.SingleValueWidget(
                title="Running/Waiting and Dead Jobs",
                metrics=[
                    sqs_queue.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue.metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_not_visible(
                        period=core.Duration.minutes(1)),
                    sqs_queue_DLQ.
                    metric_approximate_number_of_messages_visible(
                        period=core.Duration.minutes(1))
                ],
                height=6))
        # Alarm for queue - DLQ
        alarm_DLQ = cw.Alarm(
            self,
            "SQS_DLQ",
            metric=sqs_queue_DLQ.metric_approximate_number_of_messages_visible(
            ),
            threshold=0,
            comparison_operator=cw.ComparisonOperator.GREATER_THAN_THRESHOLD,
            evaluation_periods=1,
            datapoints_to_alarm=1)
        alarm_topic = sns.Topic(self, "SQS queue-DLQ has dead letter")
        alarm_topic.add_subscription(
            subscription=sub.EmailSubscription(alarm_email))
        alarm_DLQ.add_alarm_action(action.SnsAction(alarm_topic))

        core.CfnOutput(self,
                       "Dashboard",
                       value="CloudWatch Dashboard name s3_migrate_serverless")
Ejemplo n.º 28
0
    def __init__(self, scope: core.Construct, _id: str, **kwargs) -> None:
        super().__init__(scope, _id, **kwargs)

        ddb_file_list = ddb.Table(self, "ddb",
                                  partition_key=ddb.Attribute(name="Key", type=ddb.AttributeType.STRING),
                                  billing_mode=ddb.BillingMode.PAY_PER_REQUEST)

        sqs_queue_DLQ = sqs.Queue(self, "sqs_DLQ",
                                  visibility_timeout=core.Duration.minutes(15),
                                  retention_period=core.Duration.days(14)
                                  )
        sqs_queue = sqs.Queue(self, "sqs_queue",
                              visibility_timeout=core.Duration.minutes(15),
                              retention_period=core.Duration.days(14),
                              dead_letter_queue=sqs.DeadLetterQueue(
                                  max_receive_count=100,
                                  queue=sqs_queue_DLQ
                              )
                              )
        handler = lam.Function(self, "lambdaFunction",
                               code=lam.Code.asset("./lambda"),
                               handler="lambda_function.lambda_handler",
                               runtime=lam.Runtime.PYTHON_3_8,
                               memory_size=1024,
                               timeout=core.Duration.minutes(15),
                               tracing=lam.Tracing.ACTIVE,
                               environment={
                                   'table_queue_name': ddb_file_list.table_name,
                                   'Des_bucket_default': Des_bucket_default,
                                   'Des_prefix_default': Des_prefix_default,
                                   'StorageClass': StorageClass,
                                   'aws_access_key_id': aws_access_key_id,
                                   'aws_secret_access_key': aws_secret_access_key,
                                   'aws_access_key_region': aws_access_key_region
                               })

        ddb_file_list.grant_read_write_data(handler)
        handler.add_event_source(SqsEventSource(sqs_queue))

        s3bucket = s3.Bucket(self, "s3bucket")
        s3bucket.grant_read(handler)
        s3bucket.add_event_notification(s3.EventType.OBJECT_CREATED,
                                        s3n.SqsDestination(sqs_queue))

        # You can import an existing bucket and grant access to lambda
        # exist_s3bucket = s3.Bucket.from_bucket_name(self, "import_bucket",
        #                                             bucket_name="you_bucket_name")
        # exist_s3bucket.grant_read(handler)

        # But You have to add sqs as imported bucket event notification manually, it doesn't support by CloudFormation
        # An work around is to add on_cloud_trail_event for the bucket, but will trigger could_trail first
        # 因为是导入的Bucket,需要手工建Bucket Event Trigger SQS,以及设置SQS允许该bucekt触发的Permission

        core.CfnOutput(self, "DynamoDB_Table", value=ddb_file_list.table_name)
        core.CfnOutput(self, "SQS_Job_Queue", value=sqs_queue.queue_name)
        core.CfnOutput(self, "SQS_Job_Queue_DLQ", value=sqs_queue_DLQ.queue_name)
        core.CfnOutput(self, "Worker_Lambda_Function", value=handler.function_name)
        core.CfnOutput(self, "New_S3_Bucket", value=s3bucket.bucket_name)

        # Create Lambda logs filter to create network traffic metric
        handler.log_group.add_metric_filter("Complete-bytes",
                                            metric_name="Complete-bytes",
                                            metric_namespace="s3_migrate",
                                            metric_value="$bytes",
                                            filter_pattern=logs.FilterPattern.literal(
                                                '[info, date, sn, p="--->Complete", bytes, key]'))
        handler.log_group.add_metric_filter("Uploading-bytes",
                                            metric_name="Uploading-bytes",
                                            metric_namespace="s3_migrate",
                                            metric_value="$bytes",
                                            filter_pattern=logs.FilterPattern.literal(
                                                '[info, date, sn, p="--->Uploading", bytes, key]'))
        handler.log_group.add_metric_filter("Downloading-bytes",
                                            metric_name="Downloading-bytes",
                                            metric_namespace="s3_migrate",
                                            metric_value="$bytes",
                                            filter_pattern=logs.FilterPattern.literal(
                                                '[info, date, sn, p="--->Downloading", bytes, key]'))
        lambda_metric_Complete = cw.Metric(namespace="s3_migrate",
                                           metric_name="Complete-bytes",
                                           statistic="Sum",
                                           period=core.Duration.minutes(1))
        lambda_metric_Upload = cw.Metric(namespace="s3_migrate",
                                         metric_name="Uploading-bytes",
                                         statistic="Sum",
                                         period=core.Duration.minutes(1))
        lambda_metric_Download = cw.Metric(namespace="s3_migrate",
                                           metric_name="Downloading-bytes",
                                           statistic="Sum",
                                           period=core.Duration.minutes(1))
        handler.log_group.add_metric_filter("ERROR",
                                            metric_name="ERROR-Logs",
                                            metric_namespace="s3_migrate",
                                            metric_value="1",
                                            filter_pattern=logs.FilterPattern.literal(
                                                '"ERROR"'))
        handler.log_group.add_metric_filter("WARNING",
                                            metric_name="WARNING-Logs",
                                            metric_namespace="s3_migrate",
                                            metric_value="1",
                                            filter_pattern=logs.FilterPattern.literal(
                                                '"WARNING"'))
        log_metric_ERROR = cw.Metric(namespace="s3_migrate",
                                     metric_name="ERROR-Logs",
                                     statistic="Sum",
                                     period=core.Duration.minutes(1))
        log_metric_WARNING = cw.Metric(namespace="s3_migrate",
                                       metric_name="WARNING-Logs",
                                       statistic="Sum",
                                       period=core.Duration.minutes(1))

        # Dashboard to monitor SQS and Lambda
        board = cw.Dashboard(self, "s3_migrate", dashboard_name="s3_migrate_serverless")

        board.add_widgets(cw.GraphWidget(title="Lambda-NETWORK",
                                         left=[lambda_metric_Download, lambda_metric_Upload, lambda_metric_Complete]),
                          # TODO: here monitor all lambda concurrency not just the working one. Limitation from CDK
                          # Lambda now supports monitor single lambda concurrency, will change this after CDK support
                          cw.GraphWidget(title="Lambda-all-concurrent",
                                         left=[handler.metric_all_concurrent_executions(period=core.Duration.minutes(1))]),

                          cw.GraphWidget(title="Lambda-invocations/errors/throttles",
                                         left=[handler.metric_invocations(period=core.Duration.minutes(1)),
                                               handler.metric_errors(period=core.Duration.minutes(1)),
                                               handler.metric_throttles(period=core.Duration.minutes(1))]),
                          cw.GraphWidget(title="Lambda-duration",
                                         left=[handler.metric_duration(period=core.Duration.minutes(1))]),
                          )

        board.add_widgets(cw.GraphWidget(title="SQS-Jobs",
                                         left=[sqs_queue.metric_approximate_number_of_messages_visible(
                                             period=core.Duration.minutes(1)
                                         ),
                                               sqs_queue.metric_approximate_number_of_messages_not_visible(
                                                   period=core.Duration.minutes(1)
                                               )]),
                          cw.GraphWidget(title="SQS-DeadLetterQueue",
                                         left=[sqs_queue_DLQ.metric_approximate_number_of_messages_visible(
                                             period=core.Duration.minutes(1)
                                         ),
                                               sqs_queue_DLQ.metric_approximate_number_of_messages_not_visible(
                                                   period=core.Duration.minutes(1)
                                               )]),
                          cw.GraphWidget(title="ERROR/WARNING Logs",
                                         left=[log_metric_ERROR],
                                         right=[log_metric_WARNING]),
                          cw.SingleValueWidget(title="Running/Waiting and Dead Jobs",
                                               metrics=[sqs_queue.metric_approximate_number_of_messages_not_visible(
                                                   period=core.Duration.minutes(1)
                                               ),
                                                        sqs_queue.metric_approximate_number_of_messages_visible(
                                                            period=core.Duration.minutes(1)
                                                        ),
                                                        sqs_queue_DLQ.metric_approximate_number_of_messages_not_visible(
                                                            period=core.Duration.minutes(1)
                                                        ),
                                                        sqs_queue_DLQ.metric_approximate_number_of_messages_visible(
                                                            period=core.Duration.minutes(1)
                                                        )],
                                               height=6)
                          )
        # Alarm for queue - DLQ
        alarm_DLQ = cw.Alarm(self, "SQS_DLQ",
                             alarm_name="s3-migration-serverless-SQS Dead Letter Queue",
                             metric=sqs_queue_DLQ.metric_approximate_number_of_messages_visible(),
                             threshold=0,
                             comparison_operator=cw.ComparisonOperator.GREATER_THAN_THRESHOLD,
                             evaluation_periods=1,
                             datapoints_to_alarm=1)
        alarm_topic = sns.Topic(self, "SQS queue-DLQ has dead letter")
        alarm_topic.add_subscription(subscription=sub.EmailSubscription(alarm_email))
        alarm_DLQ.add_alarm_action(action.SnsAction(alarm_topic))

        # Alarm for queue empty, i.e. no visible message and no in-visible message
        # metric_all_message = cw.MathExpression(
        #     expression="a + b",
        #     label="empty_queue_expression",
        #     using_metrics={
        #         "a": sqs_queue.metric_approximate_number_of_messages_visible(),
        #         "b": sqs_queue.metric_approximate_number_of_messages_not_visible()
        #     }
        # )
        # alarm_0 = cw.Alarm(self, "SQSempty",
        #                    alarm_name="SQS queue empty-Serverless",
        #                    metric=metric_all_message,
        #                    threshold=0,
        #                    comparison_operator=cw.ComparisonOperator.LESS_THAN_OR_EQUAL_TO_THRESHOLD,
        #                    evaluation_periods=3,
        #                    datapoints_to_alarm=3,
        #                    treat_missing_data=cw.TreatMissingData.IGNORE
        #                    )
        # alarm_topic = sns.Topic(self, "SQS queue empty-Serverless")
        # alarm_topic.add_subscription(subscription=sub.EmailSubscription(alarm_email))
        # alarm_0.add_alarm_action(action.SnsAction(alarm_topic))

        # core.CfnOutput(self, "Alarm", value="CloudWatch SQS queue empty Alarm for Serverless: " + alarm_email)
        core.CfnOutput(self, "Dashboard", value="CloudWatch Dashboard name s3_migrate_serverless")
    def __init__(self, scope: core.Construct, id: str, ** kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Read Lambda Code):
        try:
            with open("serverless_stacks/lambda_src/konstone_custom_metric_log_generator.py", mode="r") as f:
                konstone_custom_metric_fn_code = f.read()
        except OSError:
            print("Unable to read Lambda Function Code")

        konstone_custom_metric_fn = _lambda.Function(
            self,
            "konstoneFunction",
            function_name="konstone_custom_metric_fn",
            runtime=_lambda.Runtime.PYTHON_3_7,
            handler="index.lambda_handler",
            code=_lambda.InlineCode(
                konstone_custom_metric_fn_code),
            timeout=core.Duration.seconds(
                3),
            reserved_concurrent_executions=1,
            environment={
                "LOG_LEVEL": "INFO",
                "PERCENTAGE_ERRORS": "75"
            }
        )

        # Create Custom Loggroup
        # /aws/lambda/function-name
        konstone_custom_metric_lg = _logs.LogGroup(
            self,
            "konstoneLoggroup",
            log_group_name=f"/aws/lambda/{konstone_custom_metric_fn.function_name}",
            removal_policy=core.RemovalPolicy.DESTROY,
            retention=_logs.RetentionDays.ONE_DAY,
        )

        # Create Custom Metric Namespace
        third_party_error_metric = _cloudwatch.Metric(
            namespace=f"third-party-error-metric",
            metric_name="third_party_error_metric",
            label="Total No. of Third Party API Errors",
            period=core.Duration.minutes(1),
            statistic="Sum"
        )

        # Create Custom Metric Log Filter
        third_party_error_metric_filter = _logs.MetricFilter(
            self,
            "thirdPartyApiErrorMetricFilter",
            filter_pattern=_logs.FilterPattern.boolean_value(
                "$.third_party_api_error", True),
            log_group=konstone_custom_metric_lg,
            metric_namespace=third_party_error_metric.namespace,
            metric_name=third_party_error_metric.metric_name,
            default_value=0,
            metric_value="1"
        )

        # Create Third Party Error Alarm
        third_party_error_alarm = _cloudwatch.Alarm(
            self,
            "thirdPartyApiErrorAlarm",
            alarm_description="Alert if 3rd party API has more than 2 errors in the last two minutes",
            alarm_name="third-party-api-alarm",
            metric=third_party_error_metric,
            comparison_operator=_cloudwatch.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
            threshold=2,
            evaluation_periods=2,
            datapoints_to_alarm=1,
            period=core.Duration.minutes(1),
            treat_missing_data=_cloudwatch.TreatMissingData.NOT_BREACHING
        )

        # Create CloudWatch Dashboard
        konstone_dashboard = _cloudwatch.Dashboard(
            self,
            id="konstoneDashboard",
            dashboard_name="Konstone-App-Live-Dashboard"
        )

        # Add Lambda Function Metrics to Dashboard
        konstone_dashboard.add_widgets(
            _cloudwatch.Row(
                _cloudwatch.GraphWidget(
                    title="Backend-Invocations",
                    left=[
                        konstone_custom_metric_fn.metric_invocations(
                            statistic="Sum",
                            period=core.Duration.minutes(1)
                        )
                    ]
                ),
                _cloudwatch.GraphWidget(
                    title="Backend-Errors",
                    left=[
                        konstone_custom_metric_fn.metric_errors(
                            statistic="Sum",
                            period=core.Duration.minutes(1)
                        )
                    ]
                )
            )
        )

        # Add 3rd Party API Error to Dashboard
        konstone_dashboard.add_widgets(
            _cloudwatch.Row(
                _cloudwatch.SingleValueWidget(
                    title="Third Party API Errors",
                    metrics=[third_party_error_metric]
                )
            )
        )
Ejemplo n.º 30
0
    def __init__(self,
                 scope: core.Construct,
                 id: str,
                 vpc: aws_ec2.Vpc,
                 ecs_cluster=aws_ecs.Cluster,
                 alb=elbv2.ApplicationLoadBalancer,
                 albTestListener=elbv2.ApplicationListener,
                 albProdListener=elbv2.ApplicationListener,
                 blueGroup=elbv2.ApplicationTargetGroup,
                 greenGroup=elbv2.ApplicationTargetGroup,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        ECS_APP_NAME = "Nginx-app",
        ECS_DEPLOYMENT_GROUP_NAME = "NginxAppECSBlueGreen"
        ECS_DEPLOYMENT_CONFIG_NAME = "CodeDeployDefault.ECSLinear10PercentEvery1Minutes"
        ECS_TASKSET_TERMINATION_WAIT_TIME = 10
        ECS_TASK_FAMILY_NAME = "Nginx-microservice"
        ECS_APP_NAME = "Nginx-microservice"
        ECS_APP_LOG_GROUP_NAME = "/ecs/Nginx-microservice"
        DUMMY_TASK_FAMILY_NAME = "sample-Nginx-microservice"
        DUMMY_APP_NAME = "sample-Nginx-microservice"
        DUMMY_APP_LOG_GROUP_NAME = "/ecs/sample-Nginx-microservice"
        DUMMY_CONTAINER_IMAGE = "smuralee/nginx"

        # =============================================================================
        # ECR and CodeCommit repositories for the Blue/ Green deployment
        # =============================================================================

        # ECR repository for the docker images
        NginxecrRepo = aws_ecr.Repository(self,
                                          "NginxRepo",
                                          image_scan_on_push=True)

        NginxCodeCommitrepo = aws_codecommit.Repository(
            self,
            "NginxRepository",
            repository_name=ECS_APP_NAME,
            description="Oussama application hosted on NGINX")

        # =============================================================================
        #   CODE BUILD and ECS TASK ROLES for the Blue/ Green deployment
        # =============================================================================

        # IAM role for the Code Build project
        codeBuildServiceRole = aws_iam.Role(
            self,
            "codeBuildServiceRole",
            assumed_by=aws_iam.ServicePrincipal('codebuild.amazonaws.com'))

        inlinePolicyForCodeBuild = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            actions=[
                "ecr:GetAuthorizationToken", "ecr:BatchCheckLayerAvailability",
                "ecr:InitiateLayerUpload", "ecr:UploadLayerPart",
                "ecr:CompleteLayerUpload", "ecr:PutImage"
            ],
            resources=["*"])

        codeBuildServiceRole.add_to_policy(inlinePolicyForCodeBuild)

        # ECS task role
        ecsTaskRole = aws_iam.Role(
            self,
            "ecsTaskRoleForWorkshop",
            assumed_by=aws_iam.ServicePrincipal('ecs-tasks.amazonaws.com'))

        ecsTaskRole.add_managed_policy(
            aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                "service-role/AmazonECSTaskExecutionRolePolicy"))

        # =============================================================================
        # CODE DEPLOY APPLICATION for the Blue/ Green deployment
        # =============================================================================

        # Creating the code deploy application
        codeDeployApplication = codedeploy.EcsApplication(
            self, "NginxAppCodeDeploy")

        # Creating the code deploy service role
        codeDeployServiceRole = aws_iam.Role(
            self,
            "codeDeployServiceRole",
            assumed_by=aws_iam.ServicePrincipal('codedeploy.amazonaws.com'))
        codeDeployServiceRole.add_managed_policy(
            aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                "AWSCodeDeployRoleForECS"))

        # IAM role for custom lambda function
        customLambdaServiceRole = aws_iam.Role(
            self,
            "codeDeployCustomLambda",
            assumed_by=aws_iam.ServicePrincipal('lambda.amazonaws.com'))

        inlinePolicyForLambda = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            actions=[
                "iam:PassRole", "sts:AssumeRole", "codedeploy:List*",
                "codedeploy:Get*", "codedeploy:UpdateDeploymentGroup",
                "codedeploy:CreateDeploymentGroup",
                "codedeploy:DeleteDeploymentGroup"
            ],
            resources=["*"])

        customLambdaServiceRole.add_managed_policy(
            aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                'service-role/AWSLambdaBasicExecutionRole'))
        customLambdaServiceRole.add_to_policy(inlinePolicyForLambda)

        # Custom resource to create the deployment group
        createDeploymentGroupLambda = aws_lambda.Function(
            self,
            'createDeploymentGroupLambda',
            code=aws_lambda.Code.from_asset("custom_resources"),
            runtime=aws_lambda.Runtime.PYTHON_3_8,
            handler='create_deployment_group.handler',
            role=customLambdaServiceRole,
            description="Custom resource to create deployment group",
            memory_size=128,
            timeout=core.Duration.seconds(60))

        # ================================================================================================
        # CloudWatch Alarms for 4XX errors
        blue4xxMetric = aws_cloudwatch.Metric(
            namespace='AWS/ApplicationELB',
            metric_name='HTTPCode_Target_4XX_Count',
            dimensions={
                "TargetGroup": blueGroup.target_group_full_name,
                "LoadBalancer": alb.load_balancer_full_name
            },
            statistic="sum",
            period=core.Duration.minutes(1))

        blueGroupAlarm = aws_cloudwatch.Alarm(
            self,
            "blue4xxErrors",
            alarm_name="Blue_4xx_Alarm",
            alarm_description=
            "CloudWatch Alarm for the 4xx errors of Blue target group",
            metric=blue4xxMetric,
            threshold=1,
            evaluation_periods=1)

        green4xxMetric = aws_cloudwatch.Metric(
            namespace='AWS/ApplicationELB',
            metric_name='HTTPCode_Target_4XX_Count',
            dimensions={
                "TargetGroup": greenGroup.target_group_full_name,
                "LoadBalancer": alb.load_balancer_full_name
            },
            statistic="sum",
            period=core.Duration.minutes(1))
        greenGroupAlarm = aws_cloudwatch.Alarm(
            self,
            "green4xxErrors",
            alarm_name="Green_4xx_Alarm",
            alarm_description=
            "CloudWatch Alarm for the 4xx errors of Green target group",
            metric=green4xxMetric,
            threshold=1,
            evaluation_periods=1)

        # ================================================================================================
        # DUMMY TASK DEFINITION for the initial service creation
        # This is required for the service being made available to create the CodeDeploy Deployment Group
        # ================================================================================================
        sampleTaskDefinition = aws_ecs.FargateTaskDefinition(
            self,
            "sampleTaskDefn",
            family=DUMMY_TASK_FAMILY_NAME,
            cpu=256,
            memory_limit_mib=1024,
            task_role=ecsTaskRole,
            execution_role=ecsTaskRole)

        sampleContainerDefn = sampleTaskDefinition.add_container(
            "sampleAppContainer",
            image=aws_ecs.ContainerImage.from_registry(DUMMY_CONTAINER_IMAGE),
            logging=aws_ecs.AwsLogDriver(log_group=aws_logs.LogGroup(
                self,
                "sampleAppLogGroup",
                log_group_name=DUMMY_APP_LOG_GROUP_NAME,
                removal_policy=core.RemovalPolicy.DESTROY),
                                         stream_prefix=DUMMY_APP_NAME),
            docker_labels={"name": DUMMY_APP_NAME})

        port_mapping = aws_ecs.PortMapping(container_port=80,
                                           protocol=aws_ecs.Protocol.TCP)

        sampleContainerDefn.add_port_mappings(port_mapping)

        # ================================================================================================
        # ECS task definition using ECR image
        # Will be used by the CODE DEPLOY for Blue/Green deployment
        # ================================================================================================
        NginxTaskDefinition = aws_ecs.FargateTaskDefinition(
            self,
            "appTaskDefn",
            family=ECS_TASK_FAMILY_NAME,
            cpu=256,
            memory_limit_mib=1024,
            task_role=ecsTaskRole,
            execution_role=ecsTaskRole)

        NginxcontainerDefinition = NginxTaskDefinition.add_container(
            "NginxAppContainer",
            image=aws_ecs.ContainerImage.from_ecr_repository(
                NginxecrRepo, "latest"),
            logging=aws_ecs.AwsLogDriver(log_group=aws_logs.LogGroup(
                self,
                "NginxAppLogGroup",
                log_group_name=ECS_APP_LOG_GROUP_NAME,
                removal_policy=core.RemovalPolicy.DESTROY),
                                         stream_prefix=ECS_APP_NAME),
            docker_labels={"name": ECS_APP_NAME})
        NginxcontainerDefinition.add_port_mappings(port_mapping)

        # =============================================================================
        # ECS SERVICE for the Blue/ Green deployment
        # =============================================================================
        NginxAppService = aws_ecs.FargateService(
            self,
            "NginxAppService",
            cluster=ecs_cluster,
            task_definition=NginxTaskDefinition,
            health_check_grace_period=core.Duration.seconds(10),
            desired_count=3,
            deployment_controller={
                "type": aws_ecs.DeploymentControllerType.CODE_DEPLOY
            },
            service_name=ECS_APP_NAME)

        NginxAppService.connections.allow_from(alb, aws_ec2.Port.tcp(80))
        NginxAppService.connections.allow_from(alb, aws_ec2.Port.tcp(8080))
        NginxAppService.attach_to_application_target_group(blueGroup)

        # =============================================================================
        # CODE DEPLOY - Deployment Group CUSTOM RESOURCE for the Blue/ Green deployment
        # =============================================================================

        core.CustomResource(
            self,
            'customEcsDeploymentGroup',
            service_token=createDeploymentGroupLambda.function_arn,
            properties={
                "ApplicationName": codeDeployApplication.application_name,
                "DeploymentGroupName": ECS_DEPLOYMENT_GROUP_NAME,
                "DeploymentConfigName": ECS_DEPLOYMENT_CONFIG_NAME,
                "ServiceRoleArn": codeDeployServiceRole.role_arn,
                "BlueTargetGroup": blueGroup.target_group_name,
                "GreenTargetGroup": greenGroup.target_group_name,
                "ProdListenerArn": albProdListener.listener_arn,
                "TestListenerArn": albTestListener.listener_arn,
                "EcsClusterName": ecs_cluster.cluster_name,
                "EcsServiceName": NginxAppService.service_name,
                "TerminationWaitTime": ECS_TASKSET_TERMINATION_WAIT_TIME,
                "BlueGroupAlarm": blueGroupAlarm.alarm_name,
                "GreenGroupAlarm": greenGroupAlarm.alarm_name,
            })

        ecsDeploymentGroup = codedeploy.EcsDeploymentGroup.from_ecs_deployment_group_attributes(
            self,
            "ecsDeploymentGroup",
            application=codeDeployApplication,
            deployment_group_name=ECS_DEPLOYMENT_GROUP_NAME,
            deployment_config=codedeploy.EcsDeploymentConfig.
            from_ecs_deployment_config_name(self, "ecsDeploymentConfig",
                                            ECS_DEPLOYMENT_CONFIG_NAME))

        # =============================================================================
        # CODE BUILD PROJECT for the Blue/ Green deployment
        # =============================================================================

        # Creating the code build project
        NginxAppcodebuild = aws_codebuild.Project(
            self,
            "NginxAppCodeBuild",
            role=codeBuildServiceRole,
            environment=aws_codebuild.BuildEnvironment(
                build_image=aws_codebuild.LinuxBuildImage.STANDARD_4_0,
                compute_type=aws_codebuild.ComputeType.SMALL,
                privileged=True,
                environment_variables={
                    'REPOSITORY_URI': {
                        'value':
                        NginxecrRepo.repository_uri,
                        'type':
                        aws_codebuild.BuildEnvironmentVariableType.PLAINTEXT
                    },
                    'TASK_EXECUTION_ARN': {
                        'value':
                        ecsTaskRole.role_arn,
                        'type':
                        aws_codebuild.BuildEnvironmentVariableType.PLAINTEXT
                    },
                    'TASK_FAMILY': {
                        'value':
                        ECS_TASK_FAMILY_NAME,
                        'type':
                        aws_codebuild.BuildEnvironmentVariableType.PLAINTEXT
                    }
                }),
            source=aws_codebuild.Source.code_commit(
                repository=NginxCodeCommitrepo))

        # =============================================================================
        # CODE PIPELINE for Blue/Green ECS deployment
        # =============================================================================

        codePipelineServiceRole = aws_iam.Role(
            self,
            "codePipelineServiceRole",
            assumed_by=aws_iam.ServicePrincipal('codepipeline.amazonaws.com'))

        inlinePolicyForCodePipeline = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            actions=[
                "iam:PassRole", "sts:AssumeRole", "codecommit:Get*",
                "codecommit:List*", "codecommit:GitPull",
                "codecommit:UploadArchive", "codecommit:CancelUploadArchive",
                "codebuild:BatchGetBuilds", "codebuild:StartBuild",
                "codedeploy:CreateDeployment", "codedeploy:Get*",
                "codedeploy:RegisterApplicationRevision", "s3:Get*",
                "s3:List*", "s3:PutObject"
            ],
            resources=["*"])

        codePipelineServiceRole.add_to_policy(inlinePolicyForCodePipeline)

        sourceArtifact = codepipeline.Artifact('sourceArtifact')
        buildArtifact = codepipeline.Artifact('buildArtifact')

        # S3 bucket for storing the code pipeline artifacts
        NginxAppArtifactsBucket = s3.Bucket(
            self,
            "NginxAppArtifactsBucket",
            encryption=s3.BucketEncryption.S3_MANAGED,
            block_public_access=s3.BlockPublicAccess.BLOCK_ALL)

        # S3 bucket policy for the code pipeline artifacts
        denyUnEncryptedObjectUploads = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.DENY,
            actions=["s3:PutObject"],
            principals=[aws_iam.AnyPrincipal()],
            resources=[NginxAppArtifactsBucket.bucket_arn + "/*"],
            conditions={
                "StringNotEquals": {
                    "s3:x-amz-server-side-encryption": "aws:kms"
                }
            })

        denyInsecureConnections = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.DENY,
            actions=["s3:*"],
            principals=[aws_iam.AnyPrincipal()],
            resources=[NginxAppArtifactsBucket.bucket_arn + "/*"],
            conditions={"Bool": {
                "aws:SecureTransport": "false"
            }})

        NginxAppArtifactsBucket.add_to_resource_policy(
            denyUnEncryptedObjectUploads)
        NginxAppArtifactsBucket.add_to_resource_policy(denyInsecureConnections)

        # Code Pipeline - CloudWatch trigger event is created by CDK
        codepipeline.Pipeline(
            self,
            "ecsBlueGreen",
            role=codePipelineServiceRole,
            artifact_bucket=NginxAppArtifactsBucket,
            stages=[
                codepipeline.StageProps(
                    stage_name='Source',
                    actions=[
                        aws_codepipeline_actions.CodeCommitSourceAction(
                            action_name='Source',
                            repository=NginxCodeCommitrepo,
                            output=sourceArtifact,
                        )
                    ]),
                codepipeline.StageProps(
                    stage_name='Build',
                    actions=[
                        aws_codepipeline_actions.CodeBuildAction(
                            action_name='Build',
                            project=NginxAppcodebuild,
                            input=sourceArtifact,
                            outputs=[buildArtifact])
                    ]),
                codepipeline.StageProps(
                    stage_name='Deploy',
                    actions=[
                        aws_codepipeline_actions.CodeDeployEcsDeployAction(
                            action_name='Deploy',
                            deployment_group=ecsDeploymentGroup,
                            app_spec_template_input=buildArtifact,
                            task_definition_template_input=buildArtifact,
                        )
                    ])
            ])

        # =============================================================================
        # Export the outputs
        # =============================================================================
        core.CfnOutput(self,
                       "ecsBlueGreenCodeRepo",
                       description="Demo app code commit repository",
                       export_name="ecsBlueGreenDemoAppRepo",
                       value=NginxCodeCommitrepo.repository_clone_url_http)

        core.CfnOutput(self,
                       "ecsBlueGreenLBDns",
                       description="Load balancer DNS",
                       export_name="ecsBlueGreenLBDns",
                       value=alb.load_balancer_dns_name)