def attach_alarm(self, target_group): """ :type target_group: TargetGroup """ if self._alarm_topic is not None: self.template.add_resource( Alarm( target_group.title + "UnhealthyHostAlarm", AlarmName=Sub("${AWS::StackName}-UnhealthyHosts-" + target_group.title), AlarmDescription="Unhealthy hosts in target group: %s/%s" % (self.env_name, target_group.title), MetricName="UnHealthyHostCount", Namespace=self.alarm_namespace, Statistic="Minimum", Period=120, EvaluationPeriods=2, Threshold='0', AlarmActions=[self._alarm_topic], ComparisonOperator="GreaterThanThreshold", Dimensions=[ MetricDimension(Name="TargetGroup", Value=GetAtt(target_group, "TargetGroupFullName")), MetricDimension(Name="LoadBalancer", Value=GetAtt("ELB", "LoadBalancerFullName")) ]))
def _add_alb_alarms(self, service_name, alb): unhealthy_alarm = Alarm( 'ElbUnhealthyHostAlarm' + service_name, EvaluationPeriods=1, Dimensions=[ MetricDimension(Name='LoadBalancer', Value=GetAtt(alb, 'LoadBalancerFullName')) ], AlarmActions=[Ref(self.notification_sns_arn)], OKActions=[Ref(self.notification_sns_arn)], AlarmDescription='Triggers if any host is marked unhealthy', Namespace='AWS/ApplicationELB', Period=60, ComparisonOperator='GreaterThanOrEqualToThreshold', Statistic='Sum', Threshold='1', MetricName='UnHealthyHostCount', TreatMissingData='notBreaching') self.template.add_resource(unhealthy_alarm) rejected_connections_alarm = Alarm( 'ElbRejectedConnectionsAlarm' + service_name, EvaluationPeriods=1, Dimensions=[ MetricDimension(Name='LoadBalancer', Value=GetAtt(alb, 'LoadBalancerFullName')) ], AlarmActions=[Ref(self.notification_sns_arn)], OKActions=[Ref(self.notification_sns_arn)], AlarmDescription='Triggers if load balancer has \ rejected connections because the load balancer \ had reached its maximum number of connections.', Namespace='AWS/ApplicationELB', Period=60, ComparisonOperator='GreaterThanOrEqualToThreshold', Statistic='Sum', Threshold='1', MetricName='RejectedConnectionCount', TreatMissingData='notBreaching') self.template.add_resource(rejected_connections_alarm) http_code_elb5xx_alarm = Alarm( 'ElbHTTPCodeELB5xxAlarm' + service_name, EvaluationPeriods=1, Dimensions=[ MetricDimension(Name='LoadBalancer', Value=GetAtt(alb, 'LoadBalancerFullName')) ], AlarmActions=[Ref(self.notification_sns_arn)], OKActions=[Ref(self.notification_sns_arn)], AlarmDescription='Triggers if 5xx response originated \ from load balancer', Namespace='AWS/ApplicationELB', Period=60, ComparisonOperator='GreaterThanOrEqualToThreshold', Statistic='Sum', Threshold='3', MetricName='HTTPCode_ELB_5XX_Count', TreatMissingData='notBreaching') self.template.add_resource(http_code_elb5xx_alarm)
def add_resources(self): self.scaleout_policy = self.template.add_resource( ScalingPolicy( "RunnerScaleoutPolicy", AdjustmentType="ChangeInCapacity", AutoScalingGroupName=Ref(self.runner_autoscaling_group), Cooldown=Ref(self.runner_scaleout_cooldown), ScalingAdjustment="1", )) self.runner_cpu_alarm_high = self.template.add_resource( Alarm( "RunnerCPUAlarmHigh", EvaluationPeriods="1", Statistic="Average", Threshold=Ref(self.runner_scaleout_threshold), AlarmDescription="Alarm if CPU utilization too high", Period="60", AlarmActions=[Ref(self.scaleout_policy)], Namespace="AWS/EC2", Dimensions=[ MetricDimension( Name="AutoScalingGroupName", Value=Ref(self.runner_autoscaling_group), ) ], ComparisonOperator="GreaterThanThreshold", MetricName="CPUUtilization", )) self.scalein_policy = self.template.add_resource( ScalingPolicy( "RunnerScaleinPolicy", AdjustmentType="ChangeInCapacity", AutoScalingGroupName=Ref(self.runner_autoscaling_group), Cooldown=Ref(self.runner_scalein_cooldown), ScalingAdjustment="-1", )) self.runner_cpu_alarm_low = self.template.add_resource( Alarm( "RunnerCPUAlarmLow", EvaluationPeriods="1", Statistic="Average", Threshold=Ref(self.runner_scalein_threshold), AlarmDescription="Alarm if CPU utilization too low", Period="60", AlarmActions=[Ref(self.scalein_policy)], Namespace="AWS/EC2", Dimensions=[ MetricDimension( Name="AutoScalingGroupName", Value=Ref(self.runner_autoscaling_group), ) ], ComparisonOperator="LessThanThreshold", MetricName="CPUUtilization", ))
def create_autoscaling_policies(self): t = self.template asg_name = "%sASG" % self.name ScaleUpPolicy = t.add_resource(ScalingPolicy( "ScaleUpPolicy", AdjustmentType="ChangeInCapacity", AutoScalingGroupName=Ref(asg_name), Cooldown="5", ScalingAdjustment="1", )) CPUAlarmHigh = t.add_resource(Alarm( "CPUAlarmHigh", EvaluationPeriods="10", Statistic="Average", Threshold="50", AlarmDescription="Alarm if CPU too high or metric disappears indicating instance is down", Period="60", AlarmActions=[Ref(ScaleUpPolicy)], Namespace="AWS/EC2", Dimensions=[ MetricDimension(Name="AutoScalingGroupName", Value=Ref(asg_name))], ComparisonOperator="GreaterThanThreshold", MetricName="CPUUtilization", )) ScaleDownPolicy = t.add_resource(ScalingPolicy( "ScaleDownPolicy", AdjustmentType="ChangeInCapacity", AutoScalingGroupName=Ref(asg_name), Cooldown="5", ScalingAdjustment="-1", )) CPUAlarmLow = t.add_resource(Alarm( "CPUAlarmLow", EvaluationPeriods="10", Statistic="Average", Threshold="10", AlarmDescription="Alarm if CPU is not high anymore, scale down instances", Period="60", AlarmActions=[Ref(ScaleDownPolicy)], Namespace="AWS/EC2", Dimensions=[ MetricDimension(Name="AutoScalingGroupName", Value=Ref(asg_name))], ComparisonOperator="LessThanThreshold", MetricName="CPUUtilization", ))
def create_sns_notification_alarm(stack, name, description, metric_name, metric_namespace, sns_topic_arn, comparison_operator='GreaterThanThreshold', threshold='0', evaluation_periods='1', period_secs='60', statistic='Minimum', dimensions=None, treatMissingData='missing'): """Add SNS notification alarm for a cloud watch log metric which triggers alarm based on the specified criteria.""" dimensions = dimensions or {} dimensions_list = [ MetricDimension(Name=k, Value=v) for k, v in dimensions.items() ] return stack.stack.add_resource( Alarm('{0}Alarm'.format(name.replace('-', '')), AlarmName='{0}Alarm'.format(name), AlarmDescription=description, AlarmActions=[sns_topic_arn], ComparisonOperator=comparison_operator, Dimensions=dimensions_list, EvaluationPeriods=evaluation_periods, MetricName='{0}Metric'.format(metric_name.replace('-', '')), Namespace=metric_namespace, Period=period_secs, Statistic=statistic, Threshold=threshold, TreatMissingData=treatMissingData))
def set_services_alarms(settings): """ Function to create and assign alarms to services :param ecs_composex.common.settings.ComposeXSettings settings: """ for family in settings.families.values(): if not family.predefined_alarms: continue family_alarms = [] for name, definition in family.predefined_alarms.items(): primary_name = definition["Primary"] primary = definition["Alarms"][primary_name] update_definition_from_settings(primary, definition["Settings"]) for alarm_name, alarm_definition in definition["Alarms"].items(): if keyisset("Topics", definition): alarm_definition["Topics"] = definition["Topics"] if keyisset("Properties", alarm_definition) and keyisset( "MetricName", alarm_definition["Properties"]): update_alarm_threshold(alarm_definition["Properties"], definition["Settings"]) the_alarm = Alarm(alarm_name, alarm_definition, family.logical_name, settings) family_alarms.append(the_alarm) create_alarms(family.template, family_alarms) for alarm in family_alarms: dimensions = [ MetricDimension(**{ "Name": "ClusterName", "Value": Ref(CLUSTER_NAME) }), MetricDimension( **{ "Name": "ServiceName", "Value": GetAtt(family.ecs_service.ecs_service, "Name"), }), ] if isinstance(alarm.cfn_resource, CWAlarm): setattr(alarm.cfn_resource, "Dimensions", dimensions) if issubclass(type(alarm.cfn_resource), CompositeAlarm): handle_alarm_topics(alarm, family.stack, settings)
def create_simple_scaling_policy(self, scaling_policy_config): """ Simple scaling policy based upon ec2 metrics heavy-load cpu > 45 for 1 period of 300 seconds add two instances, 45 second cooldown light-load cpu <= 15 for 6 periods of 300 seconds remove one instance, 120 second cooldown medium-load cpu >= 25 for 1 period of 300 seconds add one instance, 45 second cooldown [name] [metric_name] [comparison_operator] [threshold] [evaluation_periods] [period] [scaling_adjustment] [cooldown] https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-cw-alarm.html https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-as-policy.html :param scaling_policy_config: simple scaling policy config object """ cf_name = self.trop_asg.title + get_cf_friendly_name(scaling_policy_config.name) scaling_policy = self.template.add_resource(ScalingPolicy( title=cf_name + 'Sp', AdjustmentType='ChangeInCapacity', AutoScalingGroupName=Ref(self.trop_asg), Cooldown=scaling_policy_config.cooldown, ScalingAdjustment=scaling_policy_config.scaling_adjustment, )) self.scaling_polices.append(scaling_policy) self.cw_alarms.append(self.template.add_resource(Alarm( title=cf_name + 'Cwa', AlarmActions=[Ref(scaling_policy), self.network_config.sns_topic], AlarmDescription=scaling_policy_config.description, AlarmName=cf_name, ComparisonOperator=scaling_policy_config.comparison_operator, Dimensions=[MetricDimension( Name='AutoScalingGroupName', Value=Ref(self.trop_asg) )], EvaluationPeriods=scaling_policy_config.evaluation_periods, MetricName=scaling_policy_config.metric_name, Namespace='AWS/EC2', Period=scaling_policy_config.period, Statistic='Average', Threshold=scaling_policy_config.threshold, OKActions=[self.network_config.sns_topic] )))
def _add_service_alarms(self, svc): ecs_high_cpu_alarm = Alarm( 'EcsHighCPUAlarm' + str(svc.name), EvaluationPeriods=1, Dimensions=[ MetricDimension(Name='ClusterName', Value=self.cluster_name), MetricDimension(Name='ServiceName', Value=GetAtt(svc, 'Name')) ], AlarmActions=[Ref(self.notification_sns_arn)], OKActions=[Ref(self.notification_sns_arn)], AlarmDescription='Alarm if CPU too high or metric disappears \ indicating instance is down', Namespace='AWS/ECS', Period=300, ComparisonOperator='GreaterThanThreshold', Statistic='Average', Threshold='80', MetricName='CPUUtilization') self.template.add_resource(ecs_high_cpu_alarm) ecs_high_memory_alarm = Alarm( 'EcsHighMemoryAlarm' + str(svc.name), EvaluationPeriods=1, Dimensions=[ MetricDimension(Name='ClusterName', Value=self.cluster_name), MetricDimension(Name='ServiceName', Value=GetAtt(svc, 'Name')) ], AlarmActions=[Ref(self.notification_sns_arn)], OKActions=[Ref(self.notification_sns_arn)], AlarmDescription='Alarm if memory too high or metric \ disappears indicating instance is down', Namespace='AWS/ECS', Period=300, ComparisonOperator='GreaterThanThreshold', Statistic='Average', Threshold='80', MetricName='MemoryUtilization') self.template.add_resource(ecs_high_memory_alarm) # How to add service task count alarm # http://docs.aws.amazon.com/AmazonECS/latest/developerguide/cloudwatch-metrics.html#cw_running_task_count ecs_no_running_tasks_alarm = Alarm( 'EcsNoRunningTasksAlarm' + str(svc.name), EvaluationPeriods=1, Dimensions=[ MetricDimension(Name='ClusterName', Value=self.cluster_name), MetricDimension(Name='ServiceName', Value=GetAtt(svc, 'Name')) ], AlarmActions=[Ref(self.notification_sns_arn)], OKActions=[Ref(self.notification_sns_arn)], AlarmDescription='Alarm if the task count goes to zero, denoting \ service is down', Namespace='AWS/ECS', Period=60, ComparisonOperator='LessThanThreshold', Statistic='SampleCount', Threshold='1', MetricName='CPUUtilization', TreatMissingData='breaching') self.template.add_resource(ecs_no_running_tasks_alarm)
def _add_error_alarm(self, template, function_name: str, context: str, topic): template.add_resource(Alarm( f'{function_name}ErrorAlarm', AlarmActions=[self._alert_topic_arn(context)], ComparisonOperator='GreaterThanThreshold', EvaluationPeriods=1, MetricName='Errors', Namespace='AWS/Lambda', Dimensions=[MetricDimension(Name='FunctionName', Value=function_name)], Period=60, Statistic='Sum', Threshold=0, DependsOn=[topic] ))
def add_alarm_for_resource( resource, target, scaling_out_policy, scaling_in_policy, resource_parameter ): """ Function to add the Alarm for SQS resource to the service template :param ecs_composex.common.compose_resources.XResource resource: :param tuple target: :param scaling_out_policy: :param scaling_in_policy: :param resource_parameter: :return: """ Alarm( f"SqsScalingAlarm{resource.logical_name}To{target[0].logical_name}", template=target[0].template, ActionsEnabled=True, AlarmActions=[Ref(scaling_out_policy)], AlarmDescription=f"MessagesProcessingWatchFor{resource.logical_name}To{target[0].logical_name}", ComparisonOperator="GreaterThanOrEqualToThreshold", DatapointsToAlarm=1, Dimensions=[ MetricDimension(Name="QueueName", Value=resource_parameter), ], EvaluationPeriods=1, InsufficientDataActions=[Ref(scaling_in_policy)], MetricName="ApproximateNumberOfMessagesVisible", Namespace="AWS/SQS", OKActions=[Ref(scaling_in_policy)], Period="60", Statistic="Sum", TreatMissingData="notBreaching", Threshold=float( scaling_out_policy.StepScalingPolicyConfiguration.StepAdjustments[ 0 ].MetricIntervalLowerBound ), )
def add_cloudwatch_alarm( self, title, scale_policy, alarm_desc, comparison, eval_period, metric_name, period, threshold ): # pylint: disable=too-many-arguments ''' Add cloudwatch alarm ''' self.cfn_template.add_resource( Alarm( title=title, ActionsEnabled=True, AlarmActions=[Ref(scale_policy)], AlarmDescription=alarm_desc, ComparisonOperator=comparison, Dimensions=[ MetricDimension( Name='AutoScalingGroupName', Value=Ref(constants.INST_ASG) ) ], EvaluationPeriods=eval_period, MetricName=metric_name, Namespace='AWS/ECS', Period=period, Statistic='Average', Threshold=threshold ) ) return self.cfn_template
def generate_cloudformation_template(): enable_elb = sys.argv[1] input_scaling_policies = ast.literal_eval(sys.argv[2]) input_alarms = ast.literal_eval(sys.argv[3]) enable_elb = enable_elb == 'True' elb_listeners = ast.literal_eval(sys.argv[4]) template = Template() template.add_description("""\ Configures Auto Scaling Group for the app""") project_name = template.add_parameter( Parameter( "Name", Type="String", Description="Instances will be tagged with this name", )) scalecapacity = template.add_parameter( Parameter( "ScaleCapacity", Default="1", Type="String", Description="Number of api servers to run", )) minsize = template.add_parameter( Parameter( "MinScale", Type="String", Description="Minimum number of servers to keep in the ASG", )) maxsize = template.add_parameter( Parameter( "MaxScale", Type="String", Description="Maximum number of servers to keep in the ASG", )) signalcount = template.add_parameter( Parameter( "SignalCount", Default="1", Type="String", Description= "No. of signals CF must receive before it sets the status as CREATE_COMPLETE", )) signaltimeout = template.add_parameter( Parameter( "SignalTimeout", Default="PT5M", Type="String", Description= "Time that CF waits for the number of signals that was specified in Count ", )) minsuccessfulinstancespercent = template.add_parameter( Parameter( "MinSuccessfulInstancesPercent", Default="100", Type="String", Description= "% instances in a rolling update that must signal success for CF to succeed", )) environment = template.add_parameter( Parameter( "Environment", Type="String", Description="The environment being deployed into", )) subnet = template.add_parameter( Parameter( "Subnets", Type="CommaDelimitedList", )) launchconfigurationname = template.add_parameter( Parameter( "LaunchConfigurationName", Type="String", )) health_check_grace_period = template.add_parameter( Parameter( "HealthCheckGracePeriod", Type="String", Default="300", )) if enable_elb: elb_subnets = template.add_parameter( Parameter( "LoadBalancerSubnets", Type="CommaDelimitedList", )) elb_bucket_name = template.add_parameter( Parameter("LoadBalancerBucketName", Type="String", Description="S3 Bucket for the ELB access logs")) template.add_condition("ElbLoggingCondition", Not(Equals(Ref(elb_bucket_name), ""))) elb_schema = template.add_parameter( Parameter( "LoadBalancerSchema", Type="String", )) health_check_interval = template.add_parameter( Parameter( "LoadBalancerHealthCheckInterval", Type="String", )) health_check_timeout = template.add_parameter( Parameter( "LoadBalancerHealthCheckTimeout", Type="String", )) healthy_threshold = template.add_parameter( Parameter( "LoadBalancerHealthyThreshold", Type="String", )) unhealthy_threshold = template.add_parameter( Parameter( "LoadBalancerUnHealthyThreshold", Type="String", )) enable_connection_draining = template.add_parameter( Parameter( "LoadBalancerEnableConnectionDraining", Type="String", Default="True", )) connection_draining_timeout = template.add_parameter( Parameter( "LoadBalancerConnectionDrainingTimeout", Type="String", Default="30", )) loadbalancersecuritygroup = template.add_parameter( Parameter( "LoadBalancerSecurityGroup", Type="CommaDelimitedList", Description="Security group for api app load balancer.", )) hostedzone = template.add_parameter( Parameter( "HostedZoneName", Description= "The DNS name of an existing Amazon Route 53 hosted zone", Type="String", )) dns_record = template.add_parameter( Parameter( "DNSRecord", Type="String", )) dns_ttl = template.add_parameter( Parameter( "DNSTTL", Default="300", Type="String", )) new_weight = template.add_parameter( Parameter( "NewDnsWeight", Type="String", Default="100", )) health_check_protocol = template.add_parameter( Parameter( "LoadBalancerHealthCheckProtocol", Type="String", )) template.add_condition("ElbTCPProtocolCondition", Equals(Ref(health_check_protocol), "TCP")) health_check_port = template.add_parameter( Parameter( "LoadBalancerHealthCheckPort", Type="String", )) health_check_path = template.add_parameter( Parameter( "LoadBalancerHealthCheckPath", Type="String", )) load_balancer_listeners = [] for listener in elb_listeners: load_balancer_listeners.append( elb.Listener( LoadBalancerPort=listener['load_balancer_port'], InstancePort=listener['instance_port'], Protocol=listener['protocol'], InstanceProtocol=Ref(health_check_protocol), )) loadbalancer = template.add_resource( elb.LoadBalancer( "LoadBalancer", AccessLoggingPolicy=If( "ElbLoggingCondition", elb.AccessLoggingPolicy(EmitInterval=60, Enabled=True, S3BucketName=Ref(elb_bucket_name), S3BucketPrefix="ELBLogs"), Ref("AWS::NoValue")), ConnectionDrainingPolicy=elb.ConnectionDrainingPolicy( Enabled=Ref(enable_connection_draining), Timeout=Ref(connection_draining_timeout), ), Subnets=Ref(elb_subnets), HealthCheck=elb.HealthCheck( Target=Join("", [ Ref(health_check_protocol), ":", Ref(health_check_port), If("ElbTCPProtocolCondition", Ref("AWS::NoValue"), Ref(health_check_path)) ]), HealthyThreshold=Ref(healthy_threshold), UnhealthyThreshold=Ref(unhealthy_threshold), Interval=Ref(health_check_interval), Timeout=Ref(health_check_timeout), ), Listeners=load_balancer_listeners, CrossZone=True, SecurityGroups=Ref(loadbalancersecuritygroup), Scheme=Ref(elb_schema))) route53record = template.add_resource( RecordSetType( "DNS", HostedZoneName=Join("", [Ref(hostedzone), "."]), Name=Join("", [Ref(dns_record), ".", Ref(hostedzone), "."]), ResourceRecords=[GetAtt(loadbalancer, "DNSName")], SetIdentifier=Ref(project_name), TTL=Ref(dns_ttl), Type="CNAME", Weight=Ref(new_weight), )) autoscalinggroup = template.add_resource( AutoScalingGroup( "AutoscalingGroup", Tags=[ Tag("Name", Ref(project_name), True), Tag("Environment", Ref(environment), True) ], LaunchConfigurationName=Ref(launchconfigurationname), MinSize=Ref(minsize), MaxSize=Ref(maxsize), DesiredCapacity=Ref(scalecapacity), VPCZoneIdentifier=Ref(subnet), HealthCheckGracePeriod=Ref(health_check_grace_period), CreationPolicy=CreationPolicy( ResourceSignal=ResourceSignal(Count=Ref(signalcount), Timeout=Ref(signaltimeout)), AutoScalingCreationPolicy=AutoScalingCreationPolicy( MinSuccessfulInstancesPercent=Ref( minsuccessfulinstancespercent))), UpdatePolicy=UpdatePolicy( AutoScalingRollingUpdate=AutoScalingRollingUpdate( MaxBatchSize='1', MinInstancesInService='1', MinSuccessfulInstancesPercent=Ref( minsuccessfulinstancespercent), PauseTime=Ref(signaltimeout), WaitOnResourceSignals=True)))) autoscalinggroup.HealthCheckType = 'EC2' if enable_elb: autoscalinggroup.LoadBalancerNames = [Ref(loadbalancer)] autoscalinggroup.HealthCheckType = 'ELB' created_scaling_policies = dict() for scaling_policy in input_scaling_policies: policy_properties = { 'AdjustmentType': scaling_policy['adjustment_type'], 'AutoScalingGroupName': Ref(autoscalinggroup), 'Cooldown': scaling_policy['cooldown'], 'PolicyType': scaling_policy['policy_type'], 'ScalingAdjustment': scaling_policy['scaling_adjustment'], } if scaling_policy['policy_type'] != "SimpleScaling" \ and 'estimated_instance_warmup' in scaling_policy: policy_properties['EstimatedInstanceWarmup'] = \ scaling_policy['estimated_instance_warmup'] if scaling_policy['policy_type'] != "SimpleScaling" \ and 'metric_aggregation_type' in scaling_policy: policy_properties['MetricAggregationType'] = scaling_policy[ 'metric_aggregation_type'] if scaling_policy['adjustment_type'] == "PercentChangeInCapacity" \ and 'min_adjustment_magnitude' in scaling_policy: policy_properties['MinAdjustmentMagnitude'] = scaling_policy[ 'min_adjustment_magnitude'] if 'step_adjustments' in scaling_policy: policy_properties['StepAdjustments'] = scaling_policy[ 'step_adjustments'] created_scaling_policies[ scaling_policy['name']] = template.add_resource( ScalingPolicy(scaling_policy['name'], **policy_properties)) for alarm in input_alarms: template.add_resource( Alarm( alarm['name'], ActionsEnabled=True, AlarmActions=[ Ref(created_scaling_policies[alarm['scaling_policy_name']]) ], AlarmDescription=alarm['description'], ComparisonOperator=alarm['comparison'], Dimensions=[ MetricDimension(Name="AutoScalingGroupName", Value=Ref(autoscalinggroup)), ], EvaluationPeriods=alarm['evaluation_periods'], InsufficientDataActions=[], MetricName=alarm['metric'], Namespace=alarm['namespace'], OKActions=[], Period=alarm['period'], Statistic=alarm['statistics'], Threshold=str(alarm['threshold']), Unit=alarm['unit'], )) template.add_output( Output("StackName", Value=Ref(project_name), Description="Stack Name")) if enable_elb: template.add_output( Output("DomainName", Value=Ref(route53record), Description="DNS to access the service")) template.add_output( Output("LoadBalancer", Value=GetAtt(loadbalancer, "DNSName"), Description="ELB dns")) template.add_output( Output("AutoScalingGroup", Value=Ref(autoscalinggroup), Description="Auto Scaling Group")) template.add_output( Output("LaunchConfiguration", Value=Ref(launchconfigurationname), Description="LaunchConfiguration for this deploy")) return template
)) AlarmScaleOutPolicy = t.add_resource(Alarm( "AlarmScaleOutPolicy", AlarmDescription=Join("", ["Scale out if average traffic > ", Ref(ScaleOutAverageThresholdParam), " KB/s for ", Ref(ScaleOutConsecutivePeriodsParam), " periods of ", Ref(CloudWatchScalingWindowParam), " seconds"]), MetricName="TotalKbytesPerSecond", Namespace=Join("", [Ref("AWS::StackName"), "-", Ref(NATNamespaceParam)]), Statistic="Average", Period=Ref(CloudWatchScalingWindowParam), EvaluationPeriods=Ref(ScaleOutConsecutivePeriodsParam), Threshold=Ref(ScaleOutAverageThresholdParam), Dimensions=[ MetricDimension( "StackMetricDimension", Name="StackName", Value=Ref("AWS::StackName") ) ], ComparisonOperator="GreaterThanThreshold", AlarmActions=[ Ref(ScaleOutPolicy) ] )) AlarmScaleInPolicy = t.add_resource(Alarm( "AlarmScaleInPolicy", AlarmDescription=Join("", ["Scale in if average traffic < ", Ref(ScaleInAverageThresholdParam), " KB/s for ", Ref(ScaleInConsecutivePeriodsParam), " periods of ", Ref(CloudWatchScalingWindowParam), " seconds"]), MetricName="TotalKbytesPerSecond", Namespace=Join("", [Ref("AWS::StackName"), "-", Ref(NATNamespaceParam)]),
ScalePolicy = t.add_resource( ScalingPolicy( "HTTPRequestScalingPolicy", AdjustmentType="ChangeInCapacity", AutoScalingGroupName=Ref(AutoscalingGroup), Cooldown="1", ScalingAdjustment="1", ) ) HTTPRequestAlarm = t.add_resource( Alarm( "HTTPRequestAlarm", AlarmDescription="Alarm if HTTP Requests go above a 2000", Namespace="AWS/SQS", MetricName="RequestCount", Dimensions=[ MetricDimension(Name="LoadBalancerName", Value=Ref(LoadBalancerResource)), ], Statistic="Sum", Period="300", EvaluationPeriods="1", Threshold="2000", ComparisonOperator="GreaterThanThreshold", AlarmActions=[Ref(ScalePolicy)], ) ) print(t.to_json())
Cooldown="120", ScalingAdjustment="1") template.add_resource(scale_down_policy) template.add_resource(scale_up_policy) # cloudwatch alarms on cpu cloudwatch_cpu_high_alarm = Alarm("CPUHighAlarm", EvaluationPeriods="2", Statistic="Average", Threshold="70", Period="60", AlarmDescription="Alarm if CPU > 70%", Namespace="AWS/EC2", Dimensions=[ MetricDimension( Name="AutoScaleGroup", Value=Ref(auto_scale_group)) ], AlarmActions=[Ref(scale_up_policy)], ComparisonOperator="LessThanThreshold", MetricName="CPUUtilization") cloudwatch_cpu_low_alarm = Alarm("CPULowAlarm", EvaluationPeriods="5", Statistic="Average", Threshold="30", Period="60", AlarmDescription="Alarm if CPU < 30%", Namespace="AWS/EC2", Dimensions=[ MetricDimension( Name="AutoScaleGroup",
def _add_cluster_alarms(self, cluster): ec2_hosts_high_cpu_alarm = Alarm( 'Ec2HostsHighCPUAlarm', EvaluationPeriods=1, Dimensions=[ MetricDimension(Name='AutoScalingGroupName', Value=Ref(self.auto_scaling_group)) ], AlarmActions=[Ref(self.notification_sns_arn)], AlarmDescription='Alarm if CPU too high or metric disappears \ indicating instance is down', Namespace='AWS/EC2', Period=60, ComparisonOperator='GreaterThanThreshold', Statistic='Average', Threshold='60', MetricName='CPUUtilization') self.template.add_resource(ec2_hosts_high_cpu_alarm) cluster_high_cpu_alarm = Alarm( 'ClusterHighCPUAlarm', EvaluationPeriods=1, Dimensions=[ MetricDimension(Name='ClusterName', Value=Ref(cluster)) ], AlarmActions=[Ref(self.notification_sns_arn)], AlarmDescription='Alarm if CPU is too high for cluster.', Namespace='AWS/ECS', Period=300, ComparisonOperator='GreaterThanThreshold', Statistic='Average', Threshold='60', MetricName='CPUUtilization') self.template.add_resource(cluster_high_cpu_alarm) cluster_high_memory_alarm = Alarm( 'ClusterHighMemoryAlarm', EvaluationPeriods=1, Dimensions=[ MetricDimension(Name='ClusterName', Value=Ref(cluster)) ], AlarmActions=[Ref(self.notification_sns_arn)], AlarmDescription='Alarm if memory is too high for cluster.', Namespace='AWS/ECS', Period=300, ComparisonOperator='GreaterThanThreshold', Statistic='Average', Threshold='60', MetricName='MemoryUtilization') self.template.add_resource(cluster_high_memory_alarm) self.cluster_high_memory_reservation_autoscale_alarm = Alarm( 'ClusterHighMemoryReservationAlarm', EvaluationPeriods=1, Dimensions=[ MetricDimension(Name='ClusterName', Value=Ref(cluster)) ], AlarmActions=[Ref(self.cluster_scaling_policy)], AlarmDescription='Alarm if memory reservation is over 75% \ for cluster.', Namespace='AWS/ECS', Period=300, ComparisonOperator='GreaterThanThreshold', Statistic='Average', Threshold='75', MetricName='MemoryReservation') self.template.add_resource( self.cluster_high_memory_reservation_autoscale_alarm) self.cluster_high_memory_reservation_user_notification_alarm = Alarm( 'ClusterHighMemoryReservationUserNotifcationAlarm', EvaluationPeriods=3, Dimensions=[ MetricDimension(Name='ClusterName', Value=Ref(cluster)) ], AlarmActions=[Ref(self.notification_sns_arn)], OKActions=[Ref(self.notification_sns_arn)], AlarmDescription='Alarm if memory reservation is over 75% \ for cluster for 15 minutes.', Namespace='AWS/ECS', Period=300, ComparisonOperator='GreaterThanThreshold', Statistic='Average', Threshold='75', MetricName='MemoryReservation') self.template.add_resource( self.cluster_high_memory_reservation_user_notification_alarm)
credit_threshold = t.add_parameter(Parameter('CreditThreshold', Type='String')) credit_evaluations = t.add_parameter( Parameter('CreditEvaluations', Type='String')) high_cpu_alarm = t.add_resource( Alarm( "ReDSAlarmHigh", AlarmDescription="CPU High Alarm", Namespace="AWS/RDS", MetricName="CPUUtilization", Statistic="Average", Period=60, Dimensions=[ MetricDimension(Name="DBInstanceIdentifier", Value=Ref(rds_instance)) ], EvaluationPeriods=Ref(up_evaluations), Threshold=Ref(up_threshold), ComparisonOperator="GreaterThanOrEqualToThreshold", AlarmActions=[], InsufficientDataActions=[], OKActions=[], )) low_cpu_alarm = t.add_resource( Alarm( "ReDSAlarmLow", AlarmDescription="CPU Low Alarm", Namespace="AWS/RDS", MetricName="CPUUtilization",
"alarmPrefix": "ScaleDownPolicyFor", "operator": "LessThanOrEqualToThreshold", "adjustment": "-1" } } for utilization in {ScalingMetric}: for state, value in states.items(): t.add_resource( Alarm("{}UtilizationToo{}".format(utilization, state), AlarmDescription="Alarm if {} utilization too {}".format( utilization, state), Namespace="AWS/ECS", MetricName="{}Utilization".format(utilization), Dimensions=[ MetricDimension(Name="ServiceName", Value=GetAtt(ecsservice, "Name")), MetricDimension( Name="ClusterName", Value=ImportValue( Join("-", [ Select(0, Split("-", Ref("AWS::StackName"))), "cluster-id" ]))), ], Statistic="Average", Period="60", EvaluationPeriods="1", Threshold=value['threshold'], ComparisonOperator=value['operator'], AlarmActions=[ Ref("{}{}".format(value['alarmPrefix'], utilization))
def init_template(self): self.template.add_description(self.TEMPLATE_DESCRIPTION) ecs_cluster = self.template.add_resource(Cluster(self.CLUSTER_NAME)) ecs_instance_role = self.template.add_resource( Role('sitInstanceRole', Path='/', AssumeRolePolicyDocument={ "Statement": [{ "Effect": "Allow", "Principal": { "Service": ["ec2.amazonaws.com"] }, "Action": ["sts:AssumeRole"] }] })) ecs_instance_profile = self.template.add_resource( InstanceProfile('sitInstanceProfile', Path='/', Roles=[Ref(ecs_instance_role)])) ecs_instance_policy = self.template.add_resource( PolicyType('sitInstancePolicy', PolicyName='ecs-policy', Roles=[Ref(ecs_instance_role)], PolicyDocument={ "Statement": [{ "Effect": "Allow", "Action": [ "ecs:CreateCluster", "ecs:RegisterContainerInstance", "ecs:DeregisterContainerInstance", "ecs:DiscoverPollEndpoint", "ecs:Submit*", "ecs:Poll", "ecs:StartTelemetrySession", "ecr:GetAuthorizationToken", "ecr:BatchCheckLayerAvailability", "ecr:GetDownloadUrlForLayer", "ecr:BatchGetImage", "logs:CreateLogStream", "logs:PutLogEvents" ], "Resource": "*" }], })) commands = { '01_add_instance_to_cluster': { 'command': Join('', [ '#!/bin/bash\n', 'echo ECS_CLUSTER=', Ref(ecs_cluster), '$"\n"ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION=', self.ECS_TASK_CLEANUP_WAIT, ' >> /etc/ecs/ecs.config' ]) } } files = { "/etc/cfn/cfn-hup.conf": { "content": Join("", [ "[main]\n", "stack=", Ref("AWS::StackId"), "\n", "region=", Ref("AWS::Region"), "\n" ]), "mode": "000400", "owner": "root", "group": "root" }, "/etc/cfn/hooks.d/cfn-auto-reloader.conf": { "content": Join("", [ "[cfn-auto-reloader-hook]\n", "triggers=post.update\n", "path=Resources.{0}.Metadata.AWS::CloudFormation::Init\n". format(self.LAUNCH_CONFIGURATION_NAME), "action=/opt/aws/bin/cfn-init -v ", " --stack ", Ref("AWS::StackName"), " --resource {0}".format( self.LAUNCH_CONFIGURATION_NAME), " --region ", Ref("AWS::Region"), "\n", "runas=root\n" ]) } } services = { "sysvinit": { "cfn-hup": { "enabled": "true", "ensureRunning": "true", "files": [ "/etc/cfn/cfn-hup.conf", "/etc/cfn/hooks.d/cfn-auto-reloader.conf" ] } } } launch_configuration = self.template.add_resource( LaunchConfiguration(self.LAUNCH_CONFIGURATION_NAME, ImageId=self.AMI_ID, IamInstanceProfile=Ref(ecs_instance_profile), InstanceType=self.INSTANCE_TYPE, UserData=self.user_data.get_base64_data(), AssociatePublicIpAddress=False, SecurityGroups=self.SECURITY_GROUPS, KeyName=self.KEY_NAME, Metadata=autoscaling.Metadata( cloudformation.Init({ "config": cloudformation.InitConfig( commands=commands, files=files, services=services) })), BlockDeviceMappings=[ autoscaling.BlockDeviceMapping( DeviceName=self.EBS_DEVICE_NAME, Ebs=autoscaling.EBSBlockDevice( DeleteOnTermination=True, VolumeSize=self.EBS_VOLUME_SIZE, VolumeType='gp2')) ])) auto_scaling_group = self.template.add_resource( AutoScalingGroup(self.AUTOSCALING_GROUP_NAME, MaxSize=self.MAX_SIZE, MinSize=self.MIN_SIZE, Cooldown=60, LaunchConfigurationName=Ref(launch_configuration), VPCZoneIdentifier=[self.SUBNET])) """ Scale UP Policy """ scaling_up_policy = self.template.add_resource( ScalingPolicy('{0}ScaleUpPolicy'.format( self.AUTOSCALING_GROUP_NAME), AdjustmentType='ChangeInCapacity', AutoScalingGroupName=Ref(auto_scaling_group), Cooldown=60, ScalingAdjustment='1')) for alarm_name, alarm in self.AUTOSCALE_UP_ALARMS.iteritems(): """ Cloud Watch Alarm """ self.template.add_resource( Alarm('{0}ScaleUp{1}'.format(self.AUTOSCALING_GROUP_NAME, alarm_name), ActionsEnabled=True, Namespace='AWS/ECS', MetricName=alarm['scaling_metric'], ComparisonOperator='GreaterThanOrEqualToThreshold', Threshold=alarm['scale_up_threshold'], EvaluationPeriods=1, Statistic=alarm['statistic'], Period=alarm['period'], AlarmActions=[Ref(scaling_up_policy)], Dimensions=[ MetricDimension(Name='ClusterName', Value=Ref(ecs_cluster)) ])) """ Scale DOWN Policy """ scaling_down_policy = self.template.add_resource( ScalingPolicy('{0}ScaleDownPolicy'.format( self.AUTOSCALING_GROUP_NAME), AdjustmentType='ChangeInCapacity', AutoScalingGroupName=Ref(auto_scaling_group), Cooldown=60, ScalingAdjustment='-1')) for alarm_name, alarm in self.AUTOSCALE_DOWN_ALARMS.iteritems(): """ Cloud Watch Alarm """ self.template.add_resource( Alarm('{0}ScaleDown{1}'.format(self.AUTOSCALING_GROUP_NAME, alarm_name), ActionsEnabled=True, Namespace='AWS/ECS', MetricName=alarm['scaling_metric'], ComparisonOperator='LessThanOrEqualToThreshold', Threshold=alarm['scale_down_threshold'], EvaluationPeriods=1, Statistic=alarm['statistic'], Period=alarm['period'], AlarmActions=[Ref(scaling_down_policy)], Dimensions=[ MetricDimension(Name='ClusterName', Value=Ref(ecs_cluster)) ]))
def create_instance(self, subnet1, subnet2, load_balancer, autoscaling_sg, webapp_zip): launch_config = self.t.add_resource( LaunchConfiguration( "LaunchConfiguration", UserData=Base64( Join('', [ "#!/bin/bash\n", "sudo yum install httpd mod_wsgi -y\n", "sudo pip install flask\n", "sudo chkconfig httpd on\n", "sudo service httpd start\n", "sudo service httpd restart\n", "sudo aws s3 cp s3://thivan-sample-data/", Ref(webapp_zip), " .\n", "sudo unzip ", Ref(webapp_zip), "\n", "sudo mv /home/ec2-user/app /var/www/html/\n", "sudo mv app /var/www/html/\n", "sudo mv /var/www/html/app/server_config/wsgi.conf /etc/httpd/conf.d/\n", "sudo groupadd group1\n", "sudo useradd user1 -g group1\n", "sudo usermod -a -G group1 apache\n", "sudo chown -vR :group1 /var/www/\n", "sudo chmod -vR g+w /var/www/\n", "sudo service httpd restart\n", "cfn-signal -e 0", " --resource AutoscalingGroup", " --stack ", Ref("AWS::StackName"), " --region ", Ref("AWS::Region"), "\n" ])), IamInstanceProfile= "arn:aws:iam::205198152101:instance-profile/webapps3", ImageId="ami-b73b63a0", KeyName="thivancf", SecurityGroups=[Ref(autoscaling_sg)], InstanceType="t2.micro", )) asg = self.t.add_resource( AutoScalingGroup( "AutoscalingGroup", DesiredCapacity=2, LaunchConfigurationName=Ref(launch_config), MinSize=1, MaxSize=4, VPCZoneIdentifier=[Ref(subnet1), Ref(subnet2)], LoadBalancerNames=[Ref(load_balancer)], HealthCheckGracePeriod=300, HealthCheckType="EC2", UpdatePolicy=UpdatePolicy( AutoScalingReplacingUpdate=AutoScalingReplacingUpdate( WillReplace=True, ), AutoScalingRollingUpdate=AutoScalingRollingUpdate( PauseTime='PT5M', MinInstancesInService="1", MaxBatchSize='1', )))) scaling_policy = self.t.add_resource( ScalingPolicy("ScalingPolicy", AdjustmentType="ChangeInCapacity", AutoScalingGroupName=Ref(asg), Cooldown="120", ScalingAdjustment="1")) self.t.add_resource( Alarm( "CPUAlarm", EvaluationPeriods="1", Statistic="Maximum", Threshold="50", AlarmDescription= "Alarm if CPU too high or metric disappears indicating instance is down", Period="60", AlarmActions=[Ref(scaling_policy)], Namespace="AWS/EC2", Dimensions=[ MetricDimension(Name="AutoScalingGroupName", Value=Ref(asg)), ], ComparisonOperator="GreaterThanThreshold", MetricName="CPUUtilization")) return launch_config
def emit_configuration(): # Build an SQS queue for the babysitter """create_queue = template.add_parameter( Parameter( 'CreateDeregistrationTopic', Type='String', Description='Whether or not to create the Chef Deregistration queue. This option is provided in case the queue already exists.', Default='no', AllowedValues=['yes', 'no'], ConstraintDescription='Answer must be yes or no' ) ) conditions = { "CreateDeregCondition": Equals( Ref(create_queue), "yes" ) } for c in conditions: template.add_condition(c, conditions[c])""" queue_name = '_'.join(['chef-deregistration', CLOUDNAME, CLOUDENV]) queue = template.add_resource( Queue( cfn.sanitize_id(queue_name), VisibilityTimeout=60, MessageRetentionPeriod=1209600, MaximumMessageSize=16384, QueueName=queue_name, )) alert_topic = template.add_resource( Topic( cfn.sanitize_id("BabysitterAlarmTopic{0}".format(CLOUDENV)), DisplayName='Babysitter Alarm', TopicName=queue_name, Subscription=[ Subscription(Endpoint=GetAtt(queue, "Arn"), Protocol='sqs'), ], DependsOn=queue.title, )) queue_depth_alarm = template.add_resource( Alarm( "BabysitterQueueDepthAlarm", AlarmDescription= 'Alarm if the queue depth grows beyond 200 messages', Namespace='AWS/SQS', MetricName='ApproximateNumberOfMessagesVisible', Dimensions=[ MetricDimension(Name='QueueName', Value=GetAtt(queue, "QueueName")) ], Statistic='Sum', Period='300', EvaluationPeriods='1', Threshold='200', ComparisonOperator='GreaterThanThreshold', #AlarmActions=[Ref(alert_topic), ], #InsufficientDataActions=[Ref(alert_topic), ], DependsOn=alert_topic.title, ), ) queue_policy = { "Version": "2012-10-17", "Id": "BabysitterSNSPublicationPolicy", "Statement": [{ "Sid": "AllowSNSPublishing", "Effect": "Allow", "Principal": { "AWS": "*" }, "Action": ["sqs:SendMessage"], "Resource": GetAtt(queue, "Arn"), "Condition": { "ArnEquals": { "aws:SourceArn": Ref(alert_topic) } } }] } # Publish all events from SNS to the Queue template.add_resource( QueuePolicy( "BabysitterPublishSNStoSQSPolicy", Queues=[Ref(queue)], PolicyDocument=queue_policy, DependsOn=[queue.title, alert_topic.title], )) cfn.alert_topic = alert_topic
Actions=[Action(Type='forward', TargetGroupArn=Ref(target_group))], Priority=1000)) target_group_dimension = GetAtt(target_group, 'TargetGroupFullName') load_balancer_dimension = GetAtt(load_balancer, 'LoadBalancerFullName') template.add_resource( Alarm( 'LowHealthyHostsAlarm', ActionsEnabled=True, AlarmActions=[Ref(sns_topic_arn)], OKActions=[Ref(sns_topic_arn)], InsufficientDataActions=[Ref(sns_topic_arn)], AlarmDescription='Alarm for checking if healthy hosts falls below one', ComparisonOperator='LessThanThreshold', Dimensions=[ MetricDimension(Name='TargetGroup', Value=target_group_dimension), MetricDimension(Name='LoadBalancer', Value=load_balancer_dimension) ], EvaluationPeriods=1, MetricName='HealthyHostCount', Namespace='AWS/ApplicationELB', Period=60, Statistic='Average', Threshold='1', Unit='Count')) template.add_resource( Alarm('HighAverageLatencyAlarm', ActionsEnabled=True, AlarmActions=[Ref(sns_topic_arn)], OKActions=[Ref(sns_topic_arn)],
"alarmPrefix": "ScaleDownPolicyFor", "operator": "LessThanThreshold", "adjustment": "-1" } } for reservation in {"CPU", "Memory"}: for state, value in states.iteritems(): t.add_resource( Alarm("{}ReservationToo{}".format(reservation, state), AlarmDescription="Alarm if {} reservation too {}".format( reservation, state), Namespace="AWS/ECS", MetricName="{}Reservation".format(reservation), Dimensions=[ MetricDimension(Name="ClusterName", Value=Ref("ECSCluster")), ], Statistic="Average", Period="60", EvaluationPeriods="1", Threshold=value['threshold'], ComparisonOperator=value['operator'], AlarmActions=[ Ref("{}{}".format(value['alarmPrefix'], reservation)) ])) t.add_resource( ScalingPolicy( "{}{}".format(value['alarmPrefix'], reservation), ScalingAdjustment=value['adjustment'], AutoScalingGroupName=Ref("ECSAutoScalingGroup"), AdjustmentType="ChangeInCapacity",
Timeout=60)) alarm_topic = t.add_resource( Topic('LambdaErrorTopic', Subscription=[ Subscription(Protocol="email", Endpoint=Ref(param_alarm_email)) ])) t.add_resource( Alarm("LambdaBaseErrorsAlarm", ComparisonOperator='GreaterThanThreshold', EvaluationPeriods=1, MetricName='Errors', Namespace='AWS/Lambda', Dimensions=[ MetricDimension(Name='FunctionName', Value=Ref(base_function)) ], Period=300, Statistic='Maximum', Threshold='0', AlarmActions=[Ref(alarm_topic)])) t.add_resource( Alarm("LambdaReleaseErrorsAlarm", ComparisonOperator='GreaterThanThreshold', EvaluationPeriods=1, MetricName='Errors', Namespace='AWS/Lambda', Dimensions=[ MetricDimension(Name='FunctionName', Value=Ref(release_function)) ],
ec2.PrivateIpAddressSpecification( "PrivateIpAddress", Primary="true", PrivateIpAddress=f.ec2.ip) ], GroupSet=[Ref(instanceSecurityGroup)]) ], )) f.ec2.instance = instance alarmMaster = template.add_resource( Alarm("AlarmRecovery" + f.ec2.name, AlarmDescription="Recovery " + f.ec2.name, Namespace="AWS/EC2", MetricName="StatusCheckFailed_System", Dimensions=[ MetricDimension(Name="InstanceId", Value=Ref(instance)), ], Statistic="Maximum", Period="60", EvaluationPeriods="5", Threshold="0", ComparisonOperator="GreaterThanThreshold", AlarmActions=[ Sub('arn:aws:automate:${AWS::Region}:ec2:recover') ])) ################################ Security autoscaling ################################### subnetsList = [] for f in vpc.subnets: subnetsList.append(Ref(f.instance))
t.add_output( Output("URL", Description="Helloworld URL", Value=Join("", ["http://", GetAtt("LoadBalancer", "DNSName"), ":3000"]))) t.add_resource( Alarm( "ELBHTTP5xxs", AlarmDescription="Alarm if HTTP 5xxs too high", Namespace="AWS/ELB", MetricName="HTTPCode_Backend_5XX", Dimensions=[ MetricDimension(Name="LoadBalancerName", Value=Ref("LoadBalancer")), ], Statistic="Average", Period="60", EvaluationPeriods="3", Threshold="30", ComparisonOperator="GreaterThanOrEqualToThreshold", AlarmActions=["arn:aws:sns:us-east-1:511912822958:alert-sms"], OKActions=["arn:aws:sns:us-east-1:511912822958:alert-sms"], InsufficientDataActions=[], )) t.add_resource( Alarm( "ELBHLatency", AlarmDescription="Alarm if Latency too high",
Trail("CloudTrail", IncludeGlobalServiceEvents=True, IsLogging=True, IsMultiRegionTrail=True, S3BucketName=Ref(bucket), SnsTopicName=Ref(cloudtrail_topic), DependsOn="BucketPolicy")) t.add_resource( Alarm( "LambdaErrorsAlarm", ComparisonOperator='GreaterThanThreshold', EvaluationPeriods=1, MetricName='Errors', Namespace='AWS/Lambda', Dimensions=[MetricDimension(Name='FunctionName', Value=Ref(function))], Period=300, Statistic='Maximum', Threshold='0', AlarmActions=[Ref(notificationTopic)])) t.add_resource( Alarm( "LambdaThrottlesAlarm", ComparisonOperator='GreaterThanThreshold', EvaluationPeriods=1, MetricName='Throttles', Namespace='AWS/Lambda', Dimensions=[MetricDimension(Name='FunctionName', Value=Ref(function))], Period=300, Statistic='Maximum',
alarmtopic = t.add_resource( Topic( "AlarmTopic", Subscription=[ Subscription(Endpoint=Ref(alarmemail), Protocol="email"), ], )) queuedepthalarm = t.add_resource( Alarm( "QueueDepthAlarm", AlarmDescription="Alarm if queue depth grows beyond 10 messages", Namespace="AWS/SQS", MetricName="ApproximateNumberOfMessagesVisible", Dimensions=[ MetricDimension(Name="QueueName", Value=GetAtt(myqueue, "QueueName")), ], Statistic="Sum", Period="300", EvaluationPeriods="1", Threshold="10", ComparisonOperator="GreaterThanThreshold", AlarmActions=[ Ref(alarmtopic), ], InsufficientDataActions=[ Ref(alarmtopic), ], )) t.add_output([
t.add_resource( ScalingPolicy( "ScaleUpPolicy", ScalingAdjustment="1", AutoScalingGroupName=Ref("AutoscalingGroup"), AdjustmentType="ChangeInCapacity", )) t.add_resource( Alarm( "CPUTooLow", AlarmDescription="Alarm if CPU too low", Namespace="AWS/EC2", MetricName="CPUUtilization", Dimensions=[ MetricDimension(Name="AutoScalingGroupName", Value=Ref("AutoscalingGroup")), ], Statistic="Average", Period="60", EvaluationPeriods="1", Threshold="30", ComparisonOperator="LessThanThreshold", AlarmActions=[Ref("ScaleDownPolicy")], )) t.add_resource( Alarm( "CPUTooHigh", AlarmDescription="Alarm if CPU too high", Namespace="AWS/EC2", MetricName="CPUUtilization",
], )) t.add_resource( Alarm( "ChaosLambdaErrorAlarm", AlarmName="chaosLambda/LambdaError", AlarmDescription="Enters ALARM state because we have received a lamdba " "error. See 'Errors' section on the following link: " "http://docs.aws.amazon.com/lambda/latest/dg/" "monitoring-functions-metrics.html for more " "information.", Namespace="AWS/Lambda", MetricName="Errors", Dimensions=[ MetricDimension(Name="FunctionName", Value=Ref(lambda_function_name)), ], Statistic="Sum", Period="60", EvaluationPeriods="1", Threshold="1", Unit="Count", ComparisonOperator="GreaterThanOrEqualToThreshold", AlarmActions=[ Ref(alarm_topic), ], )) t.add_resource( Alarm( "ChaosLambdaDurationAlarm",