def generate_rules(rules_name):
    global emr, scaling_policy

    rules = [
        emr.ScalingRule(
            Name=rules_name,
            Description="%s rules" % rules_name,
            Action=emr.ScalingAction(
                Market="ON_DEMAND",
                SimpleScalingPolicyConfiguration=scaling_policy),
            Trigger=emr.ScalingTrigger(
                CloudWatchAlarmDefinition=emr.CloudWatchAlarmDefinition(
                    ComparisonOperator="GREATER_THAN",
                    EvaluationPeriods="120",
                    MetricName="TestMetric",
                    Namespace="AWS/ElasticMapReduce",
                    Period="300",
                    Statistic="AVERAGE",
                    Threshold="50",
                    Unit="PERCENT",
                    Dimensions=[
                        emr.MetricDimension('my.custom.master.property',
                                            'my.custom.master.value')
                    ])))
    ]
    return rules
Exemple #2
0
    def test_allow_string_cluster(self):
        cluster_security_configuration = emr.SecurityConfiguration(
            'emrsecurityconfiguration',
            Name="EMRSecurityConfiguration",
            SecurityConfiguration=security_configuration)

        spot = "2"
        withSpotPrice = "WithSpotPrice"
        cluster = emr.Cluster(
            'Cluster',
            # AdditionalInfo="Additional Info",
            Applications=[
                emr.Application(Name="Hadoop"),
                emr.Application(Name="Hive"),
                emr.Application(Name="Mahout"),
                emr.Application(Name="Pig"),
                emr.Application(Name="Spark")
            ],
            BootstrapActions=[
                emr.BootstrapActionConfig(
                    Name='Dummy bootstrap action',
                    ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
                        Path='file:/usr/share/aws/emr/scripts/install-hue',
                        Args=["dummy", "parameter"]))
            ],
            Configurations=[
                emr.Configuration(Classification="core-site",
                                  ConfigurationProperties={
                                      'hadoop.security.groups.cache.secs':
                                      '250'
                                  })
            ],
            Instances=emr.JobFlowInstancesConfig(
                Ec2KeyName="KeyName",
                Ec2SubnetId="SubnetId",
                MasterInstanceGroup=emr.InstanceGroupConfigProperty(
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                    AutoScalingPolicy=emr.AutoScalingPolicy(
                        Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                           MaxCapacity="3"),
                        Rules=self.generate_rules("MasterAutoScalingPolicy")),
                ),
                CoreInstanceGroup=emr.InstanceGroupConfigProperty(
                    Name="Core Instance",
                    BidPrice=If(withSpotPrice, Ref(spot), Ref("AWS::NoValue")),
                    Market=If(withSpotPrice, "SPOT", "ON_DEMAND"),
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                    AutoScalingPolicy=emr.AutoScalingPolicy(
                        Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                           MaxCapacity="3"),
                        Rules=self.generate_rules("CoreAutoScalingPolicy"),
                    )),
            ),
            JobFlowRole="EMRJobFlowRole",
            LogUri="s3://cluster-logs",
            Name="EMR Cluster",
            ReleaseLabel="emr-5.5.0",
            SecurityConfiguration=Ref(cluster_security_configuration),
            ServiceRole="EMRServiceRole",
            AutoScalingRole="EMR_AutoScaling_DefaultRole",
            VisibleToAllUsers="true",
            Tags=Tags(Name="EMR Sample Cluster"))

        cluster.to_dict()

        autoscale_policy = emr.AutoScalingPolicy(
            Constraints=emr.ScalingConstraints(MinCapacity=0, MaxCapacity=5),
            Rules=[
                emr.ScalingRule(
                    Name='ScaleUpContainerPending',
                    Description='Scale up on over-provisioned '
                    'containers',
                    Action=emr.ScalingAction(
                        SimpleScalingPolicyConfiguration=emr.
                        SimpleScalingPolicyConfiguration(
                            AdjustmentType=emr.CHANGE_IN_CAPACITY,
                            CoolDown=300,
                            ScalingAdjustment=1)),
                    Trigger=emr.ScalingTrigger(
                        CloudWatchAlarmDefinition=emr.
                        CloudWatchAlarmDefinition(
                            ComparisonOperator='GREATER_THAN',
                            MetricName='ContainerPendingRatio',
                            Period=300,
                            Threshold=0.75,
                            Dimensions=[
                                emr.MetricDimension(Key='JobFlowId',
                                                    Value='${emr.clusterId}')
                            ]))),
                emr.ScalingRule(
                    Name='ScaleUpMemory',
                    Description='Scale up on low memory',
                    Action=emr.ScalingAction(
                        SimpleScalingPolicyConfiguration=emr.
                        SimpleScalingPolicyConfiguration(
                            AdjustmentType='CHANGE_IN_CAPACITY',
                            CoolDown=300,
                            ScalingAdjustment=1)),
                    Trigger=emr.ScalingTrigger(
                        CloudWatchAlarmDefinition=emr.
                        CloudWatchAlarmDefinition(
                            ComparisonOperator='LESS_THAN',
                            MetricName='YARNMemoryAvailablePercentage',
                            Period=300,
                            Threshold=15,
                            Dimensions=[
                                emr.MetricDimension(Key='JobFlowId',
                                                    Value='${emr.clusterId}')
                            ]))),
                emr.ScalingRule(
                    Name='ScaleDownMemory',
                    Description='Scale down on high memory',
                    Action=emr.ScalingAction(
                        SimpleScalingPolicyConfiguration=emr.
                        SimpleScalingPolicyConfiguration(
                            AdjustmentType=emr.CHANGE_IN_CAPACITY,
                            CoolDown=300,
                            ScalingAdjustment=-1)),
                    Trigger=emr.ScalingTrigger(
                        CloudWatchAlarmDefinition=emr.
                        CloudWatchAlarmDefinition(
                            ComparisonOperator='GREATER_THAN',
                            MetricName='YARNMemoryAvailablePercentage',
                            Period=300,
                            Threshold=75,
                            Dimensions=[
                                emr.MetricDimension(Key='JobFlowId',
                                                    Value='${emr.clusterId}')
                            ])))
            ])

        emr.InstanceGroupConfig('TaskInstanceGroup',
                                AutoScalingPolicy=autoscale_policy,
                                InstanceCount=0,
                                InstanceType=M4_LARGE,
                                InstanceRole='TASK',
                                Market='ON_DEMAND',
                                Name='Task Instance',
                                JobFlowId=Ref(cluster))