Example #1
0
    def test_allow_string_cluster(self):
        spot = "2"
        withSpotPrice = "WithSpotPrice"
        cluster = emr.Cluster(
            'Cluster',
            # AdditionalInfo="Additional Info",
            Applications=[
                emr.Application(Name="Hadoop"),
                emr.Application(Name="Hive"),
                emr.Application(Name="Mahout"),
                emr.Application(Name="Pig"),
                emr.Application(Name="Spark")
            ],
            BootstrapActions=[
                emr.BootstrapActionConfig(
                    Name='Dummy bootstrap action',
                    ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
                        Path='file:/usr/share/aws/emr/scripts/install-hue',
                        Args=["dummy", "parameter"]))
            ],
            Configurations=[
                emr.Configuration(Classification="core-site",
                                  ConfigurationProperties={
                                      'hadoop.security.groups.cache.secs':
                                      '250'
                                  })
            ],
            Instances=emr.JobFlowInstancesConfig(
                Ec2KeyName="KeyName",
                Ec2SubnetId="SubnetId",
                MasterInstanceGroup=emr.InstanceGroupConfigProperty(
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                    AutoScalingPolicy=emr.AutoScalingPolicy(
                        Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                           MaxCapacity="3"),
                        Rules=self.generate_rules("MasterAutoScalingPolicy")),
                ),
                CoreInstanceGroup=emr.InstanceGroupConfigProperty(
                    Name="Core Instance",
                    BidPrice=If(withSpotPrice, Ref(spot), Ref("AWS::NoValue")),
                    Market=If(withSpotPrice, "SPOT", "ON_DEMAND"),
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                    AutoScalingPolicy=emr.AutoScalingPolicy(
                        Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                           MaxCapacity="3"),
                        Rules=self.generate_rules("CoreAutoScalingPolicy"),
                    )),
            ),
            JobFlowRole="EMRJobFlowRole",
            LogUri="s3://cluster-logs",
            Name="EMR Cluster",
            ReleaseLabel="emr-5.5.0",
            ServiceRole="EMRServiceRole",
            AutoScalingRole="EMR_AutoScaling_DefaultRole",
            VisibleToAllUsers="true",
            Tags=Tags(Name="EMR Sample Cluster"))

        cluster.to_dict()
Example #2
0
        ManagedPolicyArns=[
            'arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role'
        ]))

emr_instance_profile = template.add_resource(
    iam.InstanceProfile("EMRInstanceProfile", Roles=[Ref(emr_job_flow_role)]))

# EMR Cluster Resource

cluster = template.add_resource(
    emr.Cluster("EMRSampleCluster",
                Name="EMR Sample Cluster",
                ReleaseLabel='emr-4.4.0',
                BootstrapActions=[
                    emr.BootstrapActionConfig(
                        Name='Dummy bootstrap action',
                        ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
                            Path='/bin/sh', Args=['echo', 'Hello World']))
                ],
                Configurations=[
                    emr.Configuration(Classification="core-site",
                                      ConfigurationProperties={
                                          'hadoop.security.groups.cache.secs':
                                          '250'
                                      }),
                    emr.Configuration(
                        Classification="mapred-site",
                        ConfigurationProperties={
                            'mapred.tasktracker.map.tasks.maximum': '2',
                            'mapreduce.map.sort.spill.percent': '90',
                            'mapreduce.tasktracker.reduce.tasks.maximum': '5'
                        }),
    emr.SecurityConfiguration(
        'EMRSecurityConfiguration',
        Name="EMRSampleClusterSecurityConfiguration",
        SecurityConfiguration=security_configuration,
    ))

cluster = template.add_resource(
    emr.Cluster(
        "EMRSampleCluster",
        Name="EMR Sample Cluster",
        ReleaseLabel='emr-4.4.0',
        SecurityConfiguration=Ref(security_config),
        BootstrapActions=[
            emr.BootstrapActionConfig(
                Name='Dummy bootstrap action',
                ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
                    Path='file:/usr/share/aws/emr/scripts/install-hue',
                    Args=["dummy", "parameter"]))
        ],
        Configurations=[
            emr.Configuration(Classification="core-site",
                              ConfigurationProperties={
                                  'hadoop.security.groups.cache.secs': '250'
                              }),
            emr.Configuration(Classification="mapred-site",
                              ConfigurationProperties={
                                  'mapred.tasktracker.map.tasks.maximum': '2',
                                  'mapreduce.map.sort.spill.percent': '90',
                                  'mapreduce.tasktracker.reduce.tasks.maximum':
                                  '5'
                              }),
Example #4
0
    def test_allow_string_cluster(self):
        cluster_security_configuration = emr.SecurityConfiguration(
            'emrsecurityconfiguration',
            Name="EMRSecurityConfiguration",
            SecurityConfiguration=security_configuration)

        spot = "2"
        withSpotPrice = "WithSpotPrice"
        cluster = emr.Cluster(
            'Cluster',
            # AdditionalInfo="Additional Info",
            Applications=[
                emr.Application(Name="Hadoop"),
                emr.Application(Name="Hive"),
                emr.Application(Name="Mahout"),
                emr.Application(Name="Pig"),
                emr.Application(Name="Spark")
            ],
            BootstrapActions=[
                emr.BootstrapActionConfig(
                    Name='Dummy bootstrap action',
                    ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
                        Path='file:/usr/share/aws/emr/scripts/install-hue',
                        Args=["dummy", "parameter"]))
            ],
            Configurations=[
                emr.Configuration(Classification="core-site",
                                  ConfigurationProperties={
                                      'hadoop.security.groups.cache.secs':
                                      '250'
                                  })
            ],
            Instances=emr.JobFlowInstancesConfig(
                Ec2KeyName="KeyName",
                Ec2SubnetId="SubnetId",
                MasterInstanceGroup=emr.InstanceGroupConfigProperty(
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                    AutoScalingPolicy=emr.AutoScalingPolicy(
                        Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                           MaxCapacity="3"),
                        Rules=self.generate_rules("MasterAutoScalingPolicy")),
                ),
                CoreInstanceGroup=emr.InstanceGroupConfigProperty(
                    Name="Core Instance",
                    BidPrice=If(withSpotPrice, Ref(spot), Ref("AWS::NoValue")),
                    Market=If(withSpotPrice, "SPOT", "ON_DEMAND"),
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                    AutoScalingPolicy=emr.AutoScalingPolicy(
                        Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                           MaxCapacity="3"),
                        Rules=self.generate_rules("CoreAutoScalingPolicy"),
                    )),
            ),
            JobFlowRole="EMRJobFlowRole",
            LogUri="s3://cluster-logs",
            Name="EMR Cluster",
            ReleaseLabel="emr-5.5.0",
            SecurityConfiguration=Ref(cluster_security_configuration),
            ServiceRole="EMRServiceRole",
            AutoScalingRole="EMR_AutoScaling_DefaultRole",
            VisibleToAllUsers="true",
            Tags=Tags(Name="EMR Sample Cluster"))

        cluster.to_dict()

        autoscale_policy = emr.AutoScalingPolicy(
            Constraints=emr.ScalingConstraints(MinCapacity=0, MaxCapacity=5),
            Rules=[
                emr.ScalingRule(
                    Name='ScaleUpContainerPending',
                    Description='Scale up on over-provisioned '
                    'containers',
                    Action=emr.ScalingAction(
                        SimpleScalingPolicyConfiguration=emr.
                        SimpleScalingPolicyConfiguration(
                            AdjustmentType=emr.CHANGE_IN_CAPACITY,
                            CoolDown=300,
                            ScalingAdjustment=1)),
                    Trigger=emr.ScalingTrigger(
                        CloudWatchAlarmDefinition=emr.
                        CloudWatchAlarmDefinition(
                            ComparisonOperator='GREATER_THAN',
                            MetricName='ContainerPendingRatio',
                            Period=300,
                            Threshold=0.75,
                            Dimensions=[
                                emr.MetricDimension(Key='JobFlowId',
                                                    Value='${emr.clusterId}')
                            ]))),
                emr.ScalingRule(
                    Name='ScaleUpMemory',
                    Description='Scale up on low memory',
                    Action=emr.ScalingAction(
                        SimpleScalingPolicyConfiguration=emr.
                        SimpleScalingPolicyConfiguration(
                            AdjustmentType='CHANGE_IN_CAPACITY',
                            CoolDown=300,
                            ScalingAdjustment=1)),
                    Trigger=emr.ScalingTrigger(
                        CloudWatchAlarmDefinition=emr.
                        CloudWatchAlarmDefinition(
                            ComparisonOperator='LESS_THAN',
                            MetricName='YARNMemoryAvailablePercentage',
                            Period=300,
                            Threshold=15,
                            Dimensions=[
                                emr.MetricDimension(Key='JobFlowId',
                                                    Value='${emr.clusterId}')
                            ]))),
                emr.ScalingRule(
                    Name='ScaleDownMemory',
                    Description='Scale down on high memory',
                    Action=emr.ScalingAction(
                        SimpleScalingPolicyConfiguration=emr.
                        SimpleScalingPolicyConfiguration(
                            AdjustmentType=emr.CHANGE_IN_CAPACITY,
                            CoolDown=300,
                            ScalingAdjustment=-1)),
                    Trigger=emr.ScalingTrigger(
                        CloudWatchAlarmDefinition=emr.
                        CloudWatchAlarmDefinition(
                            ComparisonOperator='GREATER_THAN',
                            MetricName='YARNMemoryAvailablePercentage',
                            Period=300,
                            Threshold=75,
                            Dimensions=[
                                emr.MetricDimension(Key='JobFlowId',
                                                    Value='${emr.clusterId}')
                            ])))
            ])

        emr.InstanceGroupConfig('TaskInstanceGroup',
                                AutoScalingPolicy=autoscale_policy,
                                InstanceCount=0,
                                InstanceType=M4_LARGE,
                                InstanceRole='TASK',
                                Market='ON_DEMAND',
                                Name='Task Instance',
                                JobFlowId=Ref(cluster))
     CoreInstanceGroup=emr.InstanceGroupConfigProperty(
         Name='Core Instance',
         InstanceCount=Ref(instances),
         InstanceType='m4.xlarge',
         Market='SPOT',
         BidPrice='0.1'),
     AdditionalMasterSecurityGroups=[
         networking_resources['EMRMasterSecurityGroup']
     ],
     # AdditionalSlaveSecurityGroups=[Ref(emr_additional_slave_sg_param)]
 ),
 LogUri='s3://nicor-dev/logs/emr/jupyter',
 BootstrapActions=[
     emr.BootstrapActionConfig(
         Name='Install and set up Jupyter',
         ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
             Path=
             's3://nicor-dev/deployments/emr/bootstrap_actions/bootstrap_jupyter.sh',
             Args=['testemr', 's3://nicor-dev/jupyter-notebooks/']))
 ],
 Configurations=[
     emr.Configuration(
         Classification="spark-env",
         Configurations=[
             emr.Configuration(
                 Classification="export",
                 ConfigurationProperties={
                     "PYSPARK_PYTHON":
                     os.path.join('/home/hadoop/miniconda',
                                  'bin/python'),
                     "PYTHONPATH":
                     os.path.join('/home/hadoop/miniconda',
Example #6
0
     CoreInstanceGroup=emr.InstanceGroupConfigProperty(
         Name='Core Instance',
         InstanceCount=Ref(instances),
         InstanceType='m4.xlarge',
         Market='SPOT',
         BidPrice='0.1'),
     AdditionalMasterSecurityGroups=[
         networking_resources['EMRMasterSecurityGroup']
     ],
     # AdditionalSlaveSecurityGroups=[Ref(emr_additional_slave_sg_param)]
 ),
 LogUri='s3://nicor-dev/logs/emr/generic',
 BootstrapActions=[
     emr.BootstrapActionConfig(
         Name='Move Home',
         ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
             Path=
             's3://nicor-dev/deployments/emr/bootstrap_actions/move_home.sh'
         )),
     emr.BootstrapActionConfig(
         Name='Install Conda',
         ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
             Path=
             's3://nicor-dev/deployments/emr/bootstrap_actions/bootstrap_conda.sh'
         )),
     emr.BootstrapActionConfig(
         Name='Deploy Steps',
         ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
             Path=
             's3://nicor-dev/deployments/emr/bootstrap_actions/deploy.sh',
             Args=[
                 's3://nicor-dev/deployments/emr/steps/',
Example #7
0
        "EMRSecurityConfiguration",
        Name="EMRSampleClusterSecurityConfiguration",
        SecurityConfiguration=security_configuration,
    ))

cluster = template.add_resource(
    emr.Cluster(
        "EMRSampleCluster",
        Name="EMR Sample Cluster",
        ReleaseLabel="emr-4.4.0",
        SecurityConfiguration=Ref(security_config),
        BootstrapActions=[
            emr.BootstrapActionConfig(
                Name="Dummy bootstrap action",
                ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
                    Path="file:/usr/share/aws/emr/scripts/install-hue",
                    Args=["dummy", "parameter"],
                ),
            )
        ],
        Configurations=[
            emr.Configuration(
                Classification="core-site",
                ConfigurationProperties={
                    "hadoop.security.groups.cache.secs": "250"
                },
            ),
            emr.Configuration(
                Classification="mapred-site",
                ConfigurationProperties={
                    "mapred.tasktracker.map.tasks.maximum": "2",