Ejemplo n.º 1
0
    def test_Configuration(self):
        emr.Configuration(
            Classification="hadoop-env",
            Configurations=[
                emr.Configuration(
                    Classification="export",
                    ConfigurationProperties={
                        "HADOOP_DATANODE_HEAPSIZE": "2048",
                        "HADOOP_NAMENODE_OPTS": "opts",
                    },
                ),
            ],
        ).to_dict()

        with self.assertRaises(TypeError):
            emr.Configuration(
                Classification="hadoop-env",
                Configurations=[
                    "illegalvalue",
                    emr.Configuration(
                        Classification="export",
                        ConfigurationProperties={
                            "HADOOP_DATANODE_HEAPSIZE": "2048",
                            "HADOOP_NAMENODE_OPTS": "opts",
                        },
                    ),
                ],
            ).to_dict()

        with self.assertRaises(TypeError):
            emr.Configuration(
                Classification="hadoop-env",
                Configurations="invalid",
            ).to_dict()
Ejemplo n.º 2
0
    def test_allow_string_cluster(self):
        spot = "2"
        withSpotPrice = "WithSpotPrice"
        cluster = emr.Cluster(
            'Cluster',
            # AdditionalInfo="Additional Info",
            Applications=[
                emr.Application(Name="Hadoop"),
                emr.Application(Name="Hive"),
                emr.Application(Name="Mahout"),
                emr.Application(Name="Pig"),
                emr.Application(Name="Spark")
            ],
            BootstrapActions=[
                emr.BootstrapActionConfig(
                    Name='Dummy bootstrap action',
                    ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
                        Path='file:/usr/share/aws/emr/scripts/install-hue',
                        Args=["dummy", "parameter"]))
            ],
            Configurations=[
                emr.Configuration(Classification="core-site",
                                  ConfigurationProperties={
                                      'hadoop.security.groups.cache.secs':
                                      '250'
                                  })
            ],
            Instances=emr.JobFlowInstancesConfig(
                Ec2KeyName="KeyName",
                Ec2SubnetId="SubnetId",
                MasterInstanceGroup=emr.InstanceGroupConfigProperty(
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                    AutoScalingPolicy=emr.AutoScalingPolicy(
                        Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                           MaxCapacity="3"),
                        Rules=self.generate_rules("MasterAutoScalingPolicy")),
                ),
                CoreInstanceGroup=emr.InstanceGroupConfigProperty(
                    Name="Core Instance",
                    BidPrice=If(withSpotPrice, Ref(spot), Ref("AWS::NoValue")),
                    Market=If(withSpotPrice, "SPOT", "ON_DEMAND"),
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                    AutoScalingPolicy=emr.AutoScalingPolicy(
                        Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                           MaxCapacity="3"),
                        Rules=self.generate_rules("CoreAutoScalingPolicy"),
                    )),
            ),
            JobFlowRole="EMRJobFlowRole",
            LogUri="s3://cluster-logs",
            Name="EMR Cluster",
            ReleaseLabel="emr-5.5.0",
            ServiceRole="EMRServiceRole",
            AutoScalingRole="EMR_AutoScaling_DefaultRole",
            VisibleToAllUsers="true",
            Tags=Tags(Name="EMR Sample Cluster"))

        cluster.to_dict()
Ejemplo n.º 3
0
# EMR Cluster Resource

cluster = template.add_resource(
    emr.Cluster("EMRSampleCluster",
                Name="EMR Sample Cluster",
                ReleaseLabel='emr-4.4.0',
                BootstrapActions=[
                    emr.BootstrapActionConfig(
                        Name='Dummy bootstrap action',
                        ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
                            Path='/bin/sh', Args=['echo', 'Hello World']))
                ],
                Configurations=[
                    emr.Configuration(Classification="core-site",
                                      ConfigurationProperties={
                                          'hadoop.security.groups.cache.secs':
                                          '250'
                                      }),
                    emr.Configuration(
                        Classification="mapred-site",
                        ConfigurationProperties={
                            'mapred.tasktracker.map.tasks.maximum': '2',
                            'mapreduce.map.sort.spill.percent': '90',
                            'mapreduce.tasktracker.reduce.tasks.maximum': '5'
                        }),
                    emr.Configuration(Classification="hadoop-env",
                                      Configurations=[
                                          emr.Configuration(
                                              Classification="export",
                                              ConfigurationProperties={
                                                  "HADOOP_DATANODE_HEAPSIZE":
Ejemplo n.º 4
0
    def test_allow_string_cluster(self):
        cluster_security_configuration = emr.SecurityConfiguration(
            'emrsecurityconfiguration',
            Name="EMRSecurityConfiguration",
            SecurityConfiguration=security_configuration)

        spot = "2"
        withSpotPrice = "WithSpotPrice"
        cluster = emr.Cluster(
            'Cluster',
            # AdditionalInfo="Additional Info",
            Applications=[
                emr.Application(Name="Hadoop"),
                emr.Application(Name="Hive"),
                emr.Application(Name="Mahout"),
                emr.Application(Name="Pig"),
                emr.Application(Name="Spark")
            ],
            BootstrapActions=[
                emr.BootstrapActionConfig(
                    Name='Dummy bootstrap action',
                    ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
                        Path='file:/usr/share/aws/emr/scripts/install-hue',
                        Args=["dummy", "parameter"]))
            ],
            Configurations=[
                emr.Configuration(Classification="core-site",
                                  ConfigurationProperties={
                                      'hadoop.security.groups.cache.secs':
                                      '250'
                                  })
            ],
            Instances=emr.JobFlowInstancesConfig(
                Ec2KeyName="KeyName",
                Ec2SubnetId="SubnetId",
                MasterInstanceGroup=emr.InstanceGroupConfigProperty(
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                    AutoScalingPolicy=emr.AutoScalingPolicy(
                        Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                           MaxCapacity="3"),
                        Rules=self.generate_rules("MasterAutoScalingPolicy")),
                ),
                CoreInstanceGroup=emr.InstanceGroupConfigProperty(
                    Name="Core Instance",
                    BidPrice=If(withSpotPrice, Ref(spot), Ref("AWS::NoValue")),
                    Market=If(withSpotPrice, "SPOT", "ON_DEMAND"),
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                    AutoScalingPolicy=emr.AutoScalingPolicy(
                        Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                           MaxCapacity="3"),
                        Rules=self.generate_rules("CoreAutoScalingPolicy"),
                    )),
            ),
            JobFlowRole="EMRJobFlowRole",
            LogUri="s3://cluster-logs",
            Name="EMR Cluster",
            ReleaseLabel="emr-5.5.0",
            SecurityConfiguration=Ref(cluster_security_configuration),
            ServiceRole="EMRServiceRole",
            AutoScalingRole="EMR_AutoScaling_DefaultRole",
            VisibleToAllUsers="true",
            Tags=Tags(Name="EMR Sample Cluster"))

        cluster.to_dict()

        autoscale_policy = emr.AutoScalingPolicy(
            Constraints=emr.ScalingConstraints(MinCapacity=0, MaxCapacity=5),
            Rules=[
                emr.ScalingRule(
                    Name='ScaleUpContainerPending',
                    Description='Scale up on over-provisioned '
                    'containers',
                    Action=emr.ScalingAction(
                        SimpleScalingPolicyConfiguration=emr.
                        SimpleScalingPolicyConfiguration(
                            AdjustmentType=emr.CHANGE_IN_CAPACITY,
                            CoolDown=300,
                            ScalingAdjustment=1)),
                    Trigger=emr.ScalingTrigger(
                        CloudWatchAlarmDefinition=emr.
                        CloudWatchAlarmDefinition(
                            ComparisonOperator='GREATER_THAN',
                            MetricName='ContainerPendingRatio',
                            Period=300,
                            Threshold=0.75,
                            Dimensions=[
                                emr.MetricDimension(Key='JobFlowId',
                                                    Value='${emr.clusterId}')
                            ]))),
                emr.ScalingRule(
                    Name='ScaleUpMemory',
                    Description='Scale up on low memory',
                    Action=emr.ScalingAction(
                        SimpleScalingPolicyConfiguration=emr.
                        SimpleScalingPolicyConfiguration(
                            AdjustmentType='CHANGE_IN_CAPACITY',
                            CoolDown=300,
                            ScalingAdjustment=1)),
                    Trigger=emr.ScalingTrigger(
                        CloudWatchAlarmDefinition=emr.
                        CloudWatchAlarmDefinition(
                            ComparisonOperator='LESS_THAN',
                            MetricName='YARNMemoryAvailablePercentage',
                            Period=300,
                            Threshold=15,
                            Dimensions=[
                                emr.MetricDimension(Key='JobFlowId',
                                                    Value='${emr.clusterId}')
                            ]))),
                emr.ScalingRule(
                    Name='ScaleDownMemory',
                    Description='Scale down on high memory',
                    Action=emr.ScalingAction(
                        SimpleScalingPolicyConfiguration=emr.
                        SimpleScalingPolicyConfiguration(
                            AdjustmentType=emr.CHANGE_IN_CAPACITY,
                            CoolDown=300,
                            ScalingAdjustment=-1)),
                    Trigger=emr.ScalingTrigger(
                        CloudWatchAlarmDefinition=emr.
                        CloudWatchAlarmDefinition(
                            ComparisonOperator='GREATER_THAN',
                            MetricName='YARNMemoryAvailablePercentage',
                            Period=300,
                            Threshold=75,
                            Dimensions=[
                                emr.MetricDimension(Key='JobFlowId',
                                                    Value='${emr.clusterId}')
                            ])))
            ])

        emr.InstanceGroupConfig('TaskInstanceGroup',
                                AutoScalingPolicy=autoscale_policy,
                                InstanceCount=0,
                                InstanceType=M4_LARGE,
                                InstanceRole='TASK',
                                Market='ON_DEMAND',
                                Name='Task Instance',
                                JobFlowId=Ref(cluster))
Ejemplo n.º 5
0
             Path=
             's3://nicor-dev/deployments/emr/bootstrap_actions/bootstrap_jupyter.sh',
             Args=['testemr', 's3://nicor-dev/jupyter-notebooks/']))
 ],
 Configurations=[
     emr.Configuration(
         Classification="spark-env",
         Configurations=[
             emr.Configuration(
                 Classification="export",
                 ConfigurationProperties={
                     "PYSPARK_PYTHON":
                     os.path.join('/home/hadoop/miniconda',
                                  'bin/python'),
                     "PYTHONPATH":
                     os.path.join('/home/hadoop/miniconda',
                                  'bin/python') +
                     ":/usr/lib/spark/python/:$PYTHONPATH",
                     "PYSPARK_DRIVER_PYTHON":
                     os.path.join('/home/hadoop/miniconda',
                                  'bin/python'),
                     "SPARK_HOME":
                     "/usr/lib/spark",
                     "PYTHONHASHSEED":
                     "123"
                 })
         ]),
 ],
 Applications=[
     emr.Application(Name=app) for app in cfg['applications']
 ],
 VisibleToAllUsers='true',
Ejemplo n.º 6
0
 Name="EMR Sample Cluster",
 ReleaseLabel="emr-4.4.0",
 SecurityConfiguration=Ref(security_config),
 BootstrapActions=[
     emr.BootstrapActionConfig(
         Name="Dummy bootstrap action",
         ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
             Path="file:/usr/share/aws/emr/scripts/install-hue",
             Args=["dummy", "parameter"],
         ),
     )
 ],
 Configurations=[
     emr.Configuration(
         Classification="core-site",
         ConfigurationProperties={
             "hadoop.security.groups.cache.secs": "250"
         },
     ),
     emr.Configuration(
         Classification="mapred-site",
         ConfigurationProperties={
             "mapred.tasktracker.map.tasks.maximum": "2",
             "mapreduce.map.sort.spill.percent": "90",
             "mapreduce.tasktracker.reduce.tasks.maximum": "5",
         },
     ),
     emr.Configuration(
         Classification="hadoop-env",
         Configurations=[
             emr.Configuration(
                 Classification="export",