def test_Configuration(self): emr.Configuration( Classification="hadoop-env", Configurations=[ emr.Configuration( Classification="export", ConfigurationProperties={ "HADOOP_DATANODE_HEAPSIZE": "2048", "HADOOP_NAMENODE_OPTS": "opts", }, ), ], ).to_dict() with self.assertRaises(TypeError): emr.Configuration( Classification="hadoop-env", Configurations=[ "illegalvalue", emr.Configuration( Classification="export", ConfigurationProperties={ "HADOOP_DATANODE_HEAPSIZE": "2048", "HADOOP_NAMENODE_OPTS": "opts", }, ), ], ).to_dict() with self.assertRaises(TypeError): emr.Configuration( Classification="hadoop-env", Configurations="invalid", ).to_dict()
def test_allow_string_cluster(self): spot = "2" withSpotPrice = "WithSpotPrice" cluster = emr.Cluster( 'Cluster', # AdditionalInfo="Additional Info", Applications=[ emr.Application(Name="Hadoop"), emr.Application(Name="Hive"), emr.Application(Name="Mahout"), emr.Application(Name="Pig"), emr.Application(Name="Spark") ], BootstrapActions=[ emr.BootstrapActionConfig( Name='Dummy bootstrap action', ScriptBootstrapAction=emr.ScriptBootstrapActionConfig( Path='file:/usr/share/aws/emr/scripts/install-hue', Args=["dummy", "parameter"])) ], Configurations=[ emr.Configuration(Classification="core-site", ConfigurationProperties={ 'hadoop.security.groups.cache.secs': '250' }) ], Instances=emr.JobFlowInstancesConfig( Ec2KeyName="KeyName", Ec2SubnetId="SubnetId", MasterInstanceGroup=emr.InstanceGroupConfigProperty( InstanceCount="1", InstanceType=M4_LARGE, AutoScalingPolicy=emr.AutoScalingPolicy( Constraints=emr.ScalingConstraints(MinCapacity="1", MaxCapacity="3"), Rules=self.generate_rules("MasterAutoScalingPolicy")), ), CoreInstanceGroup=emr.InstanceGroupConfigProperty( Name="Core Instance", BidPrice=If(withSpotPrice, Ref(spot), Ref("AWS::NoValue")), Market=If(withSpotPrice, "SPOT", "ON_DEMAND"), InstanceCount="1", InstanceType=M4_LARGE, AutoScalingPolicy=emr.AutoScalingPolicy( Constraints=emr.ScalingConstraints(MinCapacity="1", MaxCapacity="3"), Rules=self.generate_rules("CoreAutoScalingPolicy"), )), ), JobFlowRole="EMRJobFlowRole", LogUri="s3://cluster-logs", Name="EMR Cluster", ReleaseLabel="emr-5.5.0", ServiceRole="EMRServiceRole", AutoScalingRole="EMR_AutoScaling_DefaultRole", VisibleToAllUsers="true", Tags=Tags(Name="EMR Sample Cluster")) cluster.to_dict()
# EMR Cluster Resource cluster = template.add_resource( emr.Cluster("EMRSampleCluster", Name="EMR Sample Cluster", ReleaseLabel='emr-4.4.0', BootstrapActions=[ emr.BootstrapActionConfig( Name='Dummy bootstrap action', ScriptBootstrapAction=emr.ScriptBootstrapActionConfig( Path='/bin/sh', Args=['echo', 'Hello World'])) ], Configurations=[ emr.Configuration(Classification="core-site", ConfigurationProperties={ 'hadoop.security.groups.cache.secs': '250' }), emr.Configuration( Classification="mapred-site", ConfigurationProperties={ 'mapred.tasktracker.map.tasks.maximum': '2', 'mapreduce.map.sort.spill.percent': '90', 'mapreduce.tasktracker.reduce.tasks.maximum': '5' }), emr.Configuration(Classification="hadoop-env", Configurations=[ emr.Configuration( Classification="export", ConfigurationProperties={ "HADOOP_DATANODE_HEAPSIZE":
def test_allow_string_cluster(self): cluster_security_configuration = emr.SecurityConfiguration( 'emrsecurityconfiguration', Name="EMRSecurityConfiguration", SecurityConfiguration=security_configuration) spot = "2" withSpotPrice = "WithSpotPrice" cluster = emr.Cluster( 'Cluster', # AdditionalInfo="Additional Info", Applications=[ emr.Application(Name="Hadoop"), emr.Application(Name="Hive"), emr.Application(Name="Mahout"), emr.Application(Name="Pig"), emr.Application(Name="Spark") ], BootstrapActions=[ emr.BootstrapActionConfig( Name='Dummy bootstrap action', ScriptBootstrapAction=emr.ScriptBootstrapActionConfig( Path='file:/usr/share/aws/emr/scripts/install-hue', Args=["dummy", "parameter"])) ], Configurations=[ emr.Configuration(Classification="core-site", ConfigurationProperties={ 'hadoop.security.groups.cache.secs': '250' }) ], Instances=emr.JobFlowInstancesConfig( Ec2KeyName="KeyName", Ec2SubnetId="SubnetId", MasterInstanceGroup=emr.InstanceGroupConfigProperty( InstanceCount="1", InstanceType=M4_LARGE, AutoScalingPolicy=emr.AutoScalingPolicy( Constraints=emr.ScalingConstraints(MinCapacity="1", MaxCapacity="3"), Rules=self.generate_rules("MasterAutoScalingPolicy")), ), CoreInstanceGroup=emr.InstanceGroupConfigProperty( Name="Core Instance", BidPrice=If(withSpotPrice, Ref(spot), Ref("AWS::NoValue")), Market=If(withSpotPrice, "SPOT", "ON_DEMAND"), InstanceCount="1", InstanceType=M4_LARGE, AutoScalingPolicy=emr.AutoScalingPolicy( Constraints=emr.ScalingConstraints(MinCapacity="1", MaxCapacity="3"), Rules=self.generate_rules("CoreAutoScalingPolicy"), )), ), JobFlowRole="EMRJobFlowRole", LogUri="s3://cluster-logs", Name="EMR Cluster", ReleaseLabel="emr-5.5.0", SecurityConfiguration=Ref(cluster_security_configuration), ServiceRole="EMRServiceRole", AutoScalingRole="EMR_AutoScaling_DefaultRole", VisibleToAllUsers="true", Tags=Tags(Name="EMR Sample Cluster")) cluster.to_dict() autoscale_policy = emr.AutoScalingPolicy( Constraints=emr.ScalingConstraints(MinCapacity=0, MaxCapacity=5), Rules=[ emr.ScalingRule( Name='ScaleUpContainerPending', Description='Scale up on over-provisioned ' 'containers', Action=emr.ScalingAction( SimpleScalingPolicyConfiguration=emr. SimpleScalingPolicyConfiguration( AdjustmentType=emr.CHANGE_IN_CAPACITY, CoolDown=300, ScalingAdjustment=1)), Trigger=emr.ScalingTrigger( CloudWatchAlarmDefinition=emr. CloudWatchAlarmDefinition( ComparisonOperator='GREATER_THAN', MetricName='ContainerPendingRatio', Period=300, Threshold=0.75, Dimensions=[ emr.MetricDimension(Key='JobFlowId', Value='${emr.clusterId}') ]))), emr.ScalingRule( Name='ScaleUpMemory', Description='Scale up on low memory', Action=emr.ScalingAction( SimpleScalingPolicyConfiguration=emr. SimpleScalingPolicyConfiguration( AdjustmentType='CHANGE_IN_CAPACITY', CoolDown=300, ScalingAdjustment=1)), Trigger=emr.ScalingTrigger( CloudWatchAlarmDefinition=emr. CloudWatchAlarmDefinition( ComparisonOperator='LESS_THAN', MetricName='YARNMemoryAvailablePercentage', Period=300, Threshold=15, Dimensions=[ emr.MetricDimension(Key='JobFlowId', Value='${emr.clusterId}') ]))), emr.ScalingRule( Name='ScaleDownMemory', Description='Scale down on high memory', Action=emr.ScalingAction( SimpleScalingPolicyConfiguration=emr. SimpleScalingPolicyConfiguration( AdjustmentType=emr.CHANGE_IN_CAPACITY, CoolDown=300, ScalingAdjustment=-1)), Trigger=emr.ScalingTrigger( CloudWatchAlarmDefinition=emr. CloudWatchAlarmDefinition( ComparisonOperator='GREATER_THAN', MetricName='YARNMemoryAvailablePercentage', Period=300, Threshold=75, Dimensions=[ emr.MetricDimension(Key='JobFlowId', Value='${emr.clusterId}') ]))) ]) emr.InstanceGroupConfig('TaskInstanceGroup', AutoScalingPolicy=autoscale_policy, InstanceCount=0, InstanceType=M4_LARGE, InstanceRole='TASK', Market='ON_DEMAND', Name='Task Instance', JobFlowId=Ref(cluster))
Path= 's3://nicor-dev/deployments/emr/bootstrap_actions/bootstrap_jupyter.sh', Args=['testemr', 's3://nicor-dev/jupyter-notebooks/'])) ], Configurations=[ emr.Configuration( Classification="spark-env", Configurations=[ emr.Configuration( Classification="export", ConfigurationProperties={ "PYSPARK_PYTHON": os.path.join('/home/hadoop/miniconda', 'bin/python'), "PYTHONPATH": os.path.join('/home/hadoop/miniconda', 'bin/python') + ":/usr/lib/spark/python/:$PYTHONPATH", "PYSPARK_DRIVER_PYTHON": os.path.join('/home/hadoop/miniconda', 'bin/python'), "SPARK_HOME": "/usr/lib/spark", "PYTHONHASHSEED": "123" }) ]), ], Applications=[ emr.Application(Name=app) for app in cfg['applications'] ], VisibleToAllUsers='true',
Name="EMR Sample Cluster", ReleaseLabel="emr-4.4.0", SecurityConfiguration=Ref(security_config), BootstrapActions=[ emr.BootstrapActionConfig( Name="Dummy bootstrap action", ScriptBootstrapAction=emr.ScriptBootstrapActionConfig( Path="file:/usr/share/aws/emr/scripts/install-hue", Args=["dummy", "parameter"], ), ) ], Configurations=[ emr.Configuration( Classification="core-site", ConfigurationProperties={ "hadoop.security.groups.cache.secs": "250" }, ), emr.Configuration( Classification="mapred-site", ConfigurationProperties={ "mapred.tasktracker.map.tasks.maximum": "2", "mapreduce.map.sort.spill.percent": "90", "mapreduce.tasktracker.reduce.tasks.maximum": "5", }, ), emr.Configuration( Classification="hadoop-env", Configurations=[ emr.Configuration( Classification="export",