def test_allow_string_cluster(self): spot = "2" withSpotPrice = "WithSpotPrice" cluster = emr.Cluster( 'Cluster', # AdditionalInfo="Additional Info", Applications=[ emr.Application(Name="Hadoop"), emr.Application(Name="Hive"), emr.Application(Name="Mahout"), emr.Application(Name="Pig"), emr.Application(Name="Spark") ], BootstrapActions=[ emr.BootstrapActionConfig( Name='Dummy bootstrap action', ScriptBootstrapAction=emr.ScriptBootstrapActionConfig( Path='file:/usr/share/aws/emr/scripts/install-hue', Args=["dummy", "parameter"])) ], Configurations=[ emr.Configuration(Classification="core-site", ConfigurationProperties={ 'hadoop.security.groups.cache.secs': '250' }) ], Instances=emr.JobFlowInstancesConfig( Ec2KeyName="KeyName", Ec2SubnetId="SubnetId", MasterInstanceGroup=emr.InstanceGroupConfigProperty( InstanceCount="1", InstanceType=M4_LARGE, AutoScalingPolicy=emr.AutoScalingPolicy( Constraints=emr.ScalingConstraints(MinCapacity="1", MaxCapacity="3"), Rules=self.generate_rules("MasterAutoScalingPolicy")), ), CoreInstanceGroup=emr.InstanceGroupConfigProperty( Name="Core Instance", BidPrice=If(withSpotPrice, Ref(spot), Ref("AWS::NoValue")), Market=If(withSpotPrice, "SPOT", "ON_DEMAND"), InstanceCount="1", InstanceType=M4_LARGE, AutoScalingPolicy=emr.AutoScalingPolicy( Constraints=emr.ScalingConstraints(MinCapacity="1", MaxCapacity="3"), Rules=self.generate_rules("CoreAutoScalingPolicy"), )), ), JobFlowRole="EMRJobFlowRole", LogUri="s3://cluster-logs", Name="EMR Cluster", ReleaseLabel="emr-5.5.0", ServiceRole="EMRServiceRole", AutoScalingRole="EMR_AutoScaling_DefaultRole", VisibleToAllUsers="true", Tags=Tags(Name="EMR Sample Cluster")) cluster.to_dict()
ManagedPolicyArns=[ 'arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role' ])) emr_instance_profile = template.add_resource( iam.InstanceProfile("EMRInstanceProfile", Roles=[Ref(emr_job_flow_role)])) # EMR Cluster Resource cluster = template.add_resource( emr.Cluster("EMRSampleCluster", Name="EMR Sample Cluster", ReleaseLabel='emr-4.4.0', BootstrapActions=[ emr.BootstrapActionConfig( Name='Dummy bootstrap action', ScriptBootstrapAction=emr.ScriptBootstrapActionConfig( Path='/bin/sh', Args=['echo', 'Hello World'])) ], Configurations=[ emr.Configuration(Classification="core-site", ConfigurationProperties={ 'hadoop.security.groups.cache.secs': '250' }), emr.Configuration( Classification="mapred-site", ConfigurationProperties={ 'mapred.tasktracker.map.tasks.maximum': '2', 'mapreduce.map.sort.spill.percent': '90', 'mapreduce.tasktracker.reduce.tasks.maximum': '5' }),
emr.SecurityConfiguration( 'EMRSecurityConfiguration', Name="EMRSampleClusterSecurityConfiguration", SecurityConfiguration=security_configuration, )) cluster = template.add_resource( emr.Cluster( "EMRSampleCluster", Name="EMR Sample Cluster", ReleaseLabel='emr-4.4.0', SecurityConfiguration=Ref(security_config), BootstrapActions=[ emr.BootstrapActionConfig( Name='Dummy bootstrap action', ScriptBootstrapAction=emr.ScriptBootstrapActionConfig( Path='file:/usr/share/aws/emr/scripts/install-hue', Args=["dummy", "parameter"])) ], Configurations=[ emr.Configuration(Classification="core-site", ConfigurationProperties={ 'hadoop.security.groups.cache.secs': '250' }), emr.Configuration(Classification="mapred-site", ConfigurationProperties={ 'mapred.tasktracker.map.tasks.maximum': '2', 'mapreduce.map.sort.spill.percent': '90', 'mapreduce.tasktracker.reduce.tasks.maximum': '5' }),
def test_allow_string_cluster(self): cluster_security_configuration = emr.SecurityConfiguration( 'emrsecurityconfiguration', Name="EMRSecurityConfiguration", SecurityConfiguration=security_configuration) spot = "2" withSpotPrice = "WithSpotPrice" cluster = emr.Cluster( 'Cluster', # AdditionalInfo="Additional Info", Applications=[ emr.Application(Name="Hadoop"), emr.Application(Name="Hive"), emr.Application(Name="Mahout"), emr.Application(Name="Pig"), emr.Application(Name="Spark") ], BootstrapActions=[ emr.BootstrapActionConfig( Name='Dummy bootstrap action', ScriptBootstrapAction=emr.ScriptBootstrapActionConfig( Path='file:/usr/share/aws/emr/scripts/install-hue', Args=["dummy", "parameter"])) ], Configurations=[ emr.Configuration(Classification="core-site", ConfigurationProperties={ 'hadoop.security.groups.cache.secs': '250' }) ], Instances=emr.JobFlowInstancesConfig( Ec2KeyName="KeyName", Ec2SubnetId="SubnetId", MasterInstanceGroup=emr.InstanceGroupConfigProperty( InstanceCount="1", InstanceType=M4_LARGE, AutoScalingPolicy=emr.AutoScalingPolicy( Constraints=emr.ScalingConstraints(MinCapacity="1", MaxCapacity="3"), Rules=self.generate_rules("MasterAutoScalingPolicy")), ), CoreInstanceGroup=emr.InstanceGroupConfigProperty( Name="Core Instance", BidPrice=If(withSpotPrice, Ref(spot), Ref("AWS::NoValue")), Market=If(withSpotPrice, "SPOT", "ON_DEMAND"), InstanceCount="1", InstanceType=M4_LARGE, AutoScalingPolicy=emr.AutoScalingPolicy( Constraints=emr.ScalingConstraints(MinCapacity="1", MaxCapacity="3"), Rules=self.generate_rules("CoreAutoScalingPolicy"), )), ), JobFlowRole="EMRJobFlowRole", LogUri="s3://cluster-logs", Name="EMR Cluster", ReleaseLabel="emr-5.5.0", SecurityConfiguration=Ref(cluster_security_configuration), ServiceRole="EMRServiceRole", AutoScalingRole="EMR_AutoScaling_DefaultRole", VisibleToAllUsers="true", Tags=Tags(Name="EMR Sample Cluster")) cluster.to_dict() autoscale_policy = emr.AutoScalingPolicy( Constraints=emr.ScalingConstraints(MinCapacity=0, MaxCapacity=5), Rules=[ emr.ScalingRule( Name='ScaleUpContainerPending', Description='Scale up on over-provisioned ' 'containers', Action=emr.ScalingAction( SimpleScalingPolicyConfiguration=emr. SimpleScalingPolicyConfiguration( AdjustmentType=emr.CHANGE_IN_CAPACITY, CoolDown=300, ScalingAdjustment=1)), Trigger=emr.ScalingTrigger( CloudWatchAlarmDefinition=emr. CloudWatchAlarmDefinition( ComparisonOperator='GREATER_THAN', MetricName='ContainerPendingRatio', Period=300, Threshold=0.75, Dimensions=[ emr.MetricDimension(Key='JobFlowId', Value='${emr.clusterId}') ]))), emr.ScalingRule( Name='ScaleUpMemory', Description='Scale up on low memory', Action=emr.ScalingAction( SimpleScalingPolicyConfiguration=emr. SimpleScalingPolicyConfiguration( AdjustmentType='CHANGE_IN_CAPACITY', CoolDown=300, ScalingAdjustment=1)), Trigger=emr.ScalingTrigger( CloudWatchAlarmDefinition=emr. CloudWatchAlarmDefinition( ComparisonOperator='LESS_THAN', MetricName='YARNMemoryAvailablePercentage', Period=300, Threshold=15, Dimensions=[ emr.MetricDimension(Key='JobFlowId', Value='${emr.clusterId}') ]))), emr.ScalingRule( Name='ScaleDownMemory', Description='Scale down on high memory', Action=emr.ScalingAction( SimpleScalingPolicyConfiguration=emr. SimpleScalingPolicyConfiguration( AdjustmentType=emr.CHANGE_IN_CAPACITY, CoolDown=300, ScalingAdjustment=-1)), Trigger=emr.ScalingTrigger( CloudWatchAlarmDefinition=emr. CloudWatchAlarmDefinition( ComparisonOperator='GREATER_THAN', MetricName='YARNMemoryAvailablePercentage', Period=300, Threshold=75, Dimensions=[ emr.MetricDimension(Key='JobFlowId', Value='${emr.clusterId}') ]))) ]) emr.InstanceGroupConfig('TaskInstanceGroup', AutoScalingPolicy=autoscale_policy, InstanceCount=0, InstanceType=M4_LARGE, InstanceRole='TASK', Market='ON_DEMAND', Name='Task Instance', JobFlowId=Ref(cluster))
CoreInstanceGroup=emr.InstanceGroupConfigProperty( Name='Core Instance', InstanceCount=Ref(instances), InstanceType='m4.xlarge', Market='SPOT', BidPrice='0.1'), AdditionalMasterSecurityGroups=[ networking_resources['EMRMasterSecurityGroup'] ], # AdditionalSlaveSecurityGroups=[Ref(emr_additional_slave_sg_param)] ), LogUri='s3://nicor-dev/logs/emr/jupyter', BootstrapActions=[ emr.BootstrapActionConfig( Name='Install and set up Jupyter', ScriptBootstrapAction=emr.ScriptBootstrapActionConfig( Path= 's3://nicor-dev/deployments/emr/bootstrap_actions/bootstrap_jupyter.sh', Args=['testemr', 's3://nicor-dev/jupyter-notebooks/'])) ], Configurations=[ emr.Configuration( Classification="spark-env", Configurations=[ emr.Configuration( Classification="export", ConfigurationProperties={ "PYSPARK_PYTHON": os.path.join('/home/hadoop/miniconda', 'bin/python'), "PYTHONPATH": os.path.join('/home/hadoop/miniconda',
CoreInstanceGroup=emr.InstanceGroupConfigProperty( Name='Core Instance', InstanceCount=Ref(instances), InstanceType='m4.xlarge', Market='SPOT', BidPrice='0.1'), AdditionalMasterSecurityGroups=[ networking_resources['EMRMasterSecurityGroup'] ], # AdditionalSlaveSecurityGroups=[Ref(emr_additional_slave_sg_param)] ), LogUri='s3://nicor-dev/logs/emr/generic', BootstrapActions=[ emr.BootstrapActionConfig( Name='Move Home', ScriptBootstrapAction=emr.ScriptBootstrapActionConfig( Path= 's3://nicor-dev/deployments/emr/bootstrap_actions/move_home.sh' )), emr.BootstrapActionConfig( Name='Install Conda', ScriptBootstrapAction=emr.ScriptBootstrapActionConfig( Path= 's3://nicor-dev/deployments/emr/bootstrap_actions/bootstrap_conda.sh' )), emr.BootstrapActionConfig( Name='Deploy Steps', ScriptBootstrapAction=emr.ScriptBootstrapActionConfig( Path= 's3://nicor-dev/deployments/emr/bootstrap_actions/deploy.sh', Args=[ 's3://nicor-dev/deployments/emr/steps/',
"EMRSecurityConfiguration", Name="EMRSampleClusterSecurityConfiguration", SecurityConfiguration=security_configuration, )) cluster = template.add_resource( emr.Cluster( "EMRSampleCluster", Name="EMR Sample Cluster", ReleaseLabel="emr-4.4.0", SecurityConfiguration=Ref(security_config), BootstrapActions=[ emr.BootstrapActionConfig( Name="Dummy bootstrap action", ScriptBootstrapAction=emr.ScriptBootstrapActionConfig( Path="file:/usr/share/aws/emr/scripts/install-hue", Args=["dummy", "parameter"], ), ) ], Configurations=[ emr.Configuration( Classification="core-site", ConfigurationProperties={ "hadoop.security.groups.cache.secs": "250" }, ), emr.Configuration( Classification="mapred-site", ConfigurationProperties={ "mapred.tasktracker.map.tasks.maximum": "2",