def test_add_step_to_cluster_with_state_change(self): mock_emr_obj = moto.mock_emr() with mock_emr_obj: emr_cluster_client, emr_cluster_id = self.env_setup( self.emr_cluster_name, self.aws_region, self.aws_credentials, self.timeout_seconds, self.retry_seconds, self.long_timeout_seconds) step_name = "Test_Step" command_str = "/usr/bin/spark-submit --class spark.job.main.class" emr_step_id = emr_cluster_client.add_step(step_name, command_str) logging.info(str(emr_step_id)) cluster_steps = emr_cluster_client.get_list_of_steps() assert 1 == len(cluster_steps) assert cluster_steps[0] == emr_step_id emr_step_status, _ = emr_cluster_client.get_step_status( emr_step_id) assert emr_step_status == "STARTING" # "STARTING" is not a valid EMR Step state, so we will change it to "RUNNING" emr_backend = mock_emr_obj.backends[self.aws_region] fake_cluster = emr_backend.clusters[emr_cluster_id] fake_cluster.steps[0].state = "RUNNING" def complete_step(): # Wait for some time to let EMRClusterClient poll a few times. fake_cluster.steps[0].state = "COMPLETED" with ConcurrentExecutor(complete_step, 0.2): emr_cluster_client.wait_for_step_completion( emr_step_id, self.long_timeout_seconds)
def test_get_step_output_path(self): mock_emr_obj = moto.mock_emr() with mock_emr_obj: emr_cluster_client, emr_cluster_id = self.env_setup( self.emr_cluster_name, self.aws_region, self.aws_credentials, self.timeout_seconds, self.retry_seconds, self.long_timeout_seconds) step_name = "Test_Step" command_str = "/usr/bin/spark-submit --class spark.job.main.class" emr_step_id = emr_cluster_client.add_step(step_name, command_str) # Change step state to COMPLETED emr_backend = mock_emr_obj.backends[self.aws_region] fake_cluster = emr_backend.clusters[emr_cluster_id] fake_cluster.steps[0].state = "COMPLETED" emr_cluster_client.wait_for_step_completion( emr_step_id, self.long_timeout_seconds) output_file = emr_cluster_client.get_step_output_path(emr_step_id) expected_output_file = "s3://mybucket/log/{}/steps/{}/stdout.gz".format( emr_cluster_client.emr_cluster_id, emr_step_id) assert output_file == expected_output_file
def emr(): """EMR mock service""" # TODO: implement fixture after moto is ready # https://github.com/spulec/moto/pull/456 boto3.set_stream_logger() mock = mock_emr() mock.start() client = boto3.client('emr') clusters = [] for i in range(2): cluster = client.run_job_flow( Name='cluster{}'.format(i), Instances={ 'MasterInstanceType': 'c3.xlarge', 'SlaveInstanceType': 'c3.xlarge', 'InstanceCount': 3, 'Placement': {'AvailabilityZone': 'ap-northeast-1a'}, 'KeepJobFlowAliveWhenNoSteps': True, }, VisibleToAllUsers=True, ) clusters.append(cluster) yield {'clusters': clusters} mock.stop()
def emr_cluster(request): with mock_emr(): emr_client = boto3.client('emr', region_name='us-east-1') emr_boot_cluster = { 'Name': 'example_cluster', 'LogUri': 's3://somes3bucket', 'Instances': { 'MasterInstanceType': 'c4.large', 'SlaveInstanceType': 'c4.large', 'InstanceCount': 3 }, 'Tags': [{ 'Key': 'user_id', 'Value': '123-456' }] } cluster = emr_client.run_job_flow(**emr_boot_cluster) step_ids = emr_client.add_job_flow_steps( JobFlowId=cluster['JobFlowId'], Steps=[{ 'Name': "example", 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 's3://runner.jar' } }, { 'Name': "example 2", 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 's3://runner.jar' } }]) yield (emr_client, cluster, step_ids)
def emr(): """EMR mock service""" # FIXME: moto can only support us-east-1 when EMR module is used # https://github.com/spulec/moto/pull/456 # https://github.com/spulec/moto/pull/375 mock = mock_emr() mock.start() os.environ['AWS_DEFAULT_REGION'] = 'us-east-1' client = boto3.client('emr') clusters = [] for i in range(2): cluster = client.run_job_flow( Name='cluster-{:02d}'.format(i), Instances={ 'MasterInstanceType': 'c3.xlarge', 'SlaveInstanceType': 'c3.xlarge', 'InstanceCount': 3, 'Placement': { 'AvailabilityZone': 'us-east-1' }, 'KeepJobFlowAliveWhenNoSteps': True, }, VisibleToAllUsers=True, ) clusters.append(cluster) yield {'clusters': clusters} mock.stop() os.environ['AWS_DEFAULT_REGION'] = ''
def emr(): """EMR mock service""" # FIXME: moto can only support us-east-1 when EMR module is used # https://github.com/spulec/moto/pull/456 # https://github.com/spulec/moto/pull/375 mock = mock_emr() mock.start() os.environ['AWS_DEFAULT_REGION'] = 'us-east-1' client = boto3.client('emr') clusters = [] for i in range(2): cluster = client.run_job_flow( Name='cluster-{:02d}'.format(i), Instances={ 'MasterInstanceType': 'c3.xlarge', 'SlaveInstanceType': 'c3.xlarge', 'InstanceCount': 3, 'Placement': {'AvailabilityZone': 'us-east-1'}, 'KeepJobFlowAliveWhenNoSteps': True, }, VisibleToAllUsers=True, ) clusters.append(cluster) yield {'clusters': clusters} mock.stop() os.environ['AWS_DEFAULT_REGION'] = ''
def emr(): """EMR mock service""" # TODO: implement fixture after moto is ready # https://github.com/spulec/moto/pull/456 boto3.set_stream_logger() mock = mock_emr() mock.start() client = boto3.client('emr') clusters = [] for i in range(2): cluster = client.run_job_flow( Name='cluster{}'.format(i), Instances={ 'MasterInstanceType': 'c3.xlarge', 'SlaveInstanceType': 'c3.xlarge', 'InstanceCount': 3, 'Placement': { 'AvailabilityZone': 'ap-northeast-1a' }, 'KeepJobFlowAliveWhenNoSteps': True, }, VisibleToAllUsers=True, ) clusters.append(cluster) yield {'clusters': clusters} mock.stop()
def xxtest_start(self): # need a fix https://github.com/spulec/moto/issues/1708 with moto.mock_s3(): with moto.mock_emr(): config = spark_emr.util.load_config(CONFIG) bootstrap_path = spark_emr.util.S3Path(config["bootstrap_uri"]) self._create_bucket(bootstrap_path.bucket) emr = spark_emr.manager.EmrManager(config) job_flow_id = emr.start("TEST-EMR", "spark_emr_dummy.py 10", DUMMY_FOLDER, {}, False, False) self.assertIsNotNone(job_flow_id)
def test_add_step_to_cluster_fail_without_output(self): mock_emr_obj = moto.mock_emr() with mock_emr_obj: emr_cluster_client, emr_cluster_id = self.env_setup( self.emr_cluster_name, self.aws_region, self.aws_credentials, self.timeout_seconds, self.retry_seconds, self.long_timeout_seconds) s3_resource = Boto3Util.create_s3_resource() s3_resource.create_bucket(Bucket="mybucket") step_name = "Test_Step" command_str = "/usr/bin/spark-submit --class spark.job.main.class" emr_step_id = emr_cluster_client.add_step(step_name, command_str) cluster_steps = emr_cluster_client.get_list_of_steps() assert 1 == len(cluster_steps) assert cluster_steps[0] == emr_step_id emr_step_status, _ = emr_cluster_client.get_step_status( emr_step_id) assert emr_step_status == "STARTING" # "STARTING" is not a valid EMR Step state, so we will change it to "RUNNING" emr_backend = mock_emr_obj.backends[self.aws_region] fake_cluster = emr_backend.clusters[emr_cluster_id] fake_step = fake_cluster.steps[0] fake_step.state = "RUNNING" def fail_step(): fake_step.state = "FAILED" # Make sure that we do not wait for 300 seconds for gz file to be available. EMRClusterClient.AWSConstants.S3_FILE_AVAILABILITY_TIMEOUT_SECONDS = self.timeout_seconds # Required for correct log path generation in MockedMethod. MockedMethod.emr_cluster_id = emr_cluster_id stderr_gz_path = MockedMethod.log_file_template.format( emr_cluster_id=emr_cluster_id, emr_step_id=emr_step_id) err_msg = "File {} failed to be available after {} seconds.".\ format(stderr_gz_path, self.timeout_seconds) with pytest.raises(M3DAWSAPIException, match=err_msg): # Wait for some time to let EMRClusterClient poll a few times. with ConcurrentExecutor(fail_step, 0.4): with patch( "m3d.hadoop.emr.emr_cluster_client.EMRClusterClient.get_step_status", side_effect=MockedMethod.get_step_status_mocked): emr_cluster_client.wait_for_step_completion( emr_step_id, self.long_timeout_seconds)
def test_wait_for_bootstrapping_cluster(self): mock_emr_obj = moto.mock_emr() with mock_emr_obj: emr_cluster_client, emr_cluster_id = self.env_setup( self.emr_cluster_name, self.aws_region, self.aws_credentials, self.timeout_seconds, self.retry_seconds, self.long_timeout_seconds) # Change step state to BOOTSTRAPPING so that wait times out emr_backend = mock_emr_obj.backends[self.aws_region] fake_cluster = emr_backend.clusters[emr_cluster_id] fake_cluster.state = "BOOTSTRAPPING" err_msg = "Cluster {} failed to start after {} seconds.".format( emr_cluster_id, self.timeout_seconds) with pytest.raises(M3DAWSAPIException, match=err_msg): emr_cluster_client.wait_for_cluster_startup()
def setup_method(self, method): super(EMRSystemUnitTestBase, self).setup_method(method) # Setup EMR mock self.mock_emr = moto.mock_emr() self.mock_emr.start() self.emr_cluster_name = "test clustester for unit and integration tests" run_job_flow_args = dict(Instances={ 'InstanceCount': 3, 'KeepJobFlowAliveWhenNoSteps': True, 'MasterInstanceType': 'c3.medium', 'Placement': { 'AvailabilityZone': 'test_zone' }, 'SlaveInstanceType': 'c3.xlarge', }, JobFlowRole='EMR_EC2_DefaultRole', LogUri='s3://{}/log/'.format( self.default_log_bucket), Name=self.emr_cluster_name, ServiceRole='EMR_DefaultRole', VisibleToAllUsers=True) emr_client = Boto3Util.create_emr_client(self.default_aws_region) self.emr_cluster_id = emr_client.run_job_flow( **run_job_flow_args)['JobFlowId'] logging.debug("Test case specific EMR cluster id is {}".format( self.emr_cluster_id)) # Setup S3 mock self.mock_s3 = moto.mock_s3() self.mock_s3.start() self.s3_resource = Boto3Util.create_s3_resource() self.s3_resource.create_bucket(Bucket=self.default_dev_landing_bucket) self.s3_resource.create_bucket(Bucket=self.default_dev_lake_bucket) self.s3_resource.create_bucket(Bucket=self.default_dev_mart_cal_bucket) self.s3_resource.create_bucket( Bucket=self.default_dev_application_bucket) self.s3_resource.create_bucket(Bucket=self.default_log_bucket)
def test_class(self): with moto.mock_emr(): whitelist = {} settings = { "general": { "dry_run": False }, "services": { "emr": { "clusters": { "clean": True, "ttl": 7 } } }, } execution_log = {"AWS": {}} test_class = emr_cleanup.EMRCleanup(logging, whitelist, settings, execution_log, "ap-southeast-2") yield test_class
def test_list(self): with moto.mock_s3(): with moto.mock_emr(): config = spark_emr.util.load_config(CONFIG) ret = spark_emr.manager.list("spark_emr", config["region"]) self.assertEqual(ret, [])
def test_cli_list(self): with moto.mock_emr(): main(["list", "--config", CONFIG]) out, err = self.capsys.readouterr() self.assertTrue("id, name, reason, state, created" in out)
def emr_client(request): with mock_emr(): yield boto3.client('emr', region_name='us-east-1')
def emr_client(aws_credentials): with mock_emr(): yield boto3.client( "emr", region_name="eu-west-1", )
def test_add_step_to_cluster_fail_with_output(self): mock_emr_obj = moto.mock_emr() with mock_emr_obj: emr_cluster_client, emr_cluster_id = self.env_setup( self.emr_cluster_name, self.aws_region, self.aws_credentials, self.timeout_seconds, self.retry_seconds, self.long_timeout_seconds) s3_resource = Boto3Util.create_s3_resource() s3_resource.create_bucket(Bucket="mybucket") step_name = "Test_Step" command_str = "/usr/bin/spark-submit --class spark.job.main.class" emr_step_id = emr_cluster_client.add_step(step_name, command_str) cluster_steps = emr_cluster_client.get_list_of_steps() assert 1 == len(cluster_steps) assert cluster_steps[0] == emr_step_id emr_step_status, _ = emr_cluster_client.get_step_status( emr_step_id) assert emr_step_status == "STARTING" # "STARTING" is not a valid EMR Step state, so we will change it to "RUNNING" emr_backend = mock_emr_obj.backends[self.aws_region] fake_cluster = emr_backend.clusters[emr_cluster_id] fake_step = fake_cluster.steps[0] fake_step.state = "RUNNING" # Make sure that we do not wait for 300 seconds for gz file to be available. EMRClusterClient.AWSConstants.S3_FILE_AVAILABILITY_TIMEOUT_SECONDS = self.timeout_seconds # Required for correct log path generation in MockedMethod. MockedMethod.emr_cluster_id = emr_cluster_id stderr_gz_path = MockedMethod.log_file_template.format( emr_cluster_id=emr_cluster_id, emr_step_id=emr_step_id) expected_content = "Lots of content here!!!" def fail_step_and_write_output(): fake_step.state = "FAILED" time.sleep(0.3) compressed_content = TestEMRClusterClient._compress_string( expected_content) bucket, key = emr_cluster_client.s3_util.get_bucket_and_key( stderr_gz_path) s3_resource.Bucket(bucket).put_object(Key=key, Body=compressed_content) with pytest.raises(M3DAWSAPIException) as exc: # Wait for some time to let EMRClusterClient poll a few times. with ConcurrentExecutor(fail_step_and_write_output, 0.3): with patch( "m3d.hadoop.emr.emr_cluster_client.EMRClusterClient.get_step_status", side_effect=MockedMethod.get_step_status_mocked): emr_cluster_client.wait_for_step_completion( emr_step_id, self.long_timeout_seconds) err_msg = "EMR Step with cluster_id='{}' and step_id='{}' failed to complete".\ format(emr_cluster_id, emr_step_id) assert err_msg in str(exc.value) assert stderr_gz_path in str(exc.value) resulting_content = emr_cluster_client.s3_util.read_gzip_file_content( stderr_gz_path) assert expected_content == resulting_content
import pytest import boto from moto import mock_emr, mock_ec2, mock_ec2_deprecated from aws_lambda_context import LambdaContext from src.scripts.emr_create import lambda_handler as emr_create_cluster import time from moto.ec2.models import VPCBackend, VPC ec2 = mock_ec2() ec2.start() emr = mock_emr() emr.start() cluster_create_request = { "api_request_id": "test_emr_create", "sub_type": "nonkerb", "role": "testing", "account": "example_account", "name": f"scheduled-testing-{int(time.time())}", "core_instance_count": "1", "task_instance_count": "3", "task_ebs_vol_size": "180", "custom_ami_id": "ami-075ac68c1cf8ba1c8", "bootstrap_actions": [] } @pytest.fixture def lambda_context(): context = LambdaContext() context.aws_request_id = 'test_aws_request_id'
def moto_emr(): with moto.mock_emr(): yield True
def emr_client(aws_credentials): with mock_emr(): yield boto3.client('emr', region_name='us-east-1')