Example #1
0
    def test_add_step_to_cluster_with_state_change(self):
        mock_emr_obj = moto.mock_emr()
        with mock_emr_obj:
            emr_cluster_client, emr_cluster_id = self.env_setup(
                self.emr_cluster_name, self.aws_region, self.aws_credentials,
                self.timeout_seconds, self.retry_seconds,
                self.long_timeout_seconds)

            step_name = "Test_Step"
            command_str = "/usr/bin/spark-submit --class spark.job.main.class"

            emr_step_id = emr_cluster_client.add_step(step_name, command_str)

            logging.info(str(emr_step_id))

            cluster_steps = emr_cluster_client.get_list_of_steps()
            assert 1 == len(cluster_steps)
            assert cluster_steps[0] == emr_step_id

            emr_step_status, _ = emr_cluster_client.get_step_status(
                emr_step_id)
            assert emr_step_status == "STARTING"

            # "STARTING" is not a valid EMR Step state, so we will change it to "RUNNING"
            emr_backend = mock_emr_obj.backends[self.aws_region]
            fake_cluster = emr_backend.clusters[emr_cluster_id]
            fake_cluster.steps[0].state = "RUNNING"

            def complete_step():
                # Wait for some time to let EMRClusterClient poll a few times.
                fake_cluster.steps[0].state = "COMPLETED"

            with ConcurrentExecutor(complete_step, 0.2):
                emr_cluster_client.wait_for_step_completion(
                    emr_step_id, self.long_timeout_seconds)
Example #2
0
    def test_get_step_output_path(self):
        mock_emr_obj = moto.mock_emr()
        with mock_emr_obj:
            emr_cluster_client, emr_cluster_id = self.env_setup(
                self.emr_cluster_name, self.aws_region, self.aws_credentials,
                self.timeout_seconds, self.retry_seconds,
                self.long_timeout_seconds)

            step_name = "Test_Step"
            command_str = "/usr/bin/spark-submit --class spark.job.main.class"

            emr_step_id = emr_cluster_client.add_step(step_name, command_str)

            # Change step state to COMPLETED
            emr_backend = mock_emr_obj.backends[self.aws_region]
            fake_cluster = emr_backend.clusters[emr_cluster_id]
            fake_cluster.steps[0].state = "COMPLETED"

            emr_cluster_client.wait_for_step_completion(
                emr_step_id, self.long_timeout_seconds)

            output_file = emr_cluster_client.get_step_output_path(emr_step_id)

            expected_output_file = "s3://mybucket/log/{}/steps/{}/stdout.gz".format(
                emr_cluster_client.emr_cluster_id, emr_step_id)

            assert output_file == expected_output_file
Example #3
0
def emr():
    """EMR mock service"""
    # TODO: implement fixture after moto is ready
    # https://github.com/spulec/moto/pull/456
    boto3.set_stream_logger()
    mock = mock_emr()
    mock.start()

    client = boto3.client('emr')
    clusters = []
    for i in range(2):
        cluster = client.run_job_flow(
            Name='cluster{}'.format(i),
            Instances={
                'MasterInstanceType': 'c3.xlarge',
                'SlaveInstanceType': 'c3.xlarge',
                'InstanceCount': 3,
                'Placement': {'AvailabilityZone': 'ap-northeast-1a'},
                'KeepJobFlowAliveWhenNoSteps': True,
            },
            VisibleToAllUsers=True,
        )
        clusters.append(cluster)
    yield {'clusters': clusters}
    mock.stop()
Example #4
0
def emr_cluster(request):
    with mock_emr():
        emr_client = boto3.client('emr', region_name='us-east-1')
        emr_boot_cluster = {
            'Name': 'example_cluster',
            'LogUri': 's3://somes3bucket',
            'Instances': {
                'MasterInstanceType': 'c4.large',
                'SlaveInstanceType': 'c4.large',
                'InstanceCount': 3
            },
            'Tags': [{
                'Key': 'user_id',
                'Value': '123-456'
            }]
        }
        cluster = emr_client.run_job_flow(**emr_boot_cluster)
        step_ids = emr_client.add_job_flow_steps(
            JobFlowId=cluster['JobFlowId'],
            Steps=[{
                'Name': "example",
                'ActionOnFailure': 'CONTINUE',
                'HadoopJarStep': {
                    'Jar': 's3://runner.jar'
                }
            }, {
                'Name': "example 2",
                'ActionOnFailure': 'CONTINUE',
                'HadoopJarStep': {
                    'Jar': 's3://runner.jar'
                }
            }])
        yield (emr_client, cluster, step_ids)
Example #5
0
def emr():
    """EMR mock service"""
    # FIXME: moto can only support us-east-1 when EMR module is used
    # https://github.com/spulec/moto/pull/456
    # https://github.com/spulec/moto/pull/375
    mock = mock_emr()
    mock.start()
    os.environ['AWS_DEFAULT_REGION'] = 'us-east-1'
    client = boto3.client('emr')
    clusters = []
    for i in range(2):
        cluster = client.run_job_flow(
            Name='cluster-{:02d}'.format(i),
            Instances={
                'MasterInstanceType': 'c3.xlarge',
                'SlaveInstanceType': 'c3.xlarge',
                'InstanceCount': 3,
                'Placement': {
                    'AvailabilityZone': 'us-east-1'
                },
                'KeepJobFlowAliveWhenNoSteps': True,
            },
            VisibleToAllUsers=True,
        )
        clusters.append(cluster)
    yield {'clusters': clusters}
    mock.stop()
    os.environ['AWS_DEFAULT_REGION'] = ''
Example #6
0
def emr():
    """EMR mock service"""
    # FIXME: moto can only support us-east-1 when EMR module is used
    # https://github.com/spulec/moto/pull/456
    # https://github.com/spulec/moto/pull/375
    mock = mock_emr()
    mock.start()
    os.environ['AWS_DEFAULT_REGION'] = 'us-east-1'
    client = boto3.client('emr')
    clusters = []
    for i in range(2):
        cluster = client.run_job_flow(
            Name='cluster-{:02d}'.format(i),
            Instances={
                'MasterInstanceType': 'c3.xlarge',
                'SlaveInstanceType': 'c3.xlarge',
                'InstanceCount': 3,
                'Placement': {'AvailabilityZone': 'us-east-1'},
                'KeepJobFlowAliveWhenNoSteps': True,
            },
            VisibleToAllUsers=True,
        )
        clusters.append(cluster)
    yield {'clusters': clusters}
    mock.stop()
    os.environ['AWS_DEFAULT_REGION'] = ''
Example #7
0
def emr():
    """EMR mock service"""
    # TODO: implement fixture after moto is ready
    # https://github.com/spulec/moto/pull/456
    boto3.set_stream_logger()
    mock = mock_emr()
    mock.start()

    client = boto3.client('emr')
    clusters = []
    for i in range(2):
        cluster = client.run_job_flow(
            Name='cluster{}'.format(i),
            Instances={
                'MasterInstanceType': 'c3.xlarge',
                'SlaveInstanceType': 'c3.xlarge',
                'InstanceCount': 3,
                'Placement': {
                    'AvailabilityZone': 'ap-northeast-1a'
                },
                'KeepJobFlowAliveWhenNoSteps': True,
            },
            VisibleToAllUsers=True,
        )
        clusters.append(cluster)
    yield {'clusters': clusters}
    mock.stop()
Example #8
0
 def xxtest_start(self):
     # need a fix https://github.com/spulec/moto/issues/1708
     with moto.mock_s3():
         with moto.mock_emr():
             config = spark_emr.util.load_config(CONFIG)
             bootstrap_path = spark_emr.util.S3Path(config["bootstrap_uri"])
             self._create_bucket(bootstrap_path.bucket)
             emr = spark_emr.manager.EmrManager(config)
             job_flow_id = emr.start("TEST-EMR", "spark_emr_dummy.py 10",
                                     DUMMY_FOLDER, {}, False, False)
             self.assertIsNotNone(job_flow_id)
Example #9
0
    def test_add_step_to_cluster_fail_without_output(self):
        mock_emr_obj = moto.mock_emr()
        with mock_emr_obj:
            emr_cluster_client, emr_cluster_id = self.env_setup(
                self.emr_cluster_name, self.aws_region, self.aws_credentials,
                self.timeout_seconds, self.retry_seconds,
                self.long_timeout_seconds)

            s3_resource = Boto3Util.create_s3_resource()
            s3_resource.create_bucket(Bucket="mybucket")

            step_name = "Test_Step"
            command_str = "/usr/bin/spark-submit --class spark.job.main.class"

            emr_step_id = emr_cluster_client.add_step(step_name, command_str)

            cluster_steps = emr_cluster_client.get_list_of_steps()
            assert 1 == len(cluster_steps)
            assert cluster_steps[0] == emr_step_id

            emr_step_status, _ = emr_cluster_client.get_step_status(
                emr_step_id)
            assert emr_step_status == "STARTING"

            # "STARTING" is not a valid EMR Step state, so we will change it to "RUNNING"
            emr_backend = mock_emr_obj.backends[self.aws_region]
            fake_cluster = emr_backend.clusters[emr_cluster_id]
            fake_step = fake_cluster.steps[0]
            fake_step.state = "RUNNING"

            def fail_step():
                fake_step.state = "FAILED"

            # Make sure that we do not wait for 300 seconds for gz file to be available.
            EMRClusterClient.AWSConstants.S3_FILE_AVAILABILITY_TIMEOUT_SECONDS = self.timeout_seconds

            # Required for correct log path generation in MockedMethod.
            MockedMethod.emr_cluster_id = emr_cluster_id

            stderr_gz_path = MockedMethod.log_file_template.format(
                emr_cluster_id=emr_cluster_id, emr_step_id=emr_step_id)

            err_msg = "File {} failed to be available after {} seconds.".\
                format(stderr_gz_path, self.timeout_seconds)

            with pytest.raises(M3DAWSAPIException, match=err_msg):
                # Wait for some time to let EMRClusterClient poll a few times.
                with ConcurrentExecutor(fail_step, 0.4):
                    with patch(
                            "m3d.hadoop.emr.emr_cluster_client.EMRClusterClient.get_step_status",
                            side_effect=MockedMethod.get_step_status_mocked):
                        emr_cluster_client.wait_for_step_completion(
                            emr_step_id, self.long_timeout_seconds)
Example #10
0
    def test_wait_for_bootstrapping_cluster(self):
        mock_emr_obj = moto.mock_emr()
        with mock_emr_obj:
            emr_cluster_client, emr_cluster_id = self.env_setup(
                self.emr_cluster_name, self.aws_region, self.aws_credentials,
                self.timeout_seconds, self.retry_seconds,
                self.long_timeout_seconds)

            # Change step state to BOOTSTRAPPING so that wait times out
            emr_backend = mock_emr_obj.backends[self.aws_region]
            fake_cluster = emr_backend.clusters[emr_cluster_id]
            fake_cluster.state = "BOOTSTRAPPING"

            err_msg = "Cluster {} failed to start after {} seconds.".format(
                emr_cluster_id, self.timeout_seconds)

            with pytest.raises(M3DAWSAPIException, match=err_msg):
                emr_cluster_client.wait_for_cluster_startup()
Example #11
0
    def setup_method(self, method):
        super(EMRSystemUnitTestBase, self).setup_method(method)

        # Setup EMR mock
        self.mock_emr = moto.mock_emr()
        self.mock_emr.start()

        self.emr_cluster_name = "test clustester for unit and integration tests"

        run_job_flow_args = dict(Instances={
            'InstanceCount': 3,
            'KeepJobFlowAliveWhenNoSteps': True,
            'MasterInstanceType': 'c3.medium',
            'Placement': {
                'AvailabilityZone': 'test_zone'
            },
            'SlaveInstanceType': 'c3.xlarge',
        },
                                 JobFlowRole='EMR_EC2_DefaultRole',
                                 LogUri='s3://{}/log/'.format(
                                     self.default_log_bucket),
                                 Name=self.emr_cluster_name,
                                 ServiceRole='EMR_DefaultRole',
                                 VisibleToAllUsers=True)

        emr_client = Boto3Util.create_emr_client(self.default_aws_region)
        self.emr_cluster_id = emr_client.run_job_flow(
            **run_job_flow_args)['JobFlowId']
        logging.debug("Test case specific EMR cluster id is {}".format(
            self.emr_cluster_id))

        # Setup S3 mock
        self.mock_s3 = moto.mock_s3()
        self.mock_s3.start()

        self.s3_resource = Boto3Util.create_s3_resource()
        self.s3_resource.create_bucket(Bucket=self.default_dev_landing_bucket)
        self.s3_resource.create_bucket(Bucket=self.default_dev_lake_bucket)
        self.s3_resource.create_bucket(Bucket=self.default_dev_mart_cal_bucket)
        self.s3_resource.create_bucket(
            Bucket=self.default_dev_application_bucket)
        self.s3_resource.create_bucket(Bucket=self.default_log_bucket)
Example #12
0
    def test_class(self):
        with moto.mock_emr():
            whitelist = {}
            settings = {
                "general": {
                    "dry_run": False
                },
                "services": {
                    "emr": {
                        "clusters": {
                            "clean": True,
                            "ttl": 7
                        }
                    }
                },
            }
            execution_log = {"AWS": {}}

            test_class = emr_cleanup.EMRCleanup(logging, whitelist, settings,
                                                execution_log,
                                                "ap-southeast-2")
            yield test_class
Example #13
0
 def test_list(self):
     with moto.mock_s3():
         with moto.mock_emr():
             config = spark_emr.util.load_config(CONFIG)
             ret = spark_emr.manager.list("spark_emr", config["region"])
             self.assertEqual(ret, [])
Example #14
0
 def test_cli_list(self):
     with moto.mock_emr():
         main(["list", "--config", CONFIG])
         out, err = self.capsys.readouterr()
         self.assertTrue("id, name, reason, state, created" in out)
Example #15
0
def emr_client(request):
    with mock_emr():
        yield boto3.client('emr', region_name='us-east-1')
Example #16
0
def emr_client(aws_credentials):
    with mock_emr():
        yield boto3.client(
            "emr",
            region_name="eu-west-1",
        )
Example #17
0
    def test_add_step_to_cluster_fail_with_output(self):
        mock_emr_obj = moto.mock_emr()
        with mock_emr_obj:
            emr_cluster_client, emr_cluster_id = self.env_setup(
                self.emr_cluster_name, self.aws_region, self.aws_credentials,
                self.timeout_seconds, self.retry_seconds,
                self.long_timeout_seconds)

            s3_resource = Boto3Util.create_s3_resource()
            s3_resource.create_bucket(Bucket="mybucket")

            step_name = "Test_Step"
            command_str = "/usr/bin/spark-submit --class spark.job.main.class"

            emr_step_id = emr_cluster_client.add_step(step_name, command_str)

            cluster_steps = emr_cluster_client.get_list_of_steps()
            assert 1 == len(cluster_steps)
            assert cluster_steps[0] == emr_step_id

            emr_step_status, _ = emr_cluster_client.get_step_status(
                emr_step_id)
            assert emr_step_status == "STARTING"

            # "STARTING" is not a valid EMR Step state, so we will change it to "RUNNING"
            emr_backend = mock_emr_obj.backends[self.aws_region]
            fake_cluster = emr_backend.clusters[emr_cluster_id]
            fake_step = fake_cluster.steps[0]
            fake_step.state = "RUNNING"

            # Make sure that we do not wait for 300 seconds for gz file to be available.
            EMRClusterClient.AWSConstants.S3_FILE_AVAILABILITY_TIMEOUT_SECONDS = self.timeout_seconds

            # Required for correct log path generation in MockedMethod.
            MockedMethod.emr_cluster_id = emr_cluster_id

            stderr_gz_path = MockedMethod.log_file_template.format(
                emr_cluster_id=emr_cluster_id, emr_step_id=emr_step_id)

            expected_content = "Lots of content here!!!"

            def fail_step_and_write_output():
                fake_step.state = "FAILED"

                time.sleep(0.3)

                compressed_content = TestEMRClusterClient._compress_string(
                    expected_content)

                bucket, key = emr_cluster_client.s3_util.get_bucket_and_key(
                    stderr_gz_path)
                s3_resource.Bucket(bucket).put_object(Key=key,
                                                      Body=compressed_content)

            with pytest.raises(M3DAWSAPIException) as exc:
                # Wait for some time to let EMRClusterClient poll a few times.
                with ConcurrentExecutor(fail_step_and_write_output, 0.3):
                    with patch(
                            "m3d.hadoop.emr.emr_cluster_client.EMRClusterClient.get_step_status",
                            side_effect=MockedMethod.get_step_status_mocked):
                        emr_cluster_client.wait_for_step_completion(
                            emr_step_id, self.long_timeout_seconds)

            err_msg = "EMR Step with cluster_id='{}' and step_id='{}' failed to complete".\
                format(emr_cluster_id, emr_step_id)

            assert err_msg in str(exc.value)
            assert stderr_gz_path in str(exc.value)

            resulting_content = emr_cluster_client.s3_util.read_gzip_file_content(
                stderr_gz_path)
            assert expected_content == resulting_content
Example #18
0
import pytest
import boto
from moto import mock_emr, mock_ec2, mock_ec2_deprecated
from aws_lambda_context import LambdaContext
from src.scripts.emr_create import lambda_handler as emr_create_cluster
import time
from moto.ec2.models import VPCBackend, VPC

ec2 = mock_ec2()
ec2.start()

emr = mock_emr()
emr.start()

cluster_create_request = {
        "api_request_id": "test_emr_create",
        "sub_type": "nonkerb",
        "role": "testing",
        "account": "example_account",
        "name": f"scheduled-testing-{int(time.time())}",
        "core_instance_count": "1",
        "task_instance_count": "3",
        "task_ebs_vol_size": "180",
        "custom_ami_id": "ami-075ac68c1cf8ba1c8",
        "bootstrap_actions": []
}

@pytest.fixture
def lambda_context():
        context = LambdaContext()
        context.aws_request_id = 'test_aws_request_id'
Example #19
0
def moto_emr():
    with moto.mock_emr():
        yield True
Example #20
0
def emr_client(aws_credentials):
    with mock_emr():
        yield boto3.client('emr', region_name='us-east-1')