예제 #1
0
def test_emr_add_tags_and_describe_cluster(emr_cluster_config):
    context = create_test_pipeline_execution_context()
    emr = EmrJobRunner(region=REGION)

    cluster_id = emr.run_job_flow(context, emr_cluster_config)

    emr.add_tags(context, {'foobar': 'v1', 'baz': '123'}, cluster_id)

    tags = emr.describe_cluster(cluster_id)['Cluster']['Tags']

    assert {'Key': 'baz', 'Value': '123'} in tags
    assert {'Key': 'foobar', 'Value': 'v1'} in tags
예제 #2
0
def test_emr_add_tags_and_describe_cluster(emr_cluster_config):
    context = create_test_pipeline_execution_context()
    emr = EmrJobRunner(region=REGION)

    cluster_id = emr.run_job_flow(context.log, emr_cluster_config)

    emr.add_tags(context.log, {"foobar": "v1", "baz": "123"}, cluster_id)

    tags = emr.describe_cluster(cluster_id)["Cluster"]["Tags"]

    assert {"Key": "baz", "Value": "123"} in tags
    assert {"Key": "foobar", "Value": "v1"} in tags
예제 #3
0
def test_emr_log_location_for_cluster(emr_cluster_config, mock_s3_bucket):
    context = create_test_pipeline_execution_context()
    emr = EmrJobRunner(region=REGION)
    cluster_id = emr.run_job_flow(context.log, emr_cluster_config)
    assert emr.log_location_for_cluster(cluster_id) == (mock_s3_bucket.name,
                                                        "elasticmapreduce/")

    # Should raise when the log URI is missing
    emr_cluster_config = copy.deepcopy(emr_cluster_config)
    del emr_cluster_config["LogUri"]
    cluster_id = emr.run_job_flow(context.log, emr_cluster_config)
    with pytest.raises(EmrError) as exc_info:
        emr.log_location_for_cluster(cluster_id)

    assert "Log URI not specified, cannot retrieve step execution logs" in str(
        exc_info.value)
예제 #4
0
def test_emr_log_location_for_cluster(emr_cluster_config):
    context = create_test_pipeline_execution_context()
    emr = EmrJobRunner(region=REGION)
    cluster_id = emr.run_job_flow(context, emr_cluster_config)
    assert emr.log_location_for_cluster(cluster_id) == ('emr-cluster-logs',
                                                        'elasticmapreduce/')

    # Should raise when the log URI is missing
    emr_cluster_config = copy.deepcopy(emr_cluster_config)
    del emr_cluster_config['LogUri']
    cluster_id = emr.run_job_flow(context, emr_cluster_config)
    with pytest.raises(EmrError) as exc_info:
        emr.log_location_for_cluster(cluster_id)

    assert 'Log URI not specified, cannot retrieve step execution logs' in str(
        exc_info.value)
예제 #5
0
    def _get_emr_step_def(self, run_id, step_key, solid_name):
        """From the local Dagster instance, construct EMR steps that will kick off execution on a
        remote EMR cluster.
        """
        from dagster_spark.utils import flatten_dict, format_for_cli

        action_on_failure = self.action_on_failure

        # Execute Solid via spark-submit
        conf = dict(flatten_dict(self.spark_config))
        conf["spark.app.name"] = conf.get("spark.app.name", solid_name)

        check.invariant(
            conf.get("spark.master", "yarn") == "yarn",
            desc="spark.master is configured as %s; cannot set Spark master on EMR to anything "
            'other than "yarn"' % conf.get("spark.master"),
        )

        command = (
            [
                EMR_SPARK_HOME + "bin/spark-submit",
                "--master",
                "yarn",
                "--deploy-mode",
                conf.get("spark.submit.deployMode", "client"),
            ]
            + format_for_cli(list(flatten_dict(conf)))
            + [
                "--py-files",
                self._artifact_s3_uri(run_id, step_key, CODE_ZIP_NAME),
                self._artifact_s3_uri(run_id, step_key, self._main_file_name()),
                self.staging_bucket,
                self._artifact_s3_key(run_id, step_key, PICKLED_STEP_RUN_REF_FILE_NAME),
            ]
        )

        return EmrJobRunner.construct_step_dict_for_command(
            "Execute Solid %s" % solid_name, command, action_on_failure=action_on_failure
        )
예제 #6
0
    def _get_emr_step_def(self, run_id, step_key, solid_name):
        '''From the local Dagster instance, construct EMR steps that will kick off execution on a
        remote EMR cluster.
        '''
        action_on_failure = self.action_on_failure

        # Execute Solid via spark-submit
        conf = dict(flatten_dict(self.spark_config))
        conf['spark.app.name'] = conf.get('spark.app.name', solid_name)

        check.invariant(
            conf.get('spark.master', 'yarn') == 'yarn',
            desc=
            'spark.master is configured as %s; cannot set Spark master on EMR to anything '
            'other than "yarn"' % conf.get('spark.master'),
        )

        command = ([
            EMR_SPARK_HOME + 'bin/spark-submit',
            '--master',
            'yarn',
            '--deploy-mode',
            conf.get('spark.submit.deployMode', 'client'),
        ] + format_for_cli(list(flatten_dict(conf))) + [
            '--py-files',
            self._artifact_s3_uri(run_id, step_key, CODE_ZIP_NAME),
            self._artifact_s3_uri(run_id, step_key, self._main_file_name()),
            self.staging_bucket,
            self._artifact_s3_key(run_id, step_key,
                                  PICKLED_STEP_RUN_REF_FILE_NAME),
        ])

        return EmrJobRunner.construct_step_dict_for_command(
            'Execute Solid %s' % solid_name,
            command,
            action_on_failure=action_on_failure)
예제 #7
0
def test_emr_create_cluster(emr_cluster_config):
    context = create_test_pipeline_execution_context()
    cluster = EmrJobRunner(region=REGION)
    cluster_id = cluster.run_job_flow(context, emr_cluster_config)
    assert cluster_id.startswith('j-')
예제 #8
0
class EmrPySparkStepLauncher(StepLauncher):
    def __init__(
        self,
        region_name,
        staging_bucket,
        staging_prefix,
        wait_for_logs,
        action_on_failure,
        cluster_id,
        spark_config,
        local_pipeline_package_path,
        deploy_local_pipeline_package,
        s3_pipeline_package_path=None,
    ):
        self.region_name = check.str_param(region_name, 'region_name')
        self.staging_bucket = check.str_param(staging_bucket, 'staging_bucket')
        self.staging_prefix = check.str_param(staging_prefix, 'staging_prefix')
        self.wait_for_logs = check.bool_param(wait_for_logs, 'wait_for_logs')
        self.action_on_failure = check.str_param(action_on_failure,
                                                 'action_on_failure')
        self.cluster_id = check.str_param(cluster_id, 'cluster_id')
        self.spark_config = spark_config

        check.invariant(
            not deploy_local_pipeline_package or not s3_pipeline_package_path,
            'If deploy_local_pipeline_package is set to True, s3_pipeline_package_path should not '
            'also be set.',
        )

        self.local_pipeline_package_path = check.str_param(
            local_pipeline_package_path, 'local_pipeline_package_path')
        self.deploy_local_pipeline_package = check.bool_param(
            deploy_local_pipeline_package, 'deploy_local_pipeline_package')
        self.s3_pipeline_package_path = check.opt_str_param(
            s3_pipeline_package_path, 's3_pipeline_package_path')

        self.emr_job_runner = EmrJobRunner(region=self.region_name)

    def _post_artifacts(self, log, step_run_ref, run_id, step_key):
        '''
        Synchronize the step run ref and pyspark code to an S3 staging bucket for use on EMR.

        For the zip file, consider the following toy example:

            # Folder: my_pyspark_project/
            # a.py
            def foo():
                print(1)

            # b.py
            def bar():
                print(2)

            # main.py
            from a import foo
            from b import bar

            foo()
            bar()

        This will zip up `my_pyspark_project/` as `my_pyspark_project.zip`. Then, when running
        `spark-submit --py-files my_pyspark_project.zip emr_step_main.py` on EMR this will
        print 1, 2.
        '''

        with seven.TemporaryDirectory() as temp_dir:
            s3 = boto3.client('s3', region_name=self.region_name)

            # Upload step run ref
            def _upload_file_to_s3(local_path, s3_filename):
                key = self._artifact_s3_key(run_id, step_key, s3_filename)
                s3_uri = self._artifact_s3_uri(run_id, step_key, s3_filename)
                log.debug('Uploading file {local_path} to {s3_uri}'.format(
                    local_path=local_path, s3_uri=s3_uri))
                s3.upload_file(Filename=local_path,
                               Bucket=self.staging_bucket,
                               Key=key)

            # Upload main file.
            # The remote Dagster installation should also have the file, but locating it there
            # could be a pain.
            main_local_path = self._main_file_local_path()
            _upload_file_to_s3(main_local_path, self._main_file_name())

            if self.deploy_local_pipeline_package:
                # Zip and upload package containing pipeline
                zip_local_path = os.path.join(temp_dir, CODE_ZIP_NAME)
                build_pyspark_zip(zip_local_path,
                                  self.local_pipeline_package_path)
                _upload_file_to_s3(zip_local_path, CODE_ZIP_NAME)

            # Create step run ref pickle file
            step_run_ref_local_path = os.path.join(
                temp_dir, PICKLED_STEP_RUN_REF_FILE_NAME)
            with open(step_run_ref_local_path, 'wb') as step_pickle_file:
                pickle.dump(step_run_ref, step_pickle_file)

            _upload_file_to_s3(step_run_ref_local_path,
                               PICKLED_STEP_RUN_REF_FILE_NAME)

    def launch_step(self, step_context, prior_attempts_count):
        step_run_ref = step_context_to_step_run_ref(
            step_context, prior_attempts_count,
            self.local_pipeline_package_path)

        run_id = step_context.pipeline_run.run_id
        log = step_context.log

        step_key = step_run_ref.step_key
        self._post_artifacts(log, step_run_ref, run_id, step_key)

        emr_step_def = self._get_emr_step_def(run_id, step_key,
                                              step_context.solid.name)
        emr_step_id = self.emr_job_runner.add_job_flow_steps(
            log, self.cluster_id, [emr_step_def])[0]

        s3 = boto3.resource('s3', region_name=self.region_name)
        for event in self.wait_for_completion(log, s3, run_id, step_key,
                                              emr_step_id):
            log_step_event(step_context, event)
            yield event

        if self.wait_for_logs:
            self._log_logs_from_s3(log, emr_step_id)

    def wait_for_completion(self,
                            log,
                            s3,
                            run_id,
                            step_key,
                            emr_step_id,
                            check_interval=15):
        ''' We want to wait for the EMR steps to complete, and while that's happening, we want to
        yield any events that have been written to S3 for us by the remote process.
        After the the EMR steps complete, we want a final chance to fetch events before finishing
        the step.
        '''
        done = False
        all_events = []
        while not done:
            time.sleep(
                check_interval)  # AWS rate-limits us if we poll it too often
            done = self.emr_job_runner.is_emr_step_complete(
                log, self.cluster_id, emr_step_id)

            all_events_new = self.read_events(s3, run_id, step_key)
            if len(all_events_new) > len(all_events):
                for i in range(len(all_events), len(all_events_new)):
                    yield all_events_new[i]
                all_events = all_events_new

    def read_events(self, s3, run_id, step_key):
        events_s3_obj = s3.Object(  # pylint: disable=no-member
            self.staging_bucket,
            self._artifact_s3_key(run_id, step_key, PICKLED_EVENTS_FILE_NAME))

        try:
            events_data = events_s3_obj.get()['Body'].read()
            return pickle.loads(events_data)
        except ClientError as ex:
            # The file might not be there yet, which is fine
            if ex.response['Error']['Code'] == 'NoSuchKey':
                return []
            else:
                raise ex

    def _log_logs_from_s3(self, log, emr_step_id):
        '''Retrieves the logs from the remote PySpark process that EMR posted to S3 and logs
        them to the given log.'''
        stdout_log, stderr_log = self.emr_job_runner.retrieve_logs_for_step_id(
            log, self.cluster_id, emr_step_id)
        # Since stderr is YARN / Hadoop Log4J output, parse and reformat those log lines for
        # Dagster's logging system.
        records = parse_hadoop_log4j_records(stderr_log)
        for record in records:
            log._log(  # pylint: disable=protected-access
                record.level, record.logger + ': ' + record.message, {})
        log.info(stdout_log)

    def _get_emr_step_def(self, run_id, step_key, solid_name):
        '''From the local Dagster instance, construct EMR steps that will kick off execution on a
        remote EMR cluster.
        '''
        action_on_failure = self.action_on_failure

        # Execute Solid via spark-submit
        conf = dict(flatten_dict(self.spark_config))
        conf['spark.app.name'] = conf.get('spark.app.name', solid_name)

        check.invariant(
            conf.get('spark.master', 'yarn') == 'yarn',
            desc=
            'spark.master is configured as %s; cannot set Spark master on EMR to anything '
            'other than "yarn"' % conf.get('spark.master'),
        )

        command = ([
            EMR_SPARK_HOME + 'bin/spark-submit',
            '--master',
            'yarn',
            '--deploy-mode',
            conf.get('spark.submit.deployMode', 'client'),
        ] + format_for_cli(list(flatten_dict(conf))) + [
            '--py-files',
            self._artifact_s3_uri(run_id, step_key, CODE_ZIP_NAME),
            self._artifact_s3_uri(run_id, step_key, self._main_file_name()),
            self.staging_bucket,
            self._artifact_s3_key(run_id, step_key,
                                  PICKLED_STEP_RUN_REF_FILE_NAME),
        ])

        return EmrJobRunner.construct_step_dict_for_command(
            'Execute Solid %s' % solid_name,
            command,
            action_on_failure=action_on_failure)

    def _main_file_name(self):
        return os.path.basename(self._main_file_local_path())

    def _main_file_local_path(self):
        return emr_step_main.__file__

    def _artifact_s3_uri(self, run_id, step_key, filename):
        key = self._artifact_s3_key(run_id, step_key, filename)
        return 's3://{bucket}/{key}'.format(bucket=self.staging_bucket,
                                            key=key)

    def _artifact_s3_key(self, run_id, step_key, filename):
        return '/'.join([
            self.staging_prefix, run_id, step_key,
            os.path.basename(filename)
        ])
예제 #9
0
class EmrPySparkStepLauncher(StepLauncher):
    def __init__(
        self,
        region_name,
        staging_bucket,
        staging_prefix,
        wait_for_logs,
        action_on_failure,
        cluster_id,
        spark_config,
        local_job_package_path,
        deploy_local_job_package,
        s3_job_package_path=None,
    ):
        self.region_name = check.str_param(region_name, "region_name")
        self.staging_bucket = check.str_param(staging_bucket, "staging_bucket")
        self.staging_prefix = check.str_param(staging_prefix, "staging_prefix")
        self.wait_for_logs = check.bool_param(wait_for_logs, "wait_for_logs")
        self.action_on_failure = check.str_param(action_on_failure,
                                                 "action_on_failure")
        self.cluster_id = check.str_param(cluster_id, "cluster_id")
        self.spark_config = spark_config

        check.invariant(
            not deploy_local_job_package or not s3_job_package_path,
            "If deploy_local_job_package is set to True, s3_job_package_path should not "
            "also be set.",
        )

        self.local_job_package_path = check.str_param(
            local_job_package_path, "local_job_package_path")
        self.deploy_local_job_package = check.bool_param(
            deploy_local_job_package, "deploy_local_job_package")
        self.s3_job_package_path = check.opt_str_param(s3_job_package_path,
                                                       "s3_job_package_path")

        self.emr_job_runner = EmrJobRunner(region=self.region_name)

    def _post_artifacts(self, log, step_run_ref, run_id, step_key):
        """
        Synchronize the step run ref and pyspark code to an S3 staging bucket for use on EMR.

        For the zip file, consider the following toy example:

            # Folder: my_pyspark_project/
            # a.py
            def foo():
                print(1)

            # b.py
            def bar():
                print(2)

            # main.py
            from a import foo
            from b import bar

            foo()
            bar()

        This will zip up `my_pyspark_project/` as `my_pyspark_project.zip`. Then, when running
        `spark-submit --py-files my_pyspark_project.zip emr_step_main.py` on EMR this will
        print 1, 2.
        """
        from dagster_pyspark.utils import build_pyspark_zip

        with tempfile.TemporaryDirectory() as temp_dir:
            s3 = boto3.client("s3", region_name=self.region_name)

            # Upload step run ref
            def _upload_file_to_s3(local_path, s3_filename):
                key = self._artifact_s3_key(run_id, step_key, s3_filename)
                s3_uri = self._artifact_s3_uri(run_id, step_key, s3_filename)
                log.debug("Uploading file {local_path} to {s3_uri}".format(
                    local_path=local_path, s3_uri=s3_uri))
                s3.upload_file(Filename=local_path,
                               Bucket=self.staging_bucket,
                               Key=key)

            # Upload main file.
            # The remote Dagster installation should also have the file, but locating it there
            # could be a pain.
            main_local_path = self._main_file_local_path()
            _upload_file_to_s3(main_local_path, self._main_file_name())

            if self.deploy_local_job_package:
                # Zip and upload package containing job
                zip_local_path = os.path.join(temp_dir, CODE_ZIP_NAME)

                build_pyspark_zip(zip_local_path, self.local_job_package_path)
                _upload_file_to_s3(zip_local_path, CODE_ZIP_NAME)

            # Create step run ref pickle file
            step_run_ref_local_path = os.path.join(
                temp_dir, PICKLED_STEP_RUN_REF_FILE_NAME)
            with open(step_run_ref_local_path, "wb") as step_pickle_file:
                pickle.dump(step_run_ref, step_pickle_file)

            _upload_file_to_s3(step_run_ref_local_path,
                               PICKLED_STEP_RUN_REF_FILE_NAME)

    def launch_step(self, step_context, prior_attempts_count):
        step_run_ref = step_context_to_step_run_ref(
            step_context, prior_attempts_count, self.local_job_package_path)

        run_id = step_context.pipeline_run.run_id
        log = step_context.log

        step_key = step_run_ref.step_key
        self._post_artifacts(log, step_run_ref, run_id, step_key)

        emr_step_def = self._get_emr_step_def(run_id, step_key,
                                              step_context.solid.name)
        emr_step_id = self.emr_job_runner.add_job_flow_steps(
            log, self.cluster_id, [emr_step_def])[0]

        yield from self.wait_for_completion_and_log(log, run_id, step_key,
                                                    emr_step_id, step_context)

    def wait_for_completion_and_log(self, log, run_id, step_key, emr_step_id,
                                    step_context):
        s3 = boto3.resource("s3", region_name=self.region_name)
        try:
            for event in self.wait_for_completion(log, s3, run_id, step_key,
                                                  emr_step_id):
                log_step_event(step_context, event)
                yield event
        except EmrError as emr_error:
            if self.wait_for_logs:
                self._log_logs_from_s3(log, emr_step_id)
            raise emr_error

        if self.wait_for_logs:
            self._log_logs_from_s3(log, emr_step_id)

    def wait_for_completion(self,
                            log,
                            s3,
                            run_id,
                            step_key,
                            emr_step_id,
                            check_interval=15):
        """We want to wait for the EMR steps to complete, and while that's happening, we want to
        yield any events that have been written to S3 for us by the remote process.
        After the the EMR steps complete, we want a final chance to fetch events before finishing
        the step.
        """
        done = False
        all_events = []
        # If this is being called within a `capture_interrupts` context, allow interrupts
        # while waiting for the pyspark execution to complete, so that we can terminate slow or
        # hanging steps
        while not done:
            with raise_execution_interrupts():
                time.sleep(check_interval
                           )  # AWS rate-limits us if we poll it too often
                done = self.emr_job_runner.is_emr_step_complete(
                    log, self.cluster_id, emr_step_id)

                all_events_new = self.read_events(s3, run_id, step_key)

            if len(all_events_new) > len(all_events):
                for i in range(len(all_events), len(all_events_new)):
                    yield all_events_new[i]
                all_events = all_events_new

    def read_events(self, s3, run_id, step_key):
        events_s3_obj = s3.Object(  # pylint: disable=no-member
            self.staging_bucket,
            self._artifact_s3_key(run_id, step_key, PICKLED_EVENTS_FILE_NAME))

        try:
            events_data = events_s3_obj.get()["Body"].read()
            return pickle.loads(events_data)
        except ClientError as ex:
            # The file might not be there yet, which is fine
            if ex.response["Error"]["Code"] == "NoSuchKey":
                return []
            else:
                raise ex

    def _log_logs_from_s3(self, log, emr_step_id):
        """Retrieves the logs from the remote PySpark process that EMR posted to S3 and logs
        them to the given log."""
        stdout_log, stderr_log = self.emr_job_runner.retrieve_logs_for_step_id(
            log, self.cluster_id, emr_step_id)
        # Since stderr is YARN / Hadoop Log4J output, parse and reformat those log lines for
        # Dagster's logging system.
        records = parse_hadoop_log4j_records(stderr_log)
        for record in records:
            if record.level:
                log.log(
                    level=record.level,
                    msg="".join([
                        "Spark Driver stderr: ", record.logger, ": ",
                        record.message
                    ]),
                )
            else:
                log.debug(f"Spark Driver stderr: {record.message}")

        sys.stdout.write("---------- Spark Driver stdout: ----------\n" +
                         stdout_log + "\n" +
                         "---------- End of Spark Driver stdout ----------\n")

    def _get_emr_step_def(self, run_id, step_key, solid_name):
        """From the local Dagster instance, construct EMR steps that will kick off execution on a
        remote EMR cluster.
        """
        from dagster_spark.utils import flatten_dict, format_for_cli

        action_on_failure = self.action_on_failure

        # Execute Solid via spark-submit
        conf = dict(flatten_dict(self.spark_config))
        conf["spark.app.name"] = conf.get("spark.app.name", solid_name)

        check.invariant(
            conf.get("spark.master", "yarn") == "yarn",
            desc=
            "spark.master is configured as %s; cannot set Spark master on EMR to anything "
            'other than "yarn"' % conf.get("spark.master"),
        )

        command = ([
            EMR_SPARK_HOME + "bin/spark-submit",
            "--master",
            "yarn",
            "--deploy-mode",
            conf.get("spark.submit.deployMode", "client"),
        ] + format_for_cli(list(flatten_dict(conf))) + [
            "--py-files",
            self._artifact_s3_uri(run_id, step_key, CODE_ZIP_NAME),
            self._artifact_s3_uri(run_id, step_key, self._main_file_name()),
            self.staging_bucket,
            self._artifact_s3_key(run_id, step_key,
                                  PICKLED_STEP_RUN_REF_FILE_NAME),
        ])

        return EmrJobRunner.construct_step_dict_for_command(
            "Execute Solid/Op %s" % solid_name,
            command,
            action_on_failure=action_on_failure)

    def _main_file_name(self):
        return os.path.basename(self._main_file_local_path())

    def _main_file_local_path(self):
        return emr_step_main.__file__

    def _artifact_s3_uri(self, run_id, step_key, filename):
        key = self._artifact_s3_key(run_id, step_key, filename)
        return "s3://{bucket}/{key}".format(bucket=self.staging_bucket,
                                            key=key)

    def _artifact_s3_key(self, run_id, step_key, filename):
        return "/".join([
            self.staging_prefix, run_id, step_key,
            os.path.basename(filename)
        ])
예제 #10
0
def test_emr_wait_for_step(emr_cluster_config):
    context = create_test_pipeline_execution_context()
    emr = EmrJobRunner(region=REGION, check_cluster_every=1)

    cluster_id = emr.run_job_flow(context, emr_cluster_config)

    step_name = 'test_step'
    step_cmd = ['ls', '/']
    step_ids = emr.add_job_flow_steps(
        context, cluster_id,
        [emr.construct_step_dict_for_command(step_name, step_cmd)])

    def get_step_dict(step_id, step_state):
        return {
            'Step': {
                'Id': step_id,
                'Name': step_name,
                'Config': {
                    'Jar': 'command-runner.jar',
                    'Properties': {},
                    'Args': step_cmd
                },
                'ActionOnFailure': 'CONTINUE',
                'Status': {
                    'State': step_state,
                    'StateChangeReason': {
                        'Message': 'everything is hosed'
                    },
                    'Timeline': {
                        'StartDateTime': _boto3_now()
                    },
                },
            },
        }

    calls = {'num_calls': 0, 'final_state': 'COMPLETED'}

    def new_describe_step(_, cluster_id, step_id):
        calls['num_calls'] += 1

        if calls['num_calls'] == 1:
            return get_step_dict(step_id, 'PENDING')
        elif calls['num_calls'] == 2:
            return get_step_dict(step_id, 'RUNNING')
        else:
            return get_step_dict(step_id, calls['final_state'])

        return emr.describe_step(cluster_id, step_id)

    with mock.patch.object(EmrJobRunner,
                           'describe_step',
                           new=new_describe_step):
        emr.wait_for_steps_to_complete(context, cluster_id, step_ids)

    calls['num_calls'] = 0
    calls['final_state'] = 'FAILED'
    with pytest.raises(EmrError) as exc_info:
        with mock.patch.object(EmrJobRunner,
                               'describe_step',
                               new=new_describe_step):
            emr.wait_for_steps_to_complete(context, cluster_id, step_ids)
    assert 'step failed' in str(exc_info.value)