def test_execute_uses_the_emr_config_to_create_a_cluster_and_returns_job_id(self):
        with patch("boto3.client", self.boto3_client_mock):

            operator = EmrCreateJobFlowOperator(
                task_id="test_task", aws_conn_id="aws_default", emr_conn_id="emr_default"
            )

            self.assertEqual(operator.execute(None), "j-8989898989")
    def execute(self, context):
        if self.environment not in ["dev", "prod"]:
            logging.error(f"Can't recognise deployment environment '{self.environment}'. \n"
                          "Review the environment variable 'DEPLOYMENT_ENVIRONMENT'")
            raise ValueError(f"self.environment = os.environ['DEPLOYMENT_ENVIRONMENT'] --> {self.environment}")

        # check if development/local environment
        if self.environment == 'dev':
            logging.info("EMR cluster running from development environment")

            # get user aws name
            client = boto3.client('sts')
            username = client.get_caller_identity()['Arn'].split(":", 5)[5].split("/", 1)[1].lower()

            # Create zipped archive of the local airflow repository
            airflow_repo_path = '/home/vagrant/uk_dm_airflow'
            zip_local_path = '/tmp/latest'
            shutil.make_archive(base_name='/tmp/latest',
                                format='zip',
                                root_dir=airflow_repo_path)
            logging.info(f"Zipped file location: {zip_local_path}")

            # Upload zipped airflow repository to user's s3 bucket
            hook = S3_hook.S3Hook(aws_conn_id=self.aws_conn_id)
            hook.load_file(f"{zip_local_path}.zip", f'{username}/spark_local/latest.zip',
                           bucket_name='grp-ds-users',
                           replace=True)

            logging.info(f"Airflow repo uploaded to user bucket. User: '******'")

            # Upload local bootstrap file to user s3 buckets
            bootstrap_path = self.bootstrap_path
            hook.load_file(bootstrap_path, f'{username}/spark_local/bootstrap.sh',
                           bucket_name='grp-ds-users',
                           replace=True)

            self.override_emr_template(username)
            return EmrCreateJobFlowOperator.execute(self, context)

        # Create cluster and return jobflow_id
        # Output the edited EMR template.
        self.job_flow_overrides['BootstrapActions'][0]['ScriptBootstrapAction']['Args'] = [
            f'{self.environment}', self.install_packages_on_emr]
        logging.info(self.job_flow_overrides)
        return EmrCreateJobFlowOperator.execute(self, context)  # Returns the jobflow id
    def test_execute_uses_the_emr_config_to_create_a_cluster_and_returns_job_id(
            self):
        with patch('boto3.client', self.boto3_client_mock):

            operator = EmrCreateJobFlowOperator(task_id='test_task',
                                                aws_conn_id='aws_default',
                                                emr_conn_id='emr_default')

            self.assertEqual(operator.execute(None), 'j-8989898989')
    def test_execute_uses_the_emr_config_to_create_a_cluster_and_returns_job_id(self):
        with patch('boto3.client', self.boto3_client_mock):

            operator = EmrCreateJobFlowOperator(
                task_id='test_task',
                aws_conn_id='aws_default',
                emr_conn_id='emr_default'
            )

            self.assertEqual(operator.execute(None), 'j-8989898989')
class TestEmrCreateJobFlowOperator(unittest.TestCase):
    # When
    _config = {
        'Name':
        'test_job_flow',
        'ReleaseLabel':
        '5.11.0',
        'Steps': [{
            'Name': 'test_step',
            'ActionOnFailure': 'CONTINUE',
            'HadoopJarStep': {
                'Jar':
                'command-runner.jar',
                'Args': [
                    '/usr/lib/spark/bin/run-example',
                    '{{ macros.ds_add(ds, -1) }}', '{{ ds }}'
                ]
            }
        }]
    }

    def setUp(self):
        args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}

        # Mock out the emr_client (moto has incorrect response)
        self.emr_client_mock = MagicMock()
        self.operator = EmrCreateJobFlowOperator(
            task_id='test_task',
            aws_conn_id='aws_default',
            emr_conn_id='emr_default',
            job_flow_overrides=self._config,
            region_name='ap-southeast-2',
            dag=DAG('test_dag_id', default_args=args))

    def test_init(self):
        self.assertEqual(self.operator.aws_conn_id, 'aws_default')
        self.assertEqual(self.operator.emr_conn_id, 'emr_default')
        self.assertEqual(self.operator.region_name, 'ap-southeast-2')

    def test_render_template(self):
        ti = TaskInstance(self.operator, DEFAULT_DATE)
        ti.render_templates()

        expected_args = {
            'Name':
            'test_job_flow',
            'ReleaseLabel':
            '5.11.0',
            'Steps': [{
                'Name': 'test_step',
                'ActionOnFailure': 'CONTINUE',
                'HadoopJarStep': {
                    'Jar':
                    'command-runner.jar',
                    'Args': [
                        '/usr/lib/spark/bin/run-example',
                        (DEFAULT_DATE -
                         timedelta(days=1)).strftime("%Y-%m-%d"),
                        DEFAULT_DATE.strftime("%Y-%m-%d"),
                    ]
                }
            }]
        }

        self.assertDictEqual(self.operator.job_flow_overrides, expected_args)

    def test_execute_returns_job_id(self):
        self.emr_client_mock.run_job_flow.return_value = RUN_JOB_FLOW_SUCCESS_RETURN

        # Mock out the emr_client creator
        emr_session_mock = MagicMock()
        emr_session_mock.client.return_value = self.emr_client_mock
        self.boto3_session_mock = MagicMock(return_value=emr_session_mock)

        with patch('boto3.session.Session', self.boto3_session_mock):
            self.assertEqual(self.operator.execute(None), 'j-8989898989')
class TestEmrCreateJobFlowOperator(unittest.TestCase):
    # When
    _config = {
        'Name': 'test_job_flow',
        'ReleaseLabel': '5.11.0',
        'Steps': [{
            'Name': 'test_step',
            'ActionOnFailure': 'CONTINUE',
            'HadoopJarStep': {
                'Jar': 'command-runner.jar',
                'Args': [
                    '/usr/lib/spark/bin/run-example',
                    '{{ macros.ds_add(ds, -1) }}',
                    '{{ ds }}'
                ]
            }
        }]
    }

    def setUp(self):
        configuration.load_test_config()
        args = {
            'owner': 'airflow',
            'start_date': DEFAULT_DATE
        }

        # Mock out the emr_client (moto has incorrect response)
        self.emr_client_mock = MagicMock()
        self.operator = EmrCreateJobFlowOperator(
            task_id='test_task',
            aws_conn_id='aws_default',
            emr_conn_id='emr_default',
            job_flow_overrides=self._config,
            dag=DAG('test_dag_id', default_args=args)
        )

    def test_init(self):
        self.assertEqual(self.operator.aws_conn_id, 'aws_default')
        self.assertEqual(self.operator.emr_conn_id, 'emr_default')

    def test_render_template(self):
        ti = TaskInstance(self.operator, DEFAULT_DATE)
        ti.render_templates()

        expected_args = {
            'Name': 'test_job_flow',
            'ReleaseLabel': '5.11.0',
            'Steps': [{
                'Name': 'test_step',
                'ActionOnFailure': 'CONTINUE',
                'HadoopJarStep': {
                    'Jar': 'command-runner.jar',
                    'Args': [
                        '/usr/lib/spark/bin/run-example',
                        (DEFAULT_DATE - timedelta(days=1)).strftime("%Y-%m-%d"),
                        DEFAULT_DATE.strftime("%Y-%m-%d"),
                    ]
                }
            }]
        }

        self.assertDictEqual(self.operator.job_flow_overrides, expected_args)

    def test_execute_returns_job_id(self):
        self.emr_client_mock.run_job_flow.return_value = RUN_JOB_FLOW_SUCCESS_RETURN

        # Mock out the emr_client creator
        emr_session_mock = MagicMock()
        emr_session_mock.client.return_value = self.emr_client_mock
        self.boto3_session_mock = MagicMock(return_value=emr_session_mock)

        with patch('boto3.session.Session', self.boto3_session_mock):
            self.assertEqual(self.operator.execute(None), 'j-8989898989')