def test_execute_uses_the_emr_config_to_create_a_cluster_and_returns_job_id(self): with patch("boto3.client", self.boto3_client_mock): operator = EmrCreateJobFlowOperator( task_id="test_task", aws_conn_id="aws_default", emr_conn_id="emr_default" ) self.assertEqual(operator.execute(None), "j-8989898989")
def execute(self, context): if self.environment not in ["dev", "prod"]: logging.error(f"Can't recognise deployment environment '{self.environment}'. \n" "Review the environment variable 'DEPLOYMENT_ENVIRONMENT'") raise ValueError(f"self.environment = os.environ['DEPLOYMENT_ENVIRONMENT'] --> {self.environment}") # check if development/local environment if self.environment == 'dev': logging.info("EMR cluster running from development environment") # get user aws name client = boto3.client('sts') username = client.get_caller_identity()['Arn'].split(":", 5)[5].split("/", 1)[1].lower() # Create zipped archive of the local airflow repository airflow_repo_path = '/home/vagrant/uk_dm_airflow' zip_local_path = '/tmp/latest' shutil.make_archive(base_name='/tmp/latest', format='zip', root_dir=airflow_repo_path) logging.info(f"Zipped file location: {zip_local_path}") # Upload zipped airflow repository to user's s3 bucket hook = S3_hook.S3Hook(aws_conn_id=self.aws_conn_id) hook.load_file(f"{zip_local_path}.zip", f'{username}/spark_local/latest.zip', bucket_name='grp-ds-users', replace=True) logging.info(f"Airflow repo uploaded to user bucket. User: '******'") # Upload local bootstrap file to user s3 buckets bootstrap_path = self.bootstrap_path hook.load_file(bootstrap_path, f'{username}/spark_local/bootstrap.sh', bucket_name='grp-ds-users', replace=True) self.override_emr_template(username) return EmrCreateJobFlowOperator.execute(self, context) # Create cluster and return jobflow_id # Output the edited EMR template. self.job_flow_overrides['BootstrapActions'][0]['ScriptBootstrapAction']['Args'] = [ f'{self.environment}', self.install_packages_on_emr] logging.info(self.job_flow_overrides) return EmrCreateJobFlowOperator.execute(self, context) # Returns the jobflow id
def test_execute_uses_the_emr_config_to_create_a_cluster_and_returns_job_id( self): with patch('boto3.client', self.boto3_client_mock): operator = EmrCreateJobFlowOperator(task_id='test_task', aws_conn_id='aws_default', emr_conn_id='emr_default') self.assertEqual(operator.execute(None), 'j-8989898989')
def test_execute_uses_the_emr_config_to_create_a_cluster_and_returns_job_id(self): with patch('boto3.client', self.boto3_client_mock): operator = EmrCreateJobFlowOperator( task_id='test_task', aws_conn_id='aws_default', emr_conn_id='emr_default' ) self.assertEqual(operator.execute(None), 'j-8989898989')
class TestEmrCreateJobFlowOperator(unittest.TestCase): # When _config = { 'Name': 'test_job_flow', 'ReleaseLabel': '5.11.0', 'Steps': [{ 'Name': 'test_step', 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 'command-runner.jar', 'Args': [ '/usr/lib/spark/bin/run-example', '{{ macros.ds_add(ds, -1) }}', '{{ ds }}' ] } }] } def setUp(self): args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} # Mock out the emr_client (moto has incorrect response) self.emr_client_mock = MagicMock() self.operator = EmrCreateJobFlowOperator( task_id='test_task', aws_conn_id='aws_default', emr_conn_id='emr_default', job_flow_overrides=self._config, region_name='ap-southeast-2', dag=DAG('test_dag_id', default_args=args)) def test_init(self): self.assertEqual(self.operator.aws_conn_id, 'aws_default') self.assertEqual(self.operator.emr_conn_id, 'emr_default') self.assertEqual(self.operator.region_name, 'ap-southeast-2') def test_render_template(self): ti = TaskInstance(self.operator, DEFAULT_DATE) ti.render_templates() expected_args = { 'Name': 'test_job_flow', 'ReleaseLabel': '5.11.0', 'Steps': [{ 'Name': 'test_step', 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 'command-runner.jar', 'Args': [ '/usr/lib/spark/bin/run-example', (DEFAULT_DATE - timedelta(days=1)).strftime("%Y-%m-%d"), DEFAULT_DATE.strftime("%Y-%m-%d"), ] } }] } self.assertDictEqual(self.operator.job_flow_overrides, expected_args) def test_execute_returns_job_id(self): self.emr_client_mock.run_job_flow.return_value = RUN_JOB_FLOW_SUCCESS_RETURN # Mock out the emr_client creator emr_session_mock = MagicMock() emr_session_mock.client.return_value = self.emr_client_mock self.boto3_session_mock = MagicMock(return_value=emr_session_mock) with patch('boto3.session.Session', self.boto3_session_mock): self.assertEqual(self.operator.execute(None), 'j-8989898989')
class TestEmrCreateJobFlowOperator(unittest.TestCase): # When _config = { 'Name': 'test_job_flow', 'ReleaseLabel': '5.11.0', 'Steps': [{ 'Name': 'test_step', 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 'command-runner.jar', 'Args': [ '/usr/lib/spark/bin/run-example', '{{ macros.ds_add(ds, -1) }}', '{{ ds }}' ] } }] } def setUp(self): configuration.load_test_config() args = { 'owner': 'airflow', 'start_date': DEFAULT_DATE } # Mock out the emr_client (moto has incorrect response) self.emr_client_mock = MagicMock() self.operator = EmrCreateJobFlowOperator( task_id='test_task', aws_conn_id='aws_default', emr_conn_id='emr_default', job_flow_overrides=self._config, dag=DAG('test_dag_id', default_args=args) ) def test_init(self): self.assertEqual(self.operator.aws_conn_id, 'aws_default') self.assertEqual(self.operator.emr_conn_id, 'emr_default') def test_render_template(self): ti = TaskInstance(self.operator, DEFAULT_DATE) ti.render_templates() expected_args = { 'Name': 'test_job_flow', 'ReleaseLabel': '5.11.0', 'Steps': [{ 'Name': 'test_step', 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 'command-runner.jar', 'Args': [ '/usr/lib/spark/bin/run-example', (DEFAULT_DATE - timedelta(days=1)).strftime("%Y-%m-%d"), DEFAULT_DATE.strftime("%Y-%m-%d"), ] } }] } self.assertDictEqual(self.operator.job_flow_overrides, expected_args) def test_execute_returns_job_id(self): self.emr_client_mock.run_job_flow.return_value = RUN_JOB_FLOW_SUCCESS_RETURN # Mock out the emr_client creator emr_session_mock = MagicMock() emr_session_mock.client.return_value = self.emr_client_mock self.boto3_session_mock = MagicMock(return_value=emr_session_mock) with patch('boto3.session.Session', self.boto3_session_mock): self.assertEqual(self.operator.execute(None), 'j-8989898989')