Example #1
0
class TestAwsGlueJobOperator(unittest.TestCase):
    @mock.patch('airflow.providers.amazon.aws.hooks.glue.AwsGlueJobHook')
    def setUp(self, glue_hook_mock):
        configuration.load_test_config()

        self.glue_hook_mock = glue_hook_mock
        some_script = "s3:/glue-examples/glue-scripts/sample_aws_glue_job.py"
        self.glue = AwsGlueJobOperator(
            task_id='test_glue_operator',
            job_name='my_test_job',
            script_location=some_script,
            aws_conn_id='aws_default',
            region_name='us-west-2',
            s3_bucket='some_bucket',
            iam_role_name='my_test_role',
        )

    @mock.patch.object(AwsGlueJobHook, 'get_job_state')
    @mock.patch.object(AwsGlueJobHook, 'initialize_job')
    @mock.patch.object(AwsGlueJobHook, "get_conn")
    @mock.patch.object(S3Hook, "load_file")
    def test_execute_without_failure(self, mock_load_file, mock_get_conn,
                                     mock_initialize_job, mock_get_job_state):
        mock_initialize_job.return_value = {
            'JobRunState': 'RUNNING',
            'JobRunId': '11111'
        }
        mock_get_job_state.return_value = 'SUCCEEDED'
        self.glue.execute(None)

        mock_initialize_job.assert_called_once_with({})
        self.assertEqual(self.glue.job_name, 'my_test_job')
Example #2
0
    def setUp(self, glue_hook_mock):
        configuration.load_test_config()

        self.glue_hook_mock = glue_hook_mock
        some_script = "s3:/glue-examples/glue-scripts/sample_aws_glue_job.py"
        self.glue = AwsGlueJobOperator(task_id='test_glue_operator',
                                       job_name='my_test_job',
                                       script_location=some_script,
                                       aws_conn_id='aws_default',
                                       region_name='us-west-2',
                                       s3_bucket='some_bucket',
                                       iam_role_name='my_test_role')
Example #3
0
from airflow import DAG
from datetime import datetime, timedelta
from airflow.providers.amazon.aws.operators.glue import AwsGlueJobOperator

default_args = {
	"owner": "airflow-user",
	"start_date": datetime.today(),
	"depends_on_past": False,
	"email_on_failure": False,
	"email_on_retry": False,
	"email": "<your-email-address>",
	"retries": 1,
	"retry_delay": timedelta(minutes=5)
}

with DAG(dag_id="start-glue-job", schedule_interval="@daily", default_args=default_args, catchup=False) as dag:
	glue_job = AwsGlueJobOperator(
		task_id='my_glue_job',
		job_name='<your-Glue-job-name>',
		num_of_dpus=5,
		region_name='<region>'
	)