예제 #1
0
    def execute(self, context):
        """
        Executes AWS Glue Job from Airflow

        :return: the id of the current glue job.
        """
        if self.script_location and not self.script_location.startswith(
                self.s3_protocol):
            s3_hook = S3Hook(aws_conn_id=self.aws_conn_id)
            script_name = os.path.basename(self.script_location)
            s3_hook.load_file(self.script_location, self.s3_bucket,
                              self.s3_artifcats_prefix + script_name)
        glue_job = AwsGlueJobHook(
            job_name=self.job_name,
            desc=self.job_desc,
            concurrent_run_limit=self.concurrent_run_limit,
            script_location=self.script_location,
            retry_limit=self.retry_limit,
            num_of_dpus=self.num_of_dpus,
            aws_conn_id=self.aws_conn_id,
            region_name=self.region_name,
            s3_bucket=self.s3_bucket,
            iam_role_name=self.iam_role_name)
        self.log.info("Initializing AWS Glue Job: %s", self.job_name)
        glue_job_run = glue_job.initialize_job(self.script_args)
        glue_job_run = glue_job.job_completion(self.job_name,
                                               glue_job_run['JobRunId'])
        self.log.info("AWS Glue Job: %s status: %s. Run Id: %s", self.job_name,
                      glue_job_run['JobRunState'], glue_job_run['JobRunId'])
        return glue_job_run['JobRunId']
예제 #2
0
    def test_initialize_job(self, mock_get_conn, mock_get_or_create_glue_job,
                            mock_get_job_state):
        some_data_path = "s3://glue-datasets/examples/medicare/SampleData.csv"
        some_script_arguments = {"--s3_input_data_path": some_data_path}
        some_script = "s3:/glue-examples/glue-scripts/sample_aws_glue_job.py"
        some_s3_bucket = "my-includes"

        mock_get_or_create_glue_job.Name = mock.Mock(Name='aws_test_glue_job')
        mock_get_conn.return_value.start_job_run()

        mock_job_run_state = mock_get_job_state.return_value
        glue_job_hook = AwsGlueJobHook(
            job_name='aws_test_glue_job',
            desc='This is test case job from Airflow',
            iam_role_name='my_test_role',
            script_location=some_script,
            s3_bucket=some_s3_bucket,
            region_name=self.some_aws_region,
        )
        glue_job_run = glue_job_hook.initialize_job(some_script_arguments)
        glue_job_run_state = glue_job_hook.get_job_state(
            glue_job_run['JobName'], glue_job_run['JobRunId'])
        self.assertEqual(glue_job_run_state,
                         mock_job_run_state,
                         msg='Mocks but be equal')