def poke(self, context: 'Context'): hook = GlueJobHook(aws_conn_id=self.aws_conn_id) self.log.info('Poking for job run status :for Glue Job %s and ID %s', self.job_name, self.run_id) job_state = hook.get_job_state(job_name=self.job_name, run_id=self.run_id) job_failed = False try: if job_state in self.success_states: self.log.info('Exiting Job %s Run State: %s', self.run_id, job_state) return True elif job_state in self.errored_states: job_failed = True job_error_message = 'Exiting Job %s Run State: %s', self.run_id, job_state self.log.info(job_error_message) raise AirflowException(job_error_message) else: return False finally: if self.verbose: self.next_log_token = hook.print_job_logs( job_name=self.job_name, run_id=self.run_id, job_failed=job_failed, next_token=self.next_log_token, )
def poke(self, context: 'Context'): hook = GlueJobHook(aws_conn_id=self.aws_conn_id) self.log.info("Poking for job run status :for Glue Job %s and ID %s", self.job_name, self.run_id) job_state = hook.get_job_state(job_name=self.job_name, run_id=self.run_id) if job_state in self.success_states: self.log.info("Exiting Job %s Run State: %s", self.run_id, job_state) return True elif job_state in self.errored_states: job_error_message = "Exiting Job " + self.run_id + " Run State: " + job_state raise AirflowException(job_error_message) else: return False
def execute(self, context: 'Context'): """ Executes AWS Glue Job from Airflow :return: the id of the current glue job. """ if self.script_location is None: s3_script_location = None elif not self.script_location.startswith(self.s3_protocol): s3_hook = S3Hook(aws_conn_id=self.aws_conn_id) script_name = os.path.basename(self.script_location) s3_hook.load_file(self.script_location, self.s3_artifacts_prefix + script_name, bucket_name=self.s3_bucket) s3_script_location = f"s3://{self.s3_bucket}/{self.s3_artifacts_prefix}{script_name}" else: s3_script_location = self.script_location glue_job = GlueJobHook( job_name=self.job_name, desc=self.job_desc, concurrent_run_limit=self.concurrent_run_limit, script_location=s3_script_location, retry_limit=self.retry_limit, num_of_dpus=self.num_of_dpus, aws_conn_id=self.aws_conn_id, region_name=self.region_name, s3_bucket=self.s3_bucket, iam_role_name=self.iam_role_name, create_job_kwargs=self.create_job_kwargs, ) self.log.info( "Initializing AWS Glue Job: %s. Wait for completion: %s", self.job_name, self.wait_for_completion, ) glue_job_run = glue_job.initialize_job(self.script_args, self.run_job_kwargs) if self.wait_for_completion: glue_job_run = glue_job.job_completion(self.job_name, glue_job_run['JobRunId'], self.verbose) self.log.info( "AWS Glue Job: %s status: %s. Run Id: %s", self.job_name, glue_job_run['JobRunState'], glue_job_run['JobRunId'], ) else: self.log.info("AWS Glue Job: %s. Run Id: %s", self.job_name, glue_job_run['JobRunId']) return glue_job_run['JobRunId']