コード例 #1
0
    def poke(self, context: 'Context'):
        hook = GlueJobHook(aws_conn_id=self.aws_conn_id)
        self.log.info('Poking for job run status :for Glue Job %s and ID %s',
                      self.job_name, self.run_id)
        job_state = hook.get_job_state(job_name=self.job_name,
                                       run_id=self.run_id)
        job_failed = False

        try:
            if job_state in self.success_states:
                self.log.info('Exiting Job %s Run State: %s', self.run_id,
                              job_state)
                return True
            elif job_state in self.errored_states:
                job_failed = True
                job_error_message = 'Exiting Job %s Run State: %s', self.run_id, job_state
                self.log.info(job_error_message)
                raise AirflowException(job_error_message)
            else:
                return False
        finally:
            if self.verbose:
                self.next_log_token = hook.print_job_logs(
                    job_name=self.job_name,
                    run_id=self.run_id,
                    job_failed=job_failed,
                    next_token=self.next_log_token,
                )
コード例 #2
0
ファイル: glue.py プロジェクト: subkanthi/airflow
 def poke(self, context: 'Context'):
     hook = GlueJobHook(aws_conn_id=self.aws_conn_id)
     self.log.info("Poking for job run status :for Glue Job %s and ID %s", self.job_name, self.run_id)
     job_state = hook.get_job_state(job_name=self.job_name, run_id=self.run_id)
     if job_state in self.success_states:
         self.log.info("Exiting Job %s Run State: %s", self.run_id, job_state)
         return True
     elif job_state in self.errored_states:
         job_error_message = "Exiting Job " + self.run_id + " Run State: " + job_state
         raise AirflowException(job_error_message)
     else:
         return False
コード例 #3
0
    def execute(self, context: 'Context'):
        """
        Executes AWS Glue Job from Airflow

        :return: the id of the current glue job.
        """
        if self.script_location is None:
            s3_script_location = None
        elif not self.script_location.startswith(self.s3_protocol):
            s3_hook = S3Hook(aws_conn_id=self.aws_conn_id)
            script_name = os.path.basename(self.script_location)
            s3_hook.load_file(self.script_location,
                              self.s3_artifacts_prefix + script_name,
                              bucket_name=self.s3_bucket)
            s3_script_location = f"s3://{self.s3_bucket}/{self.s3_artifacts_prefix}{script_name}"
        else:
            s3_script_location = self.script_location
        glue_job = GlueJobHook(
            job_name=self.job_name,
            desc=self.job_desc,
            concurrent_run_limit=self.concurrent_run_limit,
            script_location=s3_script_location,
            retry_limit=self.retry_limit,
            num_of_dpus=self.num_of_dpus,
            aws_conn_id=self.aws_conn_id,
            region_name=self.region_name,
            s3_bucket=self.s3_bucket,
            iam_role_name=self.iam_role_name,
            create_job_kwargs=self.create_job_kwargs,
        )
        self.log.info(
            "Initializing AWS Glue Job: %s. Wait for completion: %s",
            self.job_name,
            self.wait_for_completion,
        )
        glue_job_run = glue_job.initialize_job(self.script_args,
                                               self.run_job_kwargs)
        if self.wait_for_completion:
            glue_job_run = glue_job.job_completion(self.job_name,
                                                   glue_job_run['JobRunId'],
                                                   self.verbose)
            self.log.info(
                "AWS Glue Job: %s status: %s. Run Id: %s",
                self.job_name,
                glue_job_run['JobRunState'],
                glue_job_run['JobRunId'],
            )
        else:
            self.log.info("AWS Glue Job: %s. Run Id: %s", self.job_name,
                          glue_job_run['JobRunId'])
        return glue_job_run['JobRunId']