def check_job(self, job): rc = self.context.cm.run_config if job.status in rc.retry_on_status: if job.retries < rc.max_retries: msg = 'Job {} iteration {} completed with status {}. retrying...' self.logger.error(msg.format(job.id, job.iteration, job.status)) self.retry_job(job) self.context.move_failed(job) self.context.write_state() else: msg = 'Job {} iteration {} completed with status {}. '\ 'Max retries exceeded.' self.logger.error(msg.format(job.id, job.iteration, job.status)) self.context.failed_jobs += 1 self.send(signal.JOB_FAILED) if rc.bail_on_job_failure: raise ExecutionError('Job {} failed, bailing.'.format( job.id)) else: # status not in retry_on_status self.logger.info('Job completed with status {}'.format(job.status)) if job.status != 'ABORTED': self.context.successful_jobs += 1 else: self.context.failed_jobs += 1 self.send(signal.JOB_ABORTED)
def verify_target_responsive(self, context): can_reboot = context.reboot_policy.can_reboot if not self.target.check_responsive(explode=False): self.is_responsive = False if not can_reboot: raise TargetNotRespondingError('Target unresponsive and is not allowed to reboot.') elif self.target.has('hard_reset'): self.logger.info('Target unresponsive; performing hard reset') self.reboot(context, hard=True) self.is_responsive = True raise ExecutionError('Target became unresponsive but was recovered.') else: raise TargetNotRespondingError('Target unresponsive and hard reset not supported; bailing.')