def k8s_events_handle_experiment_job_statuses(self: 'workers.app.task', payload: Dict) -> None: """Experiment jobs statuses""" details = payload['details'] job_uuid = details['labels']['job_uuid'] restart_count = payload.get('restart_count', 0) logger.debug('handling events status for job_uuid: %s, status: %s', job_uuid, payload['status']) try: job = ExperimentJob.objects.get(uuid=job_uuid) except ExperimentJob.DoesNotExist: logger.debug('Job uuid`%s` does not exist', job_uuid) return try: experiment = job.experiment except Experiment.DoesNotExist: logger.debug('Experiment for job `%s` does not exist anymore', job_uuid) return if job.last_status is None and self.request.retries < 2: self.retry(countdown=1) max_restarts = experiment.max_restarts or conf.get( MAX_RESTARTS_EXPERIMENTS) if JobLifeCycle.failed(payload['status']) and restart_count < max_restarts: return # Set the new status try: RedisStatuses.set_status(job_uuid, payload['status']) set_node_scheduling(job, details['node_name']) job.set_status(status=payload['status'], message=payload['message'], created_at=payload.get('created_at'), traceback=payload.get('traceback'), details=details) logger.debug('status %s is set for job %s %s', payload['status'], job_uuid, job.id) except IntegrityError: # Due to concurrency this could happen, we just retry it logger.info('Retry job status %s handling %s', payload['status'], job_uuid) self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)
def k8s_events_handle_build_job_statuses(self: 'workers.app.task', payload: Dict) -> None: """Project Plugin jobs statuses""" details = payload['details'] app = details['labels']['app'] job_uuid = details['labels']['job_uuid'] job_name = details['labels']['job_name'] restart_count = payload.get('restart_count', 0) project_name = details['labels'].get('project_name') logger.debug('handling events status for build jon %s %s', job_name, app) try: build_job = BuildJob.objects.get(uuid=job_uuid) except BuildJob.DoesNotExist: logger.info('Build job `%s` does not exist', job_name) return try: build_job.project except Project.DoesNotExist: logger.debug('`%s` does not exist anymore', project_name) max_restarts = build_job.max_restarts or conf.get(MAX_RESTARTS_BUILD_JOBS) if JobLifeCycle.failed(payload['status']) and restart_count < max_restarts: return # Set the new status try: RedisStatuses.set_status(job_uuid, payload['status']) set_node_scheduling(build_job, details['node_name']) build_job.set_status(status=payload['status'], message=payload['message'], traceback=payload.get('traceback'), details=details) except IntegrityError: # Due to concurrency this could happen, we just retry it self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)