예제 #1
0
def k8s_events_handle_experiment_job_statuses(self: 'workers.app.task',
                                              payload: Dict) -> None:
    """Experiment jobs statuses"""
    details = payload['details']
    job_uuid = details['labels']['job_uuid']
    restart_count = payload.get('restart_count', 0)
    logger.debug('handling events status for job_uuid: %s, status: %s',
                 job_uuid, payload['status'])

    try:
        job = ExperimentJob.objects.get(uuid=job_uuid)
    except ExperimentJob.DoesNotExist:
        logger.debug('Job uuid`%s` does not exist', job_uuid)
        return

    try:
        experiment = job.experiment
    except Experiment.DoesNotExist:
        logger.debug('Experiment for job `%s` does not exist anymore',
                     job_uuid)
        return

    if job.last_status is None and self.request.retries < 2:
        self.retry(countdown=1)

    max_restarts = experiment.max_restarts or conf.get(
        MAX_RESTARTS_EXPERIMENTS)
    if JobLifeCycle.failed(payload['status']) and restart_count < max_restarts:
        return

    # Set the new status
    try:
        RedisStatuses.set_status(job_uuid, payload['status'])
        set_node_scheduling(job, details['node_name'])
        job.set_status(status=payload['status'],
                       message=payload['message'],
                       created_at=payload.get('created_at'),
                       traceback=payload.get('traceback'),
                       details=details)
        logger.debug('status %s is set for job %s %s', payload['status'],
                     job_uuid, job.id)
    except IntegrityError:
        # Due to concurrency this could happen, we just retry it
        logger.info('Retry job status %s handling %s', payload['status'],
                    job_uuid)
        self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)
예제 #2
0
def k8s_events_handle_build_job_statuses(self: 'workers.app.task',
                                         payload: Dict) -> None:
    """Project Plugin jobs statuses"""
    details = payload['details']
    app = details['labels']['app']
    job_uuid = details['labels']['job_uuid']
    job_name = details['labels']['job_name']
    restart_count = payload.get('restart_count', 0)
    project_name = details['labels'].get('project_name')
    logger.debug('handling events status for build jon %s %s', job_name, app)

    try:
        build_job = BuildJob.objects.get(uuid=job_uuid)
    except BuildJob.DoesNotExist:
        logger.info('Build job `%s` does not exist', job_name)
        return

    try:
        build_job.project
    except Project.DoesNotExist:
        logger.debug('`%s` does not exist anymore', project_name)

    max_restarts = build_job.max_restarts or conf.get(MAX_RESTARTS_BUILD_JOBS)
    if JobLifeCycle.failed(payload['status']) and restart_count < max_restarts:
        return

    # Set the new status
    try:
        RedisStatuses.set_status(job_uuid, payload['status'])
        set_node_scheduling(build_job, details['node_name'])
        build_job.set_status(status=payload['status'],
                             message=payload['message'],
                             traceback=payload.get('traceback'),
                             details=details)
    except IntegrityError:
        # Due to concurrency this could happen, we just retry it
        self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)