def new_experiment_job_status(sender, **kwargs): instance = kwargs['instance'] created = kwargs.get('created', False) job = instance.job if created: # update job last_status job.job_status = instance job.save() # check if the new status is done to remove the containers from the monitors if job.is_done: from libs.redis_db import RedisJobContainers RedisJobContainers.remove_job(job.uuid.hex) # Check if the experiment job status if not created: return # Check if we need to change the experiment status experiment = instance.job.experiment if experiment.is_done: return check_experiment_status.delay(experiment_uuid=experiment.uuid.hex)
def new_experiment_job_status(sender, **kwargs): instance = kwargs['instance'] created = kwargs.get('created', False) job = instance.job if created: # update job last_status job.status = instance job.save() # check if the new status is done to remove the containers from the monitors if job.is_done: from libs.redis_db import RedisJobContainers RedisJobContainers.remove_job(job.uuid.hex) # Check if the experiment job status if not created: return # Check if we need to change the experiment status experiment = instance.job.experiment if experiment.is_done: return celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_CHECK_STATUS, kwargs={'experiment_id': experiment.id}, countdown=1)
def update_job_containers(event, status, job_container_name): if JobLifeCycle.is_done(status): # Remove the job monitoring job_uuid = event['metadata']['labels']['job_uuid'] logger.info('Stop monitoring job_uuid: %s', job_uuid) RedisJobContainers.remove_job(job_uuid) if event['status']['container_statuses'] is None: return def get_container_id(container_id): if not container_id: return None if container_id.startswith('docker://'): return container_id[len('docker://'):] return container_id for container_status in event['status']['container_statuses']: if container_status['name'] != job_container_name: continue container_id = get_container_id(container_status['container_id']) if container_id: job_uuid = event['metadata']['labels']['job_uuid'] if container_status['state']['running'] is not None: logger.info('Monitoring (container_id, job_uuid): (%s, %s)', container_id, job_uuid) RedisJobContainers.monitor(container_id=container_id, job_uuid=job_uuid) else: RedisJobContainers.remove_container(container_id=container_id)
def experiment_job_status_post_save(sender, **kwargs): instance = kwargs['instance'] job = instance.job # update job last_status job.status = instance set_job_started_at(instance=job, status=instance.status) set_job_finished_at(instance=job, status=instance.status) job.save() # check if the new status is done to remove the containers from the monitors if job.is_done: from libs.redis_db import RedisJobContainers RedisJobContainers.remove_job(job.uuid.hex) # Check if we need to change the experiment status experiment = instance.job.experiment if experiment.is_done: return celery_app.send_task( SchedulerCeleryTasks.EXPERIMENTS_CHECK_STATUS, kwargs={'experiment_id': experiment.id}, countdown=1)
def new_experiment_job_status(sender, **kwargs): instance = kwargs['instance'] created = kwargs.get('created', False) job = instance.job if created: # update job last_status job.status = instance job.save() # check if the new status is done to remove the containers from the monitors if job.is_done: from libs.redis_db import RedisJobContainers RedisJobContainers.remove_job(job.uuid.hex) # Check if the experiment job status if not created: return # Check if we need to change the experiment status experiment = instance.job.experiment if experiment.is_done: return celery_app.send_task( SchedulerCeleryTasks.EXPERIMENTS_CHECK_STATUS, kwargs={'experiment_id': experiment.id}, countdown=1)