Ejemplo n.º 1
0
    def test_master_success_influences_other_experiment_workers_status(self):
        with patch('experiments.tasks.start_experiment.delay') as _:
            with patch.object(Experiment, 'set_status') as _:
                experiment = ExperimentFactory()

        assert ExperimentLifeCycle.is_done(experiment.last_status) is False
        # Add jobs
        master = ExperimentJobFactory(experiment=experiment,
                                      role=TaskType.MASTER)
        assert JobLifeCycle.is_done(master.last_status) is False
        workers = [
            ExperimentJobFactory(experiment=experiment, role=TaskType.WORKER)
            for _ in range(2)
        ]
        for worker in workers:
            worker.refresh_from_db()
            assert JobLifeCycle.is_done(worker.last_status) is False

        # Set master to succeeded
        ExperimentJobStatusFactory(job=master, status=JobLifeCycle.SUCCEEDED)

        # All worker should have a success status
        for worker in workers:
            worker.refresh_from_db()
            assert worker.last_status == JobLifeCycle.SUCCEEDED

        # Experiment last status should be success
        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.SUCCEEDED
Ejemplo n.º 2
0
def update_job_containers(event, status, job_container_name):
    if JobLifeCycle.is_done(status):
        # Remove the job monitoring
        job_uuid = event['metadata']['labels']['job_uuid']
        logger.info('Stop monitoring job_uuid: {}'.format(job_uuid))
        RedisJobContainers.remove_job(job_uuid)

    if event['status']['container_statuses'] is None:
        return

    def get_container_id(container_id):
        if not container_id:
            return None
        if container_id.startswith('docker://'):
            return container_id[len('docker://'):]
        return container_id

    for container_status in event['status']['container_statuses']:
        if container_status['name'] != job_container_name:
            continue

        container_id = get_container_id(container_status['container_id'])
        if container_id:
            job_uuid = event['metadata']['labels']['job_uuid']
            if container_status['state']['running'] is not None:
                logger.info(
                    'Monitoring (container_id, job_uuid): ({}, {})'.format(
                        container_id, job_uuid))
                RedisJobContainers.monitor(container_id=container_id,
                                           job_uuid=job_uuid)
            else:

                RedisJobContainers.remove_container(container_id=container_id)
Ejemplo n.º 3
0
 def calculated_status(self):
     master_status = self.jobs.filter(role=TaskType.MASTER)[0].last_status
     calculated_status = master_status if JobLifeCycle.is_done(master_status) else None
     if calculated_status is None:
         calculated_status = ExperimentLifeCycle.jobs_status(self.last_job_statuses)
     if calculated_status is None:
         return self.last_status
     return calculated_status
Ejemplo n.º 4
0
 def _set_status(self,
                 status_model,
                 logger,
                 status,
                 message=None,
                 details=None):
     current_status = self.last_status
     # We should not update statuses anymore
     if JobLifeCycle.is_done(current_status):
         logger.info('Received a new status `{}` for job `{}`. '
                     'But the job is already done with status `{}`'.format(
                         status, self.unique_name, current_status))
         return False
     if status != current_status:
         # Add new status to the job
         status_model.objects.create(job=self,
                                     status=status,
                                     message=message,
                                     details=details)
         return True
     return False
Ejemplo n.º 5
0
 def is_done(self):
     return JobLifeCycle.is_done(self.last_status)
Ejemplo n.º 6
0
 def is_running(self):
     return JobLifeCycle.is_running(self.last_status)