Ejemplo n.º 1
0
    def test_experiments_sync_jobs_statuses(self):
        with patch('scheduler.tasks.experiments.experiments_build.apply_async') as _:  # noqa
            with patch.object(Experiment, 'set_status') as _:  # noqa
                experiments = [ExperimentFactory() for _ in range(3)]

        done_xp, no_jobs_xp, xp_with_jobs = experiments

        # Set done status
        with patch('scheduler.experiment_scheduler.stop_experiment') as _:  # noqa
            ExperimentStatusFactory(experiment=done_xp, status=JobLifeCycle.FAILED)

        # Create jobs for xp_with_jobs and update status, and do not update the xp status
        with patch.object(Experiment, 'set_status') as _:  # noqa
            job = ExperimentJobFactory(experiment=xp_with_jobs)
            ExperimentJobStatusFactory(job=job, status=JobLifeCycle.RUNNING)

        xp_with_jobs.refresh_from_db()
        assert xp_with_jobs.last_status is None

        # Mock sync experiments and jobs constants
        with patch('scheduler.tasks.experiments.'
                   'experiments_check_status.apply_async') as check_status_mock:
            experiments_sync_jobs_statuses()

        assert check_status_mock.call_count == 1

        # Call sync experiments and jobs constants
        with patch('scheduler.tasks.experiments.experiments_build.apply_async') as build_mock:
            ExperimentStatusFactory(experiment=xp_with_jobs, status=JobLifeCycle.CREATED)
        assert build_mock.call_count == 1
        experiments_sync_jobs_statuses()
        done_xp.refresh_from_db()
        no_jobs_xp.refresh_from_db()
        xp_with_jobs.refresh_from_db()
        assert done_xp.last_status == ExperimentLifeCycle.FAILED
        assert no_jobs_xp.last_status is None
        assert xp_with_jobs.last_status == ExperimentLifeCycle.RUNNING
Ejemplo n.º 2
0
    def test_update_job_containers(self):
        update_job_containers(
            event=status_experiment_job_event_with_conditions['object'],
            status=JobLifeCycle.BUILDING,
            job_container_name=settings.CONTAINER_NAME_JOB)
        # Assert it's still 0 because no job was created with that job_uuid
        assert len(RedisJobContainers.get_containers()) == 0

        # Create a job with a specific uuid
        labels = status_experiment_job_event_with_conditions['object'][
            'metadata']['labels']
        ExperimentJobFactory(uuid=labels['job_uuid'])
        job = ExperimentJob.objects.get(uuid=labels['job_uuid'])
        update_job_containers(
            event=status_experiment_job_event_with_conditions['object'],
            status=JobLifeCycle.BUILDING,
            job_container_name=settings.CONTAINER_NAME_JOB)
        # Assert now it has started monitoring the container
        assert len(RedisJobContainers.get_containers()) == 1
        container_id = '539e6a6f4209997094802b0657f90576fe129b7f81697120172836073d9bbd75'
        assert RedisJobContainers.get_containers() == [container_id]
        job_uuid, experiment_uuid = RedisJobContainers.get_job(container_id)
        assert job.uuid.hex == job_uuid
        assert job.experiment.uuid.hex == experiment_uuid
Ejemplo n.º 3
0
    def test_experiment_group_deletion_triggers_stopping_for_running_experiment(self,
                                                                                create_build_job):
        build = BuildJobFactory()
        BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build)
        create_build_job.return_value = build, True, True
        with patch('hpsearch.tasks.grid.hp_grid_search_start.apply_async') as mock_fct:
            experiment_group = ExperimentGroupFactory()

        assert mock_fct.call_count == 1
        experiment = ExperimentFactory(project=experiment_group.project,
                                       experiment_group=experiment_group)
        # Set this experiment to scheduled
        experiment.set_status(ExperimentLifeCycle.SCHEDULED)
        # Add job
        ExperimentJobFactory(experiment=experiment)

        assert Experiment.objects.filter(experiment_group=experiment_group).count() == 3

        with patch('scheduler.tasks.experiments.experiments_stop.apply_async') as mock_fct:
            experiment_group.delete()

        assert mock_fct.call_count == 1  # Only one experiment was stopped

        assert Experiment.objects.filter(experiment_group=experiment_group).count() == 0
Ejemplo n.º 4
0
    def test_resume(self):
        experiment = ExperimentFactory()
        count_experiment = Experiment.objects.count()
        ExperimentStatus.objects.create(experiment=experiment,
                                        status=ExperimentLifeCycle.STOPPED)
        assert experiment.last_status == ExperimentLifeCycle.STOPPED

        config = experiment.config
        declarations = experiment.declarations

        # Resume with same config
        experiment.resume()
        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.STOPPED
        last_resumed_experiment = experiment.clones.filter(
            cloning_strategy=CloningStrategy.RESUME).last()
        assert last_resumed_experiment.config == config
        assert last_resumed_experiment.declarations == declarations
        assert Experiment.objects.count() == count_experiment + 1
        assert experiment.clones.count() == 1

        # Resume with different config
        new_declarations = {'lr': 0.1, 'dropout': 0.5}
        new_experiment = experiment.resume(declarations=new_declarations)
        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.STOPPED
        last_resumed_experiment = experiment.clones.filter(
            cloning_strategy=CloningStrategy.RESUME).last()
        assert last_resumed_experiment.config == config
        assert last_resumed_experiment.declarations != declarations
        assert last_resumed_experiment.declarations == new_declarations
        assert Experiment.objects.count() == count_experiment + 2
        assert experiment.clones.count() == 2

        # Resuming a resumed experiment
        new_experiment.resume()
        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.STOPPED
        last_resumed_experiment_new = experiment.clones.filter(
            cloning_strategy=CloningStrategy.RESUME).last()
        assert last_resumed_experiment_new.original_experiment.pk != last_resumed_experiment.pk
        assert (last_resumed_experiment_new.original_experiment.pk ==
                last_resumed_experiment.original_experiment.pk)
        assert last_resumed_experiment.config == config
        assert last_resumed_experiment.declarations != declarations
        assert last_resumed_experiment.declarations == new_declarations
        assert Experiment.objects.count() == count_experiment + 3
        assert experiment.clones.count() == 3

        # Deleting a resumed experiment does not delete other experiments
        last_resumed_experiment_new.set_status(ExperimentLifeCycle.SCHEDULED)
        ExperimentJobFactory(experiment=last_resumed_experiment_new)
        with patch(
                'scheduler.experiment_scheduler.stop_experiment') as mock_stop:
            last_resumed_experiment_new.delete()
        assert experiment.clones.count() == 2
        assert mock_stop.call_count == 1

        # Deleting original experiment deletes all
        with patch(
                'scheduler.experiment_scheduler.stop_experiment') as mock_stop:
            experiment.delete()
        assert Experiment.objects.count() == 0
        assert mock_stop.call_count == 0  # No running experiment
 def get_job_object(self, job_state):
     job_uuid = job_state.details.labels.job_uuid.hex
     return ExperimentJobFactory(uuid=job_uuid)
 def setUp(self):
     super().setUp()
     self.experiment_job = ExperimentJobFactory()
Ejemplo n.º 7
0
 def setUp(self):
     super().setUp()
     self.job1 = ExperimentJobFactory()
     self.obj1 = self.job1.experiment
     self.obj2 = ExperimentJobFactory()
     self.obj2 = self.obj2.experiment