def setUp(self): super().setUp() self.job1 = ExperimentJobFactory() self.obj1 = self.job1.experiment self.job2 = ExperimentJobFactory() self.obj1.refresh_from_db() self.obj1_query = queries.experiments_details.get(id=self.obj1.id)
def test_master_success_influences_other_experiment_workers_status(self): with patch('scheduler.tasks.experiments.experiments_build.apply_async' ) as _: # noqa with patch.object(Experiment, 'set_status') as _: # noqa experiment = ExperimentFactory() assert ExperimentLifeCycle.is_done(experiment.last_status) is False # Add jobs master = ExperimentJobFactory(experiment=experiment, role=TaskType.MASTER) assert JobLifeCycle.is_done(master.last_status) is False workers = [ ExperimentJobFactory(experiment=experiment, role=TaskType.WORKER) for _ in range(2) ] for worker in workers: worker.refresh_from_db() assert JobLifeCycle.is_done(worker.last_status) is False # Set master to succeeded ExperimentJobStatusFactory(job=master, status=JobLifeCycle.SUCCEEDED) # All worker should have a success status for worker in workers: worker.refresh_from_db() assert worker.last_status == JobLifeCycle.SUCCEEDED # Experiment last status should be success experiment.refresh_from_db() assert experiment.last_status == ExperimentLifeCycle.SUCCEEDED
def test_default_job_role(self): with patch('scheduler.tasks.experiments.experiments_build.apply_async') as _: # noqa experiment = ExperimentFactory( content=exec_experiment_resources_parsed_content.raw_data) ExperimentJobFactory(experiment=experiment, role=TaskType.PS) ExperimentJobFactory(experiment=experiment, role=TaskType.MASTER) ExperimentJobFactory(experiment=experiment, role=TaskType.WORKER) assert experiment.default_job_role == TaskType.PS
def test_sync_experiments_and_jobs_statuses(self): with patch('scheduler.tasks.experiments.experiments_build.apply_async') as _: # noqa with patch.object(Experiment, 'set_status') as _: # noqa experiments = [ExperimentFactory() for _ in range(3)] done_xp, no_jobs_xp, xp_with_jobs = experiments # Set done status with patch('scheduler.experiment_scheduler.stop_experiment') as _: # noqa ExperimentStatusFactory(experiment=done_xp, status=JobLifeCycle.FAILED) # Create jobs for xp_with_jobs and update status, and do not update the xp status with patch.object(Experiment, 'set_status') as _: # noqa job = ExperimentJobFactory(experiment=xp_with_jobs) ExperimentJobStatusFactory(job=job, status=JobLifeCycle.RUNNING) xp_with_jobs.refresh_from_db() assert xp_with_jobs.last_status is None # Mock sync experiments and jobs constants with patch('scheduler.tasks.experiments.' 'experiments_check_status.apply_async') as check_status_mock: sync_experiments_and_jobs_statuses() assert check_status_mock.call_count == 1 # Call sync experiments and jobs constants sync_experiments_and_jobs_statuses() done_xp.refresh_from_db() no_jobs_xp.refresh_from_db() xp_with_jobs.refresh_from_db() assert done_xp.last_status == ExperimentLifeCycle.FAILED assert no_jobs_xp.last_status is None assert xp_with_jobs.last_status == ExperimentLifeCycle.RUNNING
def test_experiment_group_deletion_triggers_stopping_for_running_experiment( self): with patch('hpsearch.tasks.grid.hp_grid_search_start.apply_async' ) as mock_fct: experiment_group = ExperimentGroupFactory() assert mock_fct.call_count == 1 experiment = ExperimentFactory(project=experiment_group.project, experiment_group=experiment_group) # Set this experiment to scheduled experiment.set_status(ExperimentLifeCycle.SCHEDULED) # Add job ExperimentJobFactory(experiment=experiment) assert Experiment.objects.filter( experiment_group=experiment_group).count() == 3 with patch('scheduler.tasks.experiments.experiments_stop.apply_async' ) as mock_fct: experiment_group.delete() assert mock_fct.call_count == 1 # Only one experiment was stopped assert Experiment.objects.filter( experiment_group=experiment_group).count() == 0
def test_experiment_group_deletion_triggers_stopping_for_running_experiment( self, create_build_job): build = BuildJobFactory() BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build) create_build_job.return_value = build, True, True with patch('hpsearch.tasks.grid.hp_grid_search_start.apply_async' ) as mock_fct: experiment_group = ExperimentGroupFactory() assert mock_fct.call_count == 2 experiment = ExperimentFactory(project=experiment_group.project, experiment_group=experiment_group) # Set this experiment to scheduled experiment.set_status(ExperimentLifeCycle.SCHEDULED) # Add job ExperimentJobFactory(experiment=experiment) assert Experiment.objects.filter( experiment_group=experiment_group).count() == 3 with patch('scheduler.tasks.experiments.experiments_stop.apply_async' ) as mock_fct: experiment_group.delete() assert mock_fct.call_count == 1 # Only one experiment was stopped assert Experiment.objects.filter( experiment_group=experiment_group).count() == 0
def test_delete_triggers_stopping_of_experiments(self): assert self.queryset.count() == 1 assert ExperimentGroup.objects.count() == 2 # Add experiment experiment = ExperimentFactory(project=self.object) # Set one experiment to running with one job experiment.set_status(ExperimentLifeCycle.SCHEDULED) # Add job ExperimentJobFactory(experiment=experiment) assert Experiment.objects.count() == 3 with patch('scheduler.tasks.experiments.experiments_stop.apply_async' ) as xp_mock_stop: resp = self.auth_client.delete(self.url) assert xp_mock_stop.call_count == 1 assert resp.status_code == status.HTTP_204_NO_CONTENT assert self.queryset.count() == 0 assert ExperimentGroup.objects.count() == 0 assert Experiment.objects.count() == 0 # Delete does not work for other project public and private resp = self.auth_client.delete(self.url_other) assert resp.status_code in (status.HTTP_401_UNAUTHORIZED, status.HTTP_403_FORBIDDEN) resp = self.auth_client.delete(self.url_private) assert resp.status_code in (status.HTTP_401_UNAUTHORIZED, status.HTTP_403_FORBIDDEN)
def setUp(self): super().setUp() self.experiment_job = ExperimentJobFactory() auditor.validate() auditor.setup() tracker.validate() tracker.setup() activitylogs.validate() activitylogs.setup()
def setUp(self): super().setUp() self.experiment_job = ExperimentJobFactory() self.tested_events = { experiment_job_events.EXPERIMENT_JOB_VIEWED, experiment_job_events.EXPERIMENT_JOB_RESOURCES_VIEWED, experiment_job_events.EXPERIMENT_JOB_LOGS_VIEWED, experiment_job_events.EXPERIMENT_JOB_STATUSES_VIEWED, experiment_job_events.EXPERIMENT_JOB_NEW_STATUS, }
def test_status_update_results_in_new_updated_at_datetime_experiment_job(self): experiment_job = ExperimentJobFactory() updated_at = experiment_job.updated_at # Create new status ExperimentJobStatus.objects.create(job=experiment_job, status=ExperimentLifeCycle.BUILDING) experiment_job.refresh_from_db() assert updated_at < experiment_job.updated_at updated_at = experiment_job.updated_at # Create status Using set_status experiment_job.set_status(ExperimentLifeCycle.FAILED) experiment_job.refresh_from_db() assert updated_at < experiment_job.updated_at
def test_delete_experiment_triggers_experiment_stop_mocks(self, delete_path): experiment = ExperimentFactory() experiment.set_status(ExperimentLifeCycle.SCHEDULED) # Add job ExperimentJobFactory(experiment=experiment) assert delete_path.call_count == 2 # outputs + logs with patch('scheduler.experiment_scheduler.stop_experiment') as mock_fct: experiment.delete() assert delete_path.call_count == 2 + 2 # outputs + logs assert mock_fct.call_count == 1
def test_handle_events_job_statues_for_existing_job_with_known_conditions(self): assert ExperimentJobStatus.objects.count() == 0 job_state = get_job_state(event_type=status_raw_event_with_conditions['type'], event=status_raw_event_with_conditions['object'], job_container_name=settings.JOB_CONTAINER_NAME, experiment_type_label=settings.TYPE_LABELS_EXPERIMENT) job_uuid = job_state.details.labels.job_uuid.hex job = ExperimentJobFactory(uuid=job_uuid) handle_events_job_statues(job_state.to_dict()) assert ExperimentJobStatus.objects.count() == 2 statuses = ExperimentJobStatus.objects.filter(job=job).values_list('status', flat=True) assert set(statuses) == {JobLifeCycle.CREATED, JobLifeCycle.FAILED}
def setUp(self): super().setUp() project = ProjectFactory(user=self.auth_client.user) self.object = self.factory_class(project=project) self.url = '/{}/{}/{}/experiments/{}/'.format(API_V1, project.user.username, project.name, self.object.sequence) self.queryset = self.model_class.objects.all() # Create related fields for _ in range(2): ExperimentJobFactory(experiment=self.object)
def setUp(self): super().setUp() with patch('experiments.tasks.start_experiment.delay') as _: with patch.object(ExperimentJob, 'set_status') as _: project = ProjectFactory(user=self.auth_client.user) experiment = ExperimentFactory(project=project) self.experiment_job = ExperimentJobFactory( experiment=experiment) self.object = self.factory_class(job=self.experiment_job) self.url = '/{}/{}/{}/experiments/{}/jobs/{}/statuses/{}'.format( API_V1, project.user.username, project.name, experiment.sequence, self.experiment_job.sequence, self.object.uuid.hex) self.queryset = self.model_class.objects.filter( job=self.experiment_job)
def test_delete_triggers_stopping_of_experiments(self): assert self.queryset.count() == 1 assert ExperimentGroup.objects.count() == 2 # Add experiment experiment = ExperimentFactory(project=self.object) # Set one experiment to running with one job experiment.set_status(ExperimentLifeCycle.SCHEDULED) # Add job ExperimentJobFactory(experiment=experiment) assert Experiment.objects.count() == 3 with patch('scheduler.tasks.experiments.experiments_stop.apply_async') as xp_mock_stop: resp = self.auth_client.delete(self.url) assert xp_mock_stop.called assert resp.status_code == status.HTTP_204_NO_CONTENT assert self.queryset.count() == 0 assert Experiment.all.count() == 0
def setUp(self): super().setUp() with patch('experiments.tasks.start_experiment.delay') as _: with patch.object(ExperimentJob, 'set_status') as _: project = ProjectFactory(user=self.auth_client.user) experiment = ExperimentFactory(project=project) self.experiment_job = ExperimentJobFactory(experiment=experiment) self.url = '/{}/{}/{}/experiments/{}/jobs/{}/statuses/'.format( API_V1, project.user.username, project.name, experiment.sequence, self.experiment_job.sequence) self.objects = [self.factory_class(job=self.experiment_job, status=JobLifeCycle.CHOICES[i][0]) for i in range(self.num_objects)] self.queryset = self.model_class.objects.filter(job=self.experiment_job)
def test_delete(self): assert self.model_class.objects.count() == 1 assert Experiment.objects.count() == 2 experiment = ExperimentFactory(project=self.object.project, experiment_group=self.object) # Set one experiment to running with one job experiment.set_status(ExperimentLifeCycle.SCHEDULED) # Add job ExperimentJobFactory(experiment=experiment) with patch('scheduler.tasks.experiments.experiments_stop.apply_async') as scheduler_mock: with patch('libs.paths.experiments.delete_path') as outputs_mock_stop: resp = self.auth_client.delete(self.url) assert outputs_mock_stop.call_count == 6 # Outputs and Logs * 3 assert scheduler_mock.call_count == 1 assert resp.status_code == status.HTTP_204_NO_CONTENT assert self.model_class.objects.count() == 0 assert Experiment.objects.count() == 0
def test_delete_archives_and_schedules_deletion(self): assert self.model_class.objects.count() == 1 experiment = ExperimentFactory(project=self.object.project, experiment_group=self.object) assert Experiment.objects.count() == 3 # Set one experiment to running with one job experiment.set_status(ExperimentLifeCycle.SCHEDULED) # Add job ExperimentJobFactory(experiment=experiment) with patch('scheduler.tasks.experiment_groups.' 'experiments_group_schedule_deletion.apply_async') as scheduler_mock: resp = self.auth_client.delete(self.url) assert scheduler_mock.call_count == 1 assert resp.status_code == status.HTTP_204_NO_CONTENT assert self.model_class.objects.count() == 0 assert self.model_class.all.count() == 1 assert Experiment.all.count() == 3 assert Experiment.objects.count() == 0
def test_archive_schedule_deletion(self): assert self.model_class.objects.count() == 1 experiment = ExperimentFactory(project=self.object.project, experiment_group=self.object) assert Experiment.objects.count() == 3 # Set one experiment to running with one job experiment.set_status(ExperimentLifeCycle.SCHEDULED) # Add job ExperimentJobFactory(experiment=experiment) with patch('scheduler.tasks.experiment_groups.' 'experiments_group_schedule_deletion.apply_async') as spawner_mock_stop: resp = self.auth_client.post(self.url + 'archive/') assert resp.status_code == status.HTTP_200_OK assert spawner_mock_stop.call_count == 1 assert self.model_class.objects.count() == 1 assert self.model_class.all.count() == 1 assert Experiment.all.count() == 3 assert Experiment.objects.count() == 3
def test_update_job_containers(self): update_job_containers(event=status_experiment_job_event_with_conditions['object'], status=JobLifeCycle.BUILDING, job_container_name=settings.CONTAINER_NAME_EXPERIMENT_JOB) # Assert it's still 0 because no job was created with that job_uuid assert len(RedisJobContainers.get_containers()) == 0 # pylint:disable=len-as-condition # Create a job with a specific uuid labels = status_experiment_job_event_with_conditions['object']['metadata']['labels'] ExperimentJobFactory(uuid=labels['job_uuid']) job = ExperimentJob.objects.get(uuid=labels['job_uuid']) update_job_containers(event=status_experiment_job_event_with_conditions['object'], status=JobLifeCycle.BUILDING, job_container_name=settings.CONTAINER_NAME_EXPERIMENT_JOB) # Assert now it has started monitoring the container assert len(RedisJobContainers.get_containers()) == 1 container_id = '539e6a6f4209997094802b0657f90576fe129b7f81697120172836073d9bbd75' assert RedisJobContainers.get_containers() == [container_id] job_uuid, experiment_uuid = RedisJobContainers.get_job(container_id) assert job.uuid.hex == job_uuid assert job.experiment.uuid.hex == experiment_uuid
def test_archive_triggers_stopping_of_experiment_groups(self): assert self.queryset.count() == 1 assert ExperimentGroup.objects.count() == 2 experiment_group = ExperimentGroup.objects.first() # Add running experiment experiment = ExperimentFactory(project=experiment_group.project, experiment_group=experiment_group) # Set one experiment to running with one job experiment.set_status(ExperimentLifeCycle.SCHEDULED) # Add job ExperimentJobFactory(experiment=experiment) assert Experiment.objects.count() == 3 with patch('scheduler.tasks.experiments.experiments_stop.apply_async') as xp_mock_stop: resp = self.auth_client.post(self.url + 'archive/') assert xp_mock_stop.call_count == 1 assert resp.status_code == status.HTTP_200_OK assert self.queryset.count() == 0 assert ExperimentGroup.objects.count() == 0 assert ExperimentGroup.all.count() == 2 assert Experiment.objects.count() == 0 assert Experiment.all.count() == 3
def test_resume(self): experiment = ExperimentFactory() count_experiment = Experiment.objects.count() ExperimentStatus.objects.create(experiment=experiment, status=ExperimentLifeCycle.STOPPED) assert experiment.last_status == ExperimentLifeCycle.STOPPED config = experiment.config declarations = experiment.declarations # Resume with same config experiment.resume() experiment.refresh_from_db() assert experiment.last_status == ExperimentLifeCycle.STOPPED last_resumed_experiment = experiment.clones.filter( cloning_strategy=CloningStrategy.RESUME).last() assert last_resumed_experiment.config == config assert last_resumed_experiment.declarations == declarations assert Experiment.objects.count() == count_experiment + 1 assert experiment.clones.count() == 1 # Resume with different config new_declarations = {'lr': 0.1, 'dropout': 0.5} new_experiment = experiment.resume(declarations=new_declarations) experiment.refresh_from_db() assert experiment.last_status == ExperimentLifeCycle.STOPPED last_resumed_experiment = experiment.clones.filter( cloning_strategy=CloningStrategy.RESUME).last() assert last_resumed_experiment.config == config assert last_resumed_experiment.declarations != declarations assert last_resumed_experiment.declarations == new_declarations assert Experiment.objects.count() == count_experiment + 2 assert experiment.clones.count() == 2 # Resuming a resumed experiment new_experiment.resume() experiment.refresh_from_db() assert experiment.last_status == ExperimentLifeCycle.STOPPED last_resumed_experiment_new = experiment.clones.filter( cloning_strategy=CloningStrategy.RESUME).last() assert last_resumed_experiment_new.original_experiment.pk != last_resumed_experiment.pk assert (last_resumed_experiment_new.original_experiment.pk == last_resumed_experiment.original_experiment.pk) assert last_resumed_experiment.config == config assert last_resumed_experiment.declarations != declarations assert last_resumed_experiment.declarations == new_declarations assert Experiment.objects.count() == count_experiment + 3 assert experiment.clones.count() == 3 # Deleting a resumed experiment does not delete other experiments last_resumed_experiment_new.set_status(ExperimentLifeCycle.SCHEDULED) ExperimentJobFactory(experiment=last_resumed_experiment_new) with patch( 'scheduler.experiment_scheduler.stop_experiment') as mock_stop: last_resumed_experiment_new.delete() assert experiment.clones.count() == 2 assert mock_stop.call_count == 1 # Deleting original experiment deletes all with patch( 'scheduler.experiment_scheduler.stop_experiment') as mock_stop: experiment.delete() assert Experiment.objects.count() == 0 assert mock_stop.call_count == 0 # No running experiment
def setUp(self): super().setUp() self.job1 = ExperimentJobFactory() self.obj1 = self.job1.experiment self.obj2 = ExperimentJobFactory() self.obj2 = self.obj2.experiment
def setUp(self): super().setUp() self.experiment_job = ExperimentJobFactory()
def get_job_object(self, job_state): job_uuid = job_state.details.labels.job_uuid.hex return ExperimentJobFactory(uuid=job_uuid)