def test_scheduler_create_build_job_of_already_done_job(self): """Check the case when the job is already done and we need to create a new job.""" config = {'image': 'busybox:tag'} build_job = BuildJobFactory(project=self.project, user=self.project.user, code_reference=self.code_reference, config=BuildSpecification.create_specification(config)) build_job.set_status(JobLifeCycle.STOPPED) assert BuildJob.objects.count() == 1 with patch('scheduler.dockerizer_scheduler.start_dockerizer') as mock_start: with patch('scheduler.dockerizer_scheduler.check_image') as mock_check: mock_start.return_value = True mock_check.return_value = False build_job, image_exists, build_status = dockerizer_scheduler.create_build_job( user=self.project.user, project=self.project, config=config, code_reference=self.code_reference ) assert mock_start.call_count == 1 assert mock_check.call_count == 1 assert image_exists is False assert build_status is True assert BuildJob.objects.count() == 2
def test_set_dockerfile(self): build_job = BuildJobFactory() assert build_job.dockerfile is None build_jobs_set_dockerfile(build_job_uuid=build_job.uuid.hex, dockerfile='dockerfile') build_job.refresh_from_db() assert build_job.dockerfile == 'dockerfile'
def test_archive(self): project = ProjectFactory() ExperimentGroupFactory(project=project) ExperimentFactory(project=project) JobFactory(project=project) BuildJobFactory(project=project) NotebookJobFactory(project=project) TensorboardJobFactory(project=project) assert project.deleted is False assert project.experiments.count() == 1 assert project.experiment_groups.count() == 1 assert project.jobs.count() == 1 assert project.build_jobs.count() == 1 assert project.notebook_jobs.count() == 1 assert project.tensorboard_jobs.count() == 1 assert project.all_experiments.count() == 1 assert project.all_experiment_groups.count() == 1 assert project.all_notebook_jobs.count() == 1 assert project.all_tensorboard_jobs.count() == 1 project.archive() assert project.deleted is True assert project.experiments.count() == 0 assert project.experiment_groups.count() == 0 assert project.jobs.count() == 0 assert project.build_jobs.count() == 0 assert project.notebook_jobs.count() == 0 assert project.tensorboard_jobs.count() == 0 assert project.all_experiments.count() == 1 assert project.all_experiment_groups.count() == 1 assert project.all_notebook_jobs.count() == 1 assert project.all_tensorboard_jobs.count() == 1 project.unarchive() assert project.deleted is False assert project.experiments.count() == 1 assert project.experiment_groups.count() == 1 assert project.jobs.count() == 1 assert project.build_jobs.count() == 1 assert project.notebook_jobs.count() == 1 assert project.tensorboard_jobs.count() == 1 assert project.all_experiments.count() == 1 assert project.all_experiment_groups.count() == 1 assert project.all_notebook_jobs.count() == 1 assert project.all_tensorboard_jobs.count() == 1
def test_delete_archives_and_schedules_deletion(self, xp_group_scheduler_mock, xp_scheduler_mock, job_scheduler_mock, build_scheduler_mock, notebook_scheduler_mock, tensorboard_scheduler_mock): for _ in range(2): JobFactory(project=self.object) BuildJobFactory(project=self.object) TensorboardJobFactory(project=self.object) NotebookJobFactory(project=self.object) self.object.experiment_groups.first().set_status(ExperimentGroupLifeCycle.RUNNING) self.object.experiments.first().set_status(ExperimentLifeCycle.RUNNING) self.object.jobs.first().set_status(JobLifeCycle.RUNNING) self.object.build_jobs.first().set_status(JobLifeCycle.RUNNING) self.object.notebook_jobs.first().set_status(JobLifeCycle.RUNNING) self.object.tensorboard_jobs.first().set_status(JobLifeCycle.RUNNING) assert self.queryset.count() == 1 assert ExperimentGroup.objects.count() == 2 assert Experiment.objects.count() == 2 assert Job.objects.count() == 2 assert BuildJob.objects.count() == 2 assert NotebookJob.objects.count() == 2 assert TensorboardJob.objects.count() == 2 resp = self.auth_client.delete(self.url) assert xp_group_scheduler_mock.call_count == 2 assert xp_scheduler_mock.call_count == 1 assert job_scheduler_mock.called assert build_scheduler_mock.called assert notebook_scheduler_mock.called assert tensorboard_scheduler_mock.called assert resp.status_code == status.HTTP_204_NO_CONTENT assert self.queryset.count() == 0 assert Project.all.filter(user=self.object.user).count() == 0 assert ExperimentGroup.all.count() == 0 assert Experiment.all.count() == 0 assert Job.all.count() == 0 assert BuildJob.all.count() == 0 assert TensorboardJob.all.count() == 0 assert NotebookJob.all.count() == 0
def test_non_independent_experiment_creation_doesnt_trigger_start( self, create_build_job): build = BuildJobFactory() BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build) create_build_job.return_value = build, True, True with patch('hpsearch.tasks.hp_create.apply_async') as mock_fct: experiment_group = ExperimentGroupFactory() assert mock_fct.call_count == 1 with patch('scheduler.tasks.experiments.experiments_start.apply_async' ) as mock_fct: with patch.object(Experiment, 'set_status') as mock_fct2: ExperimentFactory(experiment_group=experiment_group) assert mock_fct.call_count == 0 assert mock_fct2.call_count == 1
def setUp(self): super().setUp() project = ProjectFactory(user=self.auth_client.user) job = BuildJobFactory(project=project) self.url = '/{}/{}/{}/builds/{}/logs'.format(API_V1, project.user.username, project.name, job.id) log_path = get_job_logs_path(job.unique_name) create_job_logs_path(job_name=job.unique_name) fake = Faker() self.logs = [] for _ in range(self.num_log_lines): self.logs.append(fake.sentence()) with open(log_path, 'w') as file: for line in self.logs: file.write(line) file.write('\n')
def test_create_experiment_with_resources_spec(self, spawner_mock): spec = compiler.compile(kind=kinds.EXPERIMENT, values=exec_experiment_resources_content) mock_instance = spawner_mock.return_value mock_instance.start_experiment.return_value = start_experiment_value mock_instance.job_uuids = { 'master': ['fa6203c189a855dd977019854a7ffcc3'], 'worker': ['3a9c9b0bd56b5e9fbdbd1a3d43d57960'], 'ps': ['59e3601232b85a3d8be2511f23a62945'] } mock_instance.spec = spec with patch('scheduler.dockerizer_scheduler.create_build_job' ) as mock_start: mock_start.return_value = BuildJobFactory(), True, True experiment = ExperimentFactory(content=spec.raw_data) assert experiment.is_independent is True assert ExperimentStatus.objects.filter( experiment=experiment).count() == 3 assert list( ExperimentStatus.objects.filter(experiment=experiment).values_list( 'status', flat=True)) == [ ExperimentLifeCycle.CREATED, ExperimentLifeCycle.SCHEDULED, ExperimentLifeCycle.STARTING ] experiment.refresh_from_db() assert experiment.last_status == ExperimentLifeCycle.STARTING # Assert 3 jobs were created with resources assert ExperimentJob.objects.filter(experiment=experiment).count() == 3 assert JobResources.objects.count() == 3 jobs_statuses = ExperimentJob.objects.values_list('statuses__status', flat=True) assert set(jobs_statuses) == { JobLifeCycle.CREATED, } jobs = ExperimentJob.objects.filter(experiment=experiment) assert experiment.calculated_status == ExperimentLifeCycle.STARTING for job in jobs: # Assert the jobs status is created assert job.last_status == JobLifeCycle.CREATED
def test_experiment_group_should_stop_early(self, create_build_job): build = BuildJobFactory() BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build) create_build_job.return_value = build, True, True with patch('hpsearch.tasks.random.hp_random_search_start.apply_async') as mock_fct: experiment_group = ExperimentGroupFactory( content=experiment_group_spec_content_early_stopping) assert mock_fct.call_count == 1 assert experiment_group.should_stop_early() is False assert experiment_group.pending_experiments.count() == 2 assert experiment_group.iteration_config.num_suggestions == 2 # Make a metric for one of the experiments experiment1, experiment2 = list(experiment_group.experiments.all()) metric1 = ExperimentMetric.objects.create(experiment=experiment1, values={'precision': 0.99}) # Check again that early stopping works assert experiment_group.should_stop_early() is True # Add another metric metric2 = ExperimentMetric.objects.create(experiment=experiment2, values={'loss': 0.01}) # Check again that early stopping still works assert experiment_group.should_stop_early() is True # Delete metric1 metric1.delete() # Delete metric2 metric2.delete() # Check again that early stopping still works assert experiment_group.should_stop_early() is True # last_metric still has the last values # Add another metric ExperimentMetric.objects.create(experiment=experiment1, values={'precision': 0.8}) ExperimentMetric.objects.create(experiment=experiment2, values={'loss': 0.2}) assert experiment_group.should_stop_early() is False
def setUp(self): super().setUp() project = ProjectFactory(user=self.auth_client.user) with patch('hpsearch.tasks.grid.hp_grid_search_start.apply_async') as mock_fct: with patch('scheduler.dockerizer_scheduler.create_build_job') as mock_start: build = BuildJobFactory() BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build) mock_start.return_value = build, True, True self.object = self.factory_class(project=project) assert mock_fct.call_count == 1 # Add a running experiment experiment = ExperimentFactory(experiment_group=self.object) ExperimentStatusFactory(experiment=experiment, status=ExperimentLifeCycle.RUNNING) self.url = '/{}/{}/{}/groups/{}/stop'.format( API_V1, project.user.username, project.name, self.object.id)
def test_independent_experiment_creation_triggers_experiment_scheduling( self): content = ExperimentSpecification.read(exec_experiment_spec_content) with patch('scheduler.dockerizer_scheduler.create_build_job' ) as mock_start: mock_start.return_value = BuildJobFactory(), True, True experiment = ExperimentFactory(content=content.raw_data) assert experiment.is_independent is True assert ExperimentStatus.objects.filter( experiment=experiment).count() == 3 assert list( ExperimentStatus.objects.filter(experiment=experiment).values_list( 'status', flat=True)) == [ ExperimentLifeCycle.CREATED, ExperimentLifeCycle.SCHEDULED, ExperimentLifeCycle.FAILED ] experiment.refresh_from_db() assert experiment.last_status == ExperimentLifeCycle.FAILED
def test_stop_pending_experiments(self, create_build_job): build = BuildJobFactory() BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build) create_build_job.return_value = build, True, True with patch('hpsearch.tasks.random.hp_random_search_start.apply_async') as mock_fct: experiment_group = ExperimentGroupFactory( content=experiment_group_spec_content_early_stopping) experiment = ExperimentFactory(experiment_group=experiment_group) ExperimentStatusFactory(experiment=experiment, status=ExperimentLifeCycle.RUNNING) assert mock_fct.call_count == 1 assert experiment_group.pending_experiments.count() == 2 assert experiment_group.running_experiments.count() == 1 experiments_group_stop_experiments(experiment_group_id=experiment_group.id, pending=True) assert experiment_group.pending_experiments.count() == 0 assert experiment_group.running_experiments.count() == 1
def test_get_requirements_and_setup_path_works_as_expected(self): build_job = BuildJobFactory() # Create a repo folder repo_path = os.path.join(conf.get('REPOS_MOUNT_PATH'), 'repo') os.mkdir(repo_path) builder = DockerFileGenerator(repo_path=repo_path, from_image='busybox', build_steps=build_job.build_steps, env_vars=build_job.env_vars) assert builder.polyaxon_requirements_path is None assert builder.polyaxon_setup_path is None builder.clean() # Add a polyaxon_requirements.txt and polyaxon_setup.sh files to repo path Path(os.path.join(repo_path, 'polyaxon_requirements.txt')).touch() Path(os.path.join(repo_path, 'polyaxon_setup.sh')).touch() builder = DockerFileGenerator(repo_path=repo_path, from_image='busybox', build_steps=build_job.build_steps, env_vars=build_job.env_vars) assert builder.polyaxon_requirements_path == 'repo/polyaxon_requirements.txt' assert builder.polyaxon_setup_path == 'repo/polyaxon_setup.sh' builder.clean() # Delete previous files os.remove(os.path.join(repo_path, 'polyaxon_requirements.txt')) os.remove(os.path.join(repo_path, 'polyaxon_setup.sh')) # Add a requirements.txt and setup.sh files to repo path Path(os.path.join(repo_path, 'requirements.txt')).touch() Path(os.path.join(repo_path, 'setup.sh')).touch() builder = DockerFileGenerator(repo_path=repo_path, from_image='busybox', build_steps=build_job.build_steps, env_vars=build_job.env_vars) assert builder.polyaxon_requirements_path == 'repo/requirements.txt' assert builder.polyaxon_setup_path == 'repo/setup.sh' builder.clean()
def test_independent_experiment_creation_with_run_triggers_experiment_scheduling( self): config = ExperimentSpecification.read(exec_experiment_spec_content) # Create a repo for the project repo = RepoFactory() with patch('scheduler.tasks.experiments.experiments_build.apply_async' ) as mock_build: experiment = ExperimentFactory(config=config.parsed_data, project=repo.project) assert mock_build.call_count == 1 assert experiment.project.repo is not None assert experiment.is_independent is True assert ExperimentStatus.objects.filter( experiment=experiment).count() == 1 assert list( ExperimentStatus.objects.filter(experiment=experiment).values_list( 'status', flat=True)) == [ExperimentLifeCycle.CREATED] with patch('scheduler.dockerizer_scheduler.create_build_job' ) as mock_start: build = BuildJobFactory() BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build) mock_start.return_value = build, True, True experiments_build(experiment_id=experiment.id) assert mock_start.call_count == 1 assert ExperimentStatus.objects.filter( experiment=experiment).count() == 3 assert list( ExperimentStatus.objects.filter(experiment=experiment).values_list( 'status', flat=True)) == [ ExperimentLifeCycle.CREATED, ExperimentLifeCycle.SCHEDULED, ExperimentLifeCycle.FAILED ] experiment.refresh_from_db() assert experiment.last_status == ExperimentLifeCycle.FAILED
def test_create_job_with_valid_spec(self, spawner_mock): config = JobSpecification.read(job_spec_content) mock_instance = spawner_mock.return_value mock_instance.start_job.return_value = {'pod': 'pod_content'} mock_instance.spec = config with patch('scheduler.dockerizer_scheduler.create_build_job' ) as mock_start: build = BuildJobFactory() BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build) mock_start.return_value = build, True, True job = JobFactory(config=config.parsed_data) assert JobStatus.objects.filter(job=job).count() == 2 assert list( JobStatus.objects.filter(job=job).values_list( 'status', flat=True)) == [JobLifeCycle.CREATED, JobLifeCycle.SCHEDULED] job.refresh_from_db() assert job.last_status == JobLifeCycle.SCHEDULED
def test_get_requirements_and_setup_path_works_as_expected(self, _): build_job = BuildJobFactory() # Create a repo folder repo_path = os.path.join(settings.REPOS_MOUNT_PATH, 'repo') os.mkdir(repo_path) builder = DockerBuilder(build_job=build_job, repo_path=repo_path, from_image='busybox') assert builder.polyaxon_requirements_path is None assert builder.polyaxon_setup_path is None builder.clean() # Add a polyaxon_requirements.txt and polyaxon_setup.sh files to repo path Path(os.path.join(repo_path, 'polyaxon_requirements.txt')).touch() Path(os.path.join(repo_path, 'polyaxon_setup.sh')).touch() builder = DockerBuilder(build_job=build_job, repo_path=repo_path, from_image='busybox') assert builder.polyaxon_requirements_path == 'repo/polyaxon_requirements.txt' assert builder.polyaxon_setup_path == 'repo/polyaxon_setup.sh' builder.clean() # Delete previous files os.remove(os.path.join(repo_path, 'polyaxon_requirements.txt')) os.remove(os.path.join(repo_path, 'polyaxon_setup.sh')) # Add a requirements.txt and setup.sh files to repo path Path(os.path.join(repo_path, 'requirements.txt')).touch() Path(os.path.join(repo_path, 'setup.sh')).touch() builder = DockerBuilder(build_job=build_job, repo_path=repo_path, from_image='busybox') assert builder.polyaxon_requirements_path == 'repo/requirements.txt' assert builder.polyaxon_setup_path == 'repo/setup.sh' builder.clean()
def test_scheduler_create_build_job_image_already_exists(self): """Check the case when the image is already built.""" config = {'image': 'busybox:tag'} BuildJobFactory(project=self.project, user=self.project.user, code_reference=self.code_reference, config=BuildSpecification.create_specification(config)) assert BuildJob.objects.count() == 1 with patch('scheduler.dockerizer_scheduler.start_dockerizer') as mock_start: with patch('scheduler.dockerizer_scheduler.check_image') as mock_check: mock_check.return_value = True build_job, image_exists, build_status = dockerizer_scheduler.create_build_job( user=self.project.user, project=self.project, config=config, code_reference=self.code_reference ) assert mock_start.call_count == 0 assert mock_check.call_count == 1 assert image_exists is True assert build_status is False assert BuildJob.objects.count() == 1
def test_archive(self, create_build_job): build = BuildJobFactory() BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build) create_build_job.return_value = build, True, True with patch('hpsearch.tasks.grid.hp_grid_search_start.apply_async' ) as mock_fct: experiment_group = ExperimentGroupFactory() assert mock_fct.call_count == 2 assert experiment_group.deleted is False assert Experiment.objects.filter( experiment_group=experiment_group).count() == 2 assert ExperimentGroup.objects.count() == 1 assert ExperimentGroup.all.count() == 1 experiment_group.archive() assert experiment_group.deleted is True assert ExperimentGroup.objects.count() == 0 assert ExperimentGroup.all.count() == 1 assert Experiment.objects.filter( experiment_group=experiment_group).count() == 0 assert Experiment.all.filter( experiment_group=experiment_group).count() == 2 assert experiment_group.experiments.count() == 0 assert experiment_group.all_experiments.count() == 2 experiment_group.restore() assert experiment_group.deleted is False assert ExperimentGroup.objects.count() == 1 assert ExperimentGroup.all.count() == 1 assert Experiment.objects.filter( experiment_group=experiment_group).count() == 2 assert Experiment.all.filter( experiment_group=experiment_group).count() == 2 assert experiment_group.experiments.count() == 2 assert experiment_group.all_experiments.count() == 2
def test_experiment_group_deletion_triggers_stopping_for_running_experiment(self, create_build_job): build = BuildJobFactory() BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build) create_build_job.return_value = build, True, True with patch('hpsearch.tasks.grid.hp_grid_search_start.apply_async') as mock_fct: experiment_group = ExperimentGroupFactory() assert mock_fct.call_count == 1 experiment = ExperimentFactory(project=experiment_group.project, experiment_group=experiment_group) # Set this experiment to scheduled experiment.set_status(ExperimentLifeCycle.SCHEDULED) # Add job ExperimentJobFactory(experiment=experiment) assert Experiment.objects.filter(experiment_group=experiment_group).count() == 3 with patch('scheduler.tasks.experiments.experiments_stop.apply_async') as mock_fct: experiment_group.delete() assert mock_fct.call_count == 1 # Only one experiment was stopped assert Experiment.objects.filter(experiment_group=experiment_group).count() == 0
def test_spec_creation_triggers_experiments_creations_and_scheduling( self, create_build_job): build = BuildJobFactory() BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build) create_build_job.return_value = build, True, True with patch('hpsearch.tasks.grid.hp_grid_search_start.apply_async' ) as mock_fct: experiment_group = ExperimentGroupFactory() assert Experiment.objects.filter( experiment_group=experiment_group).count() == 2 assert mock_fct.call_count == 2 assert experiment_group.iteration_config.num_suggestions == 2 assert experiment_group.pending_experiments.count() == 2 assert experiment_group.running_experiments.count() == 0 experiment = Experiment.objects.filter( experiment_group=experiment_group).first() ExperimentStatusFactory(experiment=experiment, status=ExperimentLifeCycle.RUNNING) assert experiment_group.pending_experiments.count() == 1 assert experiment_group.running_experiments.count() == 1 with patch( 'scheduler.experiment_scheduler.stop_experiment') as _: # noqa ExperimentStatusFactory(experiment=experiment, status=ExperimentLifeCycle.SUCCEEDED) assert experiment_group.pending_experiments.count() == 1 assert experiment_group.running_experiments.count() == 0 assert experiment_group.succeeded_experiments.count() == 1 with patch('scheduler.tasks.experiments.experiments_build.apply_async' ) as start_build: experiment.resume() assert start_build.call_count == 1 assert experiment_group.pending_experiments.count() == 2 assert experiment_group.running_experiments.count() == 0 assert experiment_group.succeeded_experiments.count() == 1
class TestBuildLogsViewV1(BaseViewTest): num_log_lines = 10 HAS_AUTH = True def setUp(self): super().setUp() project = ProjectFactory(user=self.auth_client.user) self.logs = [] self.job = BuildJobFactory(project=project) self.url = '/{}/{}/{}/builds/{}/logs'.format( API_V1, project.user.username, project.name, self.job.id) def create_logs(self, temp): log_path = stores.get_job_logs_path(job_name=self.job.unique_name, temp=temp) stores.create_job_logs_path(job_name=self.job.unique_name, temp=temp) fake = Faker() self.logs = [] for _ in range(self.num_log_lines): self.logs.append(fake.sentence()) with open(log_path, 'w') as file: for line in self.logs: file.write(line) file.write('\n') def test_get_done_job(self): self.job.set_status(JobLifeCycle.SUCCEEDED) self.assertTrue(self.job.is_done) # No logs resp = self.auth_client.get(self.url) assert resp.status_code == status.HTTP_404_NOT_FOUND # Check the it does not return temp file self.create_logs(temp=True) resp = self.auth_client.get(self.url) assert resp.status_code == status.HTTP_404_NOT_FOUND # Check returns the correct file self.create_logs(temp=False) resp = self.auth_client.get(self.url) assert resp.status_code == status.HTTP_200_OK data = [i for i in resp._iterator] # pylint:disable=protected-access data = [d for d in data[0].decode('utf-8').split('\n') if d] assert len(data) == len(self.logs) assert data == self.logs @patch('api.build_jobs.views.process_logs') def test_get_non_done_job(self, _): self.assertFalse(self.job.is_done) # No logs resp = self.auth_client.get(self.url) assert resp.status_code == status.HTTP_404_NOT_FOUND # Check the it does not return non temp file self.create_logs(temp=False) resp = self.auth_client.get(self.url) assert resp.status_code == status.HTTP_404_NOT_FOUND # Check returns the correct file self.create_logs(temp=True) resp = self.auth_client.get(self.url) assert resp.status_code == status.HTTP_200_OK data = [i for i in resp._iterator] # pylint:disable=protected-access data = [d for d in data[0].decode('utf-8').split('\n') if d] assert len(data) == len(self.logs) assert data == self.logs
def test_build_job_creation_triggers_status_creation(self): job = BuildJobFactory() assert BuildJobStatus.objects.filter(job=job).count() == 1 assert job.last_status == JobLifeCycle.CREATED
def test_build_job_creation_triggers_status_creation_mock(self): with patch.object(BuildJob, 'set_status') as mock_fct: BuildJobFactory() assert mock_fct.call_count == 1
def test_creation_with_bad_config(self): with self.assertRaises(ValidationError): BuildJobFactory(content='foo')
def test_hyperband_rescheduling(self, create_build_job): build = BuildJobFactory() BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build) create_build_job.return_value = build, True, True with patch('hpsearch.tasks.hyperband.hp_hyperband_start.apply_async' ) as mock_fct: ExperimentGroupFactory( content=experiment_group_spec_content_hyperband) assert mock_fct.call_count == 2 with patch.object(GroupChecks, 'is_checked') as mock_is_check: with patch( 'hpsearch.tasks.hyperband.hp_hyperband_iterate.apply_async' ) as mock_fct1: with patch('scheduler.tasks.experiments.' 'experiments_build.apply_async') as mock_fct2: mock_is_check.return_value = False experiment_group = ExperimentGroupFactory( content= experiment_group_spec_content_hyperband_trigger_reschedule ) assert experiment_group.iteration_config.num_suggestions == 9 assert mock_fct1.call_count == 2 # 9 experiments, but since we are mocking the scheduling function, it's ~ 3 x calls, # every call to start tries to schedule again, but in reality it's just 9 calls assert mock_fct2.call_count >= 9 * 2 # Fake reschedule with patch('hpsearch.tasks.hyperband.hp_hyperband_start.apply_async' ) as mock_fct: experiment_group = ExperimentGroupFactory( content= experiment_group_spec_content_hyperband_trigger_reschedule) self.assertEqual( mock_fct.call_count, math.ceil(experiment_group.experiments.count() / conf.get('GROUP_CHUNKS')) + 1) ExperimentGroupIteration.objects.create( experiment_group=experiment_group, data={ 'iteration': 0, 'bracket_iteration': 21, 'num_suggestions': 9 }) experiment_group.iteration.experiments.set( experiment_group.experiments.values_list('id', flat=True)) # Mark experiments as done with patch( 'scheduler.experiment_scheduler.stop_experiment') as _: # noqa with patch('hpsearch.tasks.hyperband.' 'hp_hyperband_start.apply_async') as xp_trigger_start: for xp in experiment_group.experiments.all(): ExperimentStatusFactory( experiment=xp, status=ExperimentLifeCycle.SUCCEEDED) assert xp_trigger_start.call_count == experiment_group.experiments.count( ) with patch('hpsearch.tasks.hyperband.hp_hyperband_create.apply_async' ) as mock_fct1: hp_hyperband_start(experiment_group.id) assert mock_fct1.call_count == 1 # Fake reduce with patch('hpsearch.tasks.hyperband.hp_hyperband_start.apply_async' ) as mock_fct: experiment_group = ExperimentGroupFactory( content= experiment_group_spec_content_hyperband_trigger_reschedule) self.assertEqual( mock_fct.call_count, math.ceil(experiment_group.experiments.count() / conf.get('GROUP_CHUNKS')) + 1) assert experiment_group.non_done_experiments.count() == 9 # Mark experiment as done with patch( 'scheduler.experiment_scheduler.stop_experiment') as _: # noqa with patch('hpsearch.tasks.hyperband.' 'hp_hyperband_start.apply_async') as xp_trigger_start: for xp in experiment_group.experiments.all(): ExperimentStatusFactory( experiment=xp, status=ExperimentLifeCycle.SUCCEEDED) assert xp_trigger_start.call_count == experiment_group.experiments.count( ) with patch('hpsearch.tasks.hyperband.hp_hyperband_start.apply_async' ) as mock_fct2: with patch.object(HyperbandIterationManager, 'reduce_configs') as mock_fct3: hp_hyperband_start(experiment_group.id) assert mock_fct2.call_count == 1 assert mock_fct3.call_count == 1
def setUp(self): super().setUp() project = ProjectFactory(user=self.auth_client.user) self.build = BuildJobFactory(project=project) self.url = '/{}/{}/{}/builds/{}/_heartbeat'.format( API_V1, project.user.username, project.name, self.build.id)
def get_instance(): return BuildJobFactory()
def test_render_works_as_expected(self): build_job = BuildJobFactory() # Create a repo folder repo_path = os.path.join(conf.get('REPOS_MOUNT_PATH'), 'repo') os.mkdir(repo_path) # By default it should user FROM image declare WORKDIR and COPY code builder = DockerFileGenerator(repo_path=repo_path, from_image='busybox', build_steps=build_job.build_steps, env_vars=build_job.build_env_vars) dockerfile = builder.render() builder.clean() assert 'FROM busybox' in dockerfile assert 'WORKDIR {}'.format(builder.WORKDIR) in dockerfile assert 'COPY {}'.format(builder.folder_name) in dockerfile # Add env vars builder = DockerFileGenerator(repo_path=repo_path, from_image='busybox', build_steps=build_job.build_steps, env_vars=[('BLA', 'BLA')]) dockerfile = builder.render() assert 'ENV BLA BLA' in dockerfile builder.clean() # Add a polyaxon_requirements.txt and polyaxon_setup.sh files to repo path Path(os.path.join(repo_path, 'polyaxon_requirements.txt')).touch() Path(os.path.join(repo_path, 'polyaxon_setup.sh')).touch() # Add step to act on them build_steps = [ 'pip install -r polyaxon_requirements.txt', './polyaxon_setup.sh' ] builder = DockerFileGenerator(repo_path=repo_path, from_image='busybox', env_vars=build_job.build_env_vars, build_steps=build_steps) dockerfile = builder.render() assert 'COPY {} {}'.format( builder.polyaxon_requirements_path, builder.WORKDIR) in dockerfile assert 'COPY {} {}'.format( builder.polyaxon_setup_path, builder.WORKDIR) in dockerfile assert 'RUN {}'.format(build_steps[0]) in dockerfile assert 'RUN {}'.format(build_steps[1]) in dockerfile builder.clean() # Add conda env Path(os.path.join(repo_path, 'conda_env.yml')).touch() build_steps.append('conda env update -n base -f environment.yml') builder = DockerFileGenerator(repo_path=repo_path, from_image='busybox', env_vars=build_job.build_env_vars, build_steps=build_steps) dockerfile = builder.render() assert 'COPY {} {}'.format( builder.polyaxon_requirements_path, builder.WORKDIR) in dockerfile assert 'COPY {} {}'.format( builder.polyaxon_setup_path, builder.WORKDIR) in dockerfile assert 'COPY {} {}'.format( builder.polyaxon_conda_env_path, builder.WORKDIR) in dockerfile assert 'RUN {}'.format(build_steps[0]) in dockerfile assert 'RUN {}'.format(build_steps[1]) in dockerfile builder.clean()
def setUp(self): super().setUp() self.build_job = BuildJobFactory()
class TestBuildJobStatuses(BaseTest): def setUp(self): super().setUp() self.project = ProjectFactory() self.build_job = BuildJobFactory(project=self.project) self.notebook = NotebookJobFactory(project=self.project, build_job=self.build_job) self.tensorboard = TensorboardJobFactory(project=self.project, build_job=self.build_job) self.job = JobFactory(project=self.project, build_job=self.build_job) self.experiment = ExperimentFactory(project=self.project, build_job=self.build_job) def test_build_job_failed_sets_dependency_to_failed(self): assert self.build_job.last_status != JobLifeCycle.FAILED assert self.notebook.last_status != JobLifeCycle.FAILED assert self.tensorboard.last_status != JobLifeCycle.FAILED assert self.job.last_status != JobLifeCycle.FAILED assert self.experiment.last_status != ExperimentLifeCycle.FAILED self.build_job.set_status(JobLifeCycle.FAILED) assert self.build_job.last_status == JobLifeCycle.FAILED self.notebook.refresh_from_db() assert self.notebook.last_status == JobLifeCycle.FAILED self.tensorboard.refresh_from_db() assert self.tensorboard.last_status == JobLifeCycle.FAILED self.job.refresh_from_db() assert self.job.last_status == JobLifeCycle.FAILED self.experiment.refresh_from_db() assert self.experiment.last_status == ExperimentLifeCycle.FAILED def test_build_job_stopped_sets_dependency_to_stopped(self): assert self.build_job.last_status != JobLifeCycle.STOPPED assert self.notebook.last_status != JobLifeCycle.STOPPED assert self.tensorboard.last_status != JobLifeCycle.STOPPED assert self.job.last_status != JobLifeCycle.STOPPED assert self.experiment.last_status != ExperimentLifeCycle.STOPPED self.build_job.set_status(JobLifeCycle.STOPPED) assert self.build_job.last_status == JobLifeCycle.STOPPED self.notebook.refresh_from_db() assert self.notebook.last_status == JobLifeCycle.STOPPED self.tensorboard.refresh_from_db() assert self.tensorboard.last_status == JobLifeCycle.STOPPED self.job.refresh_from_db() assert self.job.last_status == JobLifeCycle.STOPPED self.experiment.refresh_from_db() assert self.experiment.last_status == ExperimentLifeCycle.STOPPED def test_build_job_succeeded_starts_dependency(self): assert self.build_job.last_status != JobLifeCycle.SUCCEEDED assert self.notebook.last_status != JobLifeCycle.SUCCEEDED assert self.tensorboard.last_status != JobLifeCycle.SUCCEEDED assert self.job.last_status != JobLifeCycle.SUCCEEDED assert self.experiment.last_status != ExperimentLifeCycle.SUCCEEDED with patch('scheduler.notebook_scheduler.start_notebook' ) as mock_notebook: with patch('scheduler.tensorboard_scheduler.start_tensorboard' ) as mock_tensorboard: with patch('scheduler.experiment_scheduler.start_experiment' ) as mock_experiment: with patch( 'scheduler.job_scheduler.start_job') as mock_job: self.build_job.set_status(JobLifeCycle.SUCCEEDED) assert self.build_job.last_status == JobLifeCycle.SUCCEEDED assert mock_notebook.call_count == 1 assert mock_tensorboard.call_count == 1 assert mock_experiment.call_count == 1 assert mock_job.call_count == 1
def get_job_object(self, job_state): project_uuid = job_state.details.labels.project_uuid.hex project = ProjectFactory(uuid=project_uuid) job_uuid = job_state.details.labels.job_uuid.hex return BuildJobFactory(uuid=job_uuid, project=project)
def setUp(self): self.build_job = BuildJobFactory(project=ProjectFactory()) super().setUp()