Example #1
0
    def test_scheduler_create_build_job_of_already_done_job(self):
        """Check the case when the job is already done and
        we need to create a new job."""
        config = {'image': 'busybox:tag'}
        build_job = BuildJobFactory(project=self.project,
                                    user=self.project.user,
                                    code_reference=self.code_reference,
                                    config=BuildSpecification.create_specification(config))
        build_job.set_status(JobLifeCycle.STOPPED)

        assert BuildJob.objects.count() == 1
        with patch('scheduler.dockerizer_scheduler.start_dockerizer') as mock_start:
            with patch('scheduler.dockerizer_scheduler.check_image') as mock_check:
                mock_start.return_value = True
                mock_check.return_value = False
                build_job, image_exists, build_status = dockerizer_scheduler.create_build_job(
                    user=self.project.user,
                    project=self.project,
                    config=config,
                    code_reference=self.code_reference
                )
        assert mock_start.call_count == 1
        assert mock_check.call_count == 1
        assert image_exists is False
        assert build_status is True
        assert BuildJob.objects.count() == 2
    def test_set_dockerfile(self):
        build_job = BuildJobFactory()

        assert build_job.dockerfile is None

        build_jobs_set_dockerfile(build_job_uuid=build_job.uuid.hex, dockerfile='dockerfile')

        build_job.refresh_from_db()
        assert build_job.dockerfile == 'dockerfile'
Example #3
0
    def test_archive(self):
        project = ProjectFactory()
        ExperimentGroupFactory(project=project)
        ExperimentFactory(project=project)
        JobFactory(project=project)
        BuildJobFactory(project=project)
        NotebookJobFactory(project=project)
        TensorboardJobFactory(project=project)

        assert project.deleted is False
        assert project.experiments.count() == 1
        assert project.experiment_groups.count() == 1
        assert project.jobs.count() == 1
        assert project.build_jobs.count() == 1
        assert project.notebook_jobs.count() == 1
        assert project.tensorboard_jobs.count() == 1
        assert project.all_experiments.count() == 1
        assert project.all_experiment_groups.count() == 1
        assert project.all_notebook_jobs.count() == 1
        assert project.all_tensorboard_jobs.count() == 1

        project.archive()
        assert project.deleted is True
        assert project.experiments.count() == 0
        assert project.experiment_groups.count() == 0
        assert project.jobs.count() == 0
        assert project.build_jobs.count() == 0
        assert project.notebook_jobs.count() == 0
        assert project.tensorboard_jobs.count() == 0
        assert project.all_experiments.count() == 1
        assert project.all_experiment_groups.count() == 1
        assert project.all_notebook_jobs.count() == 1
        assert project.all_tensorboard_jobs.count() == 1

        project.unarchive()
        assert project.deleted is False
        assert project.experiments.count() == 1
        assert project.experiment_groups.count() == 1
        assert project.jobs.count() == 1
        assert project.build_jobs.count() == 1
        assert project.notebook_jobs.count() == 1
        assert project.tensorboard_jobs.count() == 1
        assert project.all_experiments.count() == 1
        assert project.all_experiment_groups.count() == 1
        assert project.all_notebook_jobs.count() == 1
        assert project.all_tensorboard_jobs.count() == 1
Example #4
0
    def test_delete_archives_and_schedules_deletion(self,
                                                    xp_group_scheduler_mock,
                                                    xp_scheduler_mock,
                                                    job_scheduler_mock,
                                                    build_scheduler_mock,
                                                    notebook_scheduler_mock,
                                                    tensorboard_scheduler_mock):
        for _ in range(2):
            JobFactory(project=self.object)
            BuildJobFactory(project=self.object)
            TensorboardJobFactory(project=self.object)
            NotebookJobFactory(project=self.object)

        self.object.experiment_groups.first().set_status(ExperimentGroupLifeCycle.RUNNING)
        self.object.experiments.first().set_status(ExperimentLifeCycle.RUNNING)
        self.object.jobs.first().set_status(JobLifeCycle.RUNNING)
        self.object.build_jobs.first().set_status(JobLifeCycle.RUNNING)
        self.object.notebook_jobs.first().set_status(JobLifeCycle.RUNNING)
        self.object.tensorboard_jobs.first().set_status(JobLifeCycle.RUNNING)

        assert self.queryset.count() == 1
        assert ExperimentGroup.objects.count() == 2
        assert Experiment.objects.count() == 2
        assert Job.objects.count() == 2
        assert BuildJob.objects.count() == 2
        assert NotebookJob.objects.count() == 2
        assert TensorboardJob.objects.count() == 2

        resp = self.auth_client.delete(self.url)
        assert xp_group_scheduler_mock.call_count == 2
        assert xp_scheduler_mock.call_count == 1
        assert job_scheduler_mock.called
        assert build_scheduler_mock.called
        assert notebook_scheduler_mock.called
        assert tensorboard_scheduler_mock.called

        assert resp.status_code == status.HTTP_204_NO_CONTENT
        assert self.queryset.count() == 0
        assert Project.all.filter(user=self.object.user).count() == 0
        assert ExperimentGroup.all.count() == 0
        assert Experiment.all.count() == 0
        assert Job.all.count() == 0
        assert BuildJob.all.count() == 0
        assert TensorboardJob.all.count() == 0
        assert NotebookJob.all.count() == 0
Example #5
0
    def test_non_independent_experiment_creation_doesnt_trigger_start(
            self, create_build_job):
        build = BuildJobFactory()
        BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build)
        create_build_job.return_value = build, True, True

        with patch('hpsearch.tasks.hp_create.apply_async') as mock_fct:
            experiment_group = ExperimentGroupFactory()

        assert mock_fct.call_count == 1

        with patch('scheduler.tasks.experiments.experiments_start.apply_async'
                   ) as mock_fct:
            with patch.object(Experiment, 'set_status') as mock_fct2:
                ExperimentFactory(experiment_group=experiment_group)

        assert mock_fct.call_count == 0
        assert mock_fct2.call_count == 1
Example #6
0
    def setUp(self):
        super().setUp()
        project = ProjectFactory(user=self.auth_client.user)
        job = BuildJobFactory(project=project)
        self.url = '/{}/{}/{}/builds/{}/logs'.format(API_V1,
                                                     project.user.username,
                                                     project.name, job.id)

        log_path = get_job_logs_path(job.unique_name)
        create_job_logs_path(job_name=job.unique_name)
        fake = Faker()
        self.logs = []
        for _ in range(self.num_log_lines):
            self.logs.append(fake.sentence())
        with open(log_path, 'w') as file:
            for line in self.logs:
                file.write(line)
                file.write('\n')
Example #7
0
    def test_create_experiment_with_resources_spec(self, spawner_mock):
        spec = compiler.compile(kind=kinds.EXPERIMENT,
                                values=exec_experiment_resources_content)
        mock_instance = spawner_mock.return_value
        mock_instance.start_experiment.return_value = start_experiment_value
        mock_instance.job_uuids = {
            'master': ['fa6203c189a855dd977019854a7ffcc3'],
            'worker': ['3a9c9b0bd56b5e9fbdbd1a3d43d57960'],
            'ps': ['59e3601232b85a3d8be2511f23a62945']
        }
        mock_instance.spec = spec

        with patch('scheduler.dockerizer_scheduler.create_build_job'
                   ) as mock_start:
            mock_start.return_value = BuildJobFactory(), True, True
            experiment = ExperimentFactory(content=spec.raw_data)
        assert experiment.is_independent is True

        assert ExperimentStatus.objects.filter(
            experiment=experiment).count() == 3
        assert list(
            ExperimentStatus.objects.filter(experiment=experiment).values_list(
                'status', flat=True)) == [
                    ExperimentLifeCycle.CREATED, ExperimentLifeCycle.SCHEDULED,
                    ExperimentLifeCycle.STARTING
                ]

        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.STARTING

        # Assert 3 jobs were created with resources
        assert ExperimentJob.objects.filter(experiment=experiment).count() == 3
        assert JobResources.objects.count() == 3
        jobs_statuses = ExperimentJob.objects.values_list('statuses__status',
                                                          flat=True)
        assert set(jobs_statuses) == {
            JobLifeCycle.CREATED,
        }
        jobs = ExperimentJob.objects.filter(experiment=experiment)
        assert experiment.calculated_status == ExperimentLifeCycle.STARTING

        for job in jobs:
            # Assert the jobs status is created
            assert job.last_status == JobLifeCycle.CREATED
Example #8
0
    def test_experiment_group_should_stop_early(self, create_build_job):
        build = BuildJobFactory()
        BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build)
        create_build_job.return_value = build, True, True

        with patch('hpsearch.tasks.random.hp_random_search_start.apply_async') as mock_fct:
            experiment_group = ExperimentGroupFactory(
                content=experiment_group_spec_content_early_stopping)

        assert mock_fct.call_count == 1
        assert experiment_group.should_stop_early() is False
        assert experiment_group.pending_experiments.count() == 2
        assert experiment_group.iteration_config.num_suggestions == 2

        # Make a metric for one of the experiments
        experiment1, experiment2 = list(experiment_group.experiments.all())
        metric1 = ExperimentMetric.objects.create(experiment=experiment1,
                                                  values={'precision': 0.99})

        # Check again that early stopping works
        assert experiment_group.should_stop_early() is True

        # Add another metric
        metric2 = ExperimentMetric.objects.create(experiment=experiment2,
                                                  values={'loss': 0.01})

        # Check again that early stopping still works
        assert experiment_group.should_stop_early() is True

        # Delete metric1
        metric1.delete()
        # Delete metric2
        metric2.delete()

        # Check again that early stopping still works
        assert experiment_group.should_stop_early() is True  # last_metric still has the last values

        # Add another metric
        ExperimentMetric.objects.create(experiment=experiment1,
                                        values={'precision': 0.8})
        ExperimentMetric.objects.create(experiment=experiment2,
                                        values={'loss': 0.2})

        assert experiment_group.should_stop_early() is False
Example #9
0
    def setUp(self):
        super().setUp()
        project = ProjectFactory(user=self.auth_client.user)
        with patch('hpsearch.tasks.grid.hp_grid_search_start.apply_async') as mock_fct:
            with patch('scheduler.dockerizer_scheduler.create_build_job') as mock_start:
                build = BuildJobFactory()
                BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build)
                mock_start.return_value = build, True, True
                self.object = self.factory_class(project=project)

        assert mock_fct.call_count == 1
        # Add a running experiment
        experiment = ExperimentFactory(experiment_group=self.object)
        ExperimentStatusFactory(experiment=experiment, status=ExperimentLifeCycle.RUNNING)
        self.url = '/{}/{}/{}/groups/{}/stop'.format(
            API_V1,
            project.user.username,
            project.name,
            self.object.id)
Example #10
0
    def test_independent_experiment_creation_triggers_experiment_scheduling(
            self):
        content = ExperimentSpecification.read(exec_experiment_spec_content)
        with patch('scheduler.dockerizer_scheduler.create_build_job'
                   ) as mock_start:
            mock_start.return_value = BuildJobFactory(), True, True
            experiment = ExperimentFactory(content=content.raw_data)
        assert experiment.is_independent is True

        assert ExperimentStatus.objects.filter(
            experiment=experiment).count() == 3
        assert list(
            ExperimentStatus.objects.filter(experiment=experiment).values_list(
                'status', flat=True)) == [
                    ExperimentLifeCycle.CREATED, ExperimentLifeCycle.SCHEDULED,
                    ExperimentLifeCycle.FAILED
                ]
        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.FAILED
Example #11
0
    def test_stop_pending_experiments(self, create_build_job):
        build = BuildJobFactory()
        BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build)
        create_build_job.return_value = build, True, True

        with patch('hpsearch.tasks.random.hp_random_search_start.apply_async') as mock_fct:
            experiment_group = ExperimentGroupFactory(
                content=experiment_group_spec_content_early_stopping)
        experiment = ExperimentFactory(experiment_group=experiment_group)
        ExperimentStatusFactory(experiment=experiment, status=ExperimentLifeCycle.RUNNING)

        assert mock_fct.call_count == 1
        assert experiment_group.pending_experiments.count() == 2
        assert experiment_group.running_experiments.count() == 1

        experiments_group_stop_experiments(experiment_group_id=experiment_group.id, pending=True)

        assert experiment_group.pending_experiments.count() == 0
        assert experiment_group.running_experiments.count() == 1
Example #12
0
    def test_get_requirements_and_setup_path_works_as_expected(self):
        build_job = BuildJobFactory()
        # Create a repo folder
        repo_path = os.path.join(conf.get('REPOS_MOUNT_PATH'), 'repo')
        os.mkdir(repo_path)

        builder = DockerFileGenerator(repo_path=repo_path,
                                      from_image='busybox',
                                      build_steps=build_job.build_steps,
                                      env_vars=build_job.env_vars)
        assert builder.polyaxon_requirements_path is None
        assert builder.polyaxon_setup_path is None
        builder.clean()

        # Add a polyaxon_requirements.txt and polyaxon_setup.sh files to repo path
        Path(os.path.join(repo_path, 'polyaxon_requirements.txt')).touch()
        Path(os.path.join(repo_path, 'polyaxon_setup.sh')).touch()

        builder = DockerFileGenerator(repo_path=repo_path,
                                      from_image='busybox',
                                      build_steps=build_job.build_steps,
                                      env_vars=build_job.env_vars)
        assert builder.polyaxon_requirements_path == 'repo/polyaxon_requirements.txt'
        assert builder.polyaxon_setup_path == 'repo/polyaxon_setup.sh'
        builder.clean()

        # Delete previous files
        os.remove(os.path.join(repo_path, 'polyaxon_requirements.txt'))
        os.remove(os.path.join(repo_path, 'polyaxon_setup.sh'))

        # Add a requirements.txt and setup.sh files to repo path
        Path(os.path.join(repo_path, 'requirements.txt')).touch()
        Path(os.path.join(repo_path, 'setup.sh')).touch()

        builder = DockerFileGenerator(repo_path=repo_path,
                                      from_image='busybox',
                                      build_steps=build_job.build_steps,
                                      env_vars=build_job.env_vars)
        assert builder.polyaxon_requirements_path == 'repo/requirements.txt'
        assert builder.polyaxon_setup_path == 'repo/setup.sh'
        builder.clean()
Example #13
0
    def test_independent_experiment_creation_with_run_triggers_experiment_scheduling(
            self):
        config = ExperimentSpecification.read(exec_experiment_spec_content)
        # Create a repo for the project
        repo = RepoFactory()

        with patch('scheduler.tasks.experiments.experiments_build.apply_async'
                   ) as mock_build:
            experiment = ExperimentFactory(config=config.parsed_data,
                                           project=repo.project)

        assert mock_build.call_count == 1
        assert experiment.project.repo is not None
        assert experiment.is_independent is True

        assert ExperimentStatus.objects.filter(
            experiment=experiment).count() == 1
        assert list(
            ExperimentStatus.objects.filter(experiment=experiment).values_list(
                'status', flat=True)) == [ExperimentLifeCycle.CREATED]

        with patch('scheduler.dockerizer_scheduler.create_build_job'
                   ) as mock_start:
            build = BuildJobFactory()
            BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED,
                                          job=build)
            mock_start.return_value = build, True, True
            experiments_build(experiment_id=experiment.id)

        assert mock_start.call_count == 1
        assert ExperimentStatus.objects.filter(
            experiment=experiment).count() == 3
        assert list(
            ExperimentStatus.objects.filter(experiment=experiment).values_list(
                'status', flat=True)) == [
                    ExperimentLifeCycle.CREATED, ExperimentLifeCycle.SCHEDULED,
                    ExperimentLifeCycle.FAILED
                ]
        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.FAILED
Example #14
0
    def test_create_job_with_valid_spec(self, spawner_mock):
        config = JobSpecification.read(job_spec_content)

        mock_instance = spawner_mock.return_value
        mock_instance.start_job.return_value = {'pod': 'pod_content'}
        mock_instance.spec = config

        with patch('scheduler.dockerizer_scheduler.create_build_job'
                   ) as mock_start:
            build = BuildJobFactory()
            BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED,
                                          job=build)
            mock_start.return_value = build, True, True
            job = JobFactory(config=config.parsed_data)

        assert JobStatus.objects.filter(job=job).count() == 2
        assert list(
            JobStatus.objects.filter(job=job).values_list(
                'status',
                flat=True)) == [JobLifeCycle.CREATED, JobLifeCycle.SCHEDULED]
        job.refresh_from_db()
        assert job.last_status == JobLifeCycle.SCHEDULED
Example #15
0
    def test_get_requirements_and_setup_path_works_as_expected(self, _):
        build_job = BuildJobFactory()
        # Create a repo folder
        repo_path = os.path.join(settings.REPOS_MOUNT_PATH, 'repo')
        os.mkdir(repo_path)

        builder = DockerBuilder(build_job=build_job,
                                repo_path=repo_path,
                                from_image='busybox')
        assert builder.polyaxon_requirements_path is None
        assert builder.polyaxon_setup_path is None
        builder.clean()

        # Add a polyaxon_requirements.txt and polyaxon_setup.sh files to repo path
        Path(os.path.join(repo_path, 'polyaxon_requirements.txt')).touch()
        Path(os.path.join(repo_path, 'polyaxon_setup.sh')).touch()

        builder = DockerBuilder(build_job=build_job,
                                repo_path=repo_path,
                                from_image='busybox')
        assert builder.polyaxon_requirements_path == 'repo/polyaxon_requirements.txt'
        assert builder.polyaxon_setup_path == 'repo/polyaxon_setup.sh'
        builder.clean()

        # Delete previous files
        os.remove(os.path.join(repo_path, 'polyaxon_requirements.txt'))
        os.remove(os.path.join(repo_path, 'polyaxon_setup.sh'))

        # Add a requirements.txt and setup.sh files to repo path
        Path(os.path.join(repo_path, 'requirements.txt')).touch()
        Path(os.path.join(repo_path, 'setup.sh')).touch()

        builder = DockerBuilder(build_job=build_job,
                                repo_path=repo_path,
                                from_image='busybox')
        assert builder.polyaxon_requirements_path == 'repo/requirements.txt'
        assert builder.polyaxon_setup_path == 'repo/setup.sh'
        builder.clean()
Example #16
0
    def test_scheduler_create_build_job_image_already_exists(self):
        """Check the case when the image is already built."""
        config = {'image': 'busybox:tag'}
        BuildJobFactory(project=self.project,
                        user=self.project.user,
                        code_reference=self.code_reference,
                        config=BuildSpecification.create_specification(config))

        assert BuildJob.objects.count() == 1
        with patch('scheduler.dockerizer_scheduler.start_dockerizer') as mock_start:
            with patch('scheduler.dockerizer_scheduler.check_image') as mock_check:
                mock_check.return_value = True
                build_job, image_exists, build_status = dockerizer_scheduler.create_build_job(
                    user=self.project.user,
                    project=self.project,
                    config=config,
                    code_reference=self.code_reference
                )
        assert mock_start.call_count == 0
        assert mock_check.call_count == 1
        assert image_exists is True
        assert build_status is False
        assert BuildJob.objects.count() == 1
Example #17
0
    def test_archive(self, create_build_job):
        build = BuildJobFactory()
        BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build)
        create_build_job.return_value = build, True, True

        with patch('hpsearch.tasks.grid.hp_grid_search_start.apply_async'
                   ) as mock_fct:
            experiment_group = ExperimentGroupFactory()
        assert mock_fct.call_count == 2

        assert experiment_group.deleted is False
        assert Experiment.objects.filter(
            experiment_group=experiment_group).count() == 2
        assert ExperimentGroup.objects.count() == 1
        assert ExperimentGroup.all.count() == 1

        experiment_group.archive()
        assert experiment_group.deleted is True
        assert ExperimentGroup.objects.count() == 0
        assert ExperimentGroup.all.count() == 1
        assert Experiment.objects.filter(
            experiment_group=experiment_group).count() == 0
        assert Experiment.all.filter(
            experiment_group=experiment_group).count() == 2
        assert experiment_group.experiments.count() == 0
        assert experiment_group.all_experiments.count() == 2

        experiment_group.restore()
        assert experiment_group.deleted is False
        assert ExperimentGroup.objects.count() == 1
        assert ExperimentGroup.all.count() == 1
        assert Experiment.objects.filter(
            experiment_group=experiment_group).count() == 2
        assert Experiment.all.filter(
            experiment_group=experiment_group).count() == 2
        assert experiment_group.experiments.count() == 2
        assert experiment_group.all_experiments.count() == 2
Example #18
0
    def test_experiment_group_deletion_triggers_stopping_for_running_experiment(self,
                                                                                create_build_job):
        build = BuildJobFactory()
        BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build)
        create_build_job.return_value = build, True, True
        with patch('hpsearch.tasks.grid.hp_grid_search_start.apply_async') as mock_fct:
            experiment_group = ExperimentGroupFactory()

        assert mock_fct.call_count == 1
        experiment = ExperimentFactory(project=experiment_group.project,
                                       experiment_group=experiment_group)
        # Set this experiment to scheduled
        experiment.set_status(ExperimentLifeCycle.SCHEDULED)
        # Add job
        ExperimentJobFactory(experiment=experiment)

        assert Experiment.objects.filter(experiment_group=experiment_group).count() == 3

        with patch('scheduler.tasks.experiments.experiments_stop.apply_async') as mock_fct:
            experiment_group.delete()

        assert mock_fct.call_count == 1  # Only one experiment was stopped

        assert Experiment.objects.filter(experiment_group=experiment_group).count() == 0
Example #19
0
    def test_spec_creation_triggers_experiments_creations_and_scheduling(
            self, create_build_job):
        build = BuildJobFactory()
        BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build)
        create_build_job.return_value = build, True, True
        with patch('hpsearch.tasks.grid.hp_grid_search_start.apply_async'
                   ) as mock_fct:
            experiment_group = ExperimentGroupFactory()

        assert Experiment.objects.filter(
            experiment_group=experiment_group).count() == 2
        assert mock_fct.call_count == 2
        assert experiment_group.iteration_config.num_suggestions == 2
        assert experiment_group.pending_experiments.count() == 2
        assert experiment_group.running_experiments.count() == 0
        experiment = Experiment.objects.filter(
            experiment_group=experiment_group).first()
        ExperimentStatusFactory(experiment=experiment,
                                status=ExperimentLifeCycle.RUNNING)
        assert experiment_group.pending_experiments.count() == 1
        assert experiment_group.running_experiments.count() == 1
        with patch(
                'scheduler.experiment_scheduler.stop_experiment') as _:  # noqa
            ExperimentStatusFactory(experiment=experiment,
                                    status=ExperimentLifeCycle.SUCCEEDED)
        assert experiment_group.pending_experiments.count() == 1
        assert experiment_group.running_experiments.count() == 0
        assert experiment_group.succeeded_experiments.count() == 1
        with patch('scheduler.tasks.experiments.experiments_build.apply_async'
                   ) as start_build:
            experiment.resume()

        assert start_build.call_count == 1
        assert experiment_group.pending_experiments.count() == 2
        assert experiment_group.running_experiments.count() == 0
        assert experiment_group.succeeded_experiments.count() == 1
Example #20
0
class TestBuildLogsViewV1(BaseViewTest):
    num_log_lines = 10
    HAS_AUTH = True

    def setUp(self):
        super().setUp()
        project = ProjectFactory(user=self.auth_client.user)
        self.logs = []
        self.job = BuildJobFactory(project=project)
        self.url = '/{}/{}/{}/builds/{}/logs'.format(
            API_V1,
            project.user.username,
            project.name,
            self.job.id)

    def create_logs(self, temp):
        log_path = stores.get_job_logs_path(job_name=self.job.unique_name, temp=temp)
        stores.create_job_logs_path(job_name=self.job.unique_name, temp=temp)
        fake = Faker()
        self.logs = []
        for _ in range(self.num_log_lines):
            self.logs.append(fake.sentence())
        with open(log_path, 'w') as file:
            for line in self.logs:
                file.write(line)
                file.write('\n')

    def test_get_done_job(self):
        self.job.set_status(JobLifeCycle.SUCCEEDED)
        self.assertTrue(self.job.is_done)
        # No logs
        resp = self.auth_client.get(self.url)
        assert resp.status_code == status.HTTP_404_NOT_FOUND
        # Check the it does not return temp file
        self.create_logs(temp=True)
        resp = self.auth_client.get(self.url)
        assert resp.status_code == status.HTTP_404_NOT_FOUND
        # Check returns the correct file
        self.create_logs(temp=False)
        resp = self.auth_client.get(self.url)
        assert resp.status_code == status.HTTP_200_OK

        data = [i for i in resp._iterator]  # pylint:disable=protected-access
        data = [d for d in data[0].decode('utf-8').split('\n') if d]
        assert len(data) == len(self.logs)
        assert data == self.logs

    @patch('api.build_jobs.views.process_logs')
    def test_get_non_done_job(self, _):
        self.assertFalse(self.job.is_done)
        # No logs
        resp = self.auth_client.get(self.url)
        assert resp.status_code == status.HTTP_404_NOT_FOUND
        # Check the it does not return non temp file
        self.create_logs(temp=False)
        resp = self.auth_client.get(self.url)
        assert resp.status_code == status.HTTP_404_NOT_FOUND
        # Check returns the correct file
        self.create_logs(temp=True)
        resp = self.auth_client.get(self.url)
        assert resp.status_code == status.HTTP_200_OK

        data = [i for i in resp._iterator]  # pylint:disable=protected-access
        data = [d for d in data[0].decode('utf-8').split('\n') if d]
        assert len(data) == len(self.logs)
        assert data == self.logs
Example #21
0
 def test_build_job_creation_triggers_status_creation(self):
     job = BuildJobFactory()
     assert BuildJobStatus.objects.filter(job=job).count() == 1
     assert job.last_status == JobLifeCycle.CREATED
Example #22
0
 def test_build_job_creation_triggers_status_creation_mock(self):
     with patch.object(BuildJob, 'set_status') as mock_fct:
         BuildJobFactory()
     assert mock_fct.call_count == 1
Example #23
0
 def test_creation_with_bad_config(self):
     with self.assertRaises(ValidationError):
         BuildJobFactory(content='foo')
Example #24
0
    def test_hyperband_rescheduling(self, create_build_job):
        build = BuildJobFactory()
        BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build)
        create_build_job.return_value = build, True, True

        with patch('hpsearch.tasks.hyperband.hp_hyperband_start.apply_async'
                   ) as mock_fct:
            ExperimentGroupFactory(
                content=experiment_group_spec_content_hyperband)

        assert mock_fct.call_count == 2

        with patch.object(GroupChecks, 'is_checked') as mock_is_check:
            with patch(
                    'hpsearch.tasks.hyperband.hp_hyperband_iterate.apply_async'
            ) as mock_fct1:
                with patch('scheduler.tasks.experiments.'
                           'experiments_build.apply_async') as mock_fct2:
                    mock_is_check.return_value = False
                    experiment_group = ExperimentGroupFactory(
                        content=
                        experiment_group_spec_content_hyperband_trigger_reschedule
                    )

        assert experiment_group.iteration_config.num_suggestions == 9
        assert mock_fct1.call_count == 2
        # 9 experiments, but since we are mocking the scheduling function, it's ~ 3 x calls,
        # every call to start tries to schedule again, but in reality it's just 9 calls
        assert mock_fct2.call_count >= 9 * 2

        # Fake reschedule
        with patch('hpsearch.tasks.hyperband.hp_hyperband_start.apply_async'
                   ) as mock_fct:
            experiment_group = ExperimentGroupFactory(
                content=
                experiment_group_spec_content_hyperband_trigger_reschedule)
        self.assertEqual(
            mock_fct.call_count,
            math.ceil(experiment_group.experiments.count() /
                      conf.get('GROUP_CHUNKS')) + 1)
        ExperimentGroupIteration.objects.create(
            experiment_group=experiment_group,
            data={
                'iteration': 0,
                'bracket_iteration': 21,
                'num_suggestions': 9
            })

        experiment_group.iteration.experiments.set(
            experiment_group.experiments.values_list('id', flat=True))

        # Mark experiments as done
        with patch(
                'scheduler.experiment_scheduler.stop_experiment') as _:  # noqa
            with patch('hpsearch.tasks.hyperband.'
                       'hp_hyperband_start.apply_async') as xp_trigger_start:
                for xp in experiment_group.experiments.all():
                    ExperimentStatusFactory(
                        experiment=xp, status=ExperimentLifeCycle.SUCCEEDED)

        assert xp_trigger_start.call_count == experiment_group.experiments.count(
        )
        with patch('hpsearch.tasks.hyperband.hp_hyperband_create.apply_async'
                   ) as mock_fct1:
            hp_hyperband_start(experiment_group.id)

        assert mock_fct1.call_count == 1

        # Fake reduce
        with patch('hpsearch.tasks.hyperband.hp_hyperband_start.apply_async'
                   ) as mock_fct:
            experiment_group = ExperimentGroupFactory(
                content=
                experiment_group_spec_content_hyperband_trigger_reschedule)
        self.assertEqual(
            mock_fct.call_count,
            math.ceil(experiment_group.experiments.count() /
                      conf.get('GROUP_CHUNKS')) + 1)
        assert experiment_group.non_done_experiments.count() == 9

        # Mark experiment as done
        with patch(
                'scheduler.experiment_scheduler.stop_experiment') as _:  # noqa
            with patch('hpsearch.tasks.hyperband.'
                       'hp_hyperband_start.apply_async') as xp_trigger_start:
                for xp in experiment_group.experiments.all():
                    ExperimentStatusFactory(
                        experiment=xp, status=ExperimentLifeCycle.SUCCEEDED)

        assert xp_trigger_start.call_count == experiment_group.experiments.count(
        )
        with patch('hpsearch.tasks.hyperband.hp_hyperband_start.apply_async'
                   ) as mock_fct2:
            with patch.object(HyperbandIterationManager,
                              'reduce_configs') as mock_fct3:
                hp_hyperband_start(experiment_group.id)
        assert mock_fct2.call_count == 1
        assert mock_fct3.call_count == 1
Example #25
0
 def setUp(self):
     super().setUp()
     project = ProjectFactory(user=self.auth_client.user)
     self.build = BuildJobFactory(project=project)
     self.url = '/{}/{}/{}/builds/{}/_heartbeat'.format(
         API_V1, project.user.username, project.name, self.build.id)
 def get_instance():
     return BuildJobFactory()
Example #27
0
    def test_render_works_as_expected(self):
        build_job = BuildJobFactory()

        # Create a repo folder
        repo_path = os.path.join(conf.get('REPOS_MOUNT_PATH'), 'repo')
        os.mkdir(repo_path)

        # By default it should user FROM image declare WORKDIR and COPY code
        builder = DockerFileGenerator(repo_path=repo_path,
                                      from_image='busybox',
                                      build_steps=build_job.build_steps,
                                      env_vars=build_job.build_env_vars)

        dockerfile = builder.render()
        builder.clean()

        assert 'FROM busybox' in dockerfile
        assert 'WORKDIR {}'.format(builder.WORKDIR) in dockerfile
        assert 'COPY {}'.format(builder.folder_name) in dockerfile

        # Add env vars
        builder = DockerFileGenerator(repo_path=repo_path,
                                      from_image='busybox',
                                      build_steps=build_job.build_steps,
                                      env_vars=[('BLA', 'BLA')])

        dockerfile = builder.render()
        assert 'ENV BLA BLA' in dockerfile
        builder.clean()

        # Add a polyaxon_requirements.txt and polyaxon_setup.sh files to repo path
        Path(os.path.join(repo_path, 'polyaxon_requirements.txt')).touch()
        Path(os.path.join(repo_path, 'polyaxon_setup.sh')).touch()
        # Add step to act on them
        build_steps = [
            'pip install -r polyaxon_requirements.txt',
            './polyaxon_setup.sh'
        ]

        builder = DockerFileGenerator(repo_path=repo_path,
                                      from_image='busybox',
                                      env_vars=build_job.build_env_vars,
                                      build_steps=build_steps)

        dockerfile = builder.render()
        assert 'COPY {} {}'.format(
            builder.polyaxon_requirements_path, builder.WORKDIR) in dockerfile
        assert 'COPY {} {}'.format(
            builder.polyaxon_setup_path, builder.WORKDIR) in dockerfile

        assert 'RUN {}'.format(build_steps[0]) in dockerfile
        assert 'RUN {}'.format(build_steps[1]) in dockerfile
        builder.clean()

        # Add conda env
        Path(os.path.join(repo_path, 'conda_env.yml')).touch()
        build_steps.append('conda env update -n base -f environment.yml')
        builder = DockerFileGenerator(repo_path=repo_path,
                                      from_image='busybox',
                                      env_vars=build_job.build_env_vars,
                                      build_steps=build_steps)

        dockerfile = builder.render()
        assert 'COPY {} {}'.format(
            builder.polyaxon_requirements_path, builder.WORKDIR) in dockerfile
        assert 'COPY {} {}'.format(
            builder.polyaxon_setup_path, builder.WORKDIR) in dockerfile
        assert 'COPY {} {}'.format(
            builder.polyaxon_conda_env_path, builder.WORKDIR) in dockerfile

        assert 'RUN {}'.format(build_steps[0]) in dockerfile
        assert 'RUN {}'.format(build_steps[1]) in dockerfile
        builder.clean()
Example #28
0
 def setUp(self):
     super().setUp()
     self.build_job = BuildJobFactory()
Example #29
0
class TestBuildJobStatuses(BaseTest):
    def setUp(self):
        super().setUp()
        self.project = ProjectFactory()
        self.build_job = BuildJobFactory(project=self.project)
        self.notebook = NotebookJobFactory(project=self.project,
                                           build_job=self.build_job)
        self.tensorboard = TensorboardJobFactory(project=self.project,
                                                 build_job=self.build_job)
        self.job = JobFactory(project=self.project, build_job=self.build_job)
        self.experiment = ExperimentFactory(project=self.project,
                                            build_job=self.build_job)

    def test_build_job_failed_sets_dependency_to_failed(self):
        assert self.build_job.last_status != JobLifeCycle.FAILED
        assert self.notebook.last_status != JobLifeCycle.FAILED
        assert self.tensorboard.last_status != JobLifeCycle.FAILED
        assert self.job.last_status != JobLifeCycle.FAILED
        assert self.experiment.last_status != ExperimentLifeCycle.FAILED

        self.build_job.set_status(JobLifeCycle.FAILED)

        assert self.build_job.last_status == JobLifeCycle.FAILED
        self.notebook.refresh_from_db()
        assert self.notebook.last_status == JobLifeCycle.FAILED
        self.tensorboard.refresh_from_db()
        assert self.tensorboard.last_status == JobLifeCycle.FAILED
        self.job.refresh_from_db()
        assert self.job.last_status == JobLifeCycle.FAILED
        self.experiment.refresh_from_db()
        assert self.experiment.last_status == ExperimentLifeCycle.FAILED

    def test_build_job_stopped_sets_dependency_to_stopped(self):
        assert self.build_job.last_status != JobLifeCycle.STOPPED
        assert self.notebook.last_status != JobLifeCycle.STOPPED
        assert self.tensorboard.last_status != JobLifeCycle.STOPPED
        assert self.job.last_status != JobLifeCycle.STOPPED
        assert self.experiment.last_status != ExperimentLifeCycle.STOPPED

        self.build_job.set_status(JobLifeCycle.STOPPED)

        assert self.build_job.last_status == JobLifeCycle.STOPPED
        self.notebook.refresh_from_db()
        assert self.notebook.last_status == JobLifeCycle.STOPPED
        self.tensorboard.refresh_from_db()
        assert self.tensorboard.last_status == JobLifeCycle.STOPPED
        self.job.refresh_from_db()
        assert self.job.last_status == JobLifeCycle.STOPPED
        self.experiment.refresh_from_db()
        assert self.experiment.last_status == ExperimentLifeCycle.STOPPED

    def test_build_job_succeeded_starts_dependency(self):
        assert self.build_job.last_status != JobLifeCycle.SUCCEEDED
        assert self.notebook.last_status != JobLifeCycle.SUCCEEDED
        assert self.tensorboard.last_status != JobLifeCycle.SUCCEEDED
        assert self.job.last_status != JobLifeCycle.SUCCEEDED
        assert self.experiment.last_status != ExperimentLifeCycle.SUCCEEDED

        with patch('scheduler.notebook_scheduler.start_notebook'
                   ) as mock_notebook:
            with patch('scheduler.tensorboard_scheduler.start_tensorboard'
                       ) as mock_tensorboard:
                with patch('scheduler.experiment_scheduler.start_experiment'
                           ) as mock_experiment:
                    with patch(
                            'scheduler.job_scheduler.start_job') as mock_job:
                        self.build_job.set_status(JobLifeCycle.SUCCEEDED)

        assert self.build_job.last_status == JobLifeCycle.SUCCEEDED
        assert mock_notebook.call_count == 1
        assert mock_tensorboard.call_count == 1
        assert mock_experiment.call_count == 1
        assert mock_job.call_count == 1
Example #30
0
    def get_job_object(self, job_state):
        project_uuid = job_state.details.labels.project_uuid.hex
        project = ProjectFactory(uuid=project_uuid)
        job_uuid = job_state.details.labels.job_uuid.hex

        return BuildJobFactory(uuid=job_uuid, project=project)
Example #31
0
 def setUp(self):
     self.build_job = BuildJobFactory(project=ProjectFactory())
     super().setUp()