Beispiel #1
0
    def test_independent_experiment_creation_with_run_triggers_experiment_scheduling(self):
        config = ExperimentSpecification.read(exec_experiment_spec_content)
        # Create a repo for the project
        repo = RepoFactory()

        with patch('scheduler.tasks.experiments.experiments_build.apply_async') as mock_build:
            experiment = ExperimentFactory(content=config.raw_data, project=repo.project)

        assert mock_build.call_count == 1
        assert experiment.project.repo is not None
        assert experiment.is_independent is True

        assert ExperimentStatus.objects.filter(experiment=experiment).count() == 1
        assert list(ExperimentStatus.objects.filter(experiment=experiment).values_list(
            'status', flat=True)) == [ExperimentLifeCycle.CREATED]

        with patch('scheduler.dockerizer_scheduler.create_build_job') as mock_start:
            build = BuildJobFactory()
            BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build)
            mock_start.return_value = build, True, True
            experiments_build(experiment_id=experiment.id)

        assert mock_start.call_count == 1
        assert ExperimentStatus.objects.filter(experiment=experiment).count() == 3
        assert list(ExperimentStatus.objects.filter(experiment=experiment).values_list(
            'status', flat=True)) == [ExperimentLifeCycle.CREATED,
                                      ExperimentLifeCycle.SCHEDULED,
                                      ExperimentLifeCycle.FAILED]
        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.FAILED
Beispiel #2
0
    def test_create_experiment_with_resources_spec(self, spawner_mock):
        config = ExperimentSpecification.read(exec_experiment_resources_content)
        mock_instance = spawner_mock.return_value
        mock_instance.start_experiment.return_value = start_experiment_value
        mock_instance.job_uuids = {'master': ['fa6203c189a855dd977019854a7ffcc3'],
                                   'worker': ['3a9c9b0bd56b5e9fbdbd1a3d43d57960'],
                                   'ps': ['59e3601232b85a3d8be2511f23a62945']}
        mock_instance.spec = config

        experiment = ExperimentFactory(content=config.raw_data)
        assert experiment.is_independent is True

        assert ExperimentStatus.objects.filter(experiment=experiment).count() == 3
        assert list(ExperimentStatus.objects.filter(experiment=experiment).values_list(
            'status', flat=True)) == [ExperimentLifeCycle.CREATED,
                                      ExperimentLifeCycle.SCHEDULED,
                                      ExperimentLifeCycle.STARTING]

        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.STARTING

        # Assert 3 jobs were created with resources
        assert ExperimentJob.objects.filter(experiment=experiment).count() == 3
        assert JobResources.objects.count() == 3
        jobs_statuses = ExperimentJob.objects.values_list('statuses__status', flat=True)
        assert set(jobs_statuses) == {JobLifeCycle.CREATED, }
        jobs = ExperimentJob.objects.filter(experiment=experiment)
        assert experiment.calculated_status == ExperimentLifeCycle.STARTING

        for job in jobs:
            # Assert the jobs status is created
            assert job.last_status == JobLifeCycle.CREATED
Beispiel #3
0
    def test_set_metrics(self):
        config = ExperimentSpecification.read(experiment_spec_content)
        experiment = ExperimentFactory(content=config.raw_data)
        assert experiment.metrics.count() == 0

        created_at = timezone.now()
        experiments_set_metrics(experiment_id=experiment.id,
                                data={
                                    'created_at': created_at,
                                    'values': {
                                        'accuracy': 0.9,
                                        'precision': 0.9
                                    }
                                })

        assert experiment.metrics.count() == 1

        experiments_set_metrics(experiment_id=experiment.id,
                                data=[{
                                    'created_at': created_at,
                                    'values': {
                                        'accuracy': 0.9,
                                        'precision': 0.9
                                    }
                                }, {
                                    'created_at': created_at,
                                    'values': {
                                        'accuracy': 0.9,
                                        'precision': 0.9
                                    }
                                }])

        assert experiment.metrics.count() == 3
Beispiel #4
0
    def test_independent_experiment_creation_with_run_triggers_experiment_building(self):
        config = ExperimentSpecification.read(exec_experiment_spec_content)
        # Create a repo for the project
        repo = RepoFactory()

        with patch('scheduler.tasks.experiments.experiments_build.apply_async') as mock_build:
            experiment = ExperimentFactory(content=config.raw_data, project=repo.project)

        assert mock_build.call_count == 1
        assert experiment.project.repo is not None
        assert experiment.is_independent is True

        assert ExperimentStatus.objects.filter(experiment=experiment).count() == 1
        assert list(ExperimentStatus.objects.filter(experiment=experiment).values_list(
            'status', flat=True)) == [ExperimentLifeCycle.CREATED]

        with patch('scheduler.dockerizer_scheduler.start_dockerizer') as mock_start:
            experiments_build(experiment_id=experiment.id)

        assert mock_start.call_count == 1
        assert ExperimentStatus.objects.filter(experiment=experiment).count() == 2
        assert list(ExperimentStatus.objects.filter(experiment=experiment).values_list(
            'status', flat=True)) == [ExperimentLifeCycle.CREATED,
                                      ExperimentLifeCycle.BUILDING]
        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.BUILDING
Beispiel #5
0
def experiments_stop(self,
                     project_name,
                     project_uuid,
                     experiment_name,
                     experiment_group_name,
                     experiment_group_uuid,
                     experiment_uuid,
                     specification,
                     update_status=True,
                     collect_logs=True,
                     is_managed=True,
                     message=None):
    if collect_logs and is_managed:
        try:
            collectors.logs_collect_experiment_jobs(
                experiment_uuid=experiment_uuid)
        except (OSError, VolumeNotFoundError, PolyaxonStoresException):
            _logger.warning(
                'Scheduler could not collect '
                'the logs for experiment `%s`.', experiment_name)
    if specification and is_managed:
        specification = ExperimentSpecification.read(specification)
        deleted = experiment_scheduler.stop_experiment(
            project_name=project_name,
            project_uuid=project_uuid,
            experiment_name=experiment_name,
            experiment_group_name=experiment_group_name,
            experiment_group_uuid=experiment_group_uuid,
            experiment_uuid=experiment_uuid,
            specification=specification,
        )
    else:
        deleted = True

    if not deleted and self.request.retries < 2:
        _logger.info('Trying again to delete job `%s` in experiment.',
                     experiment_name)
        self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)
        return

    if not update_status:
        return

    experiment = get_valid_experiment(experiment_uuid=experiment_uuid,
                                      include_deleted=True)
    if not experiment:
        _logger.info(
            'Something went wrong, '
            'the Experiment `%s` does not exist anymore.', experiment_uuid)
        return

    # Update experiment status to show that its stopped
    experiment.set_status(ExperimentLifeCycle.STOPPED,
                          message=message or 'Experiment was stopped')
Beispiel #6
0
    def test_independent_experiment_creation_triggers_experiment_scheduling(self):
        content = ExperimentSpecification.read(experiment_spec_content)
        experiment = ExperimentFactory(content=content.raw_data)
        assert experiment.is_independent is True

        assert ExperimentStatus.objects.filter(experiment=experiment).count() == 3
        assert list(ExperimentStatus.objects.filter(experiment=experiment).values_list(
            'status', flat=True)) == [ExperimentLifeCycle.CREATED,
                                      ExperimentLifeCycle.SCHEDULED,
                                      ExperimentLifeCycle.FAILED]
        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.FAILED
Beispiel #7
0
def validate_experiment_spec_config(config, raise_for_rest: bool = False):
    try:
        spec = ExperimentSpecification.read(config)
    except (MarshmallowValidationError, PolyaxonfileError,
            PolyaxonConfigurationError) as e:
        message_error = 'Received non valid specification config. %s' % e
        if raise_for_rest:
            raise ValidationError(message_error)
        else:
            raise DjangoValidationError(message_error)

    return spec
Beispiel #8
0
    def test_independent_experiment_creation_triggers_experiment_scheduling(self):
        content = ExperimentSpecification.read(exec_experiment_spec_content)
        with patch('scheduler.dockerizer_scheduler.create_build_job') as mock_start:
            mock_start.return_value = BuildJobFactory(), True, True
            experiment = ExperimentFactory(content=content.raw_data)
        assert experiment.is_independent is True

        assert ExperimentStatus.objects.filter(experiment=experiment).count() == 3
        assert list(ExperimentStatus.objects.filter(experiment=experiment).values_list(
            'status', flat=True)) == [ExperimentLifeCycle.CREATED,
                                      ExperimentLifeCycle.SCHEDULED,
                                      ExperimentLifeCycle.FAILED]
        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.FAILED
Beispiel #9
0
    def test_set_metrics(self):
        config = ExperimentSpecification.read(exec_experiment_spec_content)
        experiment = ExperimentFactory(content=config.raw_data)
        assert experiment.metrics.count() == 0

        created_at = timezone.now()
        experiments_set_metrics(experiment_id=experiment.id,
                                data={
                                    'created_at': created_at,
                                    'values': {
                                        'accuracy': 0.9,
                                        'precision': 0.9
                                    }
                                })

        assert experiment.metrics.count() == 1
        experiment.refresh_from_db()
        assert experiment.last_metric == {'accuracy': 0.9, 'precision': 0.9}

        experiments_set_metrics(experiment_id=experiment.id,
                                data=[{
                                    'created_at': created_at,
                                    'values': {
                                        'accuracy': 0.92,
                                        'precision': 0.93,
                                        'foo': 1
                                    }
                                }, {
                                    'created_at': created_at,
                                    'values': {
                                        'accuracy': 0.95,
                                        'precision': 0.96,
                                        'bar': 8
                                    }
                                }])

        assert experiment.metrics.count() == 3
        experiment.refresh_from_db()
        assert experiment.last_metric == {
            'accuracy': 0.95,
            'precision': 0.96,
            'bar': 8,
            'foo': 1
        }
Beispiel #10
0
    def test_serialize_with_environment_section(self):
        spec_content = """---
            version: 1

            kind: experiment

            framework: pytorch

            environment:
              resources:
                cpu:
                  requests: 2
                  limits: 4
                memory:
                  requests: 4096
                  limits: 10240

              replicas:
                n_workers: 2
                default_worker:
                  resources:
                    cpu:
                      requests: 2
                      limits: 4
                    memory:
                      requests: 4096
                      limits: 10240

            build:
                image: foo

            run:
              cmd: video_prediction_train --model=DNA --num_masks=1
        """
        spec = ExperimentSpecification.read(spec_content)

        obj = self.factory_class(content=spec.raw_data)
        obj1_query = queries.experiments_details.get(id=obj.id)
        serializer = self.serializer_class(obj1_query)
        data = serializer.data
        assert 'resources' in data
Beispiel #11
0
exec_experiment_spec_content = """---
    version: 1
    
    kind: experiment
    
    tags: [fixtures]

    build:
      image: my_image
    
    run:
      cmd: video_prediction_train --model=DNA --num_masks=1
"""

exec_experiment_spec_parsed_content = ExperimentSpecification.read(
    exec_experiment_spec_content)
exec_experiment_spec_parsed_content.parse_data()

exec_experiment_ext_repo_spec_content = """---
    version: 1

    kind: experiment

    tags: [fixtures]

    build:
      image: my_image

    run:
      cmd: video_prediction_train --model=DNA --num_masks=1
"""
Beispiel #12
0
 def create_experiment(self, config):
     config = ExperimentSpecification.read(config)
     return ExperimentFactory(content=config.raw_data, project=self.project)
Beispiel #13
0
exec_experiment_spec_content = """---
    version: 1
    
    kind: experiment
    
    tags: [fixtures]

    build:
      image: my_image
    
    run:
      cmd: video_prediction_train --model=DNA --num_masks=1
"""

exec_experiment_spec_parsed_content = ExperimentSpecification.read(exec_experiment_spec_content)
exec_experiment_spec_parsed_content.apply_context()

exec_experiment_spec_content_regression_artifact_refs = """---
    version: 1

    kind: experiment

    tags: [fixtures]

    environment:
      artifact_refs: ['outputs-s3-temp']

    build:
      image: my_image
Beispiel #14
0
 def specification(self) -> 'ExperimentSpecification':
     return ExperimentSpecification(values=self.content) if self.content else None