Beispiel #1
0
    def get_init_container(self, persistence_outputs):
        """Pod init container for setting outputs path."""
        if self.original_name is not None and self.cloning_strategy == CloningStrategy.RESUME:
            return []
        if self.original_name is not None and self.cloning_strategy == CloningStrategy.COPY:
            command = InitCommands.COPY
            original_outputs_path = stores.get_experiment_outputs_path(
                persistence=persistence_outputs,
                experiment_name=self.original_name)
        else:
            command = InitCommands.CREATE
            original_outputs_path = None

        outputs_path = stores.get_experiment_outputs_path(
            persistence=persistence_outputs,
            experiment_name=self.experiment_name)
        _, outputs_volume_mount = get_pod_outputs_volume(
            persistence_outputs=persistence_outputs)
        return [
            client.V1Container(
                name=self.init_container_name,
                image=self.init_docker_image,
                command=["/bin/sh", "-c"],
                args=to_list(
                    get_output_args(
                        command=command,
                        outputs_path=outputs_path,
                        original_outputs_path=original_outputs_path)),
                volume_mounts=outputs_volume_mount)
        ]
Beispiel #2
0
    def get_init_path_args(self, persistence_outputs):
        if self.original_name is not None and self.cloning_strategy == CloningStrategy.RESUME:
            command = InitCommands.CREATE
            outputs_path = stores.get_experiment_outputs_path(
                persistence=persistence_outputs,
                experiment_name=self.original_name)
            original_outputs_path = None
        elif self.original_name is not None and self.cloning_strategy == CloningStrategy.COPY:
            command = InitCommands.COPY
            outputs_path = stores.get_experiment_outputs_path(
                persistence=persistence_outputs,
                experiment_name=self.experiment_name)
            original_outputs_path = stores.get_experiment_outputs_path(
                persistence=persistence_outputs,
                experiment_name=self.original_name)
        else:
            command = InitCommands.CREATE
            outputs_path = stores.get_experiment_outputs_path(
                persistence=persistence_outputs,
                experiment_name=self.experiment_name)
            original_outputs_path = None

        return get_output_args(command=command,
                               outputs_path=outputs_path,
                               original_outputs_path=original_outputs_path)
Beispiel #3
0
    def get(self, request, *args, **kwargs):
        filepath = request.query_params.get('path')
        if not filepath:
            raise ValidationError('Files view expect a path to the file.')

        experiment_outputs_path = stores.get_experiment_outputs_path(
            persistence=self.experiment.persistence_outputs,
            experiment_name=self.experiment.unique_name,
            original_name=self.experiment.original_unique_name,
            cloning_strategy=self.experiment.cloning_strategy)

        download_filepath = archive_outputs_file(
            persistence_outputs=self.experiment.persistence_outputs,
            outputs_path=experiment_outputs_path,
            namepath=self.experiment.unique_name,
            filepath=filepath)

        filename = os.path.basename(download_filepath)
        chunk_size = 8192
        try:
            wrapped_file = FileWrapper(open(download_filepath, 'rb'), chunk_size)
            response = StreamingHttpResponse(
                wrapped_file, content_type=mimetypes.guess_type(download_filepath)[0])
            response['Content-Length'] = os.path.getsize(download_filepath)
            response['Content-Disposition'] = "attachment; filename={}".format(filename)
            return response
        except FileNotFoundError:
            _logger.warning('Outputs file not found: log_path=%s', download_filepath)
            return Response(status=status.HTTP_404_NOT_FOUND,
                            data='Outputs file not found: log_path={}'.format(download_filepath))
Beispiel #4
0
    def test_copying_an_experiment(self):
        with patch('scheduler.tasks.experiments.experiments_build.apply_async') as _:  # noqa
            experiment1 = ExperimentFactory()

        # We create some outputs files for the experiment
        path = stores.create_experiment_outputs_path(
            persistence=experiment1.persistence_outputs,
            experiment_name=experiment1.unique_name)
        open(os.path.join(path, 'file'), 'w+')

        # Create a new experiment that is a clone of the previous
        with patch('scheduler.tasks.experiments.experiments_build.apply_async') as _:  # noqa
            experiment2 = ExperimentFactory(original_experiment=experiment1)

        # Check that outputs path for experiment2 does not exist yet
        experiment2_outputs_path = stores.get_experiment_outputs_path(
            persistence=experiment2.persistence_outputs,
            experiment_name=experiment2.unique_name)
        assert os.path.exists(experiment2_outputs_path) is False

        # Handle restart should create the outputs and copy the content of experiment 1
        copy_experiment(experiment2)

        assert os.path.exists(experiment2_outputs_path) is True
        assert os.path.exists(os.path.join(experiment2_outputs_path, 'file')) is True
Beispiel #5
0
    def get_experiments_outputs_spec(self):
        import stores

        if not self.experiments.count():
            return None
        annotation = {
            'persistence_outputs': KeyTransform('outputs', 'persistence')
        }
        query = self.experiments.annotate(**annotation)
        experiment_data = query.values_list('id', 'experiment_group__id',
                                            'project__user__username',
                                            'project__name',
                                            'persistence_outputs')
        outputs_spec_data = {}
        for data in experiment_data:
            project_name = PROJECT_UNIQUE_NAME_FORMAT.format(user=data[2],
                                                             project=data[3])

            if data[1]:
                group_name = GROUP_UNIQUE_NAME_FORMAT.format(
                    project_name=project_name, id=data[1])
                experiment_name = EXPERIMENT_UNIQUE_NAME_FORMAT.format(
                    parent_name=group_name, id=data[0])
            else:
                experiment_name = EXPERIMENT_UNIQUE_NAME_FORMAT.format(
                    parent_name=project_name, id=data[0])
            outputs_path = stores.get_experiment_outputs_path(
                persistence=data[4], experiment_name=experiment_name)
            outputs_spec_data[data[0]] = OutputsRefsSpec(path=outputs_path,
                                                         persistence=data[4])

        return outputs_spec_data
Beispiel #6
0
def get_config_map(namespace, project_name, experiment_group_name,
                   experiment_name, project_uuid, experiment_group_uuid,
                   experiment_uuid, original_name, cloning_strategy,
                   cluster_def, persistence_outputs, persistence_data, params,
                   log_level):
    name = constants.CONFIG_MAP_NAME.format(uuid=experiment_uuid)
    labels = get_map_labels(project_name, experiment_group_name,
                            experiment_name, project_uuid,
                            experiment_group_uuid, experiment_uuid)
    metadata = client.V1ObjectMeta(name=name,
                                   labels=labels,
                                   namespace=namespace)
    experiment_outputs_path = stores.get_experiment_outputs_path(
        persistence=persistence_outputs,
        experiment_name=experiment_name,
        original_name=original_name,
        cloning_strategy=cloning_strategy)
    experiment_logs_path = stores.get_experiment_logs_path(
        experiment_name=experiment_name, temp=False)
    data = {
        constants.CONFIG_MAP_CLUSTER_KEY_NAME: json.dumps(cluster_def),
        constants.CONFIG_MAP_PARAMS_KEY_NAME: json.dumps(params) or '{}',
        constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME: json.dumps(labels),
        constants.CONFIG_MAP_LOG_LEVEL_KEY_NAME: log_level,
        constants.CONFIG_MAP_RUN_OUTPUTS_PATH_KEY_NAME:
        experiment_outputs_path,
        constants.CONFIG_MAP_RUN_LOGS_PATH_KEY_NAME: experiment_logs_path,
        constants.CONFIG_MAP_RUN_DATA_PATHS_KEY_NAME: persistence_data,
        API_HTTP_URL: get_settings_http_api_url(),
        API_WS_HOST: get_settings_ws_api_url(),
    }
    return client.V1ConfigMap(api_version=k8s_constants.K8S_API_VERSION_V1,
                              kind=k8s_constants.K8S_CONFIG_MAP_KIND,
                              metadata=metadata,
                              data=data)
Beispiel #7
0
 def get(self, request, *args, **kwargs):
     try:
         store_manager = stores.get_outputs_store(
             persistence_outputs=self.experiment.persistence_outputs)
     except (PolyaxonStoresException, VolumeNotFoundError) as e:
         raise ValidationError(e)
     experiment_outputs_path = stores.get_experiment_outputs_path(
         persistence=self.experiment.persistence_outputs,
         experiment_name=self.experiment.unique_name,
         original_name=self.experiment.original_unique_name,
         cloning_strategy=self.experiment.cloning_strategy)
     if request.query_params.get('path'):
         experiment_outputs_path = os.path.join(
             experiment_outputs_path, request.query_params.get('path'))
     try:
         data = store_manager.ls(experiment_outputs_path)
     except VolumeNotFoundError:
         raise ValidationError(
             'Store manager could not load the volume requested,'
             ' to get the outputs data.')
     except Exception:
         raise ValidationError(
             'Experiment outputs path does not exists or bad configuration.'
         )
     return Response(data=data, status=200)
Beispiel #8
0
    def get_init_container(self,
                           init_command,
                           init_args,
                           env_vars,
                           context_mounts,
                           persistence_outputs,
                           persistence_data):
        """Pod init container for setting outputs path."""
        env_vars = to_list(env_vars, check_none=True)
        if self.original_name is not None and self.cloning_strategy == CloningStrategy.RESUME:
            return []
        if self.original_name is not None and self.cloning_strategy == CloningStrategy.COPY:
            command = InitCommands.COPY
            original_outputs_path = stores.get_experiment_outputs_path(
                persistence=persistence_outputs,
                experiment_name=self.original_name)
        else:
            command = InitCommands.CREATE
            original_outputs_path = None

        outputs_path = stores.get_experiment_outputs_path(
            persistence=persistence_outputs,
            experiment_name=self.experiment_name)
        _, outputs_volume_mount = get_pod_outputs_volume(persistence_outputs=persistence_outputs)
        volume_mounts = outputs_volume_mount + to_list(context_mounts, check_none=True)
        init_command = init_command or ["/bin/sh", "-c"]
        init_args = init_args or to_list(
            get_output_args(command=command,
                            outputs_path=outputs_path,
                            original_outputs_path=original_outputs_path))
        init_args += to_list(get_auth_context_args(entity='experiment',
                                                   entity_name=self.experiment_name))
        return [
            client.V1Container(
                name=self.init_container_name,
                image=self.init_docker_image,
                image_pull_policy=self.init_docker_image_pull_policy,
                command=init_command,
                args=[''.join(init_args)],
                env=env_vars,
                resources=get_init_resources(),
                volume_mounts=volume_mounts)
        ]
Beispiel #9
0
 def get_env_vars(self, task_type, task_idx):
     tf_config = {
         'cluster': self.get_cluster(),
         'task': {'type': task_type, 'index': task_idx},
         'model_dir': stores.get_experiment_outputs_path(
             persistence=self.persistence_config.outputs,
             experiment_name=self.experiment_name,
             cloning_strategy=self.cloning_strategy),
         'environment': 'cloud'
     }
     return get_env_var(name='TF_CONFIG', value=tf_config)
 def test_experiment_outputs_path_creation_deletion(self):
     experiment_outputs_path = stores.get_experiment_outputs_path(
         persistence=self.experiment.persistence_outputs,
         experiment_name=self.experiment.unique_name)
     assert os.path.exists(experiment_outputs_path) is False
     stores.create_experiment_outputs_path(
         persistence=self.experiment.persistence_outputs,
         experiment_name=self.experiment.unique_name)
     assert os.path.exists(experiment_outputs_path) is True
     stores_schedule_outputs_deletion(persistence=None, subpath=self.experiment.subpath)
     assert os.path.exists(experiment_outputs_path) is False
Beispiel #11
0
 def get_named_experiment_outputs_path(experiment):
     persistence = experiment.persistence_outputs
     outputs_path = stores.get_experiment_outputs_path(
         persistence=persistence,
         experiment_name=experiment.unique_name,
         original_name=experiment.original_unique_name,
         cloning_strategy=experiment.cloning_strategy)
     tensorboard_path = '{}:{}'.format(experiment.unique_name,
                                       outputs_path)
     return [
         OutputsRefsSpec(path=outputs_path, persistence=persistence)
     ], tensorboard_path
Beispiel #12
0
    def _get_experiment_outputs_path(self) -> Tuple[List, str]:
        import stores

        from stores.validators import validate_persistence_outputs

        persistence = validate_persistence_outputs(
            persistence_outputs=self.experiment.persistence_outputs)

        outputs_path = stores.get_experiment_outputs_path(
            persistence=persistence,
            experiment_name=self.experiment.unique_name,
            original_name=self.experiment.original_unique_name,
            cloning_strategy=self.experiment.cloning_strategy)
        return [OutputsRefsSpec(path=outputs_path, persistence=persistence)], outputs_path
Beispiel #13
0
 def get(self, request, *args, **kwargs):
     auditor.record(event_type=EXPERIMENT_OUTPUTS_DOWNLOADED,
                    instance=self.experiment,
                    actor_id=self.request.user.id,
                    actor_name=self.request.user.username)
     experiment_outputs_path = stores.get_experiment_outputs_path(
         persistence=self.experiment.persistence_outputs,
         experiment_name=self.experiment.unique_name,
         original_name=self.experiment.original_unique_name,
         cloning_strategy=self.experiment.cloning_strategy)
     archived_path, archive_name = archive_outputs(
         outputs_path=experiment_outputs_path,
         name=self.experiment.unique_name)
     return self.redirect(path='{}/{}'.format(archived_path, archive_name))
Beispiel #14
0
    def _get_named_experiment_outputs_path(experiment, persistence) -> Tuple[List, str]:
        import stores

        from stores.validators import validate_persistence_outputs

        persistence = validate_persistence_outputs(persistence_outputs=persistence)

        outputs_path = stores.get_experiment_outputs_path(
            persistence=persistence,
            experiment_name=experiment.unique_name,
            original_name=experiment.original_unique_name,
            cloning_strategy=experiment.cloning_strategy)
        tensorboard_path = '{}:{}'.format(
            experiment.unique_name,
            outputs_path)
        return [OutputsRefsSpec(path=outputs_path, persistence=persistence)], tensorboard_path
Beispiel #15
0
    def get_absolute_outputs_paths(self) -> str:
        import stores

        if self.experiment:
            return stores.get_experiment_outputs_path(
                persistence=self.experiment.persistence_outputs,
                experiment_name=self.experiment.unique_name,
                original_name=self.experiment.original_unique_name,
                cloning_strategy=self.experiment.cloning_strategy)

        if self.experiment_group:
            return stores.get_experiment_group_outputs_path(
                persistence=self.experiment_group.persistence_outputs,
                experiment_group_name=self.experiment_group.unique_name)

        return stores.get_project_outputs_path(
            persistence_outputs=None, project_name=self.project.unique_name)
Beispiel #16
0
 def test_experiment_group_outputs_path_creation_deletion(self):
     experiment = ExperimentFactory(user=self.project.user,
                                    project=self.project,
                                    experiment_group=self.experiment_group)
     stores.create_experiment_outputs_path(
         persistence=experiment.persistence_outputs,
         experiment_name=experiment.unique_name)
     experiment_outputs_path = stores.get_experiment_outputs_path(
         persistence=experiment.persistence_outputs,
         experiment_name=experiment.unique_name)
     experiment_group_outputs_path = stores.get_experiment_group_outputs_path(
         persistence=self.experiment_group.persistence_outputs,
         experiment_group_name=self.experiment_group.unique_name)
     assert os.path.exists(experiment_outputs_path) is True
     assert os.path.exists(experiment_group_outputs_path) is True
     stores_schedule_outputs_deletion(persistence=None, subpath=self.experiment_group.subpath)
     assert os.path.exists(experiment_outputs_path) is False
     assert os.path.exists(experiment_group_outputs_path) is False
Beispiel #17
0
 def test_project_outputs_path_creation_deletion(self):
     with patch('scheduler.tasks.experiments.experiments_build.apply_async'
                ) as _:  # noqa
         experiment = ExperimentFactory(user=self.project.user,
                                        project=self.project)
     stores.create_experiment_outputs_path(
         persistence=experiment.persistence_outputs,
         experiment_name=experiment.unique_name)
     experiment_outputs_path = stores.get_experiment_outputs_path(
         persistence=experiment.persistence_outputs,
         experiment_name=experiment.unique_name)
     project_outputs_path = stores.get_project_outputs_path(
         persistence=None, project_name=self.project.unique_name)
     assert os.path.exists(experiment_outputs_path) is True
     assert os.path.exists(project_outputs_path) is True
     stores_schedule_outputs_deletion(persistence='outputs',
                                      subpath=self.project.subpath)
     assert os.path.exists(experiment_outputs_path) is False
     assert os.path.exists(project_outputs_path) is False
Beispiel #18
0
    def get(self, request, *args, **kwargs):
        filepath = request.query_params.get('path')
        if not filepath:
            raise ValidationError('Files view expect a path to the file.')

        experiment_outputs_path = stores.get_experiment_outputs_path(
            persistence=self.experiment.persistence_outputs,
            experiment_name=self.experiment.unique_name,
            original_name=self.experiment.original_unique_name,
            cloning_strategy=self.experiment.cloning_strategy)

        download_filepath = archive_outputs_file(
            persistence_outputs=self.experiment.persistence_outputs,
            outputs_path=experiment_outputs_path,
            namepath=self.experiment.unique_name,
            filepath=filepath)

        if not download_filepath:
            return Response(status=status.HTTP_404_NOT_FOUND,
                            data='Outputs file not found: log_path={}'.format(download_filepath))

        return stream_file(file_path=download_filepath, logger=_logger)
Beispiel #19
0
 def _get_outputs_path(self, persistence_outputs):
     return stores.get_experiment_outputs_path(
         persistence=persistence_outputs,
         experiment_name=self.experiment_name,
         original_name=self.original_name,
         cloning_strategy=self.cloning_strategy)
Beispiel #20
0
    def get_pod_container(self,
                          volume_mounts,
                          env_vars=None,
                          command=None,
                          args=None,
                          persistence_outputs=None,
                          persistence_data=None,
                          outputs_refs_jobs=None,
                          outputs_refs_experiments=None,
                          secret_refs=None,
                          configmap_refs=None,
                          resources=None,
                          ephemeral_token=None):
        """Pod job container for task."""
        assert self.cluster_def is not None

        # Env vars preparations
        env_vars = to_list(env_vars, check_none=True)
        logs_path = stores.get_experiment_logs_path(
            experiment_name=self.experiment_name, temp=False)
        outputs_path = stores.get_experiment_outputs_path(
            persistence=persistence_outputs,
            experiment_name=self.experiment_name,
            original_name=self.original_name,
            cloning_strategy=self.cloning_strategy)
        env_vars += get_job_env_vars(
            persistence_outputs=persistence_outputs,
            outputs_path=outputs_path,
            persistence_data=persistence_data,
            log_level=self.log_level,
            logs_path=logs_path,
            outputs_refs_jobs=outputs_refs_jobs,
            outputs_refs_experiments=outputs_refs_experiments,
            ephemeral_token=ephemeral_token,
        )
        env_vars += [
            get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME,
                        value=json.dumps(self.cluster_def)),
            get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME,
                        value=self.declarations),
            get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME,
                        value=json.dumps(self.experiment_labels)),
        ]
        env_vars += get_resources_env_vars(resources=resources)

        # Env from configmap and secret refs
        env_from = get_pod_env_from(secret_refs=secret_refs,
                                    configmap_refs=configmap_refs)

        ports = [
            client.V1ContainerPort(container_port=port) for port in self.ports
        ]
        return client.V1Container(name=self.job_container_name,
                                  image=self.job_docker_image,
                                  command=command,
                                  args=args,
                                  ports=ports,
                                  env=env_vars,
                                  env_from=env_from,
                                  resources=get_resources(resources),
                                  volume_mounts=volume_mounts)