Ejemplo n.º 1
0
    def _add_dataset(self, key, spec, spec_options=None):

        owner = self._check_metadata(spec.metadata)

        permissions = self.__compute_permissions(spec.permissions, owner)

        dataset_file_path = self._db.save_file(spec.data_opener, key)
        dataset_description_path = self._db.save_file(spec.description, key)
        asset = models.Dataset(
            key=key,
            owner=owner,
            name=spec.name,
            objective_key=spec.objective_key if spec.objective_key else "",
            permissions={
                "process": {
                    "public": permissions.public,
                    "authorized_ids": permissions.authorized_ids,
                },
            },
            type=spec.type,
            train_data_sample_keys=list(),
            test_data_sample_keys=list(),
            opener={
                "checksum": fs.hash_file(dataset_file_path),
                "storage_address": dataset_file_path
            },
            description={
                "checksum": fs.hash_file(dataset_description_path),
                "storage_address": dataset_description_path
            },
            metadata=spec.metadata if spec.metadata else dict(),
        )
        return self._db.add(asset)
Ejemplo n.º 2
0
    def _add_objective(self, key, spec, spec_options):

        owner = self._check_metadata(spec.metadata)
        permissions = self.__compute_permissions(spec.permissions, owner)

        # validate spec
        test_dataset = None
        if spec.test_data_manager_key:
            dataset = self._db.get(schemas.Type.Dataset,
                                   spec.test_data_manager_key)
            # validate test data samples
            if spec.test_data_sample_keys is None:
                spec.test_data_sample_keys = list()
            self.__check_same_data_manager(spec.test_data_manager_key,
                                           spec.test_data_sample_keys)

            test_dataset = {
                "data_manager_key": spec.test_data_manager_key,
                "data_sample_keys": spec.test_data_sample_keys,
                "metadata": dataset.metadata,
                "worker": dataset.owner,
            }
            if not dataset.objective_key:
                dataset.objective_key = key
            else:
                raise substra.exceptions.InvalidRequest(
                    "dataManager is already associated with a objective", 400)

        # Copy files to the local dir
        objective_file_path = self._db.save_file(spec.metrics, key)
        objective_description_path = self._db.save_file(spec.description, key)

        # create objective model instance
        objective = models.Objective(
            key=key,
            name=spec.name,
            owner=owner,
            test_dataset=test_dataset,
            permissions={
                "process": {
                    "public": permissions.public,
                    "authorized_ids": permissions.authorized_ids,
                },
            },
            description={
                "checksum": fs.hash_file(objective_description_path),
                "storage_address": objective_description_path
            },
            metrics={
                "name": spec.metrics_name,
                "checksum": fs.hash_file(objective_file_path),
                "storage_address": objective_file_path
            },
            metadata=spec.metadata if spec.metadata else dict(),
        )

        # add objective to storage and update optionnally the associated dataset
        objective = self._db.add(objective)

        return objective
Ejemplo n.º 3
0
    def __add_algo(self, model_class, key, spec, owner, spec_options=None):

        permissions = self.__compute_permissions(spec.permissions, owner)
        algo_file_path = self._db.save_file(spec.file, key)
        algo_description_path = self._db.save_file(spec.description, key)
        algo = model_class(
            key=key,
            name=spec.name,
            owner=owner,
            permissions={
                "process": {
                    "public": permissions.public,
                    "authorized_ids": permissions.authorized_ids,
                },
            },
            content={
                "checksum": fs.hash_file(algo_file_path),
                "storage_address": algo_file_path
            },
            description={
                "checksum": fs.hash_file(algo_description_path),
                "storage_address": algo_description_path
            },
            metadata=spec.metadata if spec.metadata else dict(),
        )
        return self._db.add(algo)
Ejemplo n.º 4
0
 def _save_output_model(self, tuple_, model_name,
                        models_volume) -> models.OutModel:
     tmp_path = os.path.join(models_volume, model_name)
     model_dir = _mkdir(os.path.join(self._wdir, "models", tuple_.key))
     model_path = os.path.join(model_dir, model_name)
     shutil.copy(tmp_path, model_path)
     return models.OutModel(key=str(uuid.uuid4()),
                            checksum=fs.hash_file(model_path),
                            storage_address=model_path)