def _add_dataset(self, key, spec, spec_options=None): owner = self._check_metadata(spec.metadata) permissions = self.__compute_permissions(spec.permissions, owner) dataset_file_path = self._db.save_file(spec.data_opener, key) dataset_description_path = self._db.save_file(spec.description, key) asset = models.Dataset( key=key, owner=owner, name=spec.name, objective_key=spec.objective_key if spec.objective_key else "", permissions={ "process": { "public": permissions.public, "authorized_ids": permissions.authorized_ids, }, }, type=spec.type, train_data_sample_keys=list(), test_data_sample_keys=list(), opener={ "checksum": fs.hash_file(dataset_file_path), "storage_address": dataset_file_path }, description={ "checksum": fs.hash_file(dataset_description_path), "storage_address": dataset_description_path }, metadata=spec.metadata if spec.metadata else dict(), ) return self._db.add(asset)
def _add_objective(self, key, spec, spec_options): owner = self._check_metadata(spec.metadata) permissions = self.__compute_permissions(spec.permissions, owner) # validate spec test_dataset = None if spec.test_data_manager_key: dataset = self._db.get(schemas.Type.Dataset, spec.test_data_manager_key) # validate test data samples if spec.test_data_sample_keys is None: spec.test_data_sample_keys = list() self.__check_same_data_manager(spec.test_data_manager_key, spec.test_data_sample_keys) test_dataset = { "data_manager_key": spec.test_data_manager_key, "data_sample_keys": spec.test_data_sample_keys, "metadata": dataset.metadata, "worker": dataset.owner, } if not dataset.objective_key: dataset.objective_key = key else: raise substra.exceptions.InvalidRequest( "dataManager is already associated with a objective", 400) # Copy files to the local dir objective_file_path = self._db.save_file(spec.metrics, key) objective_description_path = self._db.save_file(spec.description, key) # create objective model instance objective = models.Objective( key=key, name=spec.name, owner=owner, test_dataset=test_dataset, permissions={ "process": { "public": permissions.public, "authorized_ids": permissions.authorized_ids, }, }, description={ "checksum": fs.hash_file(objective_description_path), "storage_address": objective_description_path }, metrics={ "name": spec.metrics_name, "checksum": fs.hash_file(objective_file_path), "storage_address": objective_file_path }, metadata=spec.metadata if spec.metadata else dict(), ) # add objective to storage and update optionnally the associated dataset objective = self._db.add(objective) return objective
def __add_algo(self, model_class, key, spec, owner, spec_options=None): permissions = self.__compute_permissions(spec.permissions, owner) algo_file_path = self._db.save_file(spec.file, key) algo_description_path = self._db.save_file(spec.description, key) algo = model_class( key=key, name=spec.name, owner=owner, permissions={ "process": { "public": permissions.public, "authorized_ids": permissions.authorized_ids, }, }, content={ "checksum": fs.hash_file(algo_file_path), "storage_address": algo_file_path }, description={ "checksum": fs.hash_file(algo_description_path), "storage_address": algo_description_path }, metadata=spec.metadata if spec.metadata else dict(), ) return self._db.add(algo)
def _save_output_model(self, tuple_, model_name, models_volume) -> models.OutModel: tmp_path = os.path.join(models_volume, model_name) model_dir = _mkdir(os.path.join(self._wdir, "models", tuple_.key)) model_path = os.path.join(model_dir, model_name) shutil.copy(tmp_path, model_path) return models.OutModel(key=str(uuid.uuid4()), checksum=fs.hash_file(model_path), storage_address=model_path)