def migrate_datasets_pre_v0_3(client): """Migrate datasets from Renku 0.3.x.""" for old_path in dataset_pre_0_3(client): name = str(old_path.parent.relative_to(client.path / 'data')) dataset = Dataset.from_yaml(old_path, client=client) new_path = (client.renku_datasets_path / dataset.uid / client.METADATA) new_path.parent.mkdir(parents=True, exist_ok=True) with client.with_metadata(read_only=True) as meta: for module in client.repo.submodules: if Path(module.url).name == meta.name: module.remove() for file_ in dataset.files: if not Path(file_.path).exists(): expected_path = ( client.path / 'data' / dataset.name / file_.path ) if expected_path.exists(): file_.path = expected_path.relative_to(client.path) dataset.__reference__ = new_path dataset.to_yaml() Path(old_path).unlink() ref = LinkReference.create( client=client, name='datasets/{0}'.format(name), force=True, ) ref.set_reference(new_path)
def _migrate_datasets_pre_v0_3(client): """Migrate datasets from Renku 0.3.x.""" for old_path in get_pre_0_3_4_datasets_metadata(client): name = str(old_path.parent.relative_to(client.path / DATA_DIR)) dataset = Dataset.from_yaml(old_path, client=client) dataset.title = name dataset.name = name new_path = client.renku_datasets_path / dataset.identifier / client.METADATA new_path.parent.mkdir(parents=True, exist_ok=True) with client.with_metadata(read_only=True) as meta: for module in client.repo.submodules: if Path(module.url).name == meta.name: module.remove() for file_ in dataset.files: if not Path(file_.path).exists(): expected_path = client.path / DATA_DIR / dataset.name / file_.path if expected_path.exists(): file_.path = expected_path.relative_to(client.path) dataset.to_yaml(new_path) Path(old_path).unlink() ref = LinkReference.create( client=client, name="datasets/{0}".format(name), force=True, ) ref.set_reference(new_path)
def create_dataset( self, short_name=None, title=None, description=None, creators=None, keywords=None, ): """Create a dataset.""" if not short_name: raise errors.ParameterError('Dataset short_name must be provided.') if not is_dataset_short_name_valid(short_name): raise errors.ParameterError( 'Dataset short_name "{}" is not valid.'.format(short_name)) if self.load_dataset(short_name=short_name): raise errors.DatasetExistsError( 'Dataset exists: "{}".'.format(short_name)) if not title: title = short_name identifier = str(uuid.uuid4()) path = self.renku_datasets_path / identifier / self.METADATA if path.exists(): raise errors.DatasetExistsError( 'Dataset with reference {} exists'.format(path)) path.parent.mkdir(parents=True, exist_ok=True) if creators is None: creators = [Person.from_git(self.repo)] keywords = keywords or () with with_reference(path): dataset = Dataset( client=self, identifier=identifier, short_name=short_name, name=title, description=description, creator=creators, keywords=keywords, ) dataset_ref = LinkReference.create(client=self, name='datasets/' + short_name) dataset_ref.set_reference(path) dataset.path = Path(dataset.path).relative_to(self.path) dataset.to_yaml() return dataset, path, dataset_ref
def _migrate_broken_dataset_paths(client): """Ensure all paths are using correct directory structure.""" for dataset in client.datasets.values(): dataset_path = Path(dataset.path) expected_path = (client.path / client.renku_datasets_path / Path(quote(dataset.identifier, safe=''))) # migrate the refs ref = LinkReference.create( client=client, name='datasets/{0}'.format(dataset.short_name), force=True, ) ref.set_reference(expected_path / client.METADATA) if not dataset_path.exists(): dataset_path = (client.path / client.renku_datasets_path / uuid.UUID(dataset.identifier).hex) if not expected_path.exists(): shutil.move(str(dataset_path), str(expected_path)) dataset.path = expected_path dataset.__reference__ = expected_path / client.METADATA for file_ in dataset.files: file_path = Path(file_.path) if not file_path.exists() and file_.path.startswith('..'): new_path = (client.path / client.renku_datasets_path / dataset.uid / file_path).resolve().relative_to( client.path) file_.path = new_path _, commit, _ = client.resolve_in_submodules( client.find_previous_commit(file_.path, revision='HEAD'), file_.path, ) host = client.remote.get('host') or 'localhost' host = os.environ.get('RENKU_DOMAIN') or host # always set the id by the identifier file_._id = urllib.parse.urljoin( 'https://{host}'.format(host=host), posixpath.join('/blob/{hexsha}/{path}'.format( hexsha=commit.hexsha, path=new_path))) file_._label = '{}@{}'.format(new_path, commit.hexsha) dataset.to_yaml()
def migrate_broken_dataset_paths(client): """Ensure all paths are using correct directory structure.""" for dataset in client.datasets.values(): dataset_path = Path(dataset.path) expected_path = ( client.renku_datasets_path / Path(quote(dataset.identifier, safe='')) ) # migrate the refs ref = LinkReference.create( client=client, name='datasets/{0}'.format(dataset.short_name), force=True, ) ref.set_reference(expected_path / client.METADATA) if not dataset_path.exists(): dataset_path = ( client.renku_datasets_path / uuid.UUID(dataset.identifier).hex ) if not expected_path.exists(): shutil.move(str(dataset_path), str(expected_path)) dataset.path = expected_path dataset.__reference__ = expected_path / client.METADATA for file_ in dataset.files: file_path = Path(file_.path) if not file_path.exists() and file_.path.startswith('..'): new_path = ( client.renku_datasets_path / dataset.uid / file_path ).resolve().relative_to(client.path) file_.path = new_path file_._label = new_path _, commit, _ = client.resolve_in_submodules( client.find_previous_commit(file_.path, revision='HEAD'), file_.path, ) id_format = 'blob/{commit}/{path}' file_._id = id_format.format( commit=commit.hexsha, path=new_path ) dataset.to_yaml()
def create_dataset(self, name, short_name=None, description='', creators=None): """Create a dataset.""" if not name: raise errors.ParameterError('Dataset name must be provided.') if not short_name: short_name = generate_default_short_name(name, None) if not is_dataset_name_valid(short_name): raise errors.ParameterError( 'Dataset name "{}" is not valid.'.format(short_name)) if self.load_dataset(name=short_name): raise errors.DatasetExistsError( 'Dataset exists: "{}".'.format(short_name)) identifier = str(uuid.uuid4()) path = self.renku_datasets_path / identifier / self.METADATA if path.exists(): raise errors.DatasetExistsError( 'Dataset with reference {} exists'.format(path)) path.parent.mkdir(parents=True, exist_ok=True) if creators is None: creators = [Person.from_git(self.repo)] with with_reference(path): dataset = Dataset(client=self, identifier=identifier, name=name, short_name=short_name, description=description, creator=creators) dataset_ref = LinkReference.create(client=self, name='datasets/' + short_name) dataset_ref.set_reference(path) dataset.to_yaml() return dataset, path, dataset_ref
def _migrate_datasets_pre_v0_3(client): """Migrate datasets from Renku 0.3.x.""" def _dataset_pre_0_3(client): """Return paths of dataset metadata for pre 0.3.4.""" project_is_pre_0_3 = int(client.project.version) < 2 if project_is_pre_0_3: return (client.path / DATA_DIR).rglob(client.METADATA) return [] for old_path in _dataset_pre_0_3(client): name = str(old_path.parent.relative_to(client.path / DATA_DIR)) dataset = Dataset.from_yaml(old_path, client=client) new_path = (client.renku_datasets_path / dataset.uid / client.METADATA) new_path.parent.mkdir(parents=True, exist_ok=True) with client.with_metadata(read_only=True) as meta: for module in client.repo.submodules: if Path(module.url).name == meta.name: module.remove() for file_ in dataset.files: if not Path(file_.path).exists(): expected_path = (client.path / DATA_DIR / dataset.name / file_.path) if expected_path.exists(): file_.path = expected_path.relative_to(client.path) dataset.__reference__ = new_path.relative_to(client.path) dataset.to_yaml() Path(old_path).unlink() ref = LinkReference.create( client=client, name='datasets/{0}'.format(name), force=True, ) ref.set_reference(new_path)
def _migrate_broken_dataset_paths(client): """Ensure all paths are using correct directory structure.""" for dataset in get_client_datasets(client): expected_path = client.renku_datasets_path / dataset.identifier if not dataset.name: dataset.name = dataset.title # migrate the refs ref = LinkReference.create( client=client, name="datasets/{0}".format(dataset.name), force=True, ) ref.set_reference(expected_path / client.METADATA) old_dataset_path = client.renku_datasets_path / uuid.UUID( dataset.identifier).hex dataset.path = os.path.relpath(expected_path, client.path) if not expected_path.exists(): shutil.move(old_dataset_path, expected_path) for file_ in dataset.files: file_path = Path(file_.path) if not file_path.exists() or file_.path.startswith(".."): new_path = Path( os.path.abspath(client.renku_datasets_path / dataset.identifier / file_path)).relative_to(client.path) file_.path = new_path file_.name = os.path.basename(file_.path) dataset.to_yaml(expected_path / client.METADATA)
def set_name(client, name, path, force): """Sets the <name> for remote <path>.""" from renku.core.models.refs import LinkReference LinkReference.create(client=client, name=_ref(name), force=force).set_reference(path)