def _migrate_datasets_pre_v0_3(client): """Migrate datasets from Renku 0.3.x.""" for old_path in get_pre_0_3_4_datasets_metadata(client): name = str(old_path.parent.relative_to(client.path / DATA_DIR)) dataset = Dataset.from_yaml(old_path, client=client) dataset.title = name dataset.name = name new_path = client.renku_datasets_path / dataset.identifier / client.METADATA new_path.parent.mkdir(parents=True, exist_ok=True) with client.with_metadata(read_only=True) as meta: for module in client.repo.submodules: if Path(module.url).name == meta.name: module.remove() for file_ in dataset.files: if not Path(file_.path).exists(): expected_path = client.path / DATA_DIR / dataset.name / file_.path if expected_path.exists(): file_.path = expected_path.relative_to(client.path) dataset.to_yaml(new_path) Path(old_path).unlink() ref = LinkReference.create( client=client, name="datasets/{0}".format(name), force=True, ) ref.set_reference(new_path)
def migrate_datasets_pre_v0_3(client): """Migrate datasets from Renku 0.3.x.""" for old_path in dataset_pre_0_3(client): name = str(old_path.parent.relative_to(client.path / 'data')) dataset = Dataset.from_yaml(old_path, client=client) new_path = (client.renku_datasets_path / dataset.uid / client.METADATA) new_path.parent.mkdir(parents=True, exist_ok=True) with client.with_metadata(read_only=True) as meta: for module in client.repo.submodules: if Path(module.url).name == meta.name: module.remove() for file_ in dataset.files: if not Path(file_.path).exists(): expected_path = ( client.path / 'data' / dataset.name / file_.path ) if expected_path.exists(): file_.path = expected_path.relative_to(client.path) dataset.__reference__ = new_path dataset.to_yaml() Path(old_path).unlink() ref = LinkReference.create( client=client, name='datasets/{0}'.format(name), force=True, ) ref.set_reference(new_path)
def is_dataset_short_name_valid(short_name): """A valid short_name is a valid Git reference name with no /.""" # TODO make short_name an RFC 3986 compatible and migrate old projects return ( short_name and LinkReference.check_ref_format(short_name, no_slashes=True) and '/' not in short_name )
def create_dataset( self, short_name=None, title=None, description=None, creators=None, keywords=None, ): """Create a dataset.""" if not short_name: raise errors.ParameterError('Dataset short_name must be provided.') if not is_dataset_short_name_valid(short_name): raise errors.ParameterError( 'Dataset short_name "{}" is not valid.'.format(short_name)) if self.load_dataset(short_name=short_name): raise errors.DatasetExistsError( 'Dataset exists: "{}".'.format(short_name)) if not title: title = short_name identifier = str(uuid.uuid4()) path = self.renku_datasets_path / identifier / self.METADATA if path.exists(): raise errors.DatasetExistsError( 'Dataset with reference {} exists'.format(path)) path.parent.mkdir(parents=True, exist_ok=True) if creators is None: creators = [Person.from_git(self.repo)] keywords = keywords or () with with_reference(path): dataset = Dataset( client=self, identifier=identifier, short_name=short_name, name=title, description=description, creator=creators, keywords=keywords, ) dataset_ref = LinkReference.create(client=self, name='datasets/' + short_name) dataset_ref.set_reference(path) dataset.path = Path(dataset.path).relative_to(self.path) dataset.to_yaml() return dataset, path, dataset_ref
def get_dataset_path(self, name): """Get dataset path from name.""" path = self.renku_datasets_path / name / self.METADATA if not path.exists(): try: path = LinkReference(client=self, name="datasets/" + name).reference except errors.ParameterError: return None return path
def dataset_remove( client, short_names, with_output=False, datasetscontext=contextlib.nullcontext, referencescontext=contextlib.nullcontext, commit_message=None ): """Delete a dataset.""" datasets = {name: client.get_dataset_path(name) for name in short_names} if not datasets: raise ParameterError( 'use dataset short_name or identifier', param_hint='short_names' ) unknown = [ name for name, path in datasets.items() if not path or not path.exists() ] if unknown: raise ParameterError( 'unknown datasets ' + ', '.join(unknown), param_hint='short_names' ) datasets = set(datasets.values()) references = list(LinkReference.iter_items(client, common_path='datasets')) if not with_output: for dataset in datasets: if dataset and dataset.exists(): dataset.unlink() for ref in references: if ref.reference in datasets: ref.delete() return datasets, references datasets_c = datasetscontext(datasets) with datasets_c as bar: for dataset in bar: if dataset and dataset.exists(): dataset.unlink() references_c = referencescontext(references) with references_c as bar: for ref in bar: if ref.reference in datasets: ref.delete()
def _migrate_broken_dataset_paths(client): """Ensure all paths are using correct directory structure.""" for dataset in client.datasets.values(): dataset_path = Path(dataset.path) expected_path = (client.path / client.renku_datasets_path / Path(quote(dataset.identifier, safe=''))) # migrate the refs ref = LinkReference.create( client=client, name='datasets/{0}'.format(dataset.short_name), force=True, ) ref.set_reference(expected_path / client.METADATA) if not dataset_path.exists(): dataset_path = (client.path / client.renku_datasets_path / uuid.UUID(dataset.identifier).hex) if not expected_path.exists(): shutil.move(str(dataset_path), str(expected_path)) dataset.path = expected_path dataset.__reference__ = expected_path / client.METADATA for file_ in dataset.files: file_path = Path(file_.path) if not file_path.exists() and file_.path.startswith('..'): new_path = (client.path / client.renku_datasets_path / dataset.uid / file_path).resolve().relative_to( client.path) file_.path = new_path _, commit, _ = client.resolve_in_submodules( client.find_previous_commit(file_.path, revision='HEAD'), file_.path, ) host = client.remote.get('host') or 'localhost' host = os.environ.get('RENKU_DOMAIN') or host # always set the id by the identifier file_._id = urllib.parse.urljoin( 'https://{host}'.format(host=host), posixpath.join('/blob/{hexsha}/{path}'.format( hexsha=commit.hexsha, path=new_path))) file_._label = '{}@{}'.format(new_path, commit.hexsha) dataset.to_yaml()
def migrate_broken_dataset_paths(client): """Ensure all paths are using correct directory structure.""" for dataset in client.datasets.values(): dataset_path = Path(dataset.path) expected_path = ( client.renku_datasets_path / Path(quote(dataset.identifier, safe='')) ) # migrate the refs ref = LinkReference.create( client=client, name='datasets/{0}'.format(dataset.short_name), force=True, ) ref.set_reference(expected_path / client.METADATA) if not dataset_path.exists(): dataset_path = ( client.renku_datasets_path / uuid.UUID(dataset.identifier).hex ) if not expected_path.exists(): shutil.move(str(dataset_path), str(expected_path)) dataset.path = expected_path dataset.__reference__ = expected_path / client.METADATA for file_ in dataset.files: file_path = Path(file_.path) if not file_path.exists() and file_.path.startswith('..'): new_path = ( client.renku_datasets_path / dataset.uid / file_path ).resolve().relative_to(client.path) file_.path = new_path file_._label = new_path _, commit, _ = client.resolve_in_submodules( client.find_previous_commit(file_.path, revision='HEAD'), file_.path, ) id_format = 'blob/{commit}/{path}' file_._id = id_format.format( commit=commit.hexsha, path=new_path ) dataset.to_yaml()
def create_dataset(self, name, short_name=None, description='', creators=None): """Create a dataset.""" if not name: raise errors.ParameterError('Dataset name must be provided.') if not short_name: short_name = generate_default_short_name(name, None) if not is_dataset_name_valid(short_name): raise errors.ParameterError( 'Dataset name "{}" is not valid.'.format(short_name)) if self.load_dataset(name=short_name): raise errors.DatasetExistsError( 'Dataset exists: "{}".'.format(short_name)) identifier = str(uuid.uuid4()) path = self.renku_datasets_path / identifier / self.METADATA if path.exists(): raise errors.DatasetExistsError( 'Dataset with reference {} exists'.format(path)) path.parent.mkdir(parents=True, exist_ok=True) if creators is None: creators = [Person.from_git(self.repo)] with with_reference(path): dataset = Dataset(client=self, identifier=identifier, name=name, short_name=short_name, description=description, creator=creators) dataset_ref = LinkReference.create(client=self, name='datasets/' + short_name) dataset_ref.set_reference(path) dataset.to_yaml() return dataset, path, dataset_ref
def workflow(ctx, client): """List or manage workflows with subcommands.""" if ctx.invoked_subcommand is None: from renku.core.models.refs import LinkReference names = defaultdict(list) for ref in LinkReference.iter_items(client, common_path='workflows'): names[ref.reference.name].append(ref.name) for path in client.workflow_path.glob('*.cwl'): click.echo('{path}: {names}'.format( path=path.name, names=', '.join( click.style(_deref(name), fg='green') for name in names[path.name]), ))
def check_missing_references(client): """Find missing references.""" from renku.core.models.refs import LinkReference missing = [ ref for ref in LinkReference.iter_items(client) if not ref.reference.exists() ] if not missing: return True, None problems = ('\n' + WARNING + 'There are missing references.' '\n (use "git rm <name>" to clean them)\n\n\t' + '\n\t'.join( click.style(str(ref.path), fg='yellow') + ' -> ' + click.style(str(ref.reference), fg='red') for ref in missing) + '\n') return False, problems
def check_missing_references(client): """Find missing references.""" from renku.core.models.refs import LinkReference missing = [ ref for ref in LinkReference.iter_items(client) if not ref.reference.exists() ] if not missing: return True, None problems = ("\n" + WARNING + "There are missing references." '\n (use "git rm <name>" to clean them)\n\n\t' + "\n\t".join( click.style(str(ref.path), fg="yellow") + " -> " + click.style(str(ref.reference), fg="red") for ref in missing) + "\n") return False, problems
def _migrate_datasets_pre_v0_3(client): """Migrate datasets from Renku 0.3.x.""" def _dataset_pre_0_3(client): """Return paths of dataset metadata for pre 0.3.4.""" project_is_pre_0_3 = int(client.project.version) < 2 if project_is_pre_0_3: return (client.path / DATA_DIR).rglob(client.METADATA) return [] for old_path in _dataset_pre_0_3(client): name = str(old_path.parent.relative_to(client.path / DATA_DIR)) dataset = Dataset.from_yaml(old_path, client=client) new_path = (client.renku_datasets_path / dataset.uid / client.METADATA) new_path.parent.mkdir(parents=True, exist_ok=True) with client.with_metadata(read_only=True) as meta: for module in client.repo.submodules: if Path(module.url).name == meta.name: module.remove() for file_ in dataset.files: if not Path(file_.path).exists(): expected_path = (client.path / DATA_DIR / dataset.name / file_.path) if expected_path.exists(): file_.path = expected_path.relative_to(client.path) dataset.__reference__ = new_path.relative_to(client.path) dataset.to_yaml() Path(old_path).unlink() ref = LinkReference.create( client=client, name='datasets/{0}'.format(name), force=True, ) ref.set_reference(new_path)
def _migrate_broken_dataset_paths(client): """Ensure all paths are using correct directory structure.""" for dataset in get_client_datasets(client): expected_path = client.renku_datasets_path / dataset.identifier if not dataset.name: dataset.name = dataset.title # migrate the refs ref = LinkReference.create( client=client, name="datasets/{0}".format(dataset.name), force=True, ) ref.set_reference(expected_path / client.METADATA) old_dataset_path = client.renku_datasets_path / uuid.UUID( dataset.identifier).hex dataset.path = os.path.relpath(expected_path, client.path) if not expected_path.exists(): shutil.move(old_dataset_path, expected_path) for file_ in dataset.files: file_path = Path(file_.path) if not file_path.exists() or file_.path.startswith(".."): new_path = Path( os.path.abspath(client.renku_datasets_path / dataset.identifier / file_path)).relative_to(client.path) file_.path = new_path file_.name = os.path.basename(file_.path) dataset.to_yaml(expected_path / client.METADATA)
def set_name(client, name, path, force): """Sets the <name> for remote <path>.""" from renku.core.models.refs import LinkReference LinkReference.create(client=client, name=_ref(name), force=force).set_reference(path)
def workflow_names(self): """Return index of workflow names.""" names = defaultdict(list) for ref in LinkReference.iter_items(self, common_path='workflows'): names[str(ref.reference.relative_to(self.path))].append(ref.name) return names
def remove(client, name): """Remove the remote named <name>.""" from renku.core.models.refs import LinkReference LinkReference(client=client, name=_ref(name)).delete()
def rename(client, old, new, force): """Rename the workflow named <old> to <new>.""" from renku.core.models.refs import LinkReference LinkReference(client=client, name=_ref(old)).rename(_ref(new), force=force)