Exemple #1
0
def _migrate_datasets_pre_v0_3(client):
    """Migrate datasets from Renku 0.3.x."""

    for old_path in get_pre_0_3_4_datasets_metadata(client):
        name = str(old_path.parent.relative_to(client.path / DATA_DIR))

        dataset = Dataset.from_yaml(old_path, client=client)
        dataset.title = name
        dataset.name = name
        new_path = client.renku_datasets_path / dataset.identifier / client.METADATA
        new_path.parent.mkdir(parents=True, exist_ok=True)

        with client.with_metadata(read_only=True) as meta:
            for module in client.repo.submodules:
                if Path(module.url).name == meta.name:
                    module.remove()

        for file_ in dataset.files:
            if not Path(file_.path).exists():
                expected_path = client.path / DATA_DIR / dataset.name / file_.path
                if expected_path.exists():
                    file_.path = expected_path.relative_to(client.path)

        dataset.to_yaml(new_path)

        Path(old_path).unlink()
        ref = LinkReference.create(
            client=client,
            name="datasets/{0}".format(name),
            force=True,
        )
        ref.set_reference(new_path)
Exemple #2
0
def migrate_datasets_pre_v0_3(client):
    """Migrate datasets from Renku 0.3.x."""
    for old_path in dataset_pre_0_3(client):
        name = str(old_path.parent.relative_to(client.path / 'data'))

        dataset = Dataset.from_yaml(old_path, client=client)
        new_path = (client.renku_datasets_path / dataset.uid / client.METADATA)
        new_path.parent.mkdir(parents=True, exist_ok=True)

        with client.with_metadata(read_only=True) as meta:
            for module in client.repo.submodules:
                if Path(module.url).name == meta.name:
                    module.remove()

        for file_ in dataset.files:
            if not Path(file_.path).exists():
                expected_path = (
                    client.path / 'data' / dataset.name / file_.path
                )
                if expected_path.exists():
                    file_.path = expected_path.relative_to(client.path)

        dataset.__reference__ = new_path
        dataset.to_yaml()

        Path(old_path).unlink()
        ref = LinkReference.create(
            client=client,
            name='datasets/{0}'.format(name),
            force=True,
        )
        ref.set_reference(new_path)
def is_dataset_short_name_valid(short_name):
    """A valid short_name is a valid Git reference name with no /."""
    # TODO make short_name an RFC 3986 compatible and migrate old projects
    return (
        short_name and
        LinkReference.check_ref_format(short_name, no_slashes=True) and
        '/' not in short_name
    )
Exemple #4
0
    def create_dataset(
        self,
        short_name=None,
        title=None,
        description=None,
        creators=None,
        keywords=None,
    ):
        """Create a dataset."""
        if not short_name:
            raise errors.ParameterError('Dataset short_name must be provided.')

        if not is_dataset_short_name_valid(short_name):
            raise errors.ParameterError(
                'Dataset short_name "{}" is not valid.'.format(short_name))

        if self.load_dataset(short_name=short_name):
            raise errors.DatasetExistsError(
                'Dataset exists: "{}".'.format(short_name))

        if not title:
            title = short_name

        identifier = str(uuid.uuid4())

        path = self.renku_datasets_path / identifier / self.METADATA

        if path.exists():
            raise errors.DatasetExistsError(
                'Dataset with reference {} exists'.format(path))

        path.parent.mkdir(parents=True, exist_ok=True)

        if creators is None:
            creators = [Person.from_git(self.repo)]

        keywords = keywords or ()

        with with_reference(path):
            dataset = Dataset(
                client=self,
                identifier=identifier,
                short_name=short_name,
                name=title,
                description=description,
                creator=creators,
                keywords=keywords,
            )

        dataset_ref = LinkReference.create(client=self,
                                           name='datasets/' + short_name)

        dataset_ref.set_reference(path)
        dataset.path = Path(dataset.path).relative_to(self.path)
        dataset.to_yaml()

        return dataset, path, dataset_ref
Exemple #5
0
    def get_dataset_path(self, name):
        """Get dataset path from name."""
        path = self.renku_datasets_path / name / self.METADATA
        if not path.exists():
            try:
                path = LinkReference(client=self, name="datasets/" + name).reference
            except errors.ParameterError:
                return None

        return path
Exemple #6
0
def dataset_remove(
    client,
    short_names,
    with_output=False,
    datasetscontext=contextlib.nullcontext,
    referencescontext=contextlib.nullcontext,
    commit_message=None
):
    """Delete a dataset."""
    datasets = {name: client.get_dataset_path(name) for name in short_names}

    if not datasets:
        raise ParameterError(
            'use dataset short_name or identifier', param_hint='short_names'
        )

    unknown = [
        name
        for name, path in datasets.items() if not path or not path.exists()
    ]
    if unknown:
        raise ParameterError(
            'unknown datasets ' + ', '.join(unknown), param_hint='short_names'
        )

    datasets = set(datasets.values())
    references = list(LinkReference.iter_items(client, common_path='datasets'))

    if not with_output:
        for dataset in datasets:
            if dataset and dataset.exists():
                dataset.unlink()

        for ref in references:
            if ref.reference in datasets:
                ref.delete()

        return datasets, references

    datasets_c = datasetscontext(datasets)

    with datasets_c as bar:
        for dataset in bar:
            if dataset and dataset.exists():
                dataset.unlink()

    references_c = referencescontext(references)

    with references_c as bar:
        for ref in bar:
            if ref.reference in datasets:
                ref.delete()
def _migrate_broken_dataset_paths(client):
    """Ensure all paths are using correct directory structure."""
    for dataset in client.datasets.values():
        dataset_path = Path(dataset.path)

        expected_path = (client.path / client.renku_datasets_path /
                         Path(quote(dataset.identifier, safe='')))

        # migrate the refs
        ref = LinkReference.create(
            client=client,
            name='datasets/{0}'.format(dataset.short_name),
            force=True,
        )
        ref.set_reference(expected_path / client.METADATA)

        if not dataset_path.exists():
            dataset_path = (client.path / client.renku_datasets_path /
                            uuid.UUID(dataset.identifier).hex)

        if not expected_path.exists():
            shutil.move(str(dataset_path), str(expected_path))
            dataset.path = expected_path
            dataset.__reference__ = expected_path / client.METADATA

        for file_ in dataset.files:
            file_path = Path(file_.path)
            if not file_path.exists() and file_.path.startswith('..'):
                new_path = (client.path / client.renku_datasets_path /
                            dataset.uid / file_path).resolve().relative_to(
                                client.path)

                file_.path = new_path

                _, commit, _ = client.resolve_in_submodules(
                    client.find_previous_commit(file_.path, revision='HEAD'),
                    file_.path,
                )
                host = client.remote.get('host') or 'localhost'
                host = os.environ.get('RENKU_DOMAIN') or host

                # always set the id by the identifier
                file_._id = urllib.parse.urljoin(
                    'https://{host}'.format(host=host),
                    posixpath.join('/blob/{hexsha}/{path}'.format(
                        hexsha=commit.hexsha, path=new_path)))
                file_._label = '{}@{}'.format(new_path, commit.hexsha)

        dataset.to_yaml()
Exemple #8
0
def migrate_broken_dataset_paths(client):
    """Ensure all paths are using correct directory structure."""
    for dataset in client.datasets.values():
        dataset_path = Path(dataset.path)

        expected_path = (
            client.renku_datasets_path /
            Path(quote(dataset.identifier, safe=''))
        )

        # migrate the refs
        ref = LinkReference.create(
            client=client,
            name='datasets/{0}'.format(dataset.short_name),
            force=True,
        )
        ref.set_reference(expected_path / client.METADATA)

        if not dataset_path.exists():
            dataset_path = (
                client.renku_datasets_path / uuid.UUID(dataset.identifier).hex
            )

        if not expected_path.exists():
            shutil.move(str(dataset_path), str(expected_path))
            dataset.path = expected_path
            dataset.__reference__ = expected_path / client.METADATA

        for file_ in dataset.files:
            file_path = Path(file_.path)
            if not file_path.exists() and file_.path.startswith('..'):
                new_path = (
                    client.renku_datasets_path / dataset.uid / file_path
                ).resolve().relative_to(client.path)

                file_.path = new_path
                file_._label = new_path

                _, commit, _ = client.resolve_in_submodules(
                    client.find_previous_commit(file_.path, revision='HEAD'),
                    file_.path,
                )
                id_format = 'blob/{commit}/{path}'
                file_._id = id_format.format(
                    commit=commit.hexsha, path=new_path
                )

        dataset.to_yaml()
Exemple #9
0
    def create_dataset(self,
                       name,
                       short_name=None,
                       description='',
                       creators=None):
        """Create a dataset."""
        if not name:
            raise errors.ParameterError('Dataset name must be provided.')

        if not short_name:
            short_name = generate_default_short_name(name, None)

        if not is_dataset_name_valid(short_name):
            raise errors.ParameterError(
                'Dataset name "{}" is not valid.'.format(short_name))

        if self.load_dataset(name=short_name):
            raise errors.DatasetExistsError(
                'Dataset exists: "{}".'.format(short_name))

        identifier = str(uuid.uuid4())

        path = self.renku_datasets_path / identifier / self.METADATA

        if path.exists():
            raise errors.DatasetExistsError(
                'Dataset with reference {} exists'.format(path))

        path.parent.mkdir(parents=True, exist_ok=True)

        if creators is None:
            creators = [Person.from_git(self.repo)]

        with with_reference(path):
            dataset = Dataset(client=self,
                              identifier=identifier,
                              name=name,
                              short_name=short_name,
                              description=description,
                              creator=creators)

        dataset_ref = LinkReference.create(client=self,
                                           name='datasets/' + short_name)

        dataset_ref.set_reference(path)
        dataset.to_yaml()

        return dataset, path, dataset_ref
Exemple #10
0
def workflow(ctx, client):
    """List or manage workflows with subcommands."""
    if ctx.invoked_subcommand is None:
        from renku.core.models.refs import LinkReference

        names = defaultdict(list)
        for ref in LinkReference.iter_items(client, common_path='workflows'):
            names[ref.reference.name].append(ref.name)

        for path in client.workflow_path.glob('*.cwl'):
            click.echo('{path}: {names}'.format(
                path=path.name,
                names=', '.join(
                    click.style(_deref(name), fg='green')
                    for name in names[path.name]),
            ))
def check_missing_references(client):
    """Find missing references."""
    from renku.core.models.refs import LinkReference

    missing = [
        ref for ref in LinkReference.iter_items(client)
        if not ref.reference.exists()
    ]

    if not missing:
        return True, None

    problems = ('\n' + WARNING + 'There are missing references.'
                '\n  (use "git rm <name>" to clean them)\n\n\t' + '\n\t'.join(
                    click.style(str(ref.path), fg='yellow') + ' -> ' +
                    click.style(str(ref.reference), fg='red')
                    for ref in missing) + '\n')
    return False, problems
Exemple #12
0
def check_missing_references(client):
    """Find missing references."""
    from renku.core.models.refs import LinkReference

    missing = [
        ref for ref in LinkReference.iter_items(client)
        if not ref.reference.exists()
    ]

    if not missing:
        return True, None

    problems = ("\n" + WARNING + "There are missing references."
                '\n  (use "git rm <name>" to clean them)\n\n\t' + "\n\t".join(
                    click.style(str(ref.path), fg="yellow") + " -> " +
                    click.style(str(ref.reference), fg="red")
                    for ref in missing) + "\n")
    return False, problems
def _migrate_datasets_pre_v0_3(client):
    """Migrate datasets from Renku 0.3.x."""
    def _dataset_pre_0_3(client):
        """Return paths of dataset metadata for pre 0.3.4."""
        project_is_pre_0_3 = int(client.project.version) < 2
        if project_is_pre_0_3:
            return (client.path / DATA_DIR).rglob(client.METADATA)
        return []

    for old_path in _dataset_pre_0_3(client):
        name = str(old_path.parent.relative_to(client.path / DATA_DIR))

        dataset = Dataset.from_yaml(old_path, client=client)
        new_path = (client.renku_datasets_path / dataset.uid / client.METADATA)
        new_path.parent.mkdir(parents=True, exist_ok=True)

        with client.with_metadata(read_only=True) as meta:
            for module in client.repo.submodules:
                if Path(module.url).name == meta.name:
                    module.remove()

        for file_ in dataset.files:
            if not Path(file_.path).exists():
                expected_path = (client.path / DATA_DIR / dataset.name /
                                 file_.path)
                if expected_path.exists():
                    file_.path = expected_path.relative_to(client.path)

        dataset.__reference__ = new_path.relative_to(client.path)
        dataset.to_yaml()

        Path(old_path).unlink()
        ref = LinkReference.create(
            client=client,
            name='datasets/{0}'.format(name),
            force=True,
        )
        ref.set_reference(new_path)
Exemple #14
0
def _migrate_broken_dataset_paths(client):
    """Ensure all paths are using correct directory structure."""
    for dataset in get_client_datasets(client):
        expected_path = client.renku_datasets_path / dataset.identifier
        if not dataset.name:
            dataset.name = dataset.title

        # migrate the refs
        ref = LinkReference.create(
            client=client,
            name="datasets/{0}".format(dataset.name),
            force=True,
        )
        ref.set_reference(expected_path / client.METADATA)

        old_dataset_path = client.renku_datasets_path / uuid.UUID(
            dataset.identifier).hex

        dataset.path = os.path.relpath(expected_path, client.path)

        if not expected_path.exists():
            shutil.move(old_dataset_path, expected_path)

        for file_ in dataset.files:
            file_path = Path(file_.path)
            if not file_path.exists() or file_.path.startswith(".."):
                new_path = Path(
                    os.path.abspath(client.renku_datasets_path /
                                    dataset.identifier /
                                    file_path)).relative_to(client.path)

                file_.path = new_path

            file_.name = os.path.basename(file_.path)

        dataset.to_yaml(expected_path / client.METADATA)
Exemple #15
0
def set_name(client, name, path, force):
    """Sets the <name> for remote <path>."""
    from renku.core.models.refs import LinkReference
    LinkReference.create(client=client, name=_ref(name),
                         force=force).set_reference(path)
Exemple #16
0
 def workflow_names(self):
     """Return index of workflow names."""
     names = defaultdict(list)
     for ref in LinkReference.iter_items(self, common_path='workflows'):
         names[str(ref.reference.relative_to(self.path))].append(ref.name)
     return names
Exemple #17
0
def remove(client, name):
    """Remove the remote named <name>."""
    from renku.core.models.refs import LinkReference
    LinkReference(client=client, name=_ref(name)).delete()
Exemple #18
0
def rename(client, old, new, force):
    """Rename the workflow named <old> to <new>."""
    from renku.core.models.refs import LinkReference
    LinkReference(client=client, name=_ref(old)).rename(_ref(new), force=force)