Example #1
0
def migrate_datasets_pre_v0_3(client):
    """Migrate datasets from Renku 0.3.x."""
    for old_path in dataset_pre_0_3(client):
        name = str(old_path.parent.relative_to(client.path / 'data'))

        dataset = Dataset.from_yaml(old_path, client=client)
        new_path = (client.renku_datasets_path / dataset.uid / client.METADATA)
        new_path.parent.mkdir(parents=True, exist_ok=True)

        with client.with_metadata(read_only=True) as meta:
            for module in client.repo.submodules:
                if Path(module.url).name == meta.name:
                    module.remove()

        for file_ in dataset.files:
            if not Path(file_.path).exists():
                expected_path = (
                    client.path / 'data' / dataset.name / file_.path
                )
                if expected_path.exists():
                    file_.path = expected_path.relative_to(client.path)

        dataset.__reference__ = new_path
        dataset.to_yaml()

        Path(old_path).unlink()
        ref = LinkReference.create(
            client=client,
            name='datasets/{0}'.format(name),
            force=True,
        )
        ref.set_reference(new_path)
Example #2
0
def _migrate_datasets_pre_v0_3(client):
    """Migrate datasets from Renku 0.3.x."""

    for old_path in get_pre_0_3_4_datasets_metadata(client):
        name = str(old_path.parent.relative_to(client.path / DATA_DIR))

        dataset = Dataset.from_yaml(old_path, client=client)
        dataset.title = name
        dataset.name = name
        new_path = client.renku_datasets_path / dataset.identifier / client.METADATA
        new_path.parent.mkdir(parents=True, exist_ok=True)

        with client.with_metadata(read_only=True) as meta:
            for module in client.repo.submodules:
                if Path(module.url).name == meta.name:
                    module.remove()

        for file_ in dataset.files:
            if not Path(file_.path).exists():
                expected_path = client.path / DATA_DIR / dataset.name / file_.path
                if expected_path.exists():
                    file_.path = expected_path.relative_to(client.path)

        dataset.to_yaml(new_path)

        Path(old_path).unlink()
        ref = LinkReference.create(
            client=client,
            name="datasets/{0}".format(name),
            force=True,
        )
        ref.set_reference(new_path)
Example #3
0
    def create_dataset(
        self,
        short_name=None,
        title=None,
        description=None,
        creators=None,
        keywords=None,
    ):
        """Create a dataset."""
        if not short_name:
            raise errors.ParameterError('Dataset short_name must be provided.')

        if not is_dataset_short_name_valid(short_name):
            raise errors.ParameterError(
                'Dataset short_name "{}" is not valid.'.format(short_name))

        if self.load_dataset(short_name=short_name):
            raise errors.DatasetExistsError(
                'Dataset exists: "{}".'.format(short_name))

        if not title:
            title = short_name

        identifier = str(uuid.uuid4())

        path = self.renku_datasets_path / identifier / self.METADATA

        if path.exists():
            raise errors.DatasetExistsError(
                'Dataset with reference {} exists'.format(path))

        path.parent.mkdir(parents=True, exist_ok=True)

        if creators is None:
            creators = [Person.from_git(self.repo)]

        keywords = keywords or ()

        with with_reference(path):
            dataset = Dataset(
                client=self,
                identifier=identifier,
                short_name=short_name,
                name=title,
                description=description,
                creator=creators,
                keywords=keywords,
            )

        dataset_ref = LinkReference.create(client=self,
                                           name='datasets/' + short_name)

        dataset_ref.set_reference(path)
        dataset.path = Path(dataset.path).relative_to(self.path)
        dataset.to_yaml()

        return dataset, path, dataset_ref
def _migrate_broken_dataset_paths(client):
    """Ensure all paths are using correct directory structure."""
    for dataset in client.datasets.values():
        dataset_path = Path(dataset.path)

        expected_path = (client.path / client.renku_datasets_path /
                         Path(quote(dataset.identifier, safe='')))

        # migrate the refs
        ref = LinkReference.create(
            client=client,
            name='datasets/{0}'.format(dataset.short_name),
            force=True,
        )
        ref.set_reference(expected_path / client.METADATA)

        if not dataset_path.exists():
            dataset_path = (client.path / client.renku_datasets_path /
                            uuid.UUID(dataset.identifier).hex)

        if not expected_path.exists():
            shutil.move(str(dataset_path), str(expected_path))
            dataset.path = expected_path
            dataset.__reference__ = expected_path / client.METADATA

        for file_ in dataset.files:
            file_path = Path(file_.path)
            if not file_path.exists() and file_.path.startswith('..'):
                new_path = (client.path / client.renku_datasets_path /
                            dataset.uid / file_path).resolve().relative_to(
                                client.path)

                file_.path = new_path

                _, commit, _ = client.resolve_in_submodules(
                    client.find_previous_commit(file_.path, revision='HEAD'),
                    file_.path,
                )
                host = client.remote.get('host') or 'localhost'
                host = os.environ.get('RENKU_DOMAIN') or host

                # always set the id by the identifier
                file_._id = urllib.parse.urljoin(
                    'https://{host}'.format(host=host),
                    posixpath.join('/blob/{hexsha}/{path}'.format(
                        hexsha=commit.hexsha, path=new_path)))
                file_._label = '{}@{}'.format(new_path, commit.hexsha)

        dataset.to_yaml()
Example #5
0
def migrate_broken_dataset_paths(client):
    """Ensure all paths are using correct directory structure."""
    for dataset in client.datasets.values():
        dataset_path = Path(dataset.path)

        expected_path = (
            client.renku_datasets_path /
            Path(quote(dataset.identifier, safe=''))
        )

        # migrate the refs
        ref = LinkReference.create(
            client=client,
            name='datasets/{0}'.format(dataset.short_name),
            force=True,
        )
        ref.set_reference(expected_path / client.METADATA)

        if not dataset_path.exists():
            dataset_path = (
                client.renku_datasets_path / uuid.UUID(dataset.identifier).hex
            )

        if not expected_path.exists():
            shutil.move(str(dataset_path), str(expected_path))
            dataset.path = expected_path
            dataset.__reference__ = expected_path / client.METADATA

        for file_ in dataset.files:
            file_path = Path(file_.path)
            if not file_path.exists() and file_.path.startswith('..'):
                new_path = (
                    client.renku_datasets_path / dataset.uid / file_path
                ).resolve().relative_to(client.path)

                file_.path = new_path
                file_._label = new_path

                _, commit, _ = client.resolve_in_submodules(
                    client.find_previous_commit(file_.path, revision='HEAD'),
                    file_.path,
                )
                id_format = 'blob/{commit}/{path}'
                file_._id = id_format.format(
                    commit=commit.hexsha, path=new_path
                )

        dataset.to_yaml()
Example #6
0
    def create_dataset(self,
                       name,
                       short_name=None,
                       description='',
                       creators=None):
        """Create a dataset."""
        if not name:
            raise errors.ParameterError('Dataset name must be provided.')

        if not short_name:
            short_name = generate_default_short_name(name, None)

        if not is_dataset_name_valid(short_name):
            raise errors.ParameterError(
                'Dataset name "{}" is not valid.'.format(short_name))

        if self.load_dataset(name=short_name):
            raise errors.DatasetExistsError(
                'Dataset exists: "{}".'.format(short_name))

        identifier = str(uuid.uuid4())

        path = self.renku_datasets_path / identifier / self.METADATA

        if path.exists():
            raise errors.DatasetExistsError(
                'Dataset with reference {} exists'.format(path))

        path.parent.mkdir(parents=True, exist_ok=True)

        if creators is None:
            creators = [Person.from_git(self.repo)]

        with with_reference(path):
            dataset = Dataset(client=self,
                              identifier=identifier,
                              name=name,
                              short_name=short_name,
                              description=description,
                              creator=creators)

        dataset_ref = LinkReference.create(client=self,
                                           name='datasets/' + short_name)

        dataset_ref.set_reference(path)
        dataset.to_yaml()

        return dataset, path, dataset_ref
def _migrate_datasets_pre_v0_3(client):
    """Migrate datasets from Renku 0.3.x."""
    def _dataset_pre_0_3(client):
        """Return paths of dataset metadata for pre 0.3.4."""
        project_is_pre_0_3 = int(client.project.version) < 2
        if project_is_pre_0_3:
            return (client.path / DATA_DIR).rglob(client.METADATA)
        return []

    for old_path in _dataset_pre_0_3(client):
        name = str(old_path.parent.relative_to(client.path / DATA_DIR))

        dataset = Dataset.from_yaml(old_path, client=client)
        new_path = (client.renku_datasets_path / dataset.uid / client.METADATA)
        new_path.parent.mkdir(parents=True, exist_ok=True)

        with client.with_metadata(read_only=True) as meta:
            for module in client.repo.submodules:
                if Path(module.url).name == meta.name:
                    module.remove()

        for file_ in dataset.files:
            if not Path(file_.path).exists():
                expected_path = (client.path / DATA_DIR / dataset.name /
                                 file_.path)
                if expected_path.exists():
                    file_.path = expected_path.relative_to(client.path)

        dataset.__reference__ = new_path.relative_to(client.path)
        dataset.to_yaml()

        Path(old_path).unlink()
        ref = LinkReference.create(
            client=client,
            name='datasets/{0}'.format(name),
            force=True,
        )
        ref.set_reference(new_path)
Example #8
0
def _migrate_broken_dataset_paths(client):
    """Ensure all paths are using correct directory structure."""
    for dataset in get_client_datasets(client):
        expected_path = client.renku_datasets_path / dataset.identifier
        if not dataset.name:
            dataset.name = dataset.title

        # migrate the refs
        ref = LinkReference.create(
            client=client,
            name="datasets/{0}".format(dataset.name),
            force=True,
        )
        ref.set_reference(expected_path / client.METADATA)

        old_dataset_path = client.renku_datasets_path / uuid.UUID(
            dataset.identifier).hex

        dataset.path = os.path.relpath(expected_path, client.path)

        if not expected_path.exists():
            shutil.move(old_dataset_path, expected_path)

        for file_ in dataset.files:
            file_path = Path(file_.path)
            if not file_path.exists() or file_.path.startswith(".."):
                new_path = Path(
                    os.path.abspath(client.renku_datasets_path /
                                    dataset.identifier /
                                    file_path)).relative_to(client.path)

                file_.path = new_path

            file_.name = os.path.basename(file_.path)

        dataset.to_yaml(expected_path / client.METADATA)
Example #9
0
def set_name(client, name, path, force):
    """Sets the <name> for remote <path>."""
    from renku.core.models.refs import LinkReference
    LinkReference.create(client=client, name=_ref(name),
                         force=force).set_reference(path)