Ejemplo n.º 1
0
def migrate_datasets_pre_v0_3(client):
    """Migrate datasets from Renku 0.3.x."""
    for old_path in dataset_pre_0_3(client):
        name = str(old_path.parent.relative_to(client.path / 'data'))

        dataset = Dataset.from_yaml(old_path, client=client)
        new_path = (client.renku_datasets_path / dataset.uid / client.METADATA)
        new_path.parent.mkdir(parents=True, exist_ok=True)

        with client.with_metadata(read_only=True) as meta:
            for module in client.repo.submodules:
                if Path(module.url).name == meta.name:
                    module.remove()

        for file_ in dataset.files:
            if not Path(file_.path).exists():
                expected_path = (
                    client.path / 'data' / dataset.name / file_.path
                )
                if expected_path.exists():
                    file_.path = expected_path.relative_to(client.path)

        dataset.__reference__ = new_path
        dataset.to_yaml()

        Path(old_path).unlink()
        ref = LinkReference.create(
            client=client,
            name='datasets/{0}'.format(name),
            force=True,
        )
        ref.set_reference(new_path)
Ejemplo n.º 2
0
def test_creators_with_same_email(tmp_path):
    """Test creators with different names and same email address."""
    creators = [Person(name="me", email="*****@*****.**"), Person(name="me2", email="*****@*****.**")]
    dataset = Dataset(name="dataset", creators=creators)
    path = tmp_path / "dataset.yml"
    dataset.__reference__ = path
    dataset.to_yaml()

    dataset = Dataset.from_yaml(path)
    assert 1 == len(dataset.creators)
    assert dataset.creators[0].name in ["me", "me2"]
Ejemplo n.º 3
0
    def datasets_from_commit(self, commit=None):
        """Return datasets defined in a commit."""
        commit = commit or self.repo.head.commit

        try:
            datasets = commit.tree / self.renku_home / self.DATASETS
        except KeyError:
            return

        for tree in datasets:
            try:
                blob = tree / self.METADATA
            except KeyError:
                continue
            dataset = Dataset.from_yaml(self.path / Path(blob.path), client=self)
            dataset.commit = commit
            yield dataset
Ejemplo n.º 4
0
def test_dataset_deserialization(client, dataset):
    """Test Dataset deserialization."""
    dataset_ = Dataset.from_yaml(client.get_dataset_path("dataset"), client=client)

    dataset_types = {
        "date_created": [datetime.datetime],
        "creators": [list],
        "description": [str, type(None)],
        "files": [list],
        "identifier": [str],
        "keywords": [list],
    }

    for attribute, type_ in dataset_types.items():
        assert type(dataset_.__getattribute__(attribute)) in type_

    creator_types = {"email": str, "_id": str, "name": str, "affiliation": str}

    creator = dataset.creators[0]

    for attribute, type_ in creator_types.items():
        assert type(getattr(creator, attribute)) is type_
def _migrate_datasets_pre_v0_3(client):
    """Migrate datasets from Renku 0.3.x."""
    def _dataset_pre_0_3(client):
        """Return paths of dataset metadata for pre 0.3.4."""
        project_is_pre_0_3 = int(client.project.version) < 2
        if project_is_pre_0_3:
            return (client.path / DATA_DIR).rglob(client.METADATA)
        return []

    for old_path in _dataset_pre_0_3(client):
        name = str(old_path.parent.relative_to(client.path / DATA_DIR))

        dataset = Dataset.from_yaml(old_path, client=client)
        new_path = (client.renku_datasets_path / dataset.uid / client.METADATA)
        new_path.parent.mkdir(parents=True, exist_ok=True)

        with client.with_metadata(read_only=True) as meta:
            for module in client.repo.submodules:
                if Path(module.url).name == meta.name:
                    module.remove()

        for file_ in dataset.files:
            if not Path(file_.path).exists():
                expected_path = (client.path / DATA_DIR / dataset.name /
                                 file_.path)
                if expected_path.exists():
                    file_.path = expected_path.relative_to(client.path)

        dataset.__reference__ = new_path.relative_to(client.path)
        dataset.to_yaml()

        Path(old_path).unlink()
        ref = LinkReference.create(
            client=client,
            name='datasets/{0}'.format(name),
            force=True,
        )
        ref.set_reference(new_path)
def test_dataset_deserialization(client, dataset):
    """Test Dataset deserialization."""
    from renku.core.models.datasets import Dataset
    dataset_ = Dataset.from_yaml(client.get_dataset_path('dataset'),
                                 client=client)

    dataset_types = {
        'created': [datetime.datetime],
        'creator': [list],
        'description': [str, type(None)],
        'files': [list],
        'identifier': [str],
        'keywords': [list],
    }

    for attribute, type_ in dataset_types.items():
        assert type(dataset_.__getattribute__(attribute)) in type_

    creator_types = {'email': str, '_id': str, 'name': str, 'affiliation': str}

    creator = dataset.creator[0]

    for attribute, type_ in creator_types.items():
        assert type(getattr(creator, attribute)) is type_
Ejemplo n.º 7
0
 def load_dataset_from_path(self, path, commit=None):
     """Return a dataset from a given path."""
     path = Path(path)
     if not path.is_absolute():
         path = self.path / path
     return Dataset.from_yaml(path, client=self, commit=commit)