Example #1
0
    def with_dataset(self, name=None):
        """Yield an editable metadata object for a dataset."""
        from renku.models._locals import with_reference
        from renku.models.refs import LinkReference

        dataset = self.load_dataset(name=name)

        if dataset is None:
            identifier = str(uuid.uuid4())
            path = (self.renku_datasets_path / identifier / self.METADATA)
            path.parent.mkdir(parents=True, exist_ok=True)

            with with_reference(path):
                dataset = Dataset(
                    identifier=identifier, name=name, client=self
                )

            if name:
                LinkReference.create(client=self, name='datasets/' +
                                     name).set_reference(path)

        dataset_path = self.path / self.datadir / dataset.name
        dataset_path.mkdir(parents=True, exist_ok=True)

        yield dataset

        # TODO
        # if path is None:
        #     path = dataset_path / self.METADATA
        #     if path.exists():
        #         raise ValueError('Dataset already exists')

        dataset.to_yaml()
Example #2
0
def datasets(ctx, client):
    """Migrate dataset metadata."""
    from renku.models._jsonld import asjsonld
    from renku.models.datasets import Dataset
    from renku.models.refs import LinkReference

    from ._checks.location_datasets import _dataset_metadata_pre_0_3_4

    for old_path in _dataset_metadata_pre_0_3_4(client):
        dataset = Dataset.from_yaml(old_path, client=client)

        name = str(old_path.parent.relative_to(client.path / 'data'))
        new_path = (client.renku_datasets_path / dataset.uid / client.METADATA)
        new_path.parent.mkdir(parents=True, exist_ok=True)

        dataset = dataset.rename_files(lambda key: os.path.relpath(
            str(old_path.parent / key), start=str(new_path.parent)))

        with new_path.open('w') as fp:
            yaml.dump(asjsonld(dataset), fp, default_flow_style=False)

        old_path.unlink()

        LinkReference.create(client=client,
                             name='datasets/' + name).set_reference(new_path)
Example #3
0
    def as_dataset(self, client):
        """Deserialize `ZenodoRecordSerializer` to `Dataset`."""
        files = self.get_files()
        metadata = self.get_jsonld()
        dataset = Dataset.from_jsonld(metadata, client=client)

        serialized_files = []
        for file_ in files:
            remote_ = file_.remote_url
            dataset_file = DatasetFile(
                url=remote_,
                id=file_.id,
                checksum=file_.checksum,
                filename=file_.filename,
                filesize=file_.filesize,
                filetype=file_.type,
                dataset=dataset.name,
                path='',
            )
            serialized_files.append(dataset_file)

        dataset.files = serialized_files

        if isinstance(dataset.url, dict) and '_id' in dataset.url:
            dataset.url = urllib.parse.urlparse(dataset.url.pop('_id'))
            dataset.url = dataset.url.geturl()

        return dataset
Example #4
0
 def datasets(self):
     """Return mapping from path to dataset."""
     result = {}
     for path in self.renku_datasets_path.rglob(self.METADATA):
         with path.open('r') as fp:
             result[path] = Dataset.from_jsonld(yaml.load(fp))
     return result
Example #5
0
    def with_dataset(self, name=None):
        """Yield an editable metadata object for a dataset."""
        from renku.models.refs import LinkReference

        with self.lock:
            path = None
            dataset = None

            if name:
                path = self.renku_datasets_path / name / self.METADATA

                if not path.exists():
                    path = LinkReference(client=self,
                                         name='datasets/' + name).reference

                if path.exists():
                    with path.open('r') as f:
                        source = yaml.load(f) or {}
                    dataset = Dataset.from_jsonld(source)

            if dataset is None:
                source = {}
                dataset = Dataset(name=name)

                path = (self.renku_datasets_path / dataset.identifier.hex /
                        self.METADATA)
                path.parent.mkdir(parents=True, exist_ok=True)

                if name:
                    LinkReference.create(client=self, name='datasets/' +
                                         name).set_reference(path)

            dataset_path = self.path / self.datadir / dataset.name
            dataset_path.mkdir(parents=True, exist_ok=True)

            yield dataset

            source.update(**asjsonld(dataset))

            # TODO
            # if path is None:
            #     path = dataset_path / self.METADATA
            #     if path.exists():
            #         raise ValueError('Dataset already exists')

            with path.open('w') as f:
                yaml.dump(source, f, default_flow_style=False)
Example #6
0
    def with_dataset(self, name=None):
        """Yield an editable metadata object for a dataset."""
        with self.lock:
            from renku.models._jsonld import asjsonld
            from renku.models.datasets import Dataset
            path = None
            dataset = None

            dataset_path = self.path / self.datadir / name

            if name:
                path = dataset_path / self.METADATA
                if path.exists():
                    with open(path, 'r') as f:
                        source = yaml.load(f) or {}
                    dataset = Dataset.from_jsonld(source)

            if dataset is None:
                source = {}
                dataset = Dataset(name=name)
                try:
                    dataset_path.mkdir(parents=True, exist_ok=True)
                except FileExistsError:
                    raise FileExistsError('This dataset already exists.')

            yield dataset

            source.update(
                **asjsonld(
                    dataset,
                    filter=lambda attr, _: attr.name != 'datadir',
                )
            )

            # TODO
            # if path is None:
            #     path = dataset_path / self.METADATA
            #     if path.exists():
            #         raise ValueError('Dataset already exists')

            with open(path, 'w') as f:
                yaml.dump(source, f, default_flow_style=False)
Example #7
0
def edit(client, id):
    """Edit dataset metadata."""
    dataset_ = client.load_dataset(id)

    if dataset_:
        metadata_edited = editor.edit(contents=bytes(
            yaml.safe_dump(dataset_.editable), encoding='utf-8'))

        edited = yaml.safe_load(metadata_edited)
        updated_ = Dataset(client=client, **edited)

        dataset_.update_metadata(updated_)
        dataset_.to_yaml()
Example #8
0
def dataset_serialization(client, dataset, data_file):
    """Test deserializing a dataset object."""
    with open(dataset.path / 'metadata.yml', 'r') as f:
        source = yaml.load(f)

    d = Dataset.from_jsonld(source)
    assert d.path == dataset.path

    d_dict = d.to_dict()

    assert all([key in d_dict for key in ('name', 'identifier', 'files')])
    assert not len(d_dict['files'].values())
    client.add_data_to_dataset(d, str(data_file))
    d_dict = d.to_dict()
    assert len(d_dict['files'].values())
Example #9
0
    def datasets_from_commit(self, commit=None):
        """Return datasets defined in a commit."""
        commit = commit or self.repo.head.commit

        try:
            datasets = commit.tree / self.renku_home / self.DATASETS
        except KeyError:
            return

        for tree in datasets:
            try:
                blob = tree / self.METADATA
            except KeyError:
                continue
            dataset = Dataset.from_yaml(self.path / Path(blob.path),
                                        client=self)
            dataset.commit = commit
            yield dataset
Example #10
0
    def as_dataset(self, client):
        """Deserialize `DataverseRecordSerializer` to `Dataset`."""
        files = self.get_files()
        dataset = Dataset.from_jsonld(self._json, client=client)

        serialized_files = []
        for file_ in files:
            remote_ = file_.remote_url
            dataset_file = DatasetFile(
                url=remote_,
                id=file_._id if file_._id else file_.name,
                filename=file_.name,
                filesize=file_.content_size,
                filetype=file_.file_format,
                dataset=dataset.name,
                path='',
            )
            serialized_files.append(dataset_file)

        dataset.files = serialized_files

        return dataset
Example #11
0
 def get_dataset(self, path, commit=None):
     """Return a dataset from a given path."""
     if not path.is_absolute():
         path = self.path / path
     return Dataset.from_yaml(path, client=self, commit=commit)