Ejemplo n.º 1
0
def test_dataset_creator_email(dataset_metadata):
    """Check that creators without an email are assigned a blank node."""
    # modify the dataset metadata to change the creator
    dataset = Dataset.from_jsonld(dataset_metadata, client=LocalClient("."),)

    dataset.creators[0]._id = "mailto:None"
    dataset_broken = Dataset.from_jsonld(dataset.as_jsonld(), client=LocalClient("."))
    assert "mailto:None" not in dataset_broken.creators[0]._id
Ejemplo n.º 2
0
    def as_dataset(self, client):
        """Deserialize `ZenodoRecordSerializer` to `Dataset`."""
        files = self.get_files()
        metadata = self.get_jsonld()
        dataset = Dataset.from_jsonld(metadata, client=client)

        serialized_files = []
        for file_ in files:
            remote_ = file_.remote_url
            dataset_file = DatasetFile(
                url=remote_.geturl(),
                id=file_.id,
                checksum=file_.checksum,
                filename=file_.filename,
                filesize=file_.filesize,
                filetype=file_.type,
                path='',
            )
            serialized_files.append(dataset_file)

        dataset.files = serialized_files

        if isinstance(dataset.url, dict) and '_id' in dataset.url:
            dataset.url = urllib.parse.urlparse(dataset.url.pop('_id'))
            dataset.url = dataset.url.geturl()

        return dataset
Ejemplo n.º 3
0
    def as_dataset(self, client):
        """Deserialize `DataverseRecordSerializer` to `Dataset`."""
        files = self.get_files()
        dataset = Dataset.from_jsonld(self._json,
                                      client=client,
                                      schema_class=_DataverseDatasetSchema)

        if dataset.description and not dataset.description.strip():
            dataset.description = None

        for creator in dataset.creator:
            if creator.affiliation == '':
                creator.affiliation = None

        serialized_files = []
        for file_ in files:
            remote_ = file_.remote_url
            dataset_file = DatasetFile(
                url=remote_.geturl(),
                id=file_._id if file_._id else file_.name,
                filename=file_.name,
                filesize=file_.content_size,
                filetype=file_.file_format,
                path='',
            )
            serialized_files.append(dataset_file)

        dataset.files = serialized_files

        return dataset
Ejemplo n.º 4
0
def test_dataset_files_empty_metadata(dataset_metadata):
    """Check parsing metadata of dataset files with empty filename."""
    dataset = Dataset.from_jsonld(dataset_metadata, client=LocalClient("."),)
    files = [file.filename for file in dataset.files if not file.filename]

    if files:
        assert None in files
def test_uuid_migration(dataset_metadata, client):
    """Test migration of id with UUID."""
    dataset = Dataset.from_jsonld(dataset_metadata, client=client)

    assert is_uuid(dataset.identifier)
    assert urljoin('https://localhost/datasets/',
                   dataset.identifier) == dataset._id
Ejemplo n.º 6
0
def test_migration_broken_urls(dataset_metadata):
    """Check that migration of broken dataset file URLs is string."""
    dataset = Dataset.from_jsonld(
        dataset_metadata,
        client=LocalClient('.'),
    )

    for file_ in dataset.files:
        assert isinstance(url_to_string(file_.url), str)
Ejemplo n.º 7
0
def test_doi_migration(dataset_metadata):
    """Test migration of id with doi."""
    dataset = Dataset.from_jsonld(
        dataset_metadata,
        client=LocalClient('.'),
    )
    assert is_doi(dataset.identifier)
    assert urljoin('https://localhost', 'datasets/' +
                   quote(dataset.identifier, safe='')) == dataset._id
    assert dataset.same_as == urljoin('https://doi.org', dataset.identifier)
Ejemplo n.º 8
0
def test_calamus(client, dataset_metadata_before_calamus):
    """Check Calamus loads project correctly."""
    dataset = Dataset.from_jsonld(dataset_metadata_before_calamus, client=LocalClient("."))

    file_ = dataset.find_file("data/dataverse/external/data.txt")
    assert file_.external is True
    assert "file://../../../../tmp/data.txt" == file_.url

    file_ = dataset.find_file("data/dataverse/local/result.csv")
    assert file_.external is False
    assert "file://../../../../tmp/result.csv" == file_.url
Ejemplo n.º 9
0
def test_dataset_doi_metadata(dataset_metadata):
    """Check dataset metadata for correct DOI."""
    from renku.core.utils.doi import is_doi
    dataset = Dataset.from_jsonld(
        dataset_metadata,
        client=LocalClient('.'),
    )

    if is_doi(dataset.identifier):
        assert urljoin('https://doi.org',
                       dataset.identifier) == dataset.same_as

    assert dataset._id.endswith('datasets/{}'.format(
        quote(dataset.identifier, safe='')))
def test_calamus(client, dataset_metadata_before_calamus):
    """Check Calamus loads project correctly."""
    dataset = Dataset.from_jsonld(dataset_metadata_before_calamus,
                                  client=LocalClient('.'))
    assert 'Open Source at Harvard' == dataset.name
    assert '51db02ad-3cba-47e2-84d0-5ee5914bd654' == dataset.identifier
    assert '51db02ad-3cba-47e2-84d0-5ee5914bd654' == dataset._label
    assert 'Harvard University' == dataset.creator[0].affiliation
    assert 'Durbin, Philip' == dataset.creator[0].name
    assert 'Durbin, Philip' == dataset.creator[0].label
    assert dataset.created is None
    assert '2019-07-03T00:00:00' == dataset.date_published.isoformat('T')
    assert 'The tabular file contains information' in dataset.description
    assert 'https://doi.org/10.7910/DVN/TJCLKP' == dataset.same_as.url
    assert '3' == dataset.tags[0].name
    assert 'Tag 3 created by renku import' == dataset.tags[0].description
    assert isinstance(dataset.license, dict)
    assert ('https://creativecommons.org/publicdomain/zero/1.0/'
            in str(dataset.license))

    file_ = dataset.find_file('data/dataverse/IQSS-UNF.json')
    assert ('https://dataverse.harvard.edu/api/access/datafile/3371500' ==
            file_.url)
    assert '2020-06-15T08:37:04.571573+00:00' == file_.added.isoformat('T')
    assert 'https://orcid.org/0000-0002-9528-9470' == file_.creator[0]._id
    assert file_.based_on is None

    file_ = dataset.find_file('data/dataverse/git/index.ipynb')
    assert ('https://github.com/SwissDataScienceCenter/r10e-ds-py.git' ==
            file_.based_on.url)
    assert ('notebooks/index.ipynb@f98325d81c700f4b86ee05c2154e94d43ca068b8' ==
            file_.based_on._label)
    assert file_.based_on.based_on is None
    assert 'mailto:cramakri@' in file_.based_on.creator[0]._id
    assert ('https://github.com/SwissDataScienceCenter/r10e-ds-py.git' ==
            file_.url)

    file_ = dataset.find_file('data/dataverse/external/data.txt')
    assert file_.external is True
    assert 'file://../../../../tmp/data.txt' == file_.url

    file_ = dataset.find_file('data/dataverse/local/result.csv')
    assert file_.external is False
    assert 'file://../../../../tmp/result.csv' == file_.url
Ejemplo n.º 11
0
def test_dataset_serialization(dataset):
    """Test dataset (de)serialization."""
    dataset_metadata = dataset.asjsonld()
    dataset = Dataset.from_jsonld(dataset_metadata)

    # assert that all attributes found in metadata are set in the instance
    assert dataset.created
    assert dataset.creator
    assert dataset.identifier
    assert dataset.name
    assert dataset.path
    assert dataset._project

    # check values
    assert str(dataset.created.isoformat()) == dataset_metadata.get('created')
    assert dataset.creator[0].email == dataset_metadata.get('creator')[0].get(
        'email')
    assert dataset.identifier == dataset_metadata.get('identifier')
    assert dataset.name == dataset_metadata.get('name')
    assert dataset.path == dataset_metadata.get('path')
Ejemplo n.º 12
0
    def as_dataset(self, client):
        """Deserialize `DataverseRecordSerializer` to `Dataset`."""
        files = self.get_files()
        dataset = Dataset.from_jsonld(self._json, client=client)

        serialized_files = []
        for file_ in files:
            remote_ = file_.remote_url
            dataset_file = DatasetFile(
                url=remote_.geturl(),
                id=file_._id if file_._id else file_.name,
                filename=file_.name,
                filesize=file_.content_size,
                filetype=file_.file_format,
                path='',
            )
            serialized_files.append(dataset_file)

        dataset.files = serialized_files

        return dataset
Ejemplo n.º 13
0
def test_dataset_serialization(dataset):
    """Test dataset (de)serialization."""
    dataset_metadata = dataset.asjsonld()
    dataset = Dataset.from_jsonld(dataset_metadata)

    # assert that all attributes found in metadata are set in the instance
    assert dataset.created
    assert dataset.creator
    assert dataset.identifier
    assert dataset.name
    assert dataset.path
    assert dataset._project

    # check values
    assert str(dataset.created.isoformat()) == dataset_metadata.get(
        'http://schema.org/dateCreated')
    assert dataset.creator[0].email == dataset_metadata.get(
        'http://schema.org/creator')[0].get('http://schema.org/email')
    assert dataset.identifier == dataset_metadata.get(
        'http://schema.org/identifier')
    assert dataset.name == dataset_metadata.get('http://schema.org/name')
    assert dataset.path == dataset_metadata.get(
        'http://www.w3.org/ns/prov#atLocation')
Ejemplo n.º 14
0
def test_dataset_serialization(dataset):
    """Test dataset (de)serialization."""

    def read_value(key):
        return dataset_metadata.get(key)[0].get("@value")

    flattened_metadata = dataset.as_jsonld()
    dataset = Dataset.from_jsonld(flattened_metadata)

    # assert that all attributes found in metadata are set in the instance
    assert dataset.date_created
    assert dataset.creators
    assert dataset.identifier
    assert dataset.title
    assert dataset.path
    assert dataset._project

    dataset_metadata = [m for m in flattened_metadata if "Dataset" in str(m["@type"])][0]

    # check values
    assert str(dataset.date_created.isoformat()) == read_value("http://schema.org/dateCreated")
    assert dataset.identifier == read_value("http://schema.org/identifier")
    assert dataset.title == read_value("http://schema.org/name")
    assert dataset.path == read_value("http://www.w3.org/ns/prov#atLocation")