def test_dataset_creator_email(dataset_metadata): """Check that creators without an email are assigned a blank node.""" # modify the dataset metadata to change the creator dataset = Dataset.from_jsonld(dataset_metadata, client=LocalClient("."),) dataset.creators[0]._id = "mailto:None" dataset_broken = Dataset.from_jsonld(dataset.as_jsonld(), client=LocalClient(".")) assert "mailto:None" not in dataset_broken.creators[0]._id
def as_dataset(self, client): """Deserialize `ZenodoRecordSerializer` to `Dataset`.""" files = self.get_files() metadata = self.get_jsonld() dataset = Dataset.from_jsonld(metadata, client=client) serialized_files = [] for file_ in files: remote_ = file_.remote_url dataset_file = DatasetFile( url=remote_.geturl(), id=file_.id, checksum=file_.checksum, filename=file_.filename, filesize=file_.filesize, filetype=file_.type, path='', ) serialized_files.append(dataset_file) dataset.files = serialized_files if isinstance(dataset.url, dict) and '_id' in dataset.url: dataset.url = urllib.parse.urlparse(dataset.url.pop('_id')) dataset.url = dataset.url.geturl() return dataset
def as_dataset(self, client): """Deserialize `DataverseRecordSerializer` to `Dataset`.""" files = self.get_files() dataset = Dataset.from_jsonld(self._json, client=client, schema_class=_DataverseDatasetSchema) if dataset.description and not dataset.description.strip(): dataset.description = None for creator in dataset.creator: if creator.affiliation == '': creator.affiliation = None serialized_files = [] for file_ in files: remote_ = file_.remote_url dataset_file = DatasetFile( url=remote_.geturl(), id=file_._id if file_._id else file_.name, filename=file_.name, filesize=file_.content_size, filetype=file_.file_format, path='', ) serialized_files.append(dataset_file) dataset.files = serialized_files return dataset
def test_dataset_files_empty_metadata(dataset_metadata): """Check parsing metadata of dataset files with empty filename.""" dataset = Dataset.from_jsonld(dataset_metadata, client=LocalClient("."),) files = [file.filename for file in dataset.files if not file.filename] if files: assert None in files
def test_uuid_migration(dataset_metadata, client): """Test migration of id with UUID.""" dataset = Dataset.from_jsonld(dataset_metadata, client=client) assert is_uuid(dataset.identifier) assert urljoin('https://localhost/datasets/', dataset.identifier) == dataset._id
def test_migration_broken_urls(dataset_metadata): """Check that migration of broken dataset file URLs is string.""" dataset = Dataset.from_jsonld( dataset_metadata, client=LocalClient('.'), ) for file_ in dataset.files: assert isinstance(url_to_string(file_.url), str)
def test_doi_migration(dataset_metadata): """Test migration of id with doi.""" dataset = Dataset.from_jsonld( dataset_metadata, client=LocalClient('.'), ) assert is_doi(dataset.identifier) assert urljoin('https://localhost', 'datasets/' + quote(dataset.identifier, safe='')) == dataset._id assert dataset.same_as == urljoin('https://doi.org', dataset.identifier)
def test_calamus(client, dataset_metadata_before_calamus): """Check Calamus loads project correctly.""" dataset = Dataset.from_jsonld(dataset_metadata_before_calamus, client=LocalClient(".")) file_ = dataset.find_file("data/dataverse/external/data.txt") assert file_.external is True assert "file://../../../../tmp/data.txt" == file_.url file_ = dataset.find_file("data/dataverse/local/result.csv") assert file_.external is False assert "file://../../../../tmp/result.csv" == file_.url
def test_dataset_doi_metadata(dataset_metadata): """Check dataset metadata for correct DOI.""" from renku.core.utils.doi import is_doi dataset = Dataset.from_jsonld( dataset_metadata, client=LocalClient('.'), ) if is_doi(dataset.identifier): assert urljoin('https://doi.org', dataset.identifier) == dataset.same_as assert dataset._id.endswith('datasets/{}'.format( quote(dataset.identifier, safe='')))
def test_calamus(client, dataset_metadata_before_calamus): """Check Calamus loads project correctly.""" dataset = Dataset.from_jsonld(dataset_metadata_before_calamus, client=LocalClient('.')) assert 'Open Source at Harvard' == dataset.name assert '51db02ad-3cba-47e2-84d0-5ee5914bd654' == dataset.identifier assert '51db02ad-3cba-47e2-84d0-5ee5914bd654' == dataset._label assert 'Harvard University' == dataset.creator[0].affiliation assert 'Durbin, Philip' == dataset.creator[0].name assert 'Durbin, Philip' == dataset.creator[0].label assert dataset.created is None assert '2019-07-03T00:00:00' == dataset.date_published.isoformat('T') assert 'The tabular file contains information' in dataset.description assert 'https://doi.org/10.7910/DVN/TJCLKP' == dataset.same_as.url assert '3' == dataset.tags[0].name assert 'Tag 3 created by renku import' == dataset.tags[0].description assert isinstance(dataset.license, dict) assert ('https://creativecommons.org/publicdomain/zero/1.0/' in str(dataset.license)) file_ = dataset.find_file('data/dataverse/IQSS-UNF.json') assert ('https://dataverse.harvard.edu/api/access/datafile/3371500' == file_.url) assert '2020-06-15T08:37:04.571573+00:00' == file_.added.isoformat('T') assert 'https://orcid.org/0000-0002-9528-9470' == file_.creator[0]._id assert file_.based_on is None file_ = dataset.find_file('data/dataverse/git/index.ipynb') assert ('https://github.com/SwissDataScienceCenter/r10e-ds-py.git' == file_.based_on.url) assert ('notebooks/index.ipynb@f98325d81c700f4b86ee05c2154e94d43ca068b8' == file_.based_on._label) assert file_.based_on.based_on is None assert 'mailto:cramakri@' in file_.based_on.creator[0]._id assert ('https://github.com/SwissDataScienceCenter/r10e-ds-py.git' == file_.url) file_ = dataset.find_file('data/dataverse/external/data.txt') assert file_.external is True assert 'file://../../../../tmp/data.txt' == file_.url file_ = dataset.find_file('data/dataverse/local/result.csv') assert file_.external is False assert 'file://../../../../tmp/result.csv' == file_.url
def test_dataset_serialization(dataset): """Test dataset (de)serialization.""" dataset_metadata = dataset.asjsonld() dataset = Dataset.from_jsonld(dataset_metadata) # assert that all attributes found in metadata are set in the instance assert dataset.created assert dataset.creator assert dataset.identifier assert dataset.name assert dataset.path assert dataset._project # check values assert str(dataset.created.isoformat()) == dataset_metadata.get('created') assert dataset.creator[0].email == dataset_metadata.get('creator')[0].get( 'email') assert dataset.identifier == dataset_metadata.get('identifier') assert dataset.name == dataset_metadata.get('name') assert dataset.path == dataset_metadata.get('path')
def as_dataset(self, client): """Deserialize `DataverseRecordSerializer` to `Dataset`.""" files = self.get_files() dataset = Dataset.from_jsonld(self._json, client=client) serialized_files = [] for file_ in files: remote_ = file_.remote_url dataset_file = DatasetFile( url=remote_.geturl(), id=file_._id if file_._id else file_.name, filename=file_.name, filesize=file_.content_size, filetype=file_.file_format, path='', ) serialized_files.append(dataset_file) dataset.files = serialized_files return dataset
def test_dataset_serialization(dataset): """Test dataset (de)serialization.""" dataset_metadata = dataset.asjsonld() dataset = Dataset.from_jsonld(dataset_metadata) # assert that all attributes found in metadata are set in the instance assert dataset.created assert dataset.creator assert dataset.identifier assert dataset.name assert dataset.path assert dataset._project # check values assert str(dataset.created.isoformat()) == dataset_metadata.get( 'http://schema.org/dateCreated') assert dataset.creator[0].email == dataset_metadata.get( 'http://schema.org/creator')[0].get('http://schema.org/email') assert dataset.identifier == dataset_metadata.get( 'http://schema.org/identifier') assert dataset.name == dataset_metadata.get('http://schema.org/name') assert dataset.path == dataset_metadata.get( 'http://www.w3.org/ns/prov#atLocation')
def test_dataset_serialization(dataset): """Test dataset (de)serialization.""" def read_value(key): return dataset_metadata.get(key)[0].get("@value") flattened_metadata = dataset.as_jsonld() dataset = Dataset.from_jsonld(flattened_metadata) # assert that all attributes found in metadata are set in the instance assert dataset.date_created assert dataset.creators assert dataset.identifier assert dataset.title assert dataset.path assert dataset._project dataset_metadata = [m for m in flattened_metadata if "Dataset" in str(m["@type"])][0] # check values assert str(dataset.date_created.isoformat()) == read_value("http://schema.org/dateCreated") assert dataset.identifier == read_value("http://schema.org/identifier") assert dataset.title == read_value("http://schema.org/name") assert dataset.path == read_value("http://www.w3.org/ns/prov#atLocation")