def test_minimal_dataset(dataverse_minimal_json_record): author = Author(authorName="Finch, Fiona", authorAffiliation="Birds Inc.") contact = Contact(datasetContactName="Finch, Fiona", datasetContactEmail="*****@*****.**") description = Description(dsDescriptionValue="Darwin's finches (also known" " as the Galápagos finches) are a group of about" " fifteen species of passerine birds.") new_record = Dataset( authors=[author], contacts=[contact], description=[description], subjects=["Medicine, Health and Life Sciences"], title="Darwin's Finches", ) assert new_record.asdict() == dataverse_minimal_json_record
def create(self, dataset: Dataset, parent: str = "root") -> Tuple[int, str]: req = self.api.create_dataset(parent, dataset.asdict()) resp = self.transport.send(req) data = resp.json()["data"] return data["id"], data["persistentId"]
def test_create_dataset_from_minimal_dataverse_json(shared_datadir): json_record = json.loads( (shared_datadir / "jpal/jpal_minimal_record.json").read_text()) actual = create_from_dataverse_json(json_record) expected = Dataset( alternativeURL="https://doi.org/00.0000/DVN/00002", authors=[ Author(authorName="Finch, Fiona", authorAffiliation="Birds Inc.") ], contacts=[ Contact( datasetContactName="Finch, Fiona", datasetContactEmail="*****@*****.**", ) ], description=[ Description(dsDescriptionValue="Darwin's finches (also known" " as the Galápagos finches) are a group of about" " fifteen species of passerine birds.", ) ], distributionDate="2020-01-01", distributors=[ Distributor( distributorName= "The Abdul Latif Jameel Poverty Action Lab Dataverse", distributorURL="https://dataverse.harvard.edu/dataverse/jpal", ), ], subjects=["Medicine, Health and Life Sciences"], title="Darwin's Finches", ) assert expected == actual
def test_inadequate_dataset(dataverse_minimal_json_record): with pytest.raises(ValueError): contact = Contact( datasetContactName="Finch, Fiona", datasetContactEmail="*****@*****.**", ) description = Description( dsDescriptionValue="Darwin's finches (also known" " as the Galápagos finches) are a group of about" " fifteen species of passerine birds.") new_record = Dataset( authors=[], contacts=[contact], description=[description], subjects=["Medicine, Health and Life Sciences"], title="Darwin's Finches", ) assert new_record.asdict() == dataverse_minimal_json_record
def test_asdict_removes_null_values(dataverse_partial_json_record): author = Author( authorName="LastAuthor1, FirstAuthor1", authorAffiliation="AuthorAffiliation1", ) contact = Contact( datasetContactName="LastContact1, FirstContact1", datasetContactEmail="*****@*****.**", ) description = Description(dsDescriptionValue="DescriptionText 1", ) contributors = Contributor( contributorName="LastContributor1, FirstContributor1", ) distributors = Distributor( distributorName="LastDistributor1, FirstDistributor1", ) keywords = Keyword(keywordValue="KeywordTerm1") grantNumbers = GrantNumber( grantNumberValue="GrantInformationGrantNumber1", ) otherIds = OtherId(otherIdValue="OtherIDIdentifier1", ) producers = Producer(producerName="LastProducer1, FirstProducer1", ) publications = Publication( publicationCitation="RelatedPublicationCitation1", publicationURL="http://RelatedPublicationURL1.org", ) series = Series(seriesInformation="SeriesInformation", ) new_record = Dataset( authors=[author], contacts=[contact], description=[description], subjects=["Agricultural Sciences"], title="Replication Data for: Title", keywords=[keywords], otherIds=[otherIds], publications=[publications], producers=[producers], contributors=[contributors], grantNumbers=[grantNumbers], distributors=[distributors], kindOfData=None, series=series, ) assert json.dumps(new_record.asdict(), sort_keys=True) == json.dumps( dataverse_partial_json_record, sort_keys=True)
def test_full_dataset(dataverse_full_json_record): author = Author( authorName="LastAuthor1, FirstAuthor1", authorAffiliation="AuthorAffiliation1", authorIdentifier="AuthorIdentifier1", authorIdentifierScheme="ORCID", ) contact = Contact( datasetContactName="LastContact1, FirstContact1", datasetContactEmail="*****@*****.**", datasetContactAffiliation="ContactAffiliation1", ) description = Description(dsDescriptionValue="DescriptionText 1", dsDescriptionDate="1000-01-01") contributors = Contributor( contributorName="LastContributor1, FirstContributor1", contributorType="Data Collector", ) distributors = Distributor( distributorName="LastDistributor1, FirstDistributor1", distributorURL="http://DistributorURL1.org", ) keywords = Keyword(keywordValue="KeywordTerm1") grantNumbers = GrantNumber( grantNumberValue="GrantInformationGrantNumber1", grantNumberAgency="GrantInformationGrantAgency1", grantNumberInformation="GrantInformationInformation1", ) otherIds = OtherId( otherIdValue="OtherIDIdentifier1", otherIdAgency="OtherIDAgency1", ) producers = Producer( producerName="LastProducer1, FirstProducer1", producerURL="http://ProducerURL1.org", ) publications = Publication( publicationCitation="RelatedPublicationCitation1", publicationIDNumber="RelatedPublicationIDNumber1", publicationIDType="ark", publicationURL="http://RelatedPublicationURL1.org", ) series = Series(seriesName="SeriesName", seriesInformation="SeriesInformation") timePeriodsCovered = TimePeriodCovered( timePeriodCoveredStart="1005-01-01", timePeriodCoveredEnd="1005-01-02", ) new_record = Dataset( authors=[author], alternativeURL="http://AlternativeURL.org", contacts=[contact], description=[description], subjects=[ "Agricultural Sciences", "Business and Management", "Engineering", "Law", ], title="Replication Data for: Title", keywords=[keywords], otherIds=[otherIds], publications=[publications], notesText="Notes1", producers=[producers], productionPlace="ProductionPlace", contributors=[contributors], grantNumbers=[grantNumbers], distributors=[distributors], distributionDate="1004-01-01", language=["English"], timePeriodsCovered=[timePeriodsCovered], kindOfData=["KindOfData1", "KindOfData2"], series=series, license="CC0", termsOfUse="CC0 Waiver", ) assert json.dumps(new_record.asdict(), sort_keys=True) == json.dumps(dataverse_full_json_record, sort_keys=True)
def create_from_dataverse_json(data: dict) -> Dataset: kwargs: Dict[str, Any] = {} # Dataset fields kwargs["alternativeURL"] = data.get("persistentUrl") kwargs["distributionDate"] = data.get("publicationDate") kwargs["distributors"] = [ Distributor( distributorName= "The Abdul Latif Jameel Poverty Action Lab Dataverse", distributorURL="https://dataverse.harvard.edu/dataverse/jpal", ) ] kwargs["license"] = data["datasetVersion"].get("license") kwargs["termsOfUse"] = data["datasetVersion"].get("termsOfUse") # Citation fields fields = data["datasetVersion"]["metadataBlocks"]["citation"]["fields"] for field in fields: if field["typeName"] == "author": kwargs["authors"] = [ Author( authorAffiliation=v["authorAffiliation"]["value"], authorIdentifier=get_optional_value(v, "authorIdentifier"), authorIdentifierScheme=get_optional_value( v, "authorIdentifierScheme"), authorName=v["authorName"]["value"], ) for v in field["value"] ] elif field["typeName"] == "datasetContact": kwargs["contacts"] = [ Contact( datasetContactAffiliation=get_optional_value( v, "datasetContactAffiliation"), datasetContactEmail=v["datasetContactEmail"]["value"], datasetContactName=v["datasetContactName"]["value"], ) for v in field["value"] ] elif field["typeName"] == "contributor": kwargs["contributors"] = [ Contributor( contributorName=get_optional_value(v, "contributorName"), contributorType=get_optional_value(v, "contributorType"), ) for v in field["value"] ] elif field["typeName"] == "dsDescription": kwargs["description"] = [ Description( dsDescriptionDate=get_optional_value( v, "dsDescriptionDate"), dsDescriptionValue=v["dsDescriptionValue"]["value"], ) for v in field["value"] ] elif field["typeName"] == "distributor": kwargs["distributors"].extend([ Distributor( distributorName=get_optional_value(v, "distributorName"), distributorURL=get_optional_value(v, "distributorURL"), ) for v in field["value"] ]) elif field["typeName"] == "grantNumber": kwargs["grantNumbers"] = [ GrantNumber( grantNumberAgency=get_optional_value( v, "grantNumberAgency"), grantNumberValue=get_optional_value(v, "grantNumberValue"), ) for v in field["value"] ] elif field["typeName"] == "keyword": kwargs["keywords"] = [ Keyword(keywordValue=get_optional_value(v, "keywordValue"), ) for v in field["value"] ] elif field["typeName"] == "kindOfData": kwargs["kindOfData"] = field["value"] elif field["typeName"] == "language": kwargs["language"] = field["value"] elif field["typeName"] == "notesText": kwargs["notesText"] = field["value"] elif field["typeName"] == "otherId": kwargs["otherIds"] = [ OtherId( otherIdAgency=get_optional_value(v, "otherIdAgency"), otherIdValue=get_optional_value(v, "otherIdValue"), ) for v in field["value"] ] elif field["typeName"] == "producer": kwargs["producers"] = [ Producer( producerName=get_optional_value(v, "producerName"), producerURL=get_optional_value(v, "producerURL"), ) for v in field["value"] ] elif field["typeName"] == "productionPlace": kwargs["productionPlace"] = field["value"] elif field["typeName"] == "publication": kwargs["publications"] = [ Publication( publicationCitation=get_optional_value( v, "publicationCitation"), publicationIDNumber=get_optional_value( v, "publicationIDNumber"), publicationIDType=get_optional_value( v, "publicationIDType"), publicationURL=get_optional_value(v, "publicationURL"), ) for v in field["value"] ] elif field["typeName"] == "series": kwargs["series"] = Series( seriesName=get_optional_value(field["value"], "seriesName"), seriesInformation=get_optional_value(field["value"], "seriesInformation"), ) elif field["typeName"] == "subject": kwargs["subjects"] = field["value"] elif field["typeName"] == "timePeriodCovered": kwargs["timePeriodsCovered"] = [ TimePeriodCovered( timePeriodCoveredStart=get_optional_value( v, "timePeriodCoveredStart"), timePeriodCoveredEnd=get_optional_value( v, "timePeriodCoveredEnd"), ) for v in field["value"] ] elif field["typeName"] == "title": kwargs["title"] = field["value"] return Dataset(**kwargs)
def test_create_dataset_from_full_dataverse_json(shared_datadir): json_record = json.loads( (shared_datadir / "jpal/jpal_complete_record.json").read_text()) actual = create_from_dataverse_json(json_record) expected = Dataset( alternativeURL="https://doi.org/00.0000/DVN/00001", authors=[ Author( authorAffiliation="AuthorAffiliation1", authorIdentifier="AuthorIdentifier1", authorIdentifierScheme="ORCID", authorName="LastAuthor1, FirstAuthor1", ), Author( authorAffiliation="AuthorAffiliation2", authorIdentifier="AuthorIdentifier2", authorIdentifierScheme="ORCID", authorName="LastAuthor2, FirstAuthor2", ), ], contacts=[ Contact( datasetContactAffiliation="ContactAffiliation1", datasetContactEmail="*****@*****.**", datasetContactName="LastContact1, FirstContact1", ), Contact( datasetContactAffiliation="ContactAffiliation2", datasetContactEmail="*****@*****.**", datasetContactName="LastContact2, FirstContact2", ), ], contributors=[ Contributor( contributorName="LastContributor1, FirstContributor1", contributorType="Data Collector", ), Contributor( contributorName="LastContributor2, FirstContributor2", contributorType="Researcher", ), ], description=[ Description( dsDescriptionDate="2020-01-01", dsDescriptionValue="DescriptionText 1", ), Description(dsDescriptionValue="DescriptionText 2"), ], distributionDate="2020-06-27", distributors=[ Distributor( distributorName= "The Abdul Latif Jameel Poverty Action Lab Dataverse", distributorURL="https://dataverse.harvard.edu/dataverse/jpal", ), Distributor( distributorName="LastDistributor1, FirstDistributor1", distributorURL="http://DistributorURL1.org", ), Distributor( distributorName="LastDistributor2, FirstDistributor2", distributorURL="http://DistributorURL2.org", ), ], grantNumbers=[ GrantNumber( grantNumberAgency="GrantInformationGrantAgency1", grantNumberValue="GrantInformationGrantNumber1", ) ], keywords=[ Keyword(keywordValue="KeywordTerm1"), Keyword(keywordValue="KeywordTerm2"), ], kindOfData=["KindOfData1", "KindOfData2"], language=["English", "Swahili"], license="CC0", notesText="Notes1", otherIds=[ OtherId( otherIdAgency="OtherIDAgency1", otherIdValue="OtherIDIdentifier1", ), OtherId( otherIdAgency="OtherIDAgency2", otherIdValue="OtherIDIdentifier2", ), ], producers=[ Producer( producerName="LastProducer1, FirstProducer1", producerURL="http://ProducerURL1.org", ), Producer( producerName="LastProducer2, FirstProducer2", producerURL="http://ProducerURL2.org", ), ], productionPlace="ProductionPlace", publications=[ Publication( publicationCitation="RelatedPublicationCitation1", publicationIDNumber="RelatedPublicationIDNumber1", publicationIDType="ark", publicationURL="http://RelatedPublicationURL1.org", ), Publication( publicationCitation="RelatedPublicationCitation2", publicationIDNumber="RelatedPublicationIDNumber2", publicationIDType="doi", publicationURL="https://doi.org/RelatedPublicationURL2", ), ], series=Series( seriesInformation="SeriesInformation", seriesName="SeriesName", ), subjects=[ "Agricultural Sciences", "Business and Management", "Engineering", "Law", ], termsOfUse="CC0 Waiver", timePeriodsCovered=[ TimePeriodCovered( timePeriodCoveredStart="1005-01-01", timePeriodCoveredEnd="1005-01-02", ), TimePeriodCovered( timePeriodCoveredStart="2020-01-01", timePeriodCoveredEnd="2020-01-02", ), ], title="Replication Data for: Title", ) assert expected == actual