Beispiel #1
0
def test_create_dataset_from_minimal_dataverse_json(shared_datadir):
    json_record = json.loads(
        (shared_datadir / "jpal/jpal_minimal_record.json").read_text())
    actual = create_from_dataverse_json(json_record)
    expected = Dataset(
        alternativeURL="https://doi.org/00.0000/DVN/00002",
        authors=[
            Author(authorName="Finch, Fiona", authorAffiliation="Birds Inc.")
        ],
        contacts=[
            Contact(
                datasetContactName="Finch, Fiona",
                datasetContactEmail="*****@*****.**",
            )
        ],
        description=[
            Description(dsDescriptionValue="Darwin's finches (also known"
                        " as the Galápagos finches) are a group of about"
                        " fifteen species of passerine birds.", )
        ],
        distributionDate="2020-01-01",
        distributors=[
            Distributor(
                distributorName=
                "The Abdul Latif Jameel Poverty Action Lab Dataverse",
                distributorURL="https://dataverse.harvard.edu/dataverse/jpal",
            ),
        ],
        subjects=["Medicine, Health and Life Sciences"],
        title="Darwin's Finches",
    )

    assert expected == actual
Beispiel #2
0
def test_minimal_dataset(dataverse_minimal_json_record):
    author = Author(authorName="Finch, Fiona", authorAffiliation="Birds Inc.")
    contact = Contact(datasetContactName="Finch, Fiona",
                      datasetContactEmail="*****@*****.**")
    description = Description(dsDescriptionValue="Darwin's finches (also known"
                              " as the Galápagos finches) are a group of about"
                              " fifteen species of passerine birds.")
    new_record = Dataset(
        authors=[author],
        contacts=[contact],
        description=[description],
        subjects=["Medicine, Health and Life Sciences"],
        title="Darwin's Finches",
    )
    assert new_record.asdict() == dataverse_minimal_json_record
Beispiel #3
0
def test_asdict_removes_null_values(dataverse_partial_json_record):
    author = Author(
        authorName="LastAuthor1, FirstAuthor1",
        authorAffiliation="AuthorAffiliation1",
    )
    contact = Contact(
        datasetContactName="LastContact1, FirstContact1",
        datasetContactEmail="*****@*****.**",
    )
    description = Description(dsDescriptionValue="DescriptionText 1", )
    contributors = Contributor(
        contributorName="LastContributor1, FirstContributor1", )
    distributors = Distributor(
        distributorName="LastDistributor1, FirstDistributor1", )
    keywords = Keyword(keywordValue="KeywordTerm1")
    grantNumbers = GrantNumber(
        grantNumberValue="GrantInformationGrantNumber1", )
    otherIds = OtherId(otherIdValue="OtherIDIdentifier1", )
    producers = Producer(producerName="LastProducer1, FirstProducer1", )
    publications = Publication(
        publicationCitation="RelatedPublicationCitation1",
        publicationURL="http://RelatedPublicationURL1.org",
    )
    series = Series(seriesInformation="SeriesInformation", )
    new_record = Dataset(
        authors=[author],
        contacts=[contact],
        description=[description],
        subjects=["Agricultural Sciences"],
        title="Replication Data for: Title",
        keywords=[keywords],
        otherIds=[otherIds],
        publications=[publications],
        producers=[producers],
        contributors=[contributors],
        grantNumbers=[grantNumbers],
        distributors=[distributors],
        kindOfData=None,
        series=series,
    )
    assert json.dumps(new_record.asdict(), sort_keys=True) == json.dumps(
        dataverse_partial_json_record, sort_keys=True)
Beispiel #4
0
def test_full_dataset(dataverse_full_json_record):
    author = Author(
        authorName="LastAuthor1, FirstAuthor1",
        authorAffiliation="AuthorAffiliation1",
        authorIdentifier="AuthorIdentifier1",
        authorIdentifierScheme="ORCID",
    )
    contact = Contact(
        datasetContactName="LastContact1, FirstContact1",
        datasetContactEmail="*****@*****.**",
        datasetContactAffiliation="ContactAffiliation1",
    )
    description = Description(dsDescriptionValue="DescriptionText 1",
                              dsDescriptionDate="1000-01-01")
    contributors = Contributor(
        contributorName="LastContributor1, FirstContributor1",
        contributorType="Data Collector",
    )
    distributors = Distributor(
        distributorName="LastDistributor1, FirstDistributor1",
        distributorURL="http://DistributorURL1.org",
    )
    keywords = Keyword(keywordValue="KeywordTerm1")
    grantNumbers = GrantNumber(
        grantNumberValue="GrantInformationGrantNumber1",
        grantNumberAgency="GrantInformationGrantAgency1",
        grantNumberInformation="GrantInformationInformation1",
    )
    otherIds = OtherId(
        otherIdValue="OtherIDIdentifier1",
        otherIdAgency="OtherIDAgency1",
    )
    producers = Producer(
        producerName="LastProducer1, FirstProducer1",
        producerURL="http://ProducerURL1.org",
    )
    publications = Publication(
        publicationCitation="RelatedPublicationCitation1",
        publicationIDNumber="RelatedPublicationIDNumber1",
        publicationIDType="ark",
        publicationURL="http://RelatedPublicationURL1.org",
    )
    series = Series(seriesName="SeriesName",
                    seriesInformation="SeriesInformation")
    timePeriodsCovered = TimePeriodCovered(
        timePeriodCoveredStart="1005-01-01",
        timePeriodCoveredEnd="1005-01-02",
    )
    new_record = Dataset(
        authors=[author],
        alternativeURL="http://AlternativeURL.org",
        contacts=[contact],
        description=[description],
        subjects=[
            "Agricultural Sciences",
            "Business and Management",
            "Engineering",
            "Law",
        ],
        title="Replication Data for: Title",
        keywords=[keywords],
        otherIds=[otherIds],
        publications=[publications],
        notesText="Notes1",
        producers=[producers],
        productionPlace="ProductionPlace",
        contributors=[contributors],
        grantNumbers=[grantNumbers],
        distributors=[distributors],
        distributionDate="1004-01-01",
        language=["English"],
        timePeriodsCovered=[timePeriodsCovered],
        kindOfData=["KindOfData1", "KindOfData2"],
        series=series,
        license="CC0",
        termsOfUse="CC0 Waiver",
    )
    assert json.dumps(new_record.asdict(),
                      sort_keys=True) == json.dumps(dataverse_full_json_record,
                                                    sort_keys=True)
Beispiel #5
0
def test_create_whoas_dim_xml(whoas_oai_server,
                              dspace_oai_xml_series_name_record):
    with requests_mock.Mocker() as m:
        m.get(
            "http+mock://example.com/oai?verb=GetRecord&metadataPrefix=dim"
            "&identifier=oai%3Adarchive.mblwhoilibrary.org:1912/6867",
            text=dspace_oai_xml_series_name_record,
        )
        client = OAIClient("http+mock://example.com/oai", "dim",
                           "Test_Collection")
        title = (
            "Animals on the Move and Deep‐Sea Vents: Dataset for Spherical Display "
            "Systems")
        authors = [
            Author(
                authorName="Beaulieu, Stace E.",
                authorAffiliation="Woods Hole",
                authorIdentifierScheme=None,
                authorIdentifier=None,
            ),
            Author(
                authorName="Brickley, Annette",
                authorAffiliation="Woods Hole",
                authorIdentifierScheme=None,
                authorIdentifier=None,
            ),
        ]
        contacts = [
            Contact(
                datasetContactName="Woods Hole Open Access Server",
                datasetContactEmail="*****@*****.**",
            )
        ]
        description = [
            Description(
                dsDescriptionValue="This educational package was developed.",
                dsDescriptionDate=None,
            ),
            Description(
                dsDescriptionValue="Sample abstract",
                dsDescriptionDate=None,
            ),
        ]
        distributors = [
            Distributor(distributorName="Esteemed Publishing Conglomerate")
        ]
        grantNumbers = [
            GrantNumber(
                grantNumberInformation="Funding for this educational package.")
        ]
        keywords = [
            Keyword(keywordValue="Migration"),
            Keyword(keywordValue="Larval dispersal"),
        ]
        notesText = ("This zipped file contains educational materials. "
                     "This educational package is Copyright ©2019 Woods"
                     " Hole Oceanographic Institution.")
        otherIds = [
            OtherId(otherIdValue="https://hdl.handle.net/1912/2368",
                    otherIdAgency=None),
            OtherId(otherIdValue="10.26025/8ke9-av98", otherIdAgency=None),
        ]
        otherIds_2 = [
            OtherId(otherIdValue="https://hdl.handle.net/1912/2371",
                    otherIdAgency=None)
        ]
        otherIds_3 = [
            OtherId(otherIdValue="https://hdl.handle.net/1912/2372",
                    otherIdAgency=None)
        ]
        publications = [
            Publication(publicationCitation="Associated publication")
        ]
        series = Series(
            seriesName="Series Title",
            seriesInformation="https://hdl.handle.net/1912/6867",
        )
        timePeriodsCovered = [
            TimePeriodCovered(
                timePeriodCoveredStart="2019-06-04",
                timePeriodCoveredEnd="2019-06-04",
            )
        ]
        partial_timePeriodsCovered = [
            TimePeriodCovered(timePeriodCoveredStart="2019-06-04")
        ]

        # minimal record
        subjects = ["Earth and Environmental Sciences"]
        dataset = create_from_whoas_dim_xml(whoas_oai_server[0], client)
        assert dataset.title == title
        assert dataset.authors == authors
        assert dataset.contacts == contacts
        assert dataset.description == description
        assert dataset.subjects == subjects

        # full record
        dataset = create_from_whoas_dim_xml(whoas_oai_server[1], client)
        for _k, v in dataset.__dict__.items():
            assert v != []
        assert dataset.title == title
        assert dataset.authors == authors
        assert dataset.contacts == contacts
        assert dataset.description == description
        assert dataset.subjects == subjects
        assert dataset.distributors == distributors
        assert dataset.grantNumbers == grantNumbers
        assert dataset.keywords == keywords
        assert dataset.language == ["English"]
        assert dataset.notesText == notesText
        assert dataset.otherIds == otherIds
        assert dataset.publications == publications
        assert dataset.series == series
        assert dataset.timePeriodsCovered == timePeriodsCovered
        assert dataset.license == "Attribution 4.0 International"
        assert dataset.termsOfUse == "Attribution 4.0 International"

        # record with no description
        dataset = create_from_whoas_dim_xml(whoas_oai_server[4], client)
        for _k, v in dataset.__dict__.items():
            assert v != []
        assert dataset.title == title
        assert dataset.authors == authors
        assert dataset.contacts == contacts
        assert dataset.description == [Description(dsDescriptionValue=title)]
        assert dataset.subjects == subjects
        assert dataset.distributors == distributors
        assert dataset.grantNumbers == grantNumbers
        assert dataset.keywords == keywords
        assert dataset.language == ["English"]
        assert dataset.otherIds == otherIds_2
        assert dataset.publications == publications
        assert dataset.series == series
        assert dataset.timePeriodsCovered == timePeriodsCovered
        assert dataset.license == "Attribution 4.0 International"
        assert dataset.termsOfUse == "Attribution 4.0 International"

        # record with invalid date
        dataset = create_from_whoas_dim_xml(whoas_oai_server[5], client)
        for _k, v in dataset.__dict__.items():
            assert v != []
        assert dataset.title == title
        assert dataset.authors == authors
        assert dataset.contacts == contacts
        assert dataset.description == description
        assert dataset.subjects == subjects
        assert dataset.distributors == distributors
        assert dataset.grantNumbers == grantNumbers
        assert dataset.keywords == keywords
        assert dataset.language == ["English"]
        assert dataset.notesText == notesText
        assert dataset.otherIds == otherIds_3
        assert dataset.publications == publications
        assert dataset.series == series
        assert dataset.timePeriodsCovered == partial_timePeriodsCovered
        assert dataset.license == "Attribution 4.0 International"
        assert dataset.termsOfUse == "Attribution 4.0 International"
Beispiel #6
0
def create_from_dataverse_json(data: dict) -> Dataset:
    kwargs: Dict[str, Any] = {}

    # Dataset fields
    kwargs["alternativeURL"] = data.get("persistentUrl")
    kwargs["distributionDate"] = data.get("publicationDate")
    kwargs["distributors"] = [
        Distributor(
            distributorName=
            "The Abdul Latif Jameel Poverty Action Lab Dataverse",
            distributorURL="https://dataverse.harvard.edu/dataverse/jpal",
        )
    ]
    kwargs["license"] = data["datasetVersion"].get("license")
    kwargs["termsOfUse"] = data["datasetVersion"].get("termsOfUse")

    # Citation fields
    fields = data["datasetVersion"]["metadataBlocks"]["citation"]["fields"]

    for field in fields:
        if field["typeName"] == "author":
            kwargs["authors"] = [
                Author(
                    authorAffiliation=v["authorAffiliation"]["value"],
                    authorIdentifier=get_optional_value(v, "authorIdentifier"),
                    authorIdentifierScheme=get_optional_value(
                        v, "authorIdentifierScheme"),
                    authorName=v["authorName"]["value"],
                ) for v in field["value"]
            ]

        elif field["typeName"] == "datasetContact":
            kwargs["contacts"] = [
                Contact(
                    datasetContactAffiliation=get_optional_value(
                        v, "datasetContactAffiliation"),
                    datasetContactEmail=v["datasetContactEmail"]["value"],
                    datasetContactName=v["datasetContactName"]["value"],
                ) for v in field["value"]
            ]

        elif field["typeName"] == "contributor":
            kwargs["contributors"] = [
                Contributor(
                    contributorName=get_optional_value(v, "contributorName"),
                    contributorType=get_optional_value(v, "contributorType"),
                ) for v in field["value"]
            ]

        elif field["typeName"] == "dsDescription":
            kwargs["description"] = [
                Description(
                    dsDescriptionDate=get_optional_value(
                        v, "dsDescriptionDate"),
                    dsDescriptionValue=v["dsDescriptionValue"]["value"],
                ) for v in field["value"]
            ]

        elif field["typeName"] == "distributor":
            kwargs["distributors"].extend([
                Distributor(
                    distributorName=get_optional_value(v, "distributorName"),
                    distributorURL=get_optional_value(v, "distributorURL"),
                ) for v in field["value"]
            ])

        elif field["typeName"] == "grantNumber":
            kwargs["grantNumbers"] = [
                GrantNumber(
                    grantNumberAgency=get_optional_value(
                        v, "grantNumberAgency"),
                    grantNumberValue=get_optional_value(v, "grantNumberValue"),
                ) for v in field["value"]
            ]

        elif field["typeName"] == "keyword":
            kwargs["keywords"] = [
                Keyword(keywordValue=get_optional_value(v, "keywordValue"), )
                for v in field["value"]
            ]

        elif field["typeName"] == "kindOfData":
            kwargs["kindOfData"] = field["value"]

        elif field["typeName"] == "language":
            kwargs["language"] = field["value"]

        elif field["typeName"] == "notesText":
            kwargs["notesText"] = field["value"]

        elif field["typeName"] == "otherId":
            kwargs["otherIds"] = [
                OtherId(
                    otherIdAgency=get_optional_value(v, "otherIdAgency"),
                    otherIdValue=get_optional_value(v, "otherIdValue"),
                ) for v in field["value"]
            ]

        elif field["typeName"] == "producer":
            kwargs["producers"] = [
                Producer(
                    producerName=get_optional_value(v, "producerName"),
                    producerURL=get_optional_value(v, "producerURL"),
                ) for v in field["value"]
            ]

        elif field["typeName"] == "productionPlace":
            kwargs["productionPlace"] = field["value"]

        elif field["typeName"] == "publication":
            kwargs["publications"] = [
                Publication(
                    publicationCitation=get_optional_value(
                        v, "publicationCitation"),
                    publicationIDNumber=get_optional_value(
                        v, "publicationIDNumber"),
                    publicationIDType=get_optional_value(
                        v, "publicationIDType"),
                    publicationURL=get_optional_value(v, "publicationURL"),
                ) for v in field["value"]
            ]

        elif field["typeName"] == "series":
            kwargs["series"] = Series(
                seriesName=get_optional_value(field["value"], "seriesName"),
                seriesInformation=get_optional_value(field["value"],
                                                     "seriesInformation"),
            )

        elif field["typeName"] == "subject":
            kwargs["subjects"] = field["value"]

        elif field["typeName"] == "timePeriodCovered":
            kwargs["timePeriodsCovered"] = [
                TimePeriodCovered(
                    timePeriodCoveredStart=get_optional_value(
                        v, "timePeriodCoveredStart"),
                    timePeriodCoveredEnd=get_optional_value(
                        v, "timePeriodCoveredEnd"),
                ) for v in field["value"]
            ]

        elif field["typeName"] == "title":
            kwargs["title"] = field["value"]

    return Dataset(**kwargs)
Beispiel #7
0
def test_create_dataset_from_full_dataverse_json(shared_datadir):
    json_record = json.loads(
        (shared_datadir / "jpal/jpal_complete_record.json").read_text())
    actual = create_from_dataverse_json(json_record)
    expected = Dataset(
        alternativeURL="https://doi.org/00.0000/DVN/00001",
        authors=[
            Author(
                authorAffiliation="AuthorAffiliation1",
                authorIdentifier="AuthorIdentifier1",
                authorIdentifierScheme="ORCID",
                authorName="LastAuthor1, FirstAuthor1",
            ),
            Author(
                authorAffiliation="AuthorAffiliation2",
                authorIdentifier="AuthorIdentifier2",
                authorIdentifierScheme="ORCID",
                authorName="LastAuthor2, FirstAuthor2",
            ),
        ],
        contacts=[
            Contact(
                datasetContactAffiliation="ContactAffiliation1",
                datasetContactEmail="*****@*****.**",
                datasetContactName="LastContact1, FirstContact1",
            ),
            Contact(
                datasetContactAffiliation="ContactAffiliation2",
                datasetContactEmail="*****@*****.**",
                datasetContactName="LastContact2, FirstContact2",
            ),
        ],
        contributors=[
            Contributor(
                contributorName="LastContributor1, FirstContributor1",
                contributorType="Data Collector",
            ),
            Contributor(
                contributorName="LastContributor2, FirstContributor2",
                contributorType="Researcher",
            ),
        ],
        description=[
            Description(
                dsDescriptionDate="2020-01-01",
                dsDescriptionValue="DescriptionText 1",
            ),
            Description(dsDescriptionValue="DescriptionText 2"),
        ],
        distributionDate="2020-06-27",
        distributors=[
            Distributor(
                distributorName=
                "The Abdul Latif Jameel Poverty Action Lab Dataverse",
                distributorURL="https://dataverse.harvard.edu/dataverse/jpal",
            ),
            Distributor(
                distributorName="LastDistributor1, FirstDistributor1",
                distributorURL="http://DistributorURL1.org",
            ),
            Distributor(
                distributorName="LastDistributor2, FirstDistributor2",
                distributorURL="http://DistributorURL2.org",
            ),
        ],
        grantNumbers=[
            GrantNumber(
                grantNumberAgency="GrantInformationGrantAgency1",
                grantNumberValue="GrantInformationGrantNumber1",
            )
        ],
        keywords=[
            Keyword(keywordValue="KeywordTerm1"),
            Keyword(keywordValue="KeywordTerm2"),
        ],
        kindOfData=["KindOfData1", "KindOfData2"],
        language=["English", "Swahili"],
        license="CC0",
        notesText="Notes1",
        otherIds=[
            OtherId(
                otherIdAgency="OtherIDAgency1",
                otherIdValue="OtherIDIdentifier1",
            ),
            OtherId(
                otherIdAgency="OtherIDAgency2",
                otherIdValue="OtherIDIdentifier2",
            ),
        ],
        producers=[
            Producer(
                producerName="LastProducer1, FirstProducer1",
                producerURL="http://ProducerURL1.org",
            ),
            Producer(
                producerName="LastProducer2, FirstProducer2",
                producerURL="http://ProducerURL2.org",
            ),
        ],
        productionPlace="ProductionPlace",
        publications=[
            Publication(
                publicationCitation="RelatedPublicationCitation1",
                publicationIDNumber="RelatedPublicationIDNumber1",
                publicationIDType="ark",
                publicationURL="http://RelatedPublicationURL1.org",
            ),
            Publication(
                publicationCitation="RelatedPublicationCitation2",
                publicationIDNumber="RelatedPublicationIDNumber2",
                publicationIDType="doi",
                publicationURL="https://doi.org/RelatedPublicationURL2",
            ),
        ],
        series=Series(
            seriesInformation="SeriesInformation",
            seriesName="SeriesName",
        ),
        subjects=[
            "Agricultural Sciences",
            "Business and Management",
            "Engineering",
            "Law",
        ],
        termsOfUse="CC0 Waiver",
        timePeriodsCovered=[
            TimePeriodCovered(
                timePeriodCoveredStart="1005-01-01",
                timePeriodCoveredEnd="1005-01-02",
            ),
            TimePeriodCovered(
                timePeriodCoveredStart="2020-01-01",
                timePeriodCoveredEnd="2020-01-02",
            ),
        ],
        title="Replication Data for: Title",
    )

    assert expected == actual