Ejemplo n.º 1
0
def test_minimal_dataset(dataverse_minimal_json_record):
    author = Author(authorName="Finch, Fiona", authorAffiliation="Birds Inc.")
    contact = Contact(datasetContactName="Finch, Fiona",
                      datasetContactEmail="*****@*****.**")
    description = Description(dsDescriptionValue="Darwin's finches (also known"
                              " as the Galápagos finches) are a group of about"
                              " fifteen species of passerine birds.")
    new_record = Dataset(
        authors=[author],
        contacts=[contact],
        description=[description],
        subjects=["Medicine, Health and Life Sciences"],
        title="Darwin's Finches",
    )
    assert new_record.asdict() == dataverse_minimal_json_record
Ejemplo n.º 2
0
 def create(self,
            dataset: Dataset,
            parent: str = "root") -> Tuple[int, str]:
     req = self.api.create_dataset(parent, dataset.asdict())
     resp = self.transport.send(req)
     data = resp.json()["data"]
     return data["id"], data["persistentId"]
Ejemplo n.º 3
0
def test_create_dataset_from_minimal_dataverse_json(shared_datadir):
    json_record = json.loads(
        (shared_datadir / "jpal/jpal_minimal_record.json").read_text())
    actual = create_from_dataverse_json(json_record)
    expected = Dataset(
        alternativeURL="https://doi.org/00.0000/DVN/00002",
        authors=[
            Author(authorName="Finch, Fiona", authorAffiliation="Birds Inc.")
        ],
        contacts=[
            Contact(
                datasetContactName="Finch, Fiona",
                datasetContactEmail="*****@*****.**",
            )
        ],
        description=[
            Description(dsDescriptionValue="Darwin's finches (also known"
                        " as the Galápagos finches) are a group of about"
                        " fifteen species of passerine birds.", )
        ],
        distributionDate="2020-01-01",
        distributors=[
            Distributor(
                distributorName=
                "The Abdul Latif Jameel Poverty Action Lab Dataverse",
                distributorURL="https://dataverse.harvard.edu/dataverse/jpal",
            ),
        ],
        subjects=["Medicine, Health and Life Sciences"],
        title="Darwin's Finches",
    )

    assert expected == actual
Ejemplo n.º 4
0
def test_inadequate_dataset(dataverse_minimal_json_record):
    with pytest.raises(ValueError):
        contact = Contact(
            datasetContactName="Finch, Fiona",
            datasetContactEmail="*****@*****.**",
        )
        description = Description(
            dsDescriptionValue="Darwin's finches (also known"
            " as the Galápagos finches) are a group of about"
            " fifteen species of passerine birds.")
        new_record = Dataset(
            authors=[],
            contacts=[contact],
            description=[description],
            subjects=["Medicine, Health and Life Sciences"],
            title="Darwin's Finches",
        )
        assert new_record.asdict() == dataverse_minimal_json_record
Ejemplo n.º 5
0
def test_asdict_removes_null_values(dataverse_partial_json_record):
    author = Author(
        authorName="LastAuthor1, FirstAuthor1",
        authorAffiliation="AuthorAffiliation1",
    )
    contact = Contact(
        datasetContactName="LastContact1, FirstContact1",
        datasetContactEmail="*****@*****.**",
    )
    description = Description(dsDescriptionValue="DescriptionText 1", )
    contributors = Contributor(
        contributorName="LastContributor1, FirstContributor1", )
    distributors = Distributor(
        distributorName="LastDistributor1, FirstDistributor1", )
    keywords = Keyword(keywordValue="KeywordTerm1")
    grantNumbers = GrantNumber(
        grantNumberValue="GrantInformationGrantNumber1", )
    otherIds = OtherId(otherIdValue="OtherIDIdentifier1", )
    producers = Producer(producerName="LastProducer1, FirstProducer1", )
    publications = Publication(
        publicationCitation="RelatedPublicationCitation1",
        publicationURL="http://RelatedPublicationURL1.org",
    )
    series = Series(seriesInformation="SeriesInformation", )
    new_record = Dataset(
        authors=[author],
        contacts=[contact],
        description=[description],
        subjects=["Agricultural Sciences"],
        title="Replication Data for: Title",
        keywords=[keywords],
        otherIds=[otherIds],
        publications=[publications],
        producers=[producers],
        contributors=[contributors],
        grantNumbers=[grantNumbers],
        distributors=[distributors],
        kindOfData=None,
        series=series,
    )
    assert json.dumps(new_record.asdict(), sort_keys=True) == json.dumps(
        dataverse_partial_json_record, sort_keys=True)
Ejemplo n.º 6
0
def test_full_dataset(dataverse_full_json_record):
    author = Author(
        authorName="LastAuthor1, FirstAuthor1",
        authorAffiliation="AuthorAffiliation1",
        authorIdentifier="AuthorIdentifier1",
        authorIdentifierScheme="ORCID",
    )
    contact = Contact(
        datasetContactName="LastContact1, FirstContact1",
        datasetContactEmail="*****@*****.**",
        datasetContactAffiliation="ContactAffiliation1",
    )
    description = Description(dsDescriptionValue="DescriptionText 1",
                              dsDescriptionDate="1000-01-01")
    contributors = Contributor(
        contributorName="LastContributor1, FirstContributor1",
        contributorType="Data Collector",
    )
    distributors = Distributor(
        distributorName="LastDistributor1, FirstDistributor1",
        distributorURL="http://DistributorURL1.org",
    )
    keywords = Keyword(keywordValue="KeywordTerm1")
    grantNumbers = GrantNumber(
        grantNumberValue="GrantInformationGrantNumber1",
        grantNumberAgency="GrantInformationGrantAgency1",
        grantNumberInformation="GrantInformationInformation1",
    )
    otherIds = OtherId(
        otherIdValue="OtherIDIdentifier1",
        otherIdAgency="OtherIDAgency1",
    )
    producers = Producer(
        producerName="LastProducer1, FirstProducer1",
        producerURL="http://ProducerURL1.org",
    )
    publications = Publication(
        publicationCitation="RelatedPublicationCitation1",
        publicationIDNumber="RelatedPublicationIDNumber1",
        publicationIDType="ark",
        publicationURL="http://RelatedPublicationURL1.org",
    )
    series = Series(seriesName="SeriesName",
                    seriesInformation="SeriesInformation")
    timePeriodsCovered = TimePeriodCovered(
        timePeriodCoveredStart="1005-01-01",
        timePeriodCoveredEnd="1005-01-02",
    )
    new_record = Dataset(
        authors=[author],
        alternativeURL="http://AlternativeURL.org",
        contacts=[contact],
        description=[description],
        subjects=[
            "Agricultural Sciences",
            "Business and Management",
            "Engineering",
            "Law",
        ],
        title="Replication Data for: Title",
        keywords=[keywords],
        otherIds=[otherIds],
        publications=[publications],
        notesText="Notes1",
        producers=[producers],
        productionPlace="ProductionPlace",
        contributors=[contributors],
        grantNumbers=[grantNumbers],
        distributors=[distributors],
        distributionDate="1004-01-01",
        language=["English"],
        timePeriodsCovered=[timePeriodsCovered],
        kindOfData=["KindOfData1", "KindOfData2"],
        series=series,
        license="CC0",
        termsOfUse="CC0 Waiver",
    )
    assert json.dumps(new_record.asdict(),
                      sort_keys=True) == json.dumps(dataverse_full_json_record,
                                                    sort_keys=True)
Ejemplo n.º 7
0
def create_from_dataverse_json(data: dict) -> Dataset:
    kwargs: Dict[str, Any] = {}

    # Dataset fields
    kwargs["alternativeURL"] = data.get("persistentUrl")
    kwargs["distributionDate"] = data.get("publicationDate")
    kwargs["distributors"] = [
        Distributor(
            distributorName=
            "The Abdul Latif Jameel Poverty Action Lab Dataverse",
            distributorURL="https://dataverse.harvard.edu/dataverse/jpal",
        )
    ]
    kwargs["license"] = data["datasetVersion"].get("license")
    kwargs["termsOfUse"] = data["datasetVersion"].get("termsOfUse")

    # Citation fields
    fields = data["datasetVersion"]["metadataBlocks"]["citation"]["fields"]

    for field in fields:
        if field["typeName"] == "author":
            kwargs["authors"] = [
                Author(
                    authorAffiliation=v["authorAffiliation"]["value"],
                    authorIdentifier=get_optional_value(v, "authorIdentifier"),
                    authorIdentifierScheme=get_optional_value(
                        v, "authorIdentifierScheme"),
                    authorName=v["authorName"]["value"],
                ) for v in field["value"]
            ]

        elif field["typeName"] == "datasetContact":
            kwargs["contacts"] = [
                Contact(
                    datasetContactAffiliation=get_optional_value(
                        v, "datasetContactAffiliation"),
                    datasetContactEmail=v["datasetContactEmail"]["value"],
                    datasetContactName=v["datasetContactName"]["value"],
                ) for v in field["value"]
            ]

        elif field["typeName"] == "contributor":
            kwargs["contributors"] = [
                Contributor(
                    contributorName=get_optional_value(v, "contributorName"),
                    contributorType=get_optional_value(v, "contributorType"),
                ) for v in field["value"]
            ]

        elif field["typeName"] == "dsDescription":
            kwargs["description"] = [
                Description(
                    dsDescriptionDate=get_optional_value(
                        v, "dsDescriptionDate"),
                    dsDescriptionValue=v["dsDescriptionValue"]["value"],
                ) for v in field["value"]
            ]

        elif field["typeName"] == "distributor":
            kwargs["distributors"].extend([
                Distributor(
                    distributorName=get_optional_value(v, "distributorName"),
                    distributorURL=get_optional_value(v, "distributorURL"),
                ) for v in field["value"]
            ])

        elif field["typeName"] == "grantNumber":
            kwargs["grantNumbers"] = [
                GrantNumber(
                    grantNumberAgency=get_optional_value(
                        v, "grantNumberAgency"),
                    grantNumberValue=get_optional_value(v, "grantNumberValue"),
                ) for v in field["value"]
            ]

        elif field["typeName"] == "keyword":
            kwargs["keywords"] = [
                Keyword(keywordValue=get_optional_value(v, "keywordValue"), )
                for v in field["value"]
            ]

        elif field["typeName"] == "kindOfData":
            kwargs["kindOfData"] = field["value"]

        elif field["typeName"] == "language":
            kwargs["language"] = field["value"]

        elif field["typeName"] == "notesText":
            kwargs["notesText"] = field["value"]

        elif field["typeName"] == "otherId":
            kwargs["otherIds"] = [
                OtherId(
                    otherIdAgency=get_optional_value(v, "otherIdAgency"),
                    otherIdValue=get_optional_value(v, "otherIdValue"),
                ) for v in field["value"]
            ]

        elif field["typeName"] == "producer":
            kwargs["producers"] = [
                Producer(
                    producerName=get_optional_value(v, "producerName"),
                    producerURL=get_optional_value(v, "producerURL"),
                ) for v in field["value"]
            ]

        elif field["typeName"] == "productionPlace":
            kwargs["productionPlace"] = field["value"]

        elif field["typeName"] == "publication":
            kwargs["publications"] = [
                Publication(
                    publicationCitation=get_optional_value(
                        v, "publicationCitation"),
                    publicationIDNumber=get_optional_value(
                        v, "publicationIDNumber"),
                    publicationIDType=get_optional_value(
                        v, "publicationIDType"),
                    publicationURL=get_optional_value(v, "publicationURL"),
                ) for v in field["value"]
            ]

        elif field["typeName"] == "series":
            kwargs["series"] = Series(
                seriesName=get_optional_value(field["value"], "seriesName"),
                seriesInformation=get_optional_value(field["value"],
                                                     "seriesInformation"),
            )

        elif field["typeName"] == "subject":
            kwargs["subjects"] = field["value"]

        elif field["typeName"] == "timePeriodCovered":
            kwargs["timePeriodsCovered"] = [
                TimePeriodCovered(
                    timePeriodCoveredStart=get_optional_value(
                        v, "timePeriodCoveredStart"),
                    timePeriodCoveredEnd=get_optional_value(
                        v, "timePeriodCoveredEnd"),
                ) for v in field["value"]
            ]

        elif field["typeName"] == "title":
            kwargs["title"] = field["value"]

    return Dataset(**kwargs)
Ejemplo n.º 8
0
def test_create_dataset_from_full_dataverse_json(shared_datadir):
    json_record = json.loads(
        (shared_datadir / "jpal/jpal_complete_record.json").read_text())
    actual = create_from_dataverse_json(json_record)
    expected = Dataset(
        alternativeURL="https://doi.org/00.0000/DVN/00001",
        authors=[
            Author(
                authorAffiliation="AuthorAffiliation1",
                authorIdentifier="AuthorIdentifier1",
                authorIdentifierScheme="ORCID",
                authorName="LastAuthor1, FirstAuthor1",
            ),
            Author(
                authorAffiliation="AuthorAffiliation2",
                authorIdentifier="AuthorIdentifier2",
                authorIdentifierScheme="ORCID",
                authorName="LastAuthor2, FirstAuthor2",
            ),
        ],
        contacts=[
            Contact(
                datasetContactAffiliation="ContactAffiliation1",
                datasetContactEmail="*****@*****.**",
                datasetContactName="LastContact1, FirstContact1",
            ),
            Contact(
                datasetContactAffiliation="ContactAffiliation2",
                datasetContactEmail="*****@*****.**",
                datasetContactName="LastContact2, FirstContact2",
            ),
        ],
        contributors=[
            Contributor(
                contributorName="LastContributor1, FirstContributor1",
                contributorType="Data Collector",
            ),
            Contributor(
                contributorName="LastContributor2, FirstContributor2",
                contributorType="Researcher",
            ),
        ],
        description=[
            Description(
                dsDescriptionDate="2020-01-01",
                dsDescriptionValue="DescriptionText 1",
            ),
            Description(dsDescriptionValue="DescriptionText 2"),
        ],
        distributionDate="2020-06-27",
        distributors=[
            Distributor(
                distributorName=
                "The Abdul Latif Jameel Poverty Action Lab Dataverse",
                distributorURL="https://dataverse.harvard.edu/dataverse/jpal",
            ),
            Distributor(
                distributorName="LastDistributor1, FirstDistributor1",
                distributorURL="http://DistributorURL1.org",
            ),
            Distributor(
                distributorName="LastDistributor2, FirstDistributor2",
                distributorURL="http://DistributorURL2.org",
            ),
        ],
        grantNumbers=[
            GrantNumber(
                grantNumberAgency="GrantInformationGrantAgency1",
                grantNumberValue="GrantInformationGrantNumber1",
            )
        ],
        keywords=[
            Keyword(keywordValue="KeywordTerm1"),
            Keyword(keywordValue="KeywordTerm2"),
        ],
        kindOfData=["KindOfData1", "KindOfData2"],
        language=["English", "Swahili"],
        license="CC0",
        notesText="Notes1",
        otherIds=[
            OtherId(
                otherIdAgency="OtherIDAgency1",
                otherIdValue="OtherIDIdentifier1",
            ),
            OtherId(
                otherIdAgency="OtherIDAgency2",
                otherIdValue="OtherIDIdentifier2",
            ),
        ],
        producers=[
            Producer(
                producerName="LastProducer1, FirstProducer1",
                producerURL="http://ProducerURL1.org",
            ),
            Producer(
                producerName="LastProducer2, FirstProducer2",
                producerURL="http://ProducerURL2.org",
            ),
        ],
        productionPlace="ProductionPlace",
        publications=[
            Publication(
                publicationCitation="RelatedPublicationCitation1",
                publicationIDNumber="RelatedPublicationIDNumber1",
                publicationIDType="ark",
                publicationURL="http://RelatedPublicationURL1.org",
            ),
            Publication(
                publicationCitation="RelatedPublicationCitation2",
                publicationIDNumber="RelatedPublicationIDNumber2",
                publicationIDType="doi",
                publicationURL="https://doi.org/RelatedPublicationURL2",
            ),
        ],
        series=Series(
            seriesInformation="SeriesInformation",
            seriesName="SeriesName",
        ),
        subjects=[
            "Agricultural Sciences",
            "Business and Management",
            "Engineering",
            "Law",
        ],
        termsOfUse="CC0 Waiver",
        timePeriodsCovered=[
            TimePeriodCovered(
                timePeriodCoveredStart="1005-01-01",
                timePeriodCoveredEnd="1005-01-02",
            ),
            TimePeriodCovered(
                timePeriodCoveredStart="2020-01-01",
                timePeriodCoveredEnd="2020-01-02",
            ),
        ],
        title="Replication Data for: Title",
    )

    assert expected == actual