def test_fetch_bag_failure():
    with hydroshare_archive():
        with patch.object(Hydroshare,
                          "urlopen",
                          side_effect=[MockResponse("application/html", 500)]):
            hydro = Hydroshare()
            spec = {
                "host": {
                    "hostname": [
                        "https://www.hydroshare.org/resource/",
                        "http://www.hydroshare.org/resource/",
                    ],
                    "django_irods":
                    "https://www.hydroshare.org/django_irods/download/bags/",
                },
                "resource": "123456789",
            }
            with TemporaryDirectory() as d:
                with pytest.raises(
                        ContentProviderException,
                        match=r"Failed to download bag\. status code 500\.",
                ):
                    # loop for yield statements
                    for l in hydro.fetch(spec, d):
                        pass
Exemple #2
0
def test_fetch_bag():
    # we "fetch" a local ZIP file to simulate a Hydroshare resource
    with hydroshare_archive() as hydro_path:
        with patch.object(
            Hydroshare,
            "urlopen",
            side_effect=[
                MockResponse("application/html", 200),
                MockResponse("application/zip", 200),
            ],
        ):
            with patch.object(
                Hydroshare, "_urlretrieve", side_effect=[(hydro_path, None)]
            ):
                hydro = Hydroshare()
                hydro.resource_id = "b8f6eae9d89241cf8b5904033460af61"
                spec = {
                    "host": {
                        "hostname": [
                            "https://www.hydroshare.org/resource/",
                            "http://www.hydroshare.org/resource/",
                        ],
                        "django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
                    },
                    "resource": "123456789",
                }

                with TemporaryDirectory() as d:
                    output = []
                    for l in hydro.fetch(spec, d):
                        output.append(l)

                    unpacked_files = set(os.listdir(d))
                    expected = set(["some-other-file.txt", "some-file.txt"])
                    assert expected == unpacked_files
def test_fetch_bag_timeout():
    with hydroshare_archive():
        with patch.object(Hydroshare,
                          "urlopen",
                          side_effect=[MockResponse("application/html", 200)]):
            hydro = Hydroshare()
            spec = {
                "host": {
                    "hostname": [
                        "https://www.hydroshare.org/resource/",
                        "http://www.hydroshare.org/resource/",
                    ],
                    "django_irods":
                    "https://www.hydroshare.org/django_irods/download/bags/",
                },
                "resource": "123456789",
            }
            with TemporaryDirectory() as d:
                with pytest.raises(
                        ContentProviderException,
                        match=
                        r"Bag taking too long to prepare, exiting now, try again later\.",
                ):
                    # loop for yield statements
                    for l in hydro.fetch(spec, d, timeout=0):
                        pass
Exemple #4
0
def test_content_id():
    with patch.object(Hydroshare, "urlopen") as fake_urlopen:
        fake_urlopen.return_value.url = "https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61"
        hydro = Hydroshare()

        hydro.detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61")
        assert hydro.content_id == "b8f6eae9d89241cf8b5904033460af61"
Exemple #5
0
def test_content_id(requests_mock):

    requests_mock.get(re.compile("https://"), json=hydroshare_data)
    requests_mock.get(re.compile("https://doi.org"), json=doi_resolver)

    hydro = Hydroshare()

    hydro.detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61")
    assert hydro.content_id == "b8f6eae9d89241cf8b5904033460af61.v1569427757"
def test_content_id():
    with patch.object(Hydroshare, "urlopen") as fake_urlopen:
        fake_urlopen.return_value.url = (
            "https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61"
        )

        def read():
            return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'

        fake_urlopen.return_value.read = read
        hydro = Hydroshare()

        hydro.detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61")
        assert hydro.content_id == "b8f6eae9d89241cf8b5904033460af61.v1569427757"
Exemple #7
0
def test_fetch_bag_timeout():
    with hydroshare_archive() as hydro_path:
        with patch.object(Hydroshare,
                          "urlopen",
                          side_effect=[MockResponse("application/html", 200)]):
            hydro = Hydroshare()
            spec = {
                "host": {
                    "hostname": [
                        "https://www.hydroshare.org/resource/",
                        "http://www.hydroshare.org/resource/",
                    ],
                    "django_irods":
                    "https://www.hydroshare.org/django_irods/download/bags/",
                },
                "resource": "123456789",
            }
            with TemporaryDirectory() as d:
                output = []
                for l in hydro.fetch(spec, d, timeout=0):
                    output.append(l)
                assert "Bag taking too long to prepare, exiting now, try again later." == output[
                    -1]
Exemple #8
0
def test_fetch_bag_failure():
    with hydroshare_archive() as hydro_path:
        with patch.object(Hydroshare,
                          "urlopen",
                          side_effect=[MockResponse("application/html", 500)]):
            hydro = Hydroshare()
            spec = {
                "host": {
                    "hostname": [
                        "https://www.hydroshare.org/resource/",
                        "http://www.hydroshare.org/resource/",
                    ],
                    "django_irods":
                    "https://www.hydroshare.org/django_irods/download/bags/",
                },
                "resource": "123456789",
            }
            with TemporaryDirectory() as d:
                output = []
                for l in hydro.fetch(spec, d):
                    output.append(l)
                assert "Failed to download bag. status code 500.\n" == output[
                    -1]
def test_detect_hydroshare():
    with patch.object(Hydroshare, "urlopen") as fake_urlopen:
        fake_urlopen.return_value.url = (
            "https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61"
        )

        def read():
            return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'

        fake_urlopen.return_value.read = read
        # valid Hydroshare DOIs trigger this content provider
        expected = {
            "host": {
                "hostname": [
                    "https://www.hydroshare.org/resource/",
                    "http://www.hydroshare.org/resource/",
                ],
                "django_irods":
                "https://www.hydroshare.org/django_irods/download/bags/",
                "version":
                "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements",
            },
            "resource": "b8f6eae9d89241cf8b5904033460af61",
            "version": "1569427757",
        }
        assert (Hydroshare().detect(
            "https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61"
        ) == expected)
        # assert a call to urlopen was called to fetch version
        assert fake_urlopen.call_count == 1
        assert (Hydroshare().detect(
            "10.4211/hs.b8f6eae9d89241cf8b5904033460af61") == expected)
        # assert 2 more calls were made, one to resolve the DOI and another to fetch the version
        assert fake_urlopen.call_count == 3
        assert (Hydroshare().detect(
            "https://doi.org/10.4211/hs.b8f6eae9d89241cf8b5904033460af61") ==
                expected)
        # assert 2 more calls were made, one to resolve the DOI and another to fetch the version
        assert fake_urlopen.call_count == 5

    with patch.object(Hydroshare, "urlopen") as fake_urlopen:
        # Don't trigger the Hydroshare content provider
        assert Hydroshare().detect("/some/path/here") is None
        assert Hydroshare().detect("https://example.com/path/here") is None
        # don't handle DOIs that aren't from Hydroshare
        fake_urlopen.return_value.url = (
            "http://joss.theoj.org/papers/10.21105/joss.01277")

        def read():
            return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'

        fake_urlopen.return_value.read = read
        assert Hydroshare().detect(
            "https://doi.org/10.21105/joss.01277") is None
Exemple #10
0
def test_detect_hydroshare(requests_mock):
    requests_mock.get(re.compile("https://"), json=hydroshare_data)
    requests_mock.get(re.compile("https://doi.org"), json=doi_resolver)

    # valid Hydroshare DOIs trigger this content provider
    expected = {
        "host": {
            "hostname": [
                "https://www.hydroshare.org/resource/",
                "http://www.hydroshare.org/resource/",
            ],
            "django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
            "version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements",
        },
        "resource": "b8f6eae9d89241cf8b5904033460af61",
        "version": "1569427757",
    }

    assert (
        Hydroshare().detect(
            "https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61"
        )
        == expected
    )
    # assert a call to urlopen was called to fetch version
    assert requests_mock.call_count == 1
    requests_mock.reset_mock()

    assert (
        Hydroshare().detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61") == expected
    )
    # assert 3 calls were made, 2 to resolve the DOI (302 + 200) and another to fetch the version
    assert requests_mock.call_count == 3
    requests_mock.reset_mock()

    assert (
        Hydroshare().detect(
            "https://doi.org/10.4211/hs.b8f6eae9d89241cf8b5904033460af61"
        )
        == expected
    )
    # assert 3 more calls were made, 2 to resolve the DOI and another to fetch the version
    assert requests_mock.call_count == 3
    requests_mock.reset_mock()

    # Don't trigger the Hydroshare content provider
    assert Hydroshare().detect("/some/path/here") is None
    assert Hydroshare().detect("https://example.com/path/here") is None

    # don't handle DOIs that aren't from Hydroshare
    assert Hydroshare().detect("https://doi.org/10.21105/joss.01277") is None
Exemple #11
0
def test_detect_hydroshare(test_input, expected):
    with patch.object(Hydroshare, "urlopen") as fake_urlopen:
        fake_urlopen.return_value.url = test_input[0]
        # valid Hydroshare DOIs trigger this content provider
        assert Hydroshare().detect(test_input[0]) == expected
        assert Hydroshare().detect(test_input[1]) == expected
        assert Hydroshare().detect(test_input[2]) == expected
        # only two of the three calls above have to resolve a DOI
        assert fake_urlopen.call_count == 2

    with patch.object(Hydroshare, "urlopen") as fake_urlopen:
        # Don't trigger the Hydroshare content provider
        assert Hydroshare().detect("/some/path/here") is None
        assert Hydroshare().detect("https://example.com/path/here") is None
        # don't handle DOIs that aren't from Hydroshare
        fake_urlopen.return_value.url = (
            "http://joss.theoj.org/papers/10.21105/joss.01277")
        assert Hydroshare().detect(
            "https://doi.org/10.21105/joss.01277") is None