def test_fetch_bag_failure(): with hydroshare_archive(): with patch.object(Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 500)]): hydro = Hydroshare() spec = { "host": { "hostname": [ "https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/", ], "django_irods": "https://www.hydroshare.org/django_irods/download/bags/", }, "resource": "123456789", } with TemporaryDirectory() as d: with pytest.raises( ContentProviderException, match=r"Failed to download bag\. status code 500\.", ): # loop for yield statements for l in hydro.fetch(spec, d): pass
def test_fetch_bag(): # we "fetch" a local ZIP file to simulate a Hydroshare resource with hydroshare_archive() as hydro_path: with patch.object( Hydroshare, "urlopen", side_effect=[ MockResponse("application/html", 200), MockResponse("application/zip", 200), ], ): with patch.object( Hydroshare, "_urlretrieve", side_effect=[(hydro_path, None)] ): hydro = Hydroshare() hydro.resource_id = "b8f6eae9d89241cf8b5904033460af61" spec = { "host": { "hostname": [ "https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/", ], "django_irods": "https://www.hydroshare.org/django_irods/download/bags/", }, "resource": "123456789", } with TemporaryDirectory() as d: output = [] for l in hydro.fetch(spec, d): output.append(l) unpacked_files = set(os.listdir(d)) expected = set(["some-other-file.txt", "some-file.txt"]) assert expected == unpacked_files
def test_fetch_bag_timeout(): with hydroshare_archive(): with patch.object(Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 200)]): hydro = Hydroshare() spec = { "host": { "hostname": [ "https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/", ], "django_irods": "https://www.hydroshare.org/django_irods/download/bags/", }, "resource": "123456789", } with TemporaryDirectory() as d: with pytest.raises( ContentProviderException, match= r"Bag taking too long to prepare, exiting now, try again later\.", ): # loop for yield statements for l in hydro.fetch(spec, d, timeout=0): pass
def test_content_id(): with patch.object(Hydroshare, "urlopen") as fake_urlopen: fake_urlopen.return_value.url = "https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61" hydro = Hydroshare() hydro.detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61") assert hydro.content_id == "b8f6eae9d89241cf8b5904033460af61"
def test_content_id(requests_mock): requests_mock.get(re.compile("https://"), json=hydroshare_data) requests_mock.get(re.compile("https://doi.org"), json=doi_resolver) hydro = Hydroshare() hydro.detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61") assert hydro.content_id == "b8f6eae9d89241cf8b5904033460af61.v1569427757"
def test_content_id(): with patch.object(Hydroshare, "urlopen") as fake_urlopen: fake_urlopen.return_value.url = ( "https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61" ) def read(): return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}' fake_urlopen.return_value.read = read hydro = Hydroshare() hydro.detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61") assert hydro.content_id == "b8f6eae9d89241cf8b5904033460af61.v1569427757"
def test_fetch_bag_timeout(): with hydroshare_archive() as hydro_path: with patch.object(Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 200)]): hydro = Hydroshare() spec = { "host": { "hostname": [ "https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/", ], "django_irods": "https://www.hydroshare.org/django_irods/download/bags/", }, "resource": "123456789", } with TemporaryDirectory() as d: output = [] for l in hydro.fetch(spec, d, timeout=0): output.append(l) assert "Bag taking too long to prepare, exiting now, try again later." == output[ -1]
def test_fetch_bag_failure(): with hydroshare_archive() as hydro_path: with patch.object(Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 500)]): hydro = Hydroshare() spec = { "host": { "hostname": [ "https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/", ], "django_irods": "https://www.hydroshare.org/django_irods/download/bags/", }, "resource": "123456789", } with TemporaryDirectory() as d: output = [] for l in hydro.fetch(spec, d): output.append(l) assert "Failed to download bag. status code 500.\n" == output[ -1]
def test_detect_hydroshare(): with patch.object(Hydroshare, "urlopen") as fake_urlopen: fake_urlopen.return_value.url = ( "https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61" ) def read(): return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}' fake_urlopen.return_value.read = read # valid Hydroshare DOIs trigger this content provider expected = { "host": { "hostname": [ "https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/", ], "django_irods": "https://www.hydroshare.org/django_irods/download/bags/", "version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements", }, "resource": "b8f6eae9d89241cf8b5904033460af61", "version": "1569427757", } assert (Hydroshare().detect( "https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61" ) == expected) # assert a call to urlopen was called to fetch version assert fake_urlopen.call_count == 1 assert (Hydroshare().detect( "10.4211/hs.b8f6eae9d89241cf8b5904033460af61") == expected) # assert 2 more calls were made, one to resolve the DOI and another to fetch the version assert fake_urlopen.call_count == 3 assert (Hydroshare().detect( "https://doi.org/10.4211/hs.b8f6eae9d89241cf8b5904033460af61") == expected) # assert 2 more calls were made, one to resolve the DOI and another to fetch the version assert fake_urlopen.call_count == 5 with patch.object(Hydroshare, "urlopen") as fake_urlopen: # Don't trigger the Hydroshare content provider assert Hydroshare().detect("/some/path/here") is None assert Hydroshare().detect("https://example.com/path/here") is None # don't handle DOIs that aren't from Hydroshare fake_urlopen.return_value.url = ( "http://joss.theoj.org/papers/10.21105/joss.01277") def read(): return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}' fake_urlopen.return_value.read = read assert Hydroshare().detect( "https://doi.org/10.21105/joss.01277") is None
def test_detect_hydroshare(requests_mock): requests_mock.get(re.compile("https://"), json=hydroshare_data) requests_mock.get(re.compile("https://doi.org"), json=doi_resolver) # valid Hydroshare DOIs trigger this content provider expected = { "host": { "hostname": [ "https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/", ], "django_irods": "https://www.hydroshare.org/django_irods/download/bags/", "version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements", }, "resource": "b8f6eae9d89241cf8b5904033460af61", "version": "1569427757", } assert ( Hydroshare().detect( "https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61" ) == expected ) # assert a call to urlopen was called to fetch version assert requests_mock.call_count == 1 requests_mock.reset_mock() assert ( Hydroshare().detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61") == expected ) # assert 3 calls were made, 2 to resolve the DOI (302 + 200) and another to fetch the version assert requests_mock.call_count == 3 requests_mock.reset_mock() assert ( Hydroshare().detect( "https://doi.org/10.4211/hs.b8f6eae9d89241cf8b5904033460af61" ) == expected ) # assert 3 more calls were made, 2 to resolve the DOI and another to fetch the version assert requests_mock.call_count == 3 requests_mock.reset_mock() # Don't trigger the Hydroshare content provider assert Hydroshare().detect("/some/path/here") is None assert Hydroshare().detect("https://example.com/path/here") is None # don't handle DOIs that aren't from Hydroshare assert Hydroshare().detect("https://doi.org/10.21105/joss.01277") is None
def test_detect_hydroshare(test_input, expected): with patch.object(Hydroshare, "urlopen") as fake_urlopen: fake_urlopen.return_value.url = test_input[0] # valid Hydroshare DOIs trigger this content provider assert Hydroshare().detect(test_input[0]) == expected assert Hydroshare().detect(test_input[1]) == expected assert Hydroshare().detect(test_input[2]) == expected # only two of the three calls above have to resolve a DOI assert fake_urlopen.call_count == 2 with patch.object(Hydroshare, "urlopen") as fake_urlopen: # Don't trigger the Hydroshare content provider assert Hydroshare().detect("/some/path/here") is None assert Hydroshare().detect("https://example.com/path/here") is None # don't handle DOIs that aren't from Hydroshare fake_urlopen.return_value.url = ( "http://joss.theoj.org/papers/10.21105/joss.01277") assert Hydroshare().detect( "https://doi.org/10.21105/joss.01277") is None