Exemple #1
0
def fetch_images(n_images, output_dir, image_size, page_offset):
    n_pages = math.ceil(n_images / MAX_PAGESIZE)

    def get_page_size(page):
        if page != n_pages - 1:
            return MAX_PAGESIZE
        else:
            mod = n_images % MAX_PAGESIZE
            return mod if mod != 0 else MAX_PAGESIZE

    print(f"Downloading {n_images} images from {n_pages} pages...")
    http.start_persistent_client_session()

    async def run():
        # Why not do these concurrently, you ask?
        # Because it kills the API :(
        for current_page in tqdm(range(n_pages)):
            await download_images_from_page(
                page=(current_page + page_offset + 1),
                page_size=get_page_size(current_page + page_offset),
                image_size=image_size,
                dir=output_dir)

    loop = asyncio.get_event_loop()
    loop.run_until_complete(run())
    loop.run_until_complete(http.close_persistent_client_session())
Exemple #2
0
async def test_redirect():
    expected = "https://id.loc.gov/authorities/subjects/sh85101552.html"
    start_persistent_client_session()
    response = await fetch_redirect_url(
        "https://id.loc.gov/authorities/label/Physical geography")
    await close_persistent_client_session()
    assert response["url"] == expected
Exemple #3
0
async def test_non_redirect():
    start_persistent_client_session()
    response = await fetch_redirect_url(iiif_url)
    await close_persistent_client_session()
    assert response["url"] == iiif_url
Exemple #4
0
async def test_bad_json():
    start_persistent_client_session()
    with pytest.raises(ValueError):
        await fetch_url_json(image_url)
    await close_persistent_client_session()
Exemple #5
0
async def test_get_json():
    start_persistent_client_session()
    response = await fetch_url_json(iiif_url)
    await close_persistent_client_session()
    assert response["object"].status == 200
    assert json.dumps(response["json"])
Exemple #6
0
async def test_get_bytes():
    start_persistent_client_session()
    response = await fetch_url_bytes(image_url)
    await close_persistent_client_session()
    assert response["object"].status == 200
    assert isinstance(response["bytes"], bytes)
Exemple #7
0
async def test_fails_nonexistant_remote_image():
    start_persistent_client_session()
    with pytest.raises(ValueError):
        await get_image_from_url("this isn't a url")
    await close_persistent_client_session()
Exemple #8
0
async def test_fails_nonexistant_local_image():
    start_persistent_client_session()
    query_url = "nonexistent_image.jpg"
    with pytest.raises(ValueError):
        await get_image_from_url(query_url)
    await close_persistent_client_session()
Exemple #9
0
async def test_get_iiif_image():
    start_persistent_client_session()
    image = await get_image_from_url(iiif_url)
    await close_persistent_client_session()
    assert isinstance(image, Image)
Exemple #10
0
async def test_is_invalid_image():
    start_persistent_client_session()
    response = await fetch_url_bytes(invalid_url)
    await close_persistent_client_session()
    assert not is_valid_image(response["object"])