def test_get_icon(signed_in_client, mock_s3): icon_uuid = uuid4() with open(path.join(test_data_path, "wikipedia-32px.png"), "r+b") as wikipedia_icon_f: file_storage.upload_icon(file_storage.get_icon_bucket(), icon_uuid, wikipedia_icon_f) response = signed_in_client.get( flask.url_for("quarchive-icons.icon_by_uuid", icon_uuid=icon_uuid)) assert response.status_code == 200 # If cookies are set CDNs and other caches generally will not cache the image assert "Set-Cookie" not in response.headers
def icon_by_uuid(icon_uuid: UUID) -> flask.Response: # This endpoint is added for completeness. In production icons should not # be served from Python log.warning("serving icon %s directly", icon_uuid) bucket = file_storage.get_icon_bucket() icon_filelike = file_storage.download_icon(bucket, icon_uuid) response = flask.Response(icon_filelike, mimetype="image/png") # But if we're going to serve these, just serve them once ONE_YEAR = 366 * 24 * 60 * 60 response.cache_control.max_age = ONE_YEAR response.cache_control.public = True return response
def test_new_icon_found_for_page_icon(session, requests_mock, bg_client: TestAdapter[PickleMessage], mock_s3): """Test that when a new page icon is found (that doesn't match any existing icons) that it is retrieved, indexed and stored. """ url = URL.from_string(f"http://{random_string()}.example.com/") icon_url = url.follow("/favicon.png") image_buff = random_image_fileobj() hash_bytes = hashlib.blake2b(image_buff.read()).digest() image_buff.seek(0) requests_mock.add( responses.GET, url=icon_url.to_string(), body=image_buff.read(), status=200, stream=True, ) requests_mock.start() upsert_url(session, url) upsert_url(session, icon_url) session.commit() event = NewIconFound(icon_url_uuid=icon_url.url_uuid, page_url_uuid=url.url_uuid) bg_client.send(PickleMessage.from_obj(event)) icon, url_icon = (session.query(Icon, URLIcon).join(URLIcon).filter( URLIcon.url_uuid == url.url_uuid).first()) assert icon.source_blake2b_hash == hash_bytes assert url_icon.url_uuid == url.url_uuid icon_bucket = file_storage.get_icon_bucket() (s3_obj, ) = list( icon_bucket.objects.filter(Prefix=f"{icon.icon_uuid}.png")) assert s3_obj.key == f"{icon.icon_uuid}.png" response = s3_obj.get() assert response["ResponseMetadata"]["HTTPHeaders"][ "content-type"] == "image/png"
def test_new_icon_found_domain(session, requests_mock, bg_client: TestAdapter[PickleMessage], mock_s3): icon_url = URL.from_string( f"http://{random_string()}.example.com/favicon.ico") image_buff = random_image_fileobj() hash_bytes = hashlib.blake2b(image_buff.read()).digest() image_buff.seek(0) requests_mock.add( responses.GET, url=icon_url.to_string(), body=image_buff.read(), status=200, stream=True, ) requests_mock.start() upsert_url(session, icon_url) session.commit() event = NewIconFound(icon_url_uuid=icon_url.url_uuid) bg_client.send(PickleMessage.from_obj(event)) icon, domain_icon = (session.query( Icon, DomainIcon).join(DomainIcon).filter( DomainIcon.scheme == icon_url.scheme, DomainIcon.netloc == icon_url.netloc).first()) assert icon.source_blake2b_hash == hash_bytes assert domain_icon.scheme == icon_url.scheme assert domain_icon.netloc == icon_url.netloc icon_bucket = file_storage.get_icon_bucket() (s3_obj, ) = list( icon_bucket.objects.filter(Prefix=f"{icon.icon_uuid}.png")) assert s3_obj.key == f"{icon.icon_uuid}.png" response = s3_obj.get() assert response["ResponseMetadata"]["HTTPHeaders"][ "content-type"] == "image/png"
def index_icon( session: Session, icon_url: URL, filelike: BinaryIO, blake2b, page_url: Optional[URL], ) -> None: is_domain_icon = page_url is None if have_icon_by_hash(session, blake2b.digest()): log.info("already have icon: %s (hash: %s)", icon_url, blake2b.hexdigest()) if is_domain_icon: icon_uuid = record_domain_icon(session, icon_url, blake2b.digest()) else: icon_uuid = record_page_icon(session, icon_url, cast(URL, page_url), blake2b.digest()) else: if is_domain_icon: icon_uuid = record_domain_icon(session, icon_url, blake2b.digest()) else: icon_uuid = record_page_icon(session, icon_url, cast(URL, page_url), blake2b.digest()) bucket = file_storage.get_icon_bucket() converted = convert_icon(filelike, ICON_SIZE) file_storage.upload_icon(bucket, icon_uuid, converted) if is_domain_icon: log.info("indexed domain icon: %s (hash: %s)", icon_url, blake2b.hexdigest()) else: log.info( "indexed page icon: %s for %s (hash: %s)", icon_url, page_url, blake2b.hexdigest(), )