Example #1
0
def test_get_icon(signed_in_client, mock_s3):
    icon_uuid = uuid4()
    with open(path.join(test_data_path, "wikipedia-32px.png"),
              "r+b") as wikipedia_icon_f:
        file_storage.upload_icon(file_storage.get_icon_bucket(), icon_uuid,
                                 wikipedia_icon_f)

    response = signed_in_client.get(
        flask.url_for("quarchive-icons.icon_by_uuid", icon_uuid=icon_uuid))
    assert response.status_code == 200

    # If cookies are set CDNs and other caches generally will not cache the image
    assert "Set-Cookie" not in response.headers
Example #2
0
def icon_by_uuid(icon_uuid: UUID) -> flask.Response:
    # This endpoint is added for completeness.  In production icons should not
    # be served from Python
    log.warning("serving icon %s directly", icon_uuid)

    bucket = file_storage.get_icon_bucket()
    icon_filelike = file_storage.download_icon(bucket, icon_uuid)
    response = flask.Response(icon_filelike, mimetype="image/png")

    # But if we're going to serve these, just serve them once
    ONE_YEAR = 366 * 24 * 60 * 60
    response.cache_control.max_age = ONE_YEAR
    response.cache_control.public = True
    return response
Example #3
0
def test_new_icon_found_for_page_icon(session, requests_mock,
                                      bg_client: TestAdapter[PickleMessage],
                                      mock_s3):
    """Test that when a new page icon is found (that doesn't match any existing
    icons) that it is retrieved, indexed and stored.

    """
    url = URL.from_string(f"http://{random_string()}.example.com/")
    icon_url = url.follow("/favicon.png")
    image_buff = random_image_fileobj()
    hash_bytes = hashlib.blake2b(image_buff.read()).digest()
    image_buff.seek(0)
    requests_mock.add(
        responses.GET,
        url=icon_url.to_string(),
        body=image_buff.read(),
        status=200,
        stream=True,
    )
    requests_mock.start()

    upsert_url(session, url)
    upsert_url(session, icon_url)
    session.commit()

    event = NewIconFound(icon_url_uuid=icon_url.url_uuid,
                         page_url_uuid=url.url_uuid)
    bg_client.send(PickleMessage.from_obj(event))

    icon, url_icon = (session.query(Icon, URLIcon).join(URLIcon).filter(
        URLIcon.url_uuid == url.url_uuid).first())
    assert icon.source_blake2b_hash == hash_bytes

    assert url_icon.url_uuid == url.url_uuid

    icon_bucket = file_storage.get_icon_bucket()
    (s3_obj, ) = list(
        icon_bucket.objects.filter(Prefix=f"{icon.icon_uuid}.png"))
    assert s3_obj.key == f"{icon.icon_uuid}.png"
    response = s3_obj.get()
    assert response["ResponseMetadata"]["HTTPHeaders"][
        "content-type"] == "image/png"
Example #4
0
def test_new_icon_found_domain(session, requests_mock,
                               bg_client: TestAdapter[PickleMessage], mock_s3):
    icon_url = URL.from_string(
        f"http://{random_string()}.example.com/favicon.ico")
    image_buff = random_image_fileobj()
    hash_bytes = hashlib.blake2b(image_buff.read()).digest()
    image_buff.seek(0)
    requests_mock.add(
        responses.GET,
        url=icon_url.to_string(),
        body=image_buff.read(),
        status=200,
        stream=True,
    )
    requests_mock.start()

    upsert_url(session, icon_url)
    session.commit()

    event = NewIconFound(icon_url_uuid=icon_url.url_uuid)
    bg_client.send(PickleMessage.from_obj(event))

    icon, domain_icon = (session.query(
        Icon, DomainIcon).join(DomainIcon).filter(
            DomainIcon.scheme == icon_url.scheme,
            DomainIcon.netloc == icon_url.netloc).first())
    assert icon.source_blake2b_hash == hash_bytes

    assert domain_icon.scheme == icon_url.scheme
    assert domain_icon.netloc == icon_url.netloc

    icon_bucket = file_storage.get_icon_bucket()
    (s3_obj, ) = list(
        icon_bucket.objects.filter(Prefix=f"{icon.icon_uuid}.png"))
    assert s3_obj.key == f"{icon.icon_uuid}.png"
    response = s3_obj.get()
    assert response["ResponseMetadata"]["HTTPHeaders"][
        "content-type"] == "image/png"
Example #5
0
def index_icon(
    session: Session,
    icon_url: URL,
    filelike: BinaryIO,
    blake2b,
    page_url: Optional[URL],
) -> None:
    is_domain_icon = page_url is None
    if have_icon_by_hash(session, blake2b.digest()):
        log.info("already have icon: %s (hash: %s)", icon_url,
                 blake2b.hexdigest())
        if is_domain_icon:
            icon_uuid = record_domain_icon(session, icon_url, blake2b.digest())
        else:
            icon_uuid = record_page_icon(session, icon_url,
                                         cast(URL, page_url), blake2b.digest())
    else:
        if is_domain_icon:
            icon_uuid = record_domain_icon(session, icon_url, blake2b.digest())
        else:
            icon_uuid = record_page_icon(session, icon_url,
                                         cast(URL, page_url), blake2b.digest())
        bucket = file_storage.get_icon_bucket()
        converted = convert_icon(filelike, ICON_SIZE)
        file_storage.upload_icon(bucket, icon_uuid, converted)

    if is_domain_icon:
        log.info("indexed domain icon: %s (hash: %s)", icon_url,
                 blake2b.hexdigest())
    else:
        log.info(
            "indexed page icon: %s for %s (hash: %s)",
            icon_url,
            page_url,
            blake2b.hexdigest(),
        )