Ejemplo n.º 1
0
def test_parse_document_raises_if_uri_not_a_string(es_annotation_doc):
    es_annotation_doc["_source"]["target"][0]["source"] = 52

    with pytest.raises(util.InvalidAnnotationError) as exc:
        util.parse_document(es_annotation_doc)

    assert exc.value.reason == "uri_not_a_string"
Ejemplo n.º 2
0
def test_parse_document_raises_if_no_uri():
    with pytest.raises(util.InvalidAnnotationError) as exc:
        util.parse_document({
            "_id": "annotation_id",
            "_source": {}  # No "uri".
        })
    assert exc.value.reason == "annotation_has_no_uri"
Ejemplo n.º 3
0
def test_parse_document_raises_if_no_uri():
    with pytest.raises(util.InvalidAnnotationError) as exc:
        util.parse_document({
            "_id": "annotation_id",
            "_source": {}  # No "uri".
        })
    assert exc.value.reason == "annotation_has_no_uri"
Ejemplo n.º 4
0
def test_parse_document_raises_if_no_uri(es_annotation_doc):
    del es_annotation_doc["_source"]["target"][0]["source"]

    with pytest.raises(util.InvalidAnnotationError) as exc:
        util.parse_document(es_annotation_doc)

    assert exc.value.reason == "annotation_has_no_uri"
Ejemplo n.º 5
0
def test_parse_document_raises_if_uri_not_a_string():
    with pytest.raises(util.InvalidAnnotationError) as exc:
        util.parse_document({
            "_id": "annotation_id",
            "_source": {"target": [{"source": 52}]}  # "uri" isn't a string.
        })
    assert exc.value.reason == "uri_not_a_string"
Ejemplo n.º 6
0
def test_parse_document_raises_when_uri_from_web_uri_not_string_for_pdfs():
    with pytest.raises(util.InvalidAnnotationError) as exc:
        util.parse_document(
            {
                "_id": "annotation_id",
                "_source": {"target": [{"source": "urn:x-pdf:the-fingerprint"}], "document": {"web_uri": 52}},
            }
        )
    assert exc.value.reason == "uri_not_a_string"
Ejemplo n.º 7
0
def test_parse_document_raises_when_uri_from_web_uri_not_string_for_pdfs(
        es_annotation_doc):
    es_annotation_doc["_source"]["target"][0][
        "source"] = "urn:x-pdf:the-fingerprint"
    es_annotation_doc["_source"]["document"] = {"web_uri": 52}

    with pytest.raises(util.InvalidAnnotationError) as exc:
        util.parse_document(es_annotation_doc)

    assert exc.value.reason == "uri_not_a_string"
Ejemplo n.º 8
0
def test_parse_document_raises_if_annotated_deleted():
    # When an annotation is deleted in h it isn't immediately removed from the
    # search index. Its Elasticsearch document is temporarily updated to just
    # {'deleted': True}.
    with pytest.raises(util.DeletedAnnotationError) as exc:
        util.parse_document({
            "_id": "annotation_id",
            "_source": {
                "deleted": True,
            },
        })
Ejemplo n.º 9
0
def test_parse_document_raises_if_uri_not_a_string():
    with pytest.raises(util.InvalidAnnotationError) as exc:
        util.parse_document({
            "_id": "annotation_id",
            "_source": {
                "target": [{
                    "source": 52
                }]
            }  # "uri" isn't a string.
        })
    assert exc.value.reason == "uri_not_a_string"
Ejemplo n.º 10
0
def test_parse_document_raises_when_uri_from_web_uri_not_string_for_pdfs():
    with pytest.raises(util.InvalidAnnotationError) as exc:
        util.parse_document({
            "_id": "annotation_id",
            "_source": {
                "target": [{
                    "source": "urn:x-pdf:the-fingerprint"
                }],
                "document": {
                    "web_uri": 52
                }
            }
        })
    assert exc.value.reason == "uri_not_a_string"
Ejemplo n.º 11
0
def test_parse_document_returns_annotation_id():
    annotation_id = util.parse_document({
        "_id": "annotation_id",
        "_source": {"target": [{"source": "http://example.com/example.html"}]}
    })[0]

    assert annotation_id == "annotation_id"
Ejemplo n.º 12
0
def test_parse_document_returns_document_uri_from_web_uri_when_pdf(es_annotation_doc):
    es_annotation_doc["_source"]["target"][0]["source"] = "urn:x-pdf:the-fingerprint"
    es_annotation_doc["_source"]["document"] = {"web_uri": "http://example.com/foo.pdf"}

    document_uri = util.parse_document(es_annotation_doc)["document_uri"]

    assert document_uri == "http://example.com/foo.pdf"
Ejemplo n.º 13
0
def test_parse_document_returns_boilerplate_quote_when_no_quote(
    es_annotation_doc, selector
):
    if selector:
        es_annotation_doc["_source"]["target"][0]["selector"] = selector
    quote = util.parse_document(es_annotation_doc)["quote"]
    assert quote == "Hypothesis annotation for example.com"
Ejemplo n.º 14
0
def test_parse_document_returns_document_uri():
    document_uri = util.parse_document({
        "_id": "annotation_id",
        "_source": {"target": [{"source": "http://example.com/example.html"}]}
    })[1]

    assert document_uri == "http://example.com/example.html"
Ejemplo n.º 15
0
def test_parse_document_returns_quote(es_annotation_doc):
    es_annotation_doc["_source"]["target"][0]["selector"] = [{
        "type":
        "TextQuoteSelector",
        "exact":
        "test_quote",
    }]
    quote = util.parse_document(es_annotation_doc)["quote"]
    assert quote == "test_quote"
Ejemplo n.º 16
0
    def annotation(self):
        settings = self.request.registry.settings

        try:
            document = util.elasticsearch_client(settings).get(
                index=settings["elasticsearch_index"],
                doc_type="annotation",
                id=self.request.matchdict["id"])
        except exceptions.NotFoundError:
            statsd.incr("views.annotation.404.annotation_not_found")
            raise httpexceptions.HTTPNotFound(_("Annotation not found"))

        try:
            annotation_id, document_uri = util.parse_document(document)
        except util.InvalidAnnotationError as exc:
            statsd.incr("views.annotation.422.{}".format(exc.reason))
            raise httpexceptions.HTTPUnprocessableEntity(str(exc))

        # Remove any existing #fragment identifier from the URI before we
        # append our own.
        document_uri = parse.urldefrag(document_uri)[0]

        if not (document_uri.startswith("http://")
                or document_uri.startswith("https://")):
            statsd.incr("views.annotation.422.not_an_http_or_https_document")
            raise httpexceptions.HTTPUnprocessableEntity(
                _("Sorry, but it looks like this annotation was made on a "
                  "document that is not publicly available."))

        via_url = "{via_base_url}/{uri}#annotations:{id}".format(
            via_base_url=settings["via_base_url"],
            uri=document_uri,
            id=annotation_id)

        extension_url = "{uri}#annotations:{id}".format(uri=document_uri,
                                                        id=annotation_id)

        parsed_url = parse.urlparse(document_uri)
        pretty_url = parsed_url.netloc[:NETLOC_MAX_LENGTH]
        if len(parsed_url.netloc) > NETLOC_MAX_LENGTH:
            pretty_url = pretty_url + jinja2.Markup("…")

        statsd.incr("views.annotation.200.annotation_found")
        return {
            "data":
            json.dumps({
                # Warning: variable names change from python_style to
                # javaScriptStyle here!
                "chromeExtensionId": settings["chrome_extension_id"],
                "viaUrl": via_url,
                "extensionUrl": extension_url,
            }),
            "pretty_url":
            pretty_url
        }
Ejemplo n.º 17
0
def test_parse_document_returns_document_uri():
    document_uri = util.parse_document({
        "_id": "annotation_id",
        "_source": {
            "target": [{
                "source": "http://example.com/example.html"
            }]
        }
    })[1]

    assert document_uri == "http://example.com/example.html"
Ejemplo n.º 18
0
def test_parse_document_returns_annotation_id():
    annotation_id = util.parse_document({
        "_id": "annotation_id",
        "_source": {
            "target": [{
                "source": "http://example.com/example.html"
            }]
        }
    })[0]

    assert annotation_id == "annotation_id"
Ejemplo n.º 19
0
def test_parse_document_returns_document_uri_from_web_uri_when_pdf():
    document_uri = util.parse_document(
        {
            "_id": "annotation_id",
            "_source": {
                "target": [{"source": "urn:x-pdf:the-fingerprint"}],
                "document": {"web_uri": "http://example.com/foo.pdf"},
            },
        }
    )[1]

    assert document_uri == "http://example.com/foo.pdf"
Ejemplo n.º 20
0
    def annotation(self):
        settings = self.request.registry.settings

        try:
            document = util.elasticsearch_client(settings).get(
                index=settings["elasticsearch_index"],
                doc_type="annotation",
                id=self.request.matchdict["id"])
        except exceptions.NotFoundError:
            statsd.incr("views.annotation.404.annotation_not_found")
            raise httpexceptions.HTTPNotFound(_("Annotation not found"))

        try:
            annotation_id, document_uri = util.parse_document(document)
        except util.InvalidAnnotationError as exc:
            statsd.incr("views.annotation.422.{}".format(exc.reason))
            raise httpexceptions.HTTPUnprocessableEntity(str(exc))

        # Remove any existing #fragment identifier from the URI before we
        # append our own.
        document_uri = parse.urldefrag(document_uri)[0]

        if not (document_uri.startswith("http://") or
                document_uri.startswith("https://")):
            statsd.incr("views.annotation.422.not_an_http_or_https_document")
            raise httpexceptions.HTTPUnprocessableEntity(
                _("Sorry, but it looks like this annotation was made on a "
                  "document that is not publicly available."))

        via_url = "{via_base_url}/{uri}#annotations:{id}".format(
            via_base_url=settings["via_base_url"],
            uri=document_uri,
            id=annotation_id)

        extension_url = "{uri}#annotations:{id}".format(
            uri=document_uri, id=annotation_id)

        parsed_url = parse.urlparse(document_uri)
        pretty_url = parsed_url.netloc[:NETLOC_MAX_LENGTH]
        if len(parsed_url.netloc) > NETLOC_MAX_LENGTH:
          pretty_url = pretty_url + jinja2.Markup("…")

        statsd.incr("views.annotation.200.annotation_found")
        return {
            "data": json.dumps({
                # Warning: variable names change from python_style to
                # javaScriptStyle here!
                "chromeExtensionId": settings["chrome_extension_id"],
                "viaUrl": via_url,
                "extensionUrl": extension_url,
            }),
            "pretty_url": pretty_url
        }
Ejemplo n.º 21
0
def test_parse_document_returns_document_uri_from_web_uri_when_pdf():
    document_uri = util.parse_document({
        "_id": "annotation_id",
        "_source": {
            "target": [{
                "source": "urn:x-pdf:the-fingerprint"
            }],
            "document": {
                "web_uri": "http://example.com/foo.pdf"
            }
        }
    })[1]

    assert document_uri == "http://example.com/foo.pdf"
Ejemplo n.º 22
0
def test_parse_document_returns_text(es_annotation_doc):
    es_annotation_doc["_source"]["text"] = "test_text"
    text = util.parse_document(es_annotation_doc)["text"]
    assert text == "test_text"
Ejemplo n.º 23
0
def test_parse_document_returns_authority(es_annotation_doc):
    authority = util.parse_document(es_annotation_doc)["authority"]
    assert authority == "hypothes.is"
Ejemplo n.º 24
0
def test_parse_document_returns_annotation_id(es_annotation_doc):
    annotation_id = util.parse_document(es_annotation_doc)["annotation_id"]
    assert annotation_id == "annotation_id"
Ejemplo n.º 25
0
def test_parse_document_returns_document_uri(es_annotation_doc):
    document_uri = util.parse_document(es_annotation_doc)["document_uri"]
    assert document_uri == "http://example.com/example.html"
Ejemplo n.º 26
0
    def annotation(self):
        settings = self.request.registry.settings

        try:
            document = self.request.es.get(
                index=settings["elasticsearch_index"],
                doc_type="annotation",
                id=self.request.matchdict["id"],
            )
        except exceptions.NotFoundError:
            raise httpexceptions.HTTPNotFound(_("Annotation not found"))

        try:
            parsed_document = util.parse_document(document)
            authority = parsed_document["authority"]
            annotation_id = parsed_document["annotation_id"]
            document_uri = parsed_document["document_uri"]
            show_metadata = parsed_document["show_metadata"]
            quote = parsed_document["quote"]
            text = parsed_document["text"]

        except util.DeletedAnnotationError:
            raise httpexceptions.HTTPNotFound(_("Annotation not found"))

        except util.InvalidAnnotationError as exc:
            raise httpexceptions.HTTPUnprocessableEntity(str(exc))

        # Remove any existing #fragment identifier from the URI before we
        # append our own.
        document_uri = parse.urldefrag(document_uri)[0]

        if not _is_valid_http_url(document_uri):
            raise httpexceptions.HTTPUnprocessableEntity(
                _(
                    "Sorry, but it looks like this annotation was made on a "
                    "document that is not publicly available."
                )
            )

        via_url = None
        if _can_use_proxy(settings, authority=authority) and not url_embeds_client(
            document_uri
        ):
            via_url = "{via_base_url}/{uri}#annotations:{id}".format(
                via_base_url=settings["via_base_url"],
                uri=document_uri,
                id=annotation_id,
            )

        extension_url = "{uri}#annotations:{id}".format(
            uri=document_uri, id=annotation_id
        )

        pretty_url = util.get_pretty_url(document_uri)

        title = util.get_boilerplate_quote(document_uri)

        return {
            "data": json.dumps(
                {
                    # Warning: variable names change from python_style to
                    # javaScriptStyle here!
                    "chromeExtensionId": settings["chrome_extension_id"],
                    "viaUrl": via_url,
                    "extensionUrl": extension_url,
                }
            ),
            "show_metadata": show_metadata,
            "pretty_url": pretty_url,
            "quote": quote,
            "text": text,
            "title": title,
        }
Ejemplo n.º 27
0
def test_parse_document_returns_boilerplate_when_no_text(es_annotation_doc):
    text = util.parse_document(es_annotation_doc)["text"]
    assert text == util.ANNOTATION_BOILERPLATE_TEXT
Ejemplo n.º 28
0
def test_parse_document_returns_show_metadata_true_when_shared_and_world(
        es_annotation_doc):
    show_metadata = util.parse_document(es_annotation_doc)["show_metadata"]
    assert show_metadata is True