Esempio n. 1
0
def view(entity_id):
    """
    ---
    get:
      summary: Get an entity
      description: Return the entity with id `entity_id`
      parameters:
      - in: path
        name: entity_id
        required: true
        schema:
          type: string
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Entity'
      tags:
      - Entity
    """
    enable_cache()
    excludes = ["text", "numeric.*"]
    entity = get_index_entity(entity_id, request.authz.READ, excludes=excludes)
    tag_request(collection_id=entity.get("collection_id"))
    proxy = model.get_proxy(entity)
    html = proxy.first("bodyHtml", quiet=True)
    source_url = proxy.first("sourceUrl", quiet=True)
    encoding = proxy.first("encoding", quiet=True)
    entity["safeHtml"] = sanitize_html(html, source_url, encoding=encoding)
    entity["shallow"] = False
    return EntitySerializer.jsonify(entity)
Esempio n. 2
0
 def test_sanitize_html(self):
     html_str = '<!doctype html><html><head><title>Article</title><style type="text/css">body { }</style><script>alert("We love Angular")</script><link rel="stylesheet" href="http://xss.rocks/xss.css"></head><body><article id="story"><h1>We welcome our new React overlords</h1><img src="&#14;  javascript:alert(\'XSS\');" alt="" /><p>Published on <time onmouseover="alert(\'XSS\')">1 January 2018</time></p><p>Really the only thing better than the <a href="/blockchain">blockchain</a> is ReactJS.</p></article><video> <source onerror = "javascript: alert (XSS)"></video></body></html>'  # noqa
     processed = sanitize_html(html_str, "https://example.org/welcome-react")
     html = document_fromstring(processed)
     assert html.find(".//img") is None, html
     assert html.find(".//video") is None, html
     assert html.find(".//style") is None, html
     assert html.find(".//script") is None, html
     assert len(html.findall(".//article")) == 1, html
     attr = html.find(".//time").get("onmouseover")
     assert attr is None, html
     attr = html.find(".//a").get("href")
     assert attr == "https://example.org/blockchain", html
     assert html.find(".//a").get("target") == "_blank", html
     assert "nofollow" in html.find(".//a").get("rel"), html