def view(entity_id): """ --- get: summary: Get an entity description: Return the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ enable_cache() excludes = ["text", "numeric.*"] entity = get_index_entity(entity_id, request.authz.READ, excludes=excludes) tag_request(collection_id=entity.get("collection_id")) proxy = model.get_proxy(entity) html = proxy.first("bodyHtml", quiet=True) source_url = proxy.first("sourceUrl", quiet=True) encoding = proxy.first("encoding", quiet=True) entity["safeHtml"] = sanitize_html(html, source_url, encoding=encoding) entity["shallow"] = False return EntitySerializer.jsonify(entity)
def test_sanitize_html(self): html_str = '<!doctype html><html><head><title>Article</title><style type="text/css">body { }</style><script>alert("We love Angular")</script><link rel="stylesheet" href="http://xss.rocks/xss.css"></head><body><article id="story"><h1>We welcome our new React overlords</h1><img src=" javascript:alert(\'XSS\');" alt="" /><p>Published on <time onmouseover="alert(\'XSS\')">1 January 2018</time></p><p>Really the only thing better than the <a href="/blockchain">blockchain</a> is ReactJS.</p></article><video> <source onerror = "javascript: alert (XSS)"></video></body></html>' # noqa processed = sanitize_html(html_str, "https://example.org/welcome-react") html = document_fromstring(processed) assert html.find(".//img") is None, html assert html.find(".//video") is None, html assert html.find(".//style") is None, html assert html.find(".//script") is None, html assert len(html.findall(".//article")) == 1, html attr = html.find(".//time").get("onmouseover") assert attr is None, html attr = html.find(".//a").get("href") assert attr == "https://example.org/blockchain", html assert html.find(".//a").get("target") == "_blank", html assert "nofollow" in html.find(".//a").get("rel"), html