def test_clean_tag_node_with_data_linktype(self): soup = BeautifulSoup( '<a data-linktype="document" data-id="1" irrelevant="baz">foo</a>', 'html5lib') tag = soup.a DbWhitelister.clean_tag_node(soup, tag) self.assertEqual(str(tag), '<a id="1" linktype="document">foo</a>')
def editor_js(): js_files = [ 'waf/purify.1.0.2.min.js', 'waf/rangy-core.1.3.0.js', 'waf/rangy-selectionsaverestore.1.3.0.js', 'waf/hallo-dompurify.js', ] js_includes = format_html_join('\n', '<script src="{0}{1}"></script>', ((settings.STATIC_URL, filename) for filename in js_files)) # fake a clean to ensure construct_whitelister_element_rules hooks are hooked DbWhitelister.clean('') allowed_tags = ['#text'] + [ key for key in DbWhitelister.element_rules if re.match('[a-z]+', key) ] # DOMpurify will be more forgiving than DbWhitelister as it's not filtering attributes # but it's not practical to extrapolate from DbWhitelister which ones to retain return js_includes + format_html( """ <script> (function() {{ var config = {{ ALLOWED_TAGS: [{allowed_tags}], KEEP_CONTENT: true }}; registerHalloPlugin('dompurify', config); }})(); </script> """, allowed_tags=format_html_join(', ', "'{}'", ((tag, ) for tag in allowed_tags)), )
def test_clean_tag_node_with_data_linktype(self): soup = BeautifulSoup( '<a data-linktype="document" data-id="1" irrelevant="baz">foo</a>' ) tag = soup.a DbWhitelister.clean_tag_node(soup, tag) self.assertEqual(str(tag), '<a id="1" linktype="document">foo</a>')
def test_clean_tag_node(self): soup = BeautifulSoup( '<a irrelevant="baz">foo</a>' ) tag = soup.a DbWhitelister.clean_tag_node(soup, tag) self.assertEqual(str(tag), '<a>foo</a>')
def test_clean_tag_node_div(self): soup = BeautifulSoup( '<div>foo</div>' ) tag = soup.div self.assertEqual(tag.name, 'div') DbWhitelister.clean_tag_node(soup, tag) self.assertEqual(tag.name, 'p')
def test_clean_tag_node_with_data_embedtype(self): soup = BeautifulSoup( '<p><a data-embedtype="image" data-id=1 data-format="left" data-alt="bar" irrelevant="baz">foo</a></p>' ) tag = soup.p DbWhitelister.clean_tag_node(soup, tag) self.assertEqual(str(tag), '<p><embed alt="bar" embedtype="image" format="left" id="1"/></p>')
def test_clean_tag_node_with_data_embedtype(self): soup = BeautifulSoup( '<p><a data-embedtype="image" data-id=1 data-format="left" data-alt="bar" irrelevant="baz">foo</a></p>' ) tag = soup.p DbWhitelister.clean_tag_node(soup, tag) self.assertEqual( str(tag), '<p><embed alt="bar" embedtype="image" format="left" id="1"/></p>')
def test_whitelist_hooks(self): # wagtail.tests.wagtail_hooks overrides the whitelist to permit <blockquote> and <a target="..."> input_html = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1"' ' target="_blank" tea="darjeeling">' 'stand in water</a>.</blockquote><p>- <character>Gumby</character></p>' ) output_html = DbWhitelister.clean(input_html) expected = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1"' ' target="_blank">stand in water</a>.</blockquote><p>- Gumby</p>' ) self.assertHtmlEqual(expected, output_html) # check that the base Whitelister class is unaffected by these custom whitelist rules input_html = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1" target="_blank"' ' tea="darjeeling">stand in water</a>.</blockquote><p>- <character>Gumby</character></p>' ) output_html = Whitelister.clean(input_html) expected = ( 'I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1">' 'stand in water</a>.<p>- Gumby</p>' ) self.assertHtmlEqual(expected, output_html)
def test_document_link_is_rewritten(self): input_html = ( '<p>Look at our <a data-linktype="document" data-id="1" href="/documents/1/brochure.pdf">' 'horribly oversized brochure</a></p>') output_html = DbWhitelister.clean(input_html) expected = '<p>Look at our <a linktype="document" id="1">horribly oversized brochure</a></p>' self.assertHtmlEqual(expected, output_html)
def test_whitelist_hooks(self): """Test that DbWhitelister does not strip new elements and attributes. The new allowed elements and attributes are added in v1.wagtail_hooks. """ input_html = ''' <span class="schema-container" itemprop="step" itemscope="" itemtype="http://schema.org/HowToSection"> <h4 itemprop="name">Step 1: Learn about the debt</h4> <span class="schema-container" itemprop="itemListElement"> <table> <thead> <tr> <th>Col 1 header</th> <th>Col 2 header</th> </tr> </thead> <tbody> <tr> <td>Row 1 Col 1</td> <td>Row 1 Col 2</td> </tr> <tr> <td>Row 2 Col 1</td> <td>Row 2 Col 2</td> </tr> </tbody> </table> </span> </span> ''' output_html = DbWhitelister.clean(input_html) self.assertHTMLEqual(input_html, output_html)
def test_document_link_is_rewritten(self): input_html = ( '<p>Look at our <a data-linktype="document" data-id="1" href="/documents/1/brochure.pdf">' 'horribly oversized brochure</a></p>' ) output_html = DbWhitelister.clean(input_html) expected = '<p>Look at our <a linktype="document" id="1">horribly oversized brochure</a></p>' self.assertHtmlEqual(expected, output_html)
def value_from_datadict(self, data, files, name): original_value = super(FroalaRichTextArea, self).value_from_datadict(data, files, name) if original_value is None: return None if WAGTAIL_VERSION >= "2.0": return self.converter.to_database_format(original_value) else: return DbWhitelister.clean(original_value)
def test_page_link_is_rewritten(self): input_html = ( '<p>Look at the <a data-linktype="page" data-id="2" href="/">lovely homepage</a>' ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>') output_html = DbWhitelister.clean(input_html) expected = ( '<p>Look at the <a linktype="page" id="2">lovely homepage</a>' ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>') self.assertHtmlEqual(expected, output_html)
def value_from_datadict(self, data, files, name): original_value = super().value_from_datadict(data, files, name) # print("\n ------=========----- \n") if original_value is None: return None if wagtail_version == 2: return self.converter.to_database_format(original_value) else: return DbWhitelister.clean(original_value)
def test_image_embed_is_rewritten(self): input_html = ( '<p>OMG look at this picture of a kitten:</p><figure data-embedtype="image" data-id="5"' ' data-format="image-with-caption" data-alt="A cute kitten" class="fancy-image">' '<img src="/media/images/kitten.jpg" width="320" height="200" alt="A cute kitten" />' '<figcaption>A kitten, yesterday.</figcaption></figure>') output_html = DbWhitelister.clean(input_html) expected = ( '<p>OMG look at this picture of a kitten:</p><embed embedtype="image" id="5"' ' format="image-with-caption" alt="A cute kitten" />') self.assertHtmlEqual(expected, output_html)
def test_page_link_is_rewritten(self): input_html = ( '<p>Look at the <a data-linktype="page" data-id="2" href="/">lovely homepage</a>' ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>' ) output_html = DbWhitelister.clean(input_html) expected = ( '<p>Look at the <a linktype="page" id="2">lovely homepage</a>' ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>' ) self.assertHtmlEqual(expected, output_html)
def test_media_embed_is_rewritten(self): input_html = ( '<p>OMG look at this video of a kitten: ' '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" width="640"' ' height="480" src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></p>' ) output_html = DbWhitelister.clean(input_html) expected = ( '<p>OMG look at this video of a kitten:' ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p>' ) self.assertHtmlEqual(expected, output_html)
def value_from_datadict(self, data, files, name): # #W7MnV# """ This method is called by Wagtail when the page is saved. Cf https://github.com/torchbox/wagtail/blob/master/wagtail/wagtailadmin/rich_text.py#L29 """ original_value = super(MediumRichTextArea, self).value_from_datadict(data, files, name) if original_value is None: return None return DbWhitelister.clean(original_value)
def test_image_embed_is_rewritten(self): input_html = ( '<p>OMG look at this picture of a kitten:</p><figure data-embedtype="image" data-id="5"' ' data-format="image-with-caption" data-alt="A cute kitten" class="fancy-image">' '<img src="/media/images/kitten.jpg" width="320" height="200" alt="A cute kitten" />' '<figcaption>A kitten, yesterday.</figcaption></figure>' ) output_html = DbWhitelister.clean(input_html) expected = ( '<p>OMG look at this picture of a kitten:</p><embed embedtype="image" id="5"' ' format="image-with-caption" alt="A cute kitten" />' ) self.assertHtmlEqual(expected, output_html)
def test_div_conversion(self): # DIVs should be converted to P, and all whitelist / conversion rules still applied input_html = ( '<p>before</p><div class="shiny">OMG <b>look</b> at this <blink>video</blink> of a kitten: ' '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ"' ' width="640" height="480"' ' src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></div><p>after</p>' ) output_html = DbWhitelister.clean(input_html) expected = ( '<p>before</p><p>OMG <b>look</b> at this video of a kitten:' ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p><p>after</p>' ) self.assertHtmlEqual(expected, output_html)
def __update_page(self, file_id, title, date): """ Update page with the given title, date and body. """ # Retrieve file contents. body = self.__get_resource(file_id) # Get the page class. page_class = self.page_class # Get the parent page. parent_page = self.parent_page # Determine whether to add new page or update existing one. existing = SyncMeta.get_value('file:' + file_id) if not (existing is False): page = page_class.objects.get(pk=existing) page.date = date page.body = DbWhitelister.clean(body) page.save() return # Create an empty page. page = page_class() # Get the form class. form_class = page_class.get_edit_handler().get_form_class(page_class) # Instantiate a form class. form = form_class(data={ 'title': title, 'date': date, 'body': body, 'slug': self.__generate_slug(title, parent_page) }, instance=page, parent_page=parent_page) if form.is_valid(): page = form.save(commit=False) parent_page.add_child(instance=page) # Keep the synced matches. SyncMeta.set_value('file:' + file_id, page.id)
def test_clean_tag_node_div(self): soup = BeautifulSoup("<div>foo</div>", "html5lib") tag = soup.div self.assertEqual(tag.name, "div") DbWhitelister.clean_tag_node(soup, tag) self.assertEqual(tag.name, "p")
def value_from_datadict(self, data, files, name): original_value = super(RichTextArea, self).value_from_datadict(data, files, name) if original_value is None: return None return DbWhitelister.clean(original_value)
def test_clean_tag_node(self): soup = BeautifulSoup('<a irrelevant="baz">foo</a>') tag = soup.a DbWhitelister.clean_tag_node(soup, tag) self.assertEqual(str(tag), '<a>foo</a>')
def test_clean_tag_node_div(self): soup = BeautifulSoup('<div>foo</div>') tag = soup.div self.assertEqual(tag.name, 'div') DbWhitelister.clean_tag_node(soup, tag) self.assertEqual(tag.name, 'p')
def value_from_datadict(self, data, files, name): original_value = super(HalloRichTextArea, self).value_from_datadict(data, files, name) if original_value is None: return None return DbWhitelister.clean(original_value)