class TestDbWhitelister(TestCase): def setUp(self): self.whitelister = EditorHTMLConverter().whitelister def assertHtmlEqual(self, str1, str2): """ Assert that two HTML strings are equal at the DOM level (necessary because we can't guarantee the order that attributes are output in) """ self.assertEqual(BeautifulSoup(str1, "html5lib"), BeautifulSoup(str2, "html5lib")) def test_page_link_is_rewritten(self): input_html = ( '<p>Look at the <a data-linktype="page" data-id="2" href="/">lovely homepage</a>' ' of my <a href="http://wagtail.org/">Wagtail</a> site</p>') output_html = self.whitelister.clean(input_html) expected = ( '<p>Look at the <a linktype="page" id="2">lovely homepage</a>' ' of my <a href="http://wagtail.org/">Wagtail</a> site</p>') self.assertHtmlEqual(expected, output_html) def test_document_link_is_rewritten(self): input_html = ( '<p>Look at our <a data-linktype="document" data-id="1" href="/documents/1/brochure.pdf">' "horribly oversized brochure</a></p>") output_html = self.whitelister.clean(input_html) expected = '<p>Look at our <a linktype="document" id="1">horribly oversized brochure</a></p>' self.assertHtmlEqual(expected, output_html) def test_image_embed_is_rewritten(self): input_html = ( '<p>OMG look at this picture of a kitten:</p><figure data-embedtype="image" data-id="5"' ' data-format="image-with-caption" data-alt="A cute kitten" class="fancy-image">' '<img src="/media/images/kitten.jpg" width="320" height="200" alt="A cute kitten" />' "<figcaption>A kitten, yesterday.</figcaption></figure>") output_html = self.whitelister.clean(input_html) expected = ( '<p>OMG look at this picture of a kitten:</p><embed embedtype="image" id="5"' ' format="image-with-caption" alt="A cute kitten" />') self.assertHtmlEqual(expected, output_html) def test_media_embed_is_rewritten(self): input_html = ( "<p>OMG look at this video of a kitten: " '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" width="640"' ' height="480" src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></p>' ) output_html = self.whitelister.clean(input_html) expected = ( "<p>OMG look at this video of a kitten:" ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p>' ) self.assertHtmlEqual(expected, output_html) def test_div_conversion(self): # DIVs should be converted to P, and all whitelist / conversion rules still applied input_html = ( '<p>before</p><div class="shiny">OMG <b>look</b> at this <blink>video</blink> of a kitten: ' '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ"' ' width="640" height="480"' ' src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></div><p>after</p>' ) output_html = self.whitelister.clean(input_html) expected = ( "<p>before</p><p>OMG <b>look</b> at this video of a kitten:" ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p><p>after</p>' ) self.assertHtmlEqual(expected, output_html) def test_whitelist_with_feature_list(self): converter = EditorHTMLConverter( features=["h1", "bold", "link", "something_i_just_made_up"]) input_html = ( "<h1>this heading is allowed</h1> <h2>but not this one</h2> " "<p><b>bold</b> <i>italic</i></p>" '<p><a href="http://torchbox.com">external link</a> <a data-linktype="page" data-id="2" href="/">internal link</a></p>' ) output_html = converter.to_database_format(input_html) expected = ( "<h1>this heading is allowed</h1> but not this one " "<p><b>bold</b> italic</p>" '<p><a href="http://torchbox.com">external link</a> <a linktype="page" id="2">internal link</a></p>' ) self.assertHtmlEqual(expected, output_html)
class TestDbWhitelister(TestCase): def setUp(self): self.whitelister = EditorHTMLConverter().whitelister def assertHtmlEqual(self, str1, str2): """ Assert that two HTML strings are equal at the DOM level (necessary because we can't guarantee the order that attributes are output in) """ self.assertEqual(BeautifulSoup(str1, 'html5lib'), BeautifulSoup(str2, 'html5lib')) def test_page_link_is_rewritten(self): input_html = ( '<p>Look at the <a data-linktype="page" data-id="2" href="/">lovely homepage</a>' ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>') output_html = self.whitelister.clean(input_html) expected = ( '<p>Look at the <a linktype="page" id="2">lovely homepage</a>' ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>') self.assertHtmlEqual(expected, output_html) def test_document_link_is_rewritten(self): input_html = ( '<p>Look at our <a data-linktype="document" data-id="1" href="/documents/1/brochure.pdf">' 'horribly oversized brochure</a></p>') output_html = self.whitelister.clean(input_html) expected = '<p>Look at our <a linktype="document" id="1">horribly oversized brochure</a></p>' self.assertHtmlEqual(expected, output_html) def test_image_embed_is_rewritten(self): input_html = ( '<p>OMG look at this picture of a kitten:</p><figure data-embedtype="image" data-id="5"' ' data-format="image-with-caption" data-alt="A cute kitten" class="fancy-image">' '<img src="/media/images/kitten.jpg" width="320" height="200" alt="A cute kitten" />' '<figcaption>A kitten, yesterday.</figcaption></figure>') output_html = self.whitelister.clean(input_html) expected = ( '<p>OMG look at this picture of a kitten:</p><embed embedtype="image" id="5"' ' format="image-with-caption" alt="A cute kitten" />') self.assertHtmlEqual(expected, output_html) def test_media_embed_is_rewritten(self): input_html = ( '<p>OMG look at this video of a kitten: ' '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" width="640"' ' height="480" src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></p>' ) output_html = self.whitelister.clean(input_html) expected = ( '<p>OMG look at this video of a kitten:' ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p>' ) self.assertHtmlEqual(expected, output_html) def test_div_conversion(self): # DIVs should be converted to P, and all whitelist / conversion rules still applied input_html = ( '<p>before</p><div class="shiny">OMG <b>look</b> at this <blink>video</blink> of a kitten: ' '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ"' ' width="640" height="480"' ' src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></div><p>after</p>' ) output_html = self.whitelister.clean(input_html) expected = ( '<p>before</p><p>OMG <b>look</b> at this video of a kitten:' ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p><p>after</p>' ) self.assertHtmlEqual(expected, output_html) def test_whitelist_hooks(self): # wagtail.tests.wagtail_hooks overrides the whitelist to permit <blockquote> and <a target="..."> input_html = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1"' ' target="_blank" tea="darjeeling">' 'stand in water</a>.</blockquote><p>- <character>Gumby</character></p>' ) output_html = self.whitelister.clean(input_html) expected = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1"' ' target="_blank">stand in water</a>.</blockquote><p>- Gumby</p>') self.assertHtmlEqual(expected, output_html) # check that the base Whitelister class is unaffected by these custom whitelist rules input_html = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1" target="_blank"' ' tea="darjeeling">stand in water</a>.</blockquote><p>- <character>Gumby</character></p>' ) output_html = Whitelister().clean(input_html) expected = ( 'I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1">' 'stand in water</a>.<p>- Gumby</p>') self.assertHtmlEqual(expected, output_html) def test_whitelist_with_feature_list(self): converter = EditorHTMLConverter( features=['h1', 'bold', 'link', 'something_i_just_made_up']) input_html = ( '<h1>this heading is allowed</h1> <h2>but not this one</h2> ' '<p><b>bold</b> <i>italic</i></p>' '<p><a href="http://torchbox.com">external link</a> <a data-linktype="page" data-id="2" href="/">internal link</a></p>' ) output_html = converter.to_database_format(input_html) expected = ( '<h1>this heading is allowed</h1> but not this one ' '<p><b>bold</b> italic</p>' '<p><a href="http://torchbox.com">external link</a> <a linktype="page" id="2">internal link</a></p>' ) self.assertHtmlEqual(expected, output_html)
class TestDbWhitelister(TestCase): def setUp(self): self.whitelister = EditorHTMLConverter().whitelister def assertHtmlEqual(self, str1, str2): """ Assert that two HTML strings are equal at the DOM level (necessary because we can't guarantee the order that attributes are output in) """ self.assertEqual(BeautifulSoup(str1, 'html5lib'), BeautifulSoup(str2, 'html5lib')) def test_page_link_is_rewritten(self): input_html = ( '<p>Look at the <a data-linktype="page" data-id="2" href="/">lovely homepage</a>' ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>' ) output_html = self.whitelister.clean(input_html) expected = ( '<p>Look at the <a linktype="page" id="2">lovely homepage</a>' ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>' ) self.assertHtmlEqual(expected, output_html) def test_document_link_is_rewritten(self): input_html = ( '<p>Look at our <a data-linktype="document" data-id="1" href="/documents/1/brochure.pdf">' 'horribly oversized brochure</a></p>' ) output_html = self.whitelister.clean(input_html) expected = '<p>Look at our <a linktype="document" id="1">horribly oversized brochure</a></p>' self.assertHtmlEqual(expected, output_html) def test_image_embed_is_rewritten(self): input_html = ( '<p>OMG look at this picture of a kitten:</p><figure data-embedtype="image" data-id="5"' ' data-format="image-with-caption" data-alt="A cute kitten" class="fancy-image">' '<img src="/media/images/kitten.jpg" width="320" height="200" alt="A cute kitten" />' '<figcaption>A kitten, yesterday.</figcaption></figure>' ) output_html = self.whitelister.clean(input_html) expected = ( '<p>OMG look at this picture of a kitten:</p><embed embedtype="image" id="5"' ' format="image-with-caption" alt="A cute kitten" />' ) self.assertHtmlEqual(expected, output_html) def test_media_embed_is_rewritten(self): input_html = ( '<p>OMG look at this video of a kitten: ' '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" width="640"' ' height="480" src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></p>' ) output_html = self.whitelister.clean(input_html) expected = ( '<p>OMG look at this video of a kitten:' ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p>' ) self.assertHtmlEqual(expected, output_html) def test_div_conversion(self): # DIVs should be converted to P, and all whitelist / conversion rules still applied input_html = ( '<p>before</p><div class="shiny">OMG <b>look</b> at this <blink>video</blink> of a kitten: ' '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ"' ' width="640" height="480"' ' src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></div><p>after</p>' ) output_html = self.whitelister.clean(input_html) expected = ( '<p>before</p><p>OMG <b>look</b> at this video of a kitten:' ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p><p>after</p>' ) self.assertHtmlEqual(expected, output_html) def test_whitelist_hooks(self): # wagtail.tests.wagtail_hooks overrides the whitelist to permit <blockquote> and <a target="..."> input_html = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1"' ' target="_blank" tea="darjeeling">' 'stand in water</a>.</blockquote><p>- <character>Gumby</character></p>' ) output_html = self.whitelister.clean(input_html) expected = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1"' ' target="_blank">stand in water</a>.</blockquote><p>- Gumby</p>' ) self.assertHtmlEqual(expected, output_html) # check that the base Whitelister class is unaffected by these custom whitelist rules input_html = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1" target="_blank"' ' tea="darjeeling">stand in water</a>.</blockquote><p>- <character>Gumby</character></p>' ) output_html = Whitelister().clean(input_html) expected = ( 'I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1">' 'stand in water</a>.<p>- Gumby</p>' ) self.assertHtmlEqual(expected, output_html) def test_whitelist_with_feature_list(self): converter = EditorHTMLConverter(features=['h1', 'bold', 'link', 'something_i_just_made_up']) input_html = ( '<h1>this heading is allowed</h1> <h2>but not this one</h2> ' '<p><b>bold</b> <i>italic</i></p>' '<p><a href="http://torchbox.com">external link</a> <a data-linktype="page" data-id="2" href="/">internal link</a></p>' ) output_html = converter.to_database_format(input_html) expected = ( '<h1>this heading is allowed</h1> but not this one ' '<p><b>bold</b> italic</p>' '<p><a href="http://torchbox.com">external link</a> <a linktype="page" id="2">internal link</a></p>' ) self.assertHtmlEqual(expected, output_html)