def test_clean_tag_node_disallows_nested_unrecognised_node(self): """ <foo> tags should be removed, even when nested. """ soup = BeautifulSoup('<b><foo>bar</foo></b>') tag = soup.b Whitelister.clean_tag_node(tag, tag) self.assertEqual(str(tag), '<b>bar</b>')
def test_clean_unknown_node(self): """ Unknown node should remove a node from the parent document """ soup = BeautifulSoup('<foo><bar>baz</bar>quux</foo>', 'html5lib') tag = soup.foo Whitelister.clean_unknown_node('', soup.bar) self.assertEqual(str(tag), '<foo>quux</foo>')
def test_clean_tag_node_disallows_nested_unrecognised_node(self): """ <foo> tags should be removed, even when nested. """ soup = BeautifulSoup('<b><foo>bar</foo></b>', 'html5lib') tag = soup.b Whitelister.clean_tag_node(tag, tag) self.assertEqual(str(tag), '<b>bar</b>')
def test_clean_unknown_node(self): """ Unknown node should remove a node from the parent document """ soup = BeautifulSoup('<foo><bar>baz</bar>quux</foo>') tag = soup.foo Whitelister.clean_unknown_node('', soup.bar) self.assertEqual(str(tag), '<foo>quux</foo>')
def test_clean_tag_node_cleans_nested_recognised_node(self): """ <b> tags are allowed without attributes. This remains true when tags are nested. """ soup = BeautifulSoup('<b><b class="delete me">foo</b></b>', 'html5lib') tag = soup.b Whitelister.clean_tag_node(tag, tag) self.assertEqual(str(tag), '<b><b>foo</b></b>')
def test_clean_tag_node_cleans_nested_recognised_node(self): """ <b> tags are allowed without attributes. This remains true when tags are nested. """ soup = BeautifulSoup('<b><b class="delete me">foo</b></b>') tag = soup.b Whitelister.clean_tag_node(tag, tag) self.assertEqual(str(tag), '<b><b>foo</b></b>')
def test_whitelist_hooks(self): # wagtail.tests.wagtail_hooks overrides the whitelist to permit <blockquote> and <a target="..."> input_html = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1"' ' target="_blank" tea="darjeeling">' 'stand in water</a>.</blockquote><p>- <character>Gumby</character></p>' ) output_html = DbWhitelister.clean(input_html) expected = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1"' ' target="_blank">stand in water</a>.</blockquote><p>- Gumby</p>' ) self.assertHtmlEqual(expected, output_html) # check that the base Whitelister class is unaffected by these custom whitelist rules input_html = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1" target="_blank"' ' tea="darjeeling">stand in water</a>.</blockquote><p>- <character>Gumby</character></p>' ) output_html = Whitelister.clean(input_html) expected = ( 'I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1">' 'stand in water</a>.<p>- Gumby</p>' ) self.assertHtmlEqual(expected, output_html)
def test_clean(self): """ Whitelister.clean should remove disallowed tags and attributes from a string """ string = '<b foo="bar">snowman <barbecue>Yorkshire</barbecue></b>' cleaned_string = Whitelister.clean(string) self.assertEqual(cleaned_string, '<b>snowman Yorkshire</b>')
def test_clean_comments(self): string = '<b>snowman Yorkshire<!--[if gte mso 10]>MS word junk<![endif]--></b>' cleaned_string = Whitelister.clean(string) self.assertEqual(cleaned_string, '<b>snowman Yorkshire</b>')
def test_clean_node_does_not_change_navigable_strings(self): soup = BeautifulSoup('<b>bar</b>', 'html5lib') string = soup.b.string Whitelister.clean_node(string, string) self.assertEqual(str(string), 'bar')
def test_clean_string_node_does_nothing(self): soup = BeautifulSoup('<b>bar</b>') string = soup.b.string Whitelister.clean_string_node(string, string) self.assertEqual(str(string), 'bar')
def test_clean_node_does_not_change_navigable_strings(self): soup = BeautifulSoup('<b>bar</b>') string = soup.b.string Whitelister.clean_node(string, string) self.assertEqual(str(string), 'bar')