def test_escaping(self): """ bibformat - tests escaping HTML characters""" text = "Is 5 < 6 ? For sure! And what about True && False == True?" result = bibformat_engine.escape_field(text, mode=0) self.assertEqual(result, text) result = bibformat_engine.escape_field(text, mode=1) self.assertEqual(result, 'Is 5 < 6 ? For sure! And what about True && False == True?')
def test_washing(self): """ bibformat - test washing HTML tags""" text = '''Hi dude, <br>, <strong>please login</strong>:<br/> <a onclick="http://www.mycrappywebsite.com" href="login.html">login here</a></a><SCRIPT>alert("XSS");</SCRIPT>''' # Keep only basic tags result = bibformat_engine.escape_field(text, mode=2) self.assert_('script' not in result.lower()) self.assert_('onclick' not in result.lower()) self.assert_('mycrappywebsite' not in result.lower()) self.assert_('<br>' in result.lower()) self.assert_('<br/>' in result.lower().replace(' ', '')) # Keep only basic tags only if value starts with <!--HTML--> # directive. Otherwise escape (which is the case here) result = bibformat_engine.escape_field(text, mode=3) self.assert_('<script' not in result.lower()) self.assert_('<' not in result.lower()) result = bibformat_engine.escape_field(text, mode=5) self.assert_('<script' not in result.lower()) self.assert_('<br' in result.lower()) # Remove all HTML tags result = bibformat_engine.escape_field(text, mode=4) self.assert_('script' not in result.lower()) self.assert_('onclick' not in result.lower()) self.assert_('mycrappywebsite' not in result.lower()) self.assert_('strong' not in result.lower()) self.assert_('<br>' not in result.lower()) self.assert_('<br/>' not in result.lower().replace(' ', '')) self.assert_('login here' in result.lower()) # Keep basic tags + some others (like <img>) result = bibformat_engine.escape_field(text, mode=5) self.assert_('script' not in result.lower()) self.assert_('onclick' not in result.lower()) self.assert_('mycrappywebsite' not in result.lower()) self.assert_('<br' in result.lower()) self.assert_('login here' in result.lower()) text2 = text + ' <img src="loginicon" alt="login icon"/>' result = bibformat_engine.escape_field(text2, mode=5) self.assert_('<img' in result.lower()) self.assert_('src=' in result.lower()) self.assert_('alt="login icon"' in result.lower()) # Keep some tags only if value starts with <!--HTML--> # directive. Otherwise escape (which is the case here) result = bibformat_engine.escape_field(text, mode=6) self.assert_('<script' not in result.lower()) self.assert_('<' not in result.lower()) result = bibformat_engine.escape_field('<!--HTML-->'+text, mode=6) self.assert_('<script' not in result.lower()) self.assert_('<br>' in result.lower()) self.assert_('mycrappywebsite' not in result.lower()) # When the value cannot be parsed by our not so smart parser, # just escape everything text3 = """Ok, let't try with something unparsable < hehe <a onclick="http://www.mycrappywebsite.com" href="login.html">login</a>""" result = bibformat_engine.escape_field(text3, mode=2) self.assert_('mycrappywebsite' not in result.lower() or \ '<a' not in result.lower()) result = bibformat_engine.escape_field(text3, mode=3) self.assert_('<a' not in result.lower()) result = bibformat_engine.escape_field(text3, mode=5) self.assert_('mycrappywebsite' not in result.lower() or \ '<a' not in result.lower()) result = bibformat_engine.escape_field(text3, mode=6) self.assert_('<a' not in result.lower())
def test_washing(self): """ bibformat - test washing HTML tags""" text = '''Hi dude, <br>, <strong>please login</strong>:<br/> <a onclick="http://www.mycrappywebsite.com" href="login.html">login here</a></a><SCRIPT>alert("XSS");</SCRIPT>''' # Keep only basic tags result = bibformat_engine.escape_field(text, mode=2) self.assert_('script' not in result.lower()) self.assert_('onclick' not in result.lower()) self.assert_('mycrappywebsite' not in result.lower()) self.assert_('<br>' in result.lower()) self.assert_('<br/>' in result.lower().replace(' ', '')) # Keep only basic tags only if value starts with <!--HTML--> # directive. Otherwise escape (which is the case here) result = bibformat_engine.escape_field(text, mode=3) self.assert_('<script' not in result.lower()) self.assert_('<' not in result.lower()) result = bibformat_engine.escape_field(text, mode=5) self.assert_('<script' not in result.lower()) self.assert_('<br' in result.lower()) # Remove all HTML tags result = bibformat_engine.escape_field(text, mode=4) self.assert_('script' not in result.lower()) self.assert_('onclick' not in result.lower()) self.assert_('mycrappywebsite' not in result.lower()) self.assert_('strong' not in result.lower()) self.assert_('<br>' not in result.lower()) self.assert_('<br/>' not in result.lower().replace(' ', '')) self.assert_('login here' in result.lower()) # Keep basic tags + some others (like <img>) result = bibformat_engine.escape_field(text, mode=5) self.assert_('script' not in result.lower()) self.assert_('onclick' not in result.lower()) self.assert_('mycrappywebsite' not in result.lower()) self.assert_('<br' in result.lower()) self.assert_('login here' in result.lower()) text2 = text + ' <img src="loginicon" alt="login icon"/>' result = bibformat_engine.escape_field(text2, mode=5) self.assert_('<img' in result.lower()) self.assert_('src=' in result.lower()) self.assert_('alt="login icon"' in result.lower()) # Keep some tags only if value starts with <!--HTML--> # directive. Otherwise escape (which is the case here) result = bibformat_engine.escape_field(text, mode=6) self.assert_('<script' not in result.lower()) self.assert_('<' not in result.lower()) result = bibformat_engine.escape_field('<!--HTML-->' + text, mode=6) self.assert_('<script' not in result.lower()) self.assert_('<br>' in result.lower()) self.assert_('mycrappywebsite' not in result.lower()) # When the value cannot be parsed by our not so smart parser, # just escape everything text3 = """Ok, let't try with something unparsable < hehe <a onclick="http://www.mycrappywebsite.com" href="login.html">login</a>""" result = bibformat_engine.escape_field(text3, mode=2) self.assert_('mycrappywebsite' not in result.lower() or '<a' not in result.lower()) result = bibformat_engine.escape_field(text3, mode=3) self.assert_('<a' not in result.lower()) result = bibformat_engine.escape_field(text3, mode=5) self.assert_('mycrappywebsite' not in result.lower() or '<a' not in result.lower()) result = bibformat_engine.escape_field(text3, mode=6) self.assert_('<a' not in result.lower())