def test_html_invalid_utf8_entity_encoded(self): """Test for invalid entity encoded chars""" samples = { 'Valid ASCII': u"a", 'Valid 2 Octet Sequence': u"쎱", 'Invalid 2 Octet Sequence': u"쌨", 'Invalid Sequence Identifier': u"ꂡ", 'Valid 3 Octet Sequence': u"�", 'Invalid 3 Octet Sequence (in 2nd Octet)': u"�", 'Invalid 3 Octet Sequence (in 3rd Octet)': u"�", 'Valid 4 Octet Sequence': u"�", 'Invalid 4 Octet Sequence (in 2nd Octet)': u"�", 'Invalid 4 Octet Sequence (in 3rd Octet)': u"�", 'Invalid 4 Octet Sequence (in 4th Octet)': u"�", 'Valid 5 Octet Sequence (but not Unicode!)': u" � ", 'Valid 6 Octet Sequence (but not Unicode!)': u" � ", 'Invalid unicode FFFE': u"", 'Invalid unicode FFFF': u"", } for desc, sample in samples.iteritems(): try: htmldecode(sample) except Exception as e: msg = 'Exception "%s" was raised when trying to htmldecode() a "%s".' self.assertTrue(False, msg % (e, desc))
def html_unescape(t): """Decoder doing HTML unescaping. >>> encode_decode.htmldecode('<script>') u'<script>' >>> """ return encode_decode.htmldecode(t)
def test_bug_trigger_case01(self): """ u'í'.decode('utf-8') UnicodeEncodeError: 'ascii' codec can't encode character u'\xed' in position 9745: ordinal not in range(128) """ html = u'Aquí encontrará' self.assertEqual(htmldecode(html), html)
def _parse_xssed_vuln_page(self, xss_report_response): """ Parse the HTTP response for a vulnerability page such as http://www.xssed.com/mirror/76754/ and create the vulnerability object to the KB. """ body = xss_report_response.get_body() url_matches = self.XSSED_URL_RE.findall(body) for xss_url in url_matches: # Ugly but required because of how xssed.com writes stuff xss_url = xss_url.replace('<br>', '') xss_url = htmldecode(xss_url) xss_url = urllib2.unquote(xss_url) xss_url = URL(xss_url) if self.UNFIXED in xss_report_response.get_body(): vuln_severity = severity.HIGH verb = 'contains' else: vuln_severity = severity.LOW verb = 'contained' desc_fmt = ('According to xssed.com the target domain %s a XSS' ' vulnerability, see %s for more information') desc = desc_fmt % (verb, xss_report_response.get_url()) v = Vuln('Potential XSS vulnerability', desc, vuln_severity, xss_report_response.id, self.get_name()) v.set_url(xss_url) # # Add the fuzzable request, this is useful if I have the # XSS plugin enabled because it will re-test this and # possibly confirm the vulnerability # fr = FuzzableRequest(xss_url) self.output_queue.put(fr) # Save the vuln to the KB and print to output self.kb_append(self, 'xss', v)
def test_tilde(self): self.assertEqual(htmldecode(u'hólá múndó'), u'hólá múndó')
def test_bug_trigger_case02(self): html_utf8_raw = 'Aqu\xc3\xad encontrar\xc3\xa1' html_unicode = 'Aqu\xc3\xad encontrar\xc3\xa1'.decode('utf-8') self.assertEqual(htmldecode(html_utf8_raw), html_unicode)
def test_bug_trigger_case04(self): html = u'\xed' self.assertEqual(htmldecode(html), html)
def test_html_encoded(self): self.assertEqual(htmldecode(u'á'), u'á')
def test_special_char(self): self.assertEqual(htmldecode(u'hola ƻ'), u'hola ƻ')
def test_charref(self): self.assertEqual(htmldecode(u'hola mundo A'), u'hola mundo A')
def test_simple(self): self.assertEqual(htmldecode('hola mundo'), 'hola mundo')