def __init__(self, codecs=None): """ Instantiates a new DefaultEncoder. @param codecs: : a list of codec instances to use for canonicalization """ Encoder.__init__(self) self.html_codec = HTMLEntityCodec() self.percent_codec = PercentCodec() self.javascript_codec = JavascriptCodec() self.vbscript_codec = VBScriptCodec() self.css_codec = CSSCodec() self.ldap_codec = LDAPCodec() self.ldap_dn_codec = LDAPDNCodec() self.logger = ESAPI.logger("Encoder") # Used for canonicalization self.codecs = [] if codecs is None: self.codecs.append(self.html_codec) self.codecs.append(self.percent_codec) self.codecs.append(self.javascript_codec) # Leaving out css_codec because it eats / characters # Leaving out vbscript_codec because it eats " characters else: for codec in codecs: if not isinstance(codec, Codec): raise TypeError( _("Codecs in list must be instances of children of Codec" )) self.codecs.append(codec)
def test_codec_for_vbscript(self): instance = ESAPI.encoder() ### High level self.assertEquals(None, instance.encode_for_vbscript(None)) self.assertEquals( "chrw(60)&\"script\"&chrw(62)", instance.encode_for_vbscript("<script>")) self.assertEquals( "x\"&chrw(32)&chrw(33)&chrw(64)&chrw(36)&chrw(37)&chrw(40)&chrw(41)&chrw(61)&chrw(43)&chrw(123)&chrw(125)&chrw(91)&chrw(93)", instance.encode_for_vbscript("x !@$%()=+{}[]")) self.assertEquals( "alert\"&chrw(40)&chrw(39)&\"ESAPI\"&chrw(32)&\"test\"&chrw(33)&chrw(39)&chrw(41)", instance.encode_for_vbscript("alert('ESAPI test!')" )) self.assertEquals( "jeff.williams\"&chrw(64)&\"aspectsecurity.com", instance.encode_for_vbscript("*****@*****.**")) self.assertEquals( "test\"&chrw(32)&chrw(60)&chrw(62)&chrw(32)&\"test", instance.encode_for_vbscript("test <> test" )) ### Low level codec = VBScriptCodec() cases = ( # PLAIN - ENCODED - ALT_ENCODINGS ('', '', ()), # 0 length string ('t','t', ()), ('test', 'test', ()), (unichr(12345), unichr(12345), ()), ) for case in cases: if case[ENCODED] is not None: self.assertEquals(case[ENCODED], codec.encode('', case[PLAIN])) self.assertEquals(case[PLAIN], codec.decode(case[ENCODED])) for encoding in case[ALT_ENCODINGS]: print "encoding=",encoding self.assertEquals(case[PLAIN], codec.decode(encoding)) encode_only_cases = ( # PLAIN - ENCODED - ALT_ENCODINGS ('<script>', 'chrw(60)&"script"&chrw(62)', ()), ('!@#$%^&*(){}[]?+/=|\\', 'chrw(33)&chrw(64)&chrw(35)&chrw(36)&chrw(37)&chrw(94)&chrw(38)&chrw(42)&chrw(40)&chrw(41)&chrw(123)&chrw(125)&chrw(91)&chrw(93)&chrw(63)&chrw(43)&chrw(47)&chrw(61)&chrw(124)&chrw(92)', ()), ('"`~1234_-', 'chrw(34)&chrw(96)&chrw(126)&"1234"&chrw(95)&chrw(45)', ()), (unichr(9), "chrw(9)", ()), ('\\', 'chrw(92)', ('\\')), ) for case in encode_only_cases: if case[ENCODED] is not None: self.assertEquals(case[ENCODED], codec.encode('', case[PLAIN]))
class DefaultEncoder(Encoder): """ Reference implementation of the Encoder interface. This implementation takes a whitelist approach to encoding, meaning that everything not specifically identified in a list of "immune" characters is encoded. @author: Craig Younkins ([email protected]) """ IMMUNE_HTML = ',.-_ ' IMMUNE_HTMLATTR = ',.-_' IMMUNE_CSS = '' IMMUNE_JAVASCRIPT = ',._' IMMUNE_VBSCRIPT = ',._' IMMUNE_XML = ',.-_ ' IMMUNE_SQL = ' ' IMMUNE_OS = '-' IMMUNE_XMLATTR = ',.-_' IMMUNE_XPATH = ',.-_ ' IMMUNE_LDAP = '' IMMUNE_LDAP_DN = '' # Unreserved characters as specified in RFC 3986 IMMUNE_URL = '-_.~' def __init__(self, codecs=None): """ Instantiates a new DefaultEncoder. @param codecs: : a list of codec instances to use for canonicalization """ Encoder.__init__(self) self.html_codec = HTMLEntityCodec() self.percent_codec = PercentCodec() self.javascript_codec = JavascriptCodec() self.vbscript_codec = VBScriptCodec() self.css_codec = CSSCodec() self.ldap_codec = LDAPCodec() self.ldap_dn_codec = LDAPDNCodec() self.logger = ESAPI.logger("Encoder") # Used for canonicalization self.codecs = [] if codecs is None: self.codecs.append(self.html_codec) self.codecs.append(self.percent_codec) self.codecs.append(self.javascript_codec) # Leaving out css_codec because it eats / characters # Leaving out vbscript_codec because it eats " characters else: for codec in codecs: if not isinstance(codec, Codec): raise TypeError( _("Codecs in list must be instances of children of Codec" )) self.codecs.append(codec) def canonicalize(self, input_, strict=True): if input_ is None: return None working = input_[:] codecs_found = [] found_count = 0 clean = False while not clean: clean = True # Try each codec and keep track of which ones work for codec in self.codecs: old = working[:] working = codec.decode(working) if old != working: if codec.__class__.__name__ not in codecs_found: codecs_found.append(codec.__class__.__name__) if clean: found_count += 1 clean = False if found_count >= 2 and len(codecs_found) > 1: if strict: raise IntrusionException( _("Input validation failure"), _("Multiple (%(times_encoded)sx) and mixed encoding (%(codecs_found)s) detected in %(input)s" ) % { 'times_encoded': found_count, 'codecs_found': str(codecs_found), 'input': input_ }) else: self.logger.warning( Logger.SECURITY_FAILURE, _("Multiple (%s(times_encoded)x) and mixed encoding (%(codecs_found)s) detected in %(input)s" ) % { 'times_encoded': found_count, 'codecs_found': str(codecs_found), 'input': input_ }) elif found_count >= 2: if strict: raise IntrusionException( _("Input validation failure"), _("Multiple (%s(times_encoded)x) encoding detected in %(input)s" ) % { 'times_encoded': found_count, 'input': input_ }) else: self.logger.warning( Logger.SECURITY_FAILURE, _("Multiple (%s(times_encoded)x) encoding detected in %(input)s" ) % { 'times_encoded': found_count, 'input': input_ }) elif len(codecs_found) > 1: if strict: raise IntrusionException( _("Input validation failure"), _("Mixed encoding (%(codecs_found)s) detected in %(input)s" ) % { 'codecs_found': str(codecs_found), 'input': input_ }) else: self.logger.warning( Logger.SECURITY_FAILURE, _("Mixed encoding (%(codecs_found)s) detected in %(input)s" ) % { 'codecs_found': str(codecs_found), 'input': input_ }) return working def encode_for_css(self, input_): return self.css_codec.encode(DefaultEncoder.IMMUNE_CSS, input_) def encode_for_html(self, input_): return self.html_codec.encode(DefaultEncoder.IMMUNE_HTML, input_) def encode_for_html_attribute(self, input_): return self.html_codec.encode(DefaultEncoder.IMMUNE_HTMLATTR, input_) def encode_for_javascript(self, input_): return self.javascript_codec.encode(DefaultEncoder.IMMUNE_JAVASCRIPT, input_) def encode_for_vbscript(self, input_): return self.vbscript_codec.encode(DefaultEncoder.IMMUNE_VBSCRIPT, input_) def encode_for_sql(self, codec, input_): return codec.encode(DefaultEncoder.IMMUNE_SQL, input_) def encode_for_os(self, codec, input_): return codec.encode(DefaultEncoder.IMMUNE_OS, input_) def encode_for_ldap(self, input_): return self.ldap_codec.encode(DefaultEncoder.IMMUNE_LDAP, input_) def encode_for_dn(self, input_): return self.ldap_dn_codec.encode(DefaultEncoder.IMMUNE_LDAP_DN, input_) def encode_for_xpath(self, input_): return self.html_codec.encode(DefaultEncoder.IMMUNE_XPATH, input_) def encode_for_xml(self, input_): return self.html_codec.encode(DefaultEncoder.IMMUNE_XML, input_) def encode_for_xml_attribute(self, input_): return self.html_codec.encode(DefaultEncoder.IMMUNE_XMLATTR, input_) def encode_for_url(self, input_): return self.percent_codec.encode(DefaultEncoder.IMMUNE_URL, input_) def decode_from_url(self, input_): if input_ is None: return None canonical = self.canonicalize(input_) return self.percent_codec.decode(canonical) def encode_for_base64(self, input_): try: return base64.b64encode(input_) except: return None def decode_from_base64(self, input_): try: return base64.b64decode(input_) except: return None