Ejemplo n.º 1
0
 def encode_character(self, immune, char):
     """
     Encodes a character for safe use in an HTML entity field.
     """
     
     # Check for immune
     if char in immune:
         return char
     
     ord_char = ord(char)
         
     # Only look at 8-bit 
     if not codec.is_8bit(ord_char):
         return char
     
     # Pass alphanumerics
     if char.isalnum():  
         return char
         
     # Check for illegal characters
     if (codec.is_control_char(ord_char) and 
                char != "\t" and
                char != "\n" and
                char != "\r"):
         return " "
       
     # Check if there's a defined entity
     entity_name = self.entity_values_to_names.get(ord_char, None)
     if entity_name is not None:
         return "&" + entity_name + ";"
         
     # Return the hex entity as suggested in the spec
     hex_str = codec.get_hex_for_char(ord_char).lower()
     return "&#x" + hex_str + ";"
Ejemplo n.º 2
0
 def encode_character(self, immune, char):
     """
     Encodes a single character according to the spec at 
     U{W3.org<http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1>}.
     Spaces are replaced by '+'. All characters not in immune are escaped
     as described in U{this document<http://tools.ietf.org/html/rfc3986#section-2.1>}.
     """
     # check for immunes
     if char in immune:
         return char
         
     if char == ' ':
         return '+'
     
     ord_char = ord(char)
     
     # Only look at 8-bit 
     if not codec.is_8bit(ord_char):
         return char
     
     # Pass alphanumerics
     if char.isalnum():  
         return char
         
     hex_str = codec.get_hex_for_char(ord_char).upper()
     if ord_char < 0x10:
         hex_str = '0' + hex_str
         
     return '%' + hex_str
Ejemplo n.º 3
0
 def encode_character(self, immune, char):
     """
     Returns a backslash encoded numeric format. Does not use backslash
     character escapes as these can be used in attacks.
     """
     # Check for immunes
     if char in immune:
         return char
     
     ord_char = ord(char)
         
     # Only look at 8-bit 
     if not codec.is_8bit(ord_char):
         return char
     
     # Pass alphanumerics
     if char.isalnum():  
         return char
         
     # encode up to 256 with \\xHH
     temp = codec.get_hex_for_char(ord_char).upper()
     if ord(char) < 256:
         padding = '00'[len(temp):]
         return u"\\x" + padding + temp
        
     # otherwise encode with \\uHHHH
     # Will never get here because 8-bit implies < 256
     padding = '0000'[len(temp):]
     return u"\\u" + padding + temp
Ejemplo n.º 4
0
 def encode_character(self, immune, char):
     """
     Encodes a character using CSS backslash style.
     """
     # Check for immunes
     if char in immune:
         return char
     
     ord_char = ord(char)
         
     # Only look at 8-bit 
     if not codec.is_8bit(ord_char):
         return char
     
     # Pass alphanumerics
     if char.isalnum():  
         return char
         
     # Return the hex and end in whitespace to terminate
     hex_str = codec.get_hex_for_char(ord_char)
     return "\\" + hex_str + " "