Beispiel #1
0
    def _populate_class_variables():
        lookup = {}
        reverse_lookup = {}
        characters_for_re = []

        # &apos is an XHTML entity and an HTML 5, but not an HTML 4
        # entity. We don't want to use it, but we want to recognize it on the way in.
        #
        # TODO: Ideally we would be able to recognize all HTML 5 named
        # entities, but that's a little tricky.
        extra = [(39, 'apos')]
        for codepoint, name in list(codepoint2name.items()) + extra:
            character = unichr(codepoint)
            if codepoint not in (34, 39):
                # There's no point in turning the quotation mark into
                # " or the single quote into ', unless it
                # happens within an attribute value, which is handled
                # elsewhere.
                characters_for_re.append(character)
                lookup[character] = name
            # But we do want to recognize those entities on the way in and
            # convert them to Unicode characters.
            reverse_lookup[name] = character
        re_definition = "[%s]" % "".join(characters_for_re)
        return lookup, reverse_lookup, re.compile(re_definition)
Beispiel #2
0
 def _populate_class_variables():
     lookup = {}
     reverse_lookup = {}
     characters = []
     for codepoint, name in codepoint2name.items():
         if codepoint == 34:
             # There's no point in turning the quotation mark into
             # ", unless it happens within an attribute value, which
             # is handled elsewhere.
             continue
         character = unichr(codepoint)
         characters.append(character)
         lookup[character] = name
         reverse_lookup[name] = character
     re_definition = "[%s]" % "".join(characters)
     return lookup, reverse_lookup, re.compile(re_definition)
Beispiel #3
0
 def _populate_class_variables():
     lookup = {}
     reverse_lookup = {}
     characters = []
     for codepoint, name in list(codepoint2name.items()):
         if codepoint == 34:
             # There's no point in turning the quotation mark into
             # ", unless it happens within an attribute value, which
             # is handled elsewhere.
             continue
         character = unichr(codepoint)
         characters.append(character)
         lookup[character] = name
         reverse_lookup[name] = character
     re_definition = "[%s]" % "".join(characters)
     return lookup, reverse_lookup, re.compile(re_definition)
 def _populate_class_variables():
     lookup = {}
     reverse_lookup = {}
     characters_for_re = []
     for codepoint, name in list(codepoint2name.items()):
         character = unichr(codepoint)
         if codepoint != 34:
             # There's no point in turning the quotation mark into
             # ", unless it happens within an attribute value, which
             # is handled elsewhere.
             characters_for_re.append(character)
             lookup[character] = name
         # But we do want to turn " into the quotation mark.
         reverse_lookup[name] = character
     re_definition = "[%s]" % "".join(characters_for_re)
     return lookup, reverse_lookup, re.compile(re_definition)
Beispiel #5
0
 def _populate_class_variables(self):
     lookup = {}
     reverse_lookup = {}
     characters_for_re = []
     for codepoint, name in list(codepoint2name.items()):
         character = unichr(codepoint)
         if codepoint != 34:
             # There's no point in turning the quotation mark into
             # ", unless it happens within an attribute value, which
             # is handled elsewhere.
             characters_for_re.append(character)
             lookup[character] = name
         # But we do want to turn " into the quotation mark.
         reverse_lookup[name] = character
     re_definition = "[%s]" % "".join(characters_for_re)
     return lookup, reverse_lookup, re.compile(re_definition)
Beispiel #6
0
 def _populate_class_variables():
     lookup = {}
     reverse_lookup = {}
     characters_for_re = []
     for codepoint, name in list(codepoint2name.items()):
         character = unichr(codepoint)
         if codepoint != 34:
             
             
             
             characters_for_re.append(character)
             lookup[character] = name
         
         reverse_lookup[name] = character
     re_definition = "[%s]" % "".join(characters_for_re)
     return lookup, reverse_lookup, re.compile(re_definition)
Beispiel #7
0
def encode_htmlentities(string, encoding="utf-8"):
    """
    Encodes the string with html entities.
    """

    if isinstance(string, unicode):
        was_unicode = True
    else:
        was_unicode = False
        string = string.decode(encoding)

    string = string.replace("&", "&")

    for codepoint, name in cp2n.items():
        if name != "amp":
            string = string.replace(unichr(codepoint), "&%s;" % name)

    if not was_unicode:
        string = string.encode(encoding)
    return string