Beispiel #1
0
    def sanitize_token(self, token):

        # accommodate filters which use token_type differently
        token_type = token["type"]
        if token_type in tokenTypes.keys():
          token_type = tokenTypes[token_type]

        if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"], 
                             tokenTypes["EmptyTag"]):
            if token["name"] in self.allowed_elements:
                if token.has_key("data"):
                    attrs = dict([(name,val) for name,val in
                                  token["data"][::-1] 
                                  if name in self.allowed_attributes])
                    for attr in self.attr_val_is_uri:
                        if not attrs.has_key(attr):
                            continue
                        val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                               unescape(attrs[attr])).lower()
                        #remove replacement characters from unescaped characters
                        val_unescaped = val_unescaped.replace(u"\ufffd", "")
                        if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and
                            (val_unescaped.split(':')[0] not in 
                             self.allowed_protocols)):
                            del attrs[attr]
                    for attr in self.svg_attr_val_allows_ref:
                        if attr in attrs:
                            attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                                 ' ',
                                                 unescape(attrs[attr]))
                    if (token["name"] in self.svg_allow_local_href and
                        'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                            attrs['xlink:href'])):
                        del attrs['xlink:href']
                    if attrs.has_key('style'):
                        attrs['style'] = self.sanitize_css(attrs['style'])
                    token["data"] = [[name,val] for name,val in attrs.items()]
                return token
            else:
                if token_type == tokenTypes["EndTag"]:
                    token["data"] = "</%s>" % token["name"]
                elif token["data"]:
                    attrs = ''.join([' %s="%s"' % (k,escape(v)) for k,v in token["data"]])
                    token["data"] = "<%s%s>" % (token["name"],attrs)
                else:
                    token["data"] = "<%s>" % token["name"]
                if token.get("selfClosing"):
                    token["data"]=token["data"][:-1] + "/>"

                if token["type"] in tokenTypes.keys():
                    token["type"] = "Characters"
                else:
                    token["type"] = tokenTypes["Characters"]

                del token["name"]
                return token
        elif token_type == tokenTypes["Comment"]:
            pass
        else:
            return token
Beispiel #2
0
    def sanitize_token(self, token):

        # accommodate filters which use token_type differently
        token_type = token["type"]
        if token_type in tokenTypes.keys():
          token_type = tokenTypes[token_type]

        if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"], 
                             tokenTypes["EmptyTag"]):
            if token["name"] in self.allowed_elements:
                if "data" in token:
                    attrs = dict([(name,val) for name,val in
                                  token["data"][::-1] 
                                  if name in self.allowed_attributes])
                    for attr in self.attr_val_is_uri:
                        if attr not in attrs:
                            continue
                        val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                               unescape(attrs[attr])).lower()
                        #remove replacement characters from unescaped characters
                        val_unescaped = val_unescaped.replace(u"\ufffd", "")
                        if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and
                            (val_unescaped.split(':')[0] not in 
                             self.allowed_protocols)):
                            del attrs[attr]
                    for attr in self.svg_attr_val_allows_ref:
                        if attr in attrs:
                            attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                                 ' ',
                                                 unescape(attrs[attr]))
                    if (token["name"] in self.svg_allow_local_href and
                        'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                            attrs['xlink:href'])):
                        del attrs['xlink:href']
                    if 'style' in attrs:
                        attrs['style'] = self.sanitize_css(attrs['style'])
                    token["data"] = [[name,val] for name,val in attrs.items()]
                return token
            else:
                if token_type == tokenTypes["EndTag"]:
                    token["data"] = "</%s>" % token["name"]
                elif token["data"]:
                    attrs = ''.join([' %s="%s"' % (k,escape(v)) for k,v in token["data"]])
                    token["data"] = "<%s%s>" % (token["name"],attrs)
                else:
                    token["data"] = "<%s>" % token["name"]
                if token.get("selfClosing"):
                    token["data"]=token["data"][:-1] + "/>"

                if token["type"] in tokenTypes.keys():
                    token["type"] = "Characters"
                else:
                    token["type"] = tokenTypes["Characters"]

                del token["name"]
                return token
        elif token_type == tokenTypes["Comment"]:
            pass
        else:
            return token