def sanitize_token(self, token): # accommodate filters which use token_type differently token_type = token["type"] if token_type in tokenTypes.keys(): token_type = tokenTypes[token_type] if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"], tokenTypes["EmptyTag"]): if token["name"] in self.allowed_elements: if token.has_key("data"): attrs = dict([(name,val) for name,val in token["data"][::-1] if name in self.allowed_attributes]) for attr in self.attr_val_is_uri: if not attrs.has_key(attr): continue val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescape(attrs[attr])).lower() #remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace(u"\ufffd", "") if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and (val_unescaped.split(':')[0] not in self.allowed_protocols)): del attrs[attr] for attr in self.svg_attr_val_allows_ref: if attr in attrs: attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ', unescape(attrs[attr])) if (token["name"] in self.svg_allow_local_href and 'xlink:href' in attrs and re.search('^\s*[^#\s].*', attrs['xlink:href'])): del attrs['xlink:href'] if attrs.has_key('style'): attrs['style'] = self.sanitize_css(attrs['style']) token["data"] = [[name,val] for name,val in attrs.items()] return token else: if token_type == tokenTypes["EndTag"]: token["data"] = "</%s>" % token["name"] elif token["data"]: attrs = ''.join([' %s="%s"' % (k,escape(v)) for k,v in token["data"]]) token["data"] = "<%s%s>" % (token["name"],attrs) else: token["data"] = "<%s>" % token["name"] if token.get("selfClosing"): token["data"]=token["data"][:-1] + "/>" if token["type"] in tokenTypes.keys(): token["type"] = "Characters" else: token["type"] = tokenTypes["Characters"] del token["name"] return token elif token_type == tokenTypes["Comment"]: pass else: return token
def sanitize_token(self, token): # accommodate filters which use token_type differently token_type = token["type"] if token_type in tokenTypes.keys(): token_type = tokenTypes[token_type] if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"], tokenTypes["EmptyTag"]): if token["name"] in self.allowed_elements: if "data" in token: attrs = dict([(name,val) for name,val in token["data"][::-1] if name in self.allowed_attributes]) for attr in self.attr_val_is_uri: if attr not in attrs: continue val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescape(attrs[attr])).lower() #remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace(u"\ufffd", "") if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and (val_unescaped.split(':')[0] not in self.allowed_protocols)): del attrs[attr] for attr in self.svg_attr_val_allows_ref: if attr in attrs: attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ', unescape(attrs[attr])) if (token["name"] in self.svg_allow_local_href and 'xlink:href' in attrs and re.search('^\s*[^#\s].*', attrs['xlink:href'])): del attrs['xlink:href'] if 'style' in attrs: attrs['style'] = self.sanitize_css(attrs['style']) token["data"] = [[name,val] for name,val in attrs.items()] return token else: if token_type == tokenTypes["EndTag"]: token["data"] = "</%s>" % token["name"] elif token["data"]: attrs = ''.join([' %s="%s"' % (k,escape(v)) for k,v in token["data"]]) token["data"] = "<%s%s>" % (token["name"],attrs) else: token["data"] = "<%s>" % token["name"] if token.get("selfClosing"): token["data"]=token["data"][:-1] + "/>" if token["type"] in tokenTypes.keys(): token["type"] = "Characters" else: token["type"] = tokenTypes["Characters"] del token["name"] return token elif token_type == tokenTypes["Comment"]: pass else: return token