def sanitize_token(self, token): # accommodate filters which use token_type differently token_type = token["type"] if token_type in tokenTypes.keys(): token_type = tokenTypes[token_type] if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"], tokenTypes["EmptyTag"]): if token["name"] in self.allowed_elements: if token.has_key("data"): attrs = dict([(name,val) for name,val in token["data"][::-1] if name in self.allowed_attributes]) for attr in self.attr_val_is_uri: if not attrs.has_key(attr): continue val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescape(attrs[attr])).lower() #remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace(u"\ufffd", "") if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and (val_unescaped.split(':')[0] not in self.allowed_protocols)): del attrs[attr] for attr in self.svg_attr_val_allows_ref: if attr in attrs: attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ', unescape(attrs[attr])) if (token["name"] in self.svg_allow_local_href and 'xlink:href' in attrs and re.search('^\s*[^#\s].*', attrs['xlink:href'])): del attrs['xlink:href'] if attrs.has_key('style'): attrs['style'] = self.sanitize_css(attrs['style']) token["data"] = [[name,val] for name,val in attrs.items()] return token else: token["data"] = "" if token["type"] in tokenTypes.keys(): token["type"] = "Characters" else: token["type"] = tokenTypes["Characters"] del token["name"] return token elif token_type == tokenTypes["Comment"]: pass else: return token
def sanitize_token(self, token): # accommodate filters which use token_type differently token_type = token["type"] if token_type in tokenTypes.keys(): token_type = tokenTypes[token_type] if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"], tokenTypes["EmptyTag"]): token["name"] = token["name"].lower() if token["name"] in self.allowed_elements: if "data" in token: attrs = { name: val for name, val in token["data"][::-1] if name in self.allowed_attributes } for attr in self.attr_val_is_uri: if attr not in attrs: continue val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescape(attrs[attr])).lower() # remove replacement characters from unescaped # characters val_unescaped = val_unescaped.replace(u"\ufffd", "") if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and (val_unescaped.split(':')[0] not in self.allowed_protocols)): del attrs[attr] for attr in self.svg_attr_val_allows_ref: if attr in attrs: attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ', unescape(attrs[attr])) if (token["name"] in self.svg_allow_local_href and 'xlink:href' in attrs and re.search( '^\s*[^#\s].*', attrs['xlink:href'])): del attrs['xlink:href'] if 'style' in attrs: attrs['style'] = self.sanitize_css(attrs['style']) token["data"] = [[name, val] for name, val in attrs.items()] return token else: if token["name"] in self.remove_tags: token["name"] = "toberemoved" if token_type == tokenTypes["EndTag"]: token["data"] = "</{0!s}>".format(token["name"]) elif token["data"]: attrs = ''.join([ ' {0!s}="{1!s}"'.format(k, escape(v)) for k, v in token["data"] ]) token["data"] = "<{0!s}{1!s}>".format( token["name"], attrs) else: token["data"] = "<{0!s}>".format(token["name"]) if token.get("selfClosing"): token["data"] = token["data"][:-1] + "/>" if token["type"] in tokenTypes.keys(): token["type"] = "Characters" else: token["type"] = tokenTypes["Characters"] if "name" in token and token["name"] == "style": print "style", token["data"], dir(token) return token elif token_type == tokenTypes["Comment"]: pass else: return token
def sanitize_token(self, token): # accommodate filters which use token_type differently token_type = token["type"] if token_type in tokenTypes.keys(): token_type = tokenTypes[token_type] if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"], tokenTypes["EmptyTag"]): token["name"] = token["name"].lower() if token["name"] in self.allowed_elements: if "data" in token: attrs = {name: val for name, val in token["data"][::-1] if name in self.allowed_attributes} for attr in self.attr_val_is_uri: if attr not in attrs: continue val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescape(attrs[attr])).lower() # remove replacement characters from unescaped # characters val_unescaped = val_unescaped.replace(u"\ufffd", "") if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and (val_unescaped.split(':')[0] not in self.allowed_protocols)): del attrs[attr] for attr in self.svg_attr_val_allows_ref: if attr in attrs: attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ', unescape(attrs[attr])) if (token["name"] in self.svg_allow_local_href and 'xlink:href' in attrs and re.search('^\s*[^#\s].*', attrs['xlink:href'])): del attrs['xlink:href'] if 'style' in attrs: attrs['style'] = self.sanitize_css(attrs['style']) token["data"] = [[name, val] for name, val in attrs.items()] return token else: if token["name"] in self.remove_tags: token["name"] = "toberemoved" if token_type == tokenTypes["EndTag"]: token["data"] = "</{0!s}>".format(token["name"]) elif token["data"]: attrs = ''.join([' {0!s}="{1!s}"'.format(k, escape(v)) for k, v in token["data"]]) token["data"] = "<{0!s}{1!s}>".format(token["name"], attrs) else: token["data"] = "<{0!s}>".format(token["name"]) if token.get("selfClosing"): token["data"] = token["data"][:-1] + "/>" if token["type"] in tokenTypes.keys(): token["type"] = "Characters" else: token["type"] = tokenTypes["Characters"] if "name" in token and token["name"] == "style": print "style", token["data"], dir(token) return token elif token_type == tokenTypes["Comment"]: pass else: return token