Beispiel #1
0
 def sanitize_token(self, token):
     # accommodate filters which use token_type differently
     token_type = token["type"]
     if token_type in tokenTypes.keys():
         token_type = tokenTypes[token_type]
     
     if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"], tokenTypes["EmptyTag"]):
         if token["name"] in self.allowed_elements:
             if token.has_key("data"):
                 attrs = dict([(name,val) for name,val in token["data"][::-1] if name in self.allowed_attributes])
                 for attr in self.attr_val_is_uri:
                     if not attrs.has_key(attr):
                         continue
                     val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescape(attrs[attr])).lower()
                     #remove replacement characters from unescaped characters
                     val_unescaped = val_unescaped.replace(u"\ufffd", "")
                     if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and (val_unescaped.split(':')[0] not in self.allowed_protocols)):
                         del attrs[attr]
                     for attr in self.svg_attr_val_allows_ref:
                         if attr in attrs:
                             attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ', unescape(attrs[attr]))
                 if (token["name"] in self.svg_allow_local_href and 'xlink:href' in attrs and re.search('^\s*[^#\s].*', attrs['xlink:href'])):
                     del attrs['xlink:href']
                 if attrs.has_key('style'):
                     attrs['style'] = self.sanitize_css(attrs['style'])
                 token["data"] = [[name,val] for name,val in attrs.items()]
             return token
         else:
             token["data"] = ""
             
             if token["type"] in tokenTypes.keys():
                 token["type"] = "Characters"
             else:
                 token["type"] = tokenTypes["Characters"]
             del token["name"]
             return token
     elif token_type == tokenTypes["Comment"]:
         pass
     else:
         return token
Beispiel #2
0
    def sanitize_token(self, token):

        # accommodate filters which use token_type differently
        token_type = token["type"]
        if token_type in tokenTypes.keys():
            token_type = tokenTypes[token_type]

        if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
                          tokenTypes["EmptyTag"]):
            token["name"] = token["name"].lower()
            if token["name"] in self.allowed_elements:
                if "data" in token:
                    attrs = {
                        name: val
                        for name, val in token["data"][::-1]
                        if name in self.allowed_attributes
                    }
                    for attr in self.attr_val_is_uri:
                        if attr not in attrs:
                            continue
                        val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                               unescape(attrs[attr])).lower()
                        # remove replacement characters from unescaped
                        # characters
                        val_unescaped = val_unescaped.replace(u"\ufffd", "")
                        if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped)
                                and (val_unescaped.split(':')[0]
                                     not in self.allowed_protocols)):
                            del attrs[attr]
                    for attr in self.svg_attr_val_allows_ref:
                        if attr in attrs:
                            attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                                 ' ', unescape(attrs[attr]))
                    if (token["name"] in self.svg_allow_local_href
                            and 'xlink:href' in attrs and re.search(
                                '^\s*[^#\s].*', attrs['xlink:href'])):
                        del attrs['xlink:href']
                    if 'style' in attrs:
                        attrs['style'] = self.sanitize_css(attrs['style'])
                    token["data"] = [[name, val]
                                     for name, val in attrs.items()]
                return token
            else:
                if token["name"] in self.remove_tags:
                    token["name"] = "toberemoved"

                    if token_type == tokenTypes["EndTag"]:
                        token["data"] = "</{0!s}>".format(token["name"])
                    elif token["data"]:
                        attrs = ''.join([
                            ' {0!s}="{1!s}"'.format(k, escape(v))
                            for k, v in token["data"]
                        ])
                        token["data"] = "<{0!s}{1!s}>".format(
                            token["name"], attrs)
                    else:
                        token["data"] = "<{0!s}>".format(token["name"])
                    if token.get("selfClosing"):
                        token["data"] = token["data"][:-1] + "/>"

                    if token["type"] in tokenTypes.keys():
                        token["type"] = "Characters"
                    else:
                        token["type"] = tokenTypes["Characters"]

                    if "name" in token and token["name"] == "style":
                        print "style", token["data"], dir(token)

                    return token
        elif token_type == tokenTypes["Comment"]:
            pass
        else:
            return token
Beispiel #3
0
    def sanitize_token(self, token):

        # accommodate filters which use token_type differently
        token_type = token["type"]
        if token_type in tokenTypes.keys():
            token_type = tokenTypes[token_type]

        if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
                          tokenTypes["EmptyTag"]):
            token["name"] = token["name"].lower()
            if token["name"] in self.allowed_elements:
                if "data" in token:
                    attrs = {name: val for name, val in
                                  token["data"][::-1]
                                  if name in self.allowed_attributes}
                    for attr in self.attr_val_is_uri:
                        if attr not in attrs:
                            continue
                        val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                               unescape(attrs[attr])).lower()
                        # remove replacement characters from unescaped
                        # characters
                        val_unescaped = val_unescaped.replace(u"\ufffd", "")
                        if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and
                            (val_unescaped.split(':')[0] not in
                             self.allowed_protocols)):
                            del attrs[attr]
                    for attr in self.svg_attr_val_allows_ref:
                        if attr in attrs:
                            attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                                 ' ',
                                                 unescape(attrs[attr]))
                    if (token["name"] in self.svg_allow_local_href and
                        'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                            attrs['xlink:href'])):
                        del attrs['xlink:href']
                    if 'style' in attrs:
                        attrs['style'] = self.sanitize_css(attrs['style'])
                    token["data"] = [[name, val]
                                     for name, val in attrs.items()]
                return token
            else:
                if token["name"] in self.remove_tags:
                    token["name"] = "toberemoved"

                    if token_type == tokenTypes["EndTag"]:
                        token["data"] = "</{0!s}>".format(token["name"])
                    elif token["data"]:
                        attrs = ''.join([' {0!s}="{1!s}"'.format(k, escape(v))
                                        for k, v in token["data"]])
                        token["data"] = "<{0!s}{1!s}>".format(token["name"], attrs)
                    else:
                        token["data"] = "<{0!s}>".format(token["name"])
                    if token.get("selfClosing"):
                        token["data"] = token["data"][:-1] + "/>"

                    if token["type"] in tokenTypes.keys():
                        token["type"] = "Characters"
                    else:
                        token["type"] = tokenTypes["Characters"]

                    if "name" in token and token["name"] == "style":
                        print "style", token["data"], dir(token)

                    return token
        elif token_type == tokenTypes["Comment"]:
            pass
        else:
            return token