Ejemplo n.º 1
0
    def substitute_entity(match):
        ent = match.group(2) + match.group(3)
        res = ""
        while not ent in html5 and not ent.endswith(";") and match.group(1) != "#":
            # Excepción para cuando '&' se usa como argumento en la urls contenidas en los datos
            try:
                res = ent[-1] + res
                ent = ent[:-1]
            except:
                break

        if match.group(1) == "#":
            ent = unichr(int(ent.replace(";", "")))
            return ent.encode('utf-8')
        else:
            cp = html5.get(ent)
            if cp:
                return cp.decode("unicode-escape").encode('utf-8') + res
            else:
                return match.group()
Ejemplo n.º 2
0
    def substitute_entity(match):
        ent = match.group(2) + match.group(3)
        res = ""
        while not ent in html5 and not ent.endswith(";") and match.group(1) != "#":
            # Exception for when '&' is used as an argument in the urls contained in the data
            try:
                res = ent[-1] + res
                ent = ent[:-1]
            except:
                break

        if match.group(1) == "#" and ent.replace(";", "").isdigit():
            ent = unichr(int(ent.replace(";", "")))
            return ent.encode('utf-8')
        else:
            cp = html5.get(ent)
            if cp:
                return cp.decode("unicode-escape").encode('utf-8') + res
            else:
                return match.group()