def substitute_entity(match): ent = match.group(2) + match.group(3) res = "" while not ent in html5 and not ent.endswith(";") and match.group(1) != "#": # Excepción para cuando '&' se usa como argumento en la urls contenidas en los datos try: res = ent[-1] + res ent = ent[:-1] except: break if match.group(1) == "#": ent = unichr(int(ent.replace(";", ""))) return ent.encode('utf-8') else: cp = html5.get(ent) if cp: return cp.decode("unicode-escape").encode('utf-8') + res else: return match.group()
def substitute_entity(match): ent = match.group(2) + match.group(3) res = "" while not ent in html5 and not ent.endswith(";") and match.group(1) != "#": # Exception for when '&' is used as an argument in the urls contained in the data try: res = ent[-1] + res ent = ent[:-1] except: break if match.group(1) == "#" and ent.replace(";", "").isdigit(): ent = unichr(int(ent.replace(";", ""))) return ent.encode('utf-8') else: cp = html5.get(ent) if cp: return cp.decode("unicode-escape").encode('utf-8') + res else: return match.group()