def _substitute_entity(match): ent = match.group(3) if match.group(1) == "#": # decode by number if match.group(2) == '': # number is decimal return chr(int(ent)) elif match.group(2) == 'x': # number is hex return chr(int('0x' + ent, 16)) else: cp = name2codepoint.get(ent) # decode by name if cp: return chr(cp) else: return match.group()
def test_html_entities(self): source = "<div>Spam Spam < Spam > Spam</div>" output = '<div>Spam Spam < Spam > Spam</div>' assert chr(32) in output # normal space assert chr(160) in output # non breaking space perform(source, output)