def check_in_range(self, from_, to, encode=False): element = etree.Element('test') for char_int in range(from_, to + 1): try: char = unicode_chr(char_int) if encode: char = char.encode('utf-8') stripped = strip_invalid_characters(char) element.text = stripped element.set('some_attr', stripped) except UnicodeEncodeError as e: if e.reason != 'surrogates not allowed': raise except Exception as e: self.fail(r'Failed on unicode char \0x{char:x}: {e}'.format(char=char_int, e=e))
def is_valid_js_identifier(identifier, escape=r'\u', ucd_cat=category): """Return whether the given ``id`` is a valid Javascript identifier.""" if not identifier: return False if not isinstance(identifier, unicode_type): try: identifier = unicode_type(identifier, 'utf-8') except UnicodeDecodeError: return False if escape in identifier: new = [] add_char = new.append split_id = identifier.split(escape) add_char(split_id.pop(0)) for segment in split_id: if len(segment) < 4: return False try: add_char(unicode_chr(int('0x' + segment[:4], 16))) except Exception: return False add_char(segment[4:]) identifier = u''.join(new) if is_reserved_js_word(identifier): return False first_char = identifier[0] if not ((first_char in valid_jsid_chars) or (ucd_cat(first_char) in valid_jsid_categories_start)): return False for char in identifier[1:]: if not ((char in valid_jsid_chars) or (ucd_cat(char) in valid_jsid_categories)): return False return True
def is_unicode_32bit_supported(): try: unicode_chr(0x10FFFF) return True except ValueError: return False