def test_category_by_codepoint(): tree = charstree.unicode_tree() assert 'Nd' == charstree.category_by_codepoint(tree, ord(u'1')) assert 'Ll' == charstree.category_by_codepoint(tree, ord(u'я')) tree = charstree.ascii_tree() assert charstree.category_by_codepoint(tree, ord(u'я')) is None
def test_unicode_tree_categories(): tree = charstree.unicode_tree() expected = list( set([ unicodedata.category(hunichr(i)) for i in range(0, sys.maxunicode + 1) ])) actual = charstree.categories(tree) assert sorted(expected) == sorted(actual)
def __init__( self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None, ): whitelist_categories = set(whitelist_categories or []) blacklist_categories = set(blacklist_categories or []) blacklist_characters = set(blacklist_characters or []) min_codepoint = int(min_codepoint or 0) max_codepoint = int(max_codepoint or sys.maxunicode) self.ascii_tree = charstree.filter_tree( charstree.ascii_tree(), whitelist_categories, blacklist_categories, blacklist_characters, min_codepoint, max_codepoint, ) self.unicode_tree = charstree.filter_tree( charstree.unicode_tree(), whitelist_categories, blacklist_categories, blacklist_characters, min_codepoint, max_codepoint, ) self.spaces_tree = charstree.filter_tree( self.unicode_tree, whitelist_categories=set(["Zs", "Cc"]), blacklist_characters=blacklist_characters, min_codepoint=min_codepoint, max_codepoint=max_codepoint, ) self.blacklist_characters = blacklist_characters self.min_codepoint = min_codepoint self.max_codepoint = max_codepoint if not self.unicode_tree: raise InvalidArgument( "No characters could be produced." " Try to reduce white/black categories list" " or min/max allowed code points." )
def __init__(self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None): whitelist_categories = set(whitelist_categories or []) blacklist_categories = set(blacklist_categories or []) blacklist_characters = set(blacklist_characters or []) min_codepoint = int(min_codepoint or 0) max_codepoint = int(max_codepoint or sys.maxunicode) self.ascii_tree = charstree.filter_tree( charstree.ascii_tree(), whitelist_categories, blacklist_categories, blacklist_characters, min_codepoint, max_codepoint, ) self.unicode_tree = charstree.filter_tree( charstree.unicode_tree(), whitelist_categories, blacklist_categories, blacklist_characters, min_codepoint, max_codepoint, ) self.spaces_tree = charstree.filter_tree( self.unicode_tree, whitelist_categories=set(['Zs', 'Cc']), blacklist_characters=blacklist_characters, min_codepoint=min_codepoint, max_codepoint=max_codepoint, ) self.blacklist_characters = blacklist_characters self.min_codepoint = min_codepoint self.max_codepoint = max_codepoint if not self.unicode_tree: raise InvalidArgument('No characters could be produced.' ' Try to reduce white/black categories list' ' or min/max allowed code points.')
def test_unicode_tree_codepoints(): tree = charstree.unicode_tree() expected = list(range(0, sys.maxunicode + 1)) actual = sorted(list(charstree.codepoints(tree))) assert expected == actual
def test_unicode_tree(): tree = charstree.unicode_tree() assert isinstance(tree, dict)