def __init__( self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None, ): whitelist_categories = set(whitelist_categories or []) blacklist_categories = set(blacklist_categories or []) blacklist_characters = set(blacklist_characters or []) min_codepoint = int(min_codepoint or 0) max_codepoint = int(max_codepoint or sys.maxunicode) self.ascii_tree = charstree.filter_tree( charstree.ascii_tree(), whitelist_categories, blacklist_categories, blacklist_characters, min_codepoint, max_codepoint, ) self.unicode_tree = charstree.filter_tree( charstree.unicode_tree(), whitelist_categories, blacklist_categories, blacklist_characters, min_codepoint, max_codepoint, ) self.spaces_tree = charstree.filter_tree( self.unicode_tree, whitelist_categories=set(["Zs", "Cc"]), blacklist_characters=blacklist_characters, min_codepoint=min_codepoint, max_codepoint=max_codepoint, ) self.blacklist_characters = blacklist_characters self.min_codepoint = min_codepoint self.max_codepoint = max_codepoint if not self.unicode_tree: raise InvalidArgument( "No characters could be produced." " Try to reduce white/black categories list" " or min/max allowed code points." )
def test_filter_tree(): tree = charstree.ascii_tree() new_tree = charstree.filter_tree(tree, min_codepoint=ord('0'), max_codepoint=ord('9')) expected = list(range(ord('0'), ord('9') + 1)) actual = list(charstree.codepoints(new_tree)) assert expected == actual
def __init__(self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None): whitelist_categories = set(whitelist_categories or []) blacklist_categories = set(blacklist_categories or []) blacklist_characters = set(blacklist_characters or []) min_codepoint = int(min_codepoint or 0) max_codepoint = int(max_codepoint or sys.maxunicode) self.ascii_tree = charstree.filter_tree( charstree.ascii_tree(), whitelist_categories, blacklist_categories, blacklist_characters, min_codepoint, max_codepoint, ) self.unicode_tree = charstree.filter_tree( charstree.unicode_tree(), whitelist_categories, blacklist_categories, blacklist_characters, min_codepoint, max_codepoint, ) self.spaces_tree = charstree.filter_tree( self.unicode_tree, whitelist_categories=set(['Zs', 'Cc']), blacklist_characters=blacklist_characters, min_codepoint=min_codepoint, max_codepoint=max_codepoint, ) self.blacklist_characters = blacklist_characters self.min_codepoint = min_codepoint self.max_codepoint = max_codepoint if not self.unicode_tree: raise InvalidArgument('No characters could be produced.' ' Try to reduce white/black categories list' ' or min/max allowed code points.')
def try_ascii(self, random, template): tree = self.ascii_tree if not tree: return zero_point = self.zero_point template = ord(template) if template < zero_point: min_codepoint, max_codepoint = template, zero_point elif template > zero_point: min_codepoint, max_codepoint = zero_point, template else: return subtree = charstree.filter_tree(tree, min_codepoint=min_codepoint, max_codepoint=max_codepoint) for codepoint in charstree.codepoints(subtree): yield hunichr(codepoint)