def test_unicode_tree_categories(): tree = charstree.unicode_tree() expected = list( set([ unicodedata.category(hunichr(i)) for i in range(0, sys.maxunicode + 1) ])) actual = charstree.categories(tree) assert sorted(expected) == sorted(actual)
def is_good(self, char): if char in self.blacklist_characters: return False categories = charstree.categories(self.unicode_tree) if unicodedata.category(char) not in categories: return False codepoint = ord(char) return self.min_codepoint <= codepoint <= self.max_codepoint
def draw_parameter(self, random): ascii_categories = charstree.categories(self.ascii_tree) unicode_categories = charstree.categories(self.unicode_tree) spaces_categories = charstree.categories(self.spaces_tree) alphabet_size = 1 + dist.geometric(random, 0.1) alphabet = [] buckets = 10 ascii_chance = random.randint(1, buckets) if spaces_categories and ascii_chance < buckets: space_chance = random.randint(1, buckets - ascii_chance) else: space_chance = 0 while len(alphabet) < alphabet_size: choice = random.randint(1, buckets) if ascii_categories and choice <= ascii_chance: category = random.choice(ascii_categories) tree = self.ascii_tree elif spaces_categories and choice <= ascii_chance + space_chance: category = random.choice(spaces_categories) tree = self.spaces_tree else: category = random.choice(unicode_categories) tree = self.unicode_tree codepoint = charstree.random_codepoint(tree, category, random) alphabet.append(hunichr(codepoint)) if u'\n' not in alphabet and not random.randint(0, 6): if self.is_good(u'\n'): alphabet.append(u'\n') return tuple(alphabet)
def test_ascii_tree_categories(): tree = charstree.ascii_tree() expected = list( set([unicodedata.category(hunichr(i)) for i in range(0, 128)])) actual = charstree.categories(tree) assert sorted(expected) == sorted(actual)