Beispiel #1
0
def test_unicode_tree_categories():
    tree = charstree.unicode_tree()
    expected = list(
        set([
            unicodedata.category(hunichr(i))
            for i in range(0, sys.maxunicode + 1)
        ]))
    actual = charstree.categories(tree)
    assert sorted(expected) == sorted(actual)
Beispiel #2
0
    def is_good(self, char):
        if char in self.blacklist_characters:
            return False

        categories = charstree.categories(self.unicode_tree)
        if unicodedata.category(char) not in categories:
            return False

        codepoint = ord(char)
        return self.min_codepoint <= codepoint <= self.max_codepoint
    def is_good(self, char):
        if char in self.blacklist_characters:
            return False

        categories = charstree.categories(self.unicode_tree)
        if unicodedata.category(char) not in categories:
            return False

        codepoint = ord(char)
        return self.min_codepoint <= codepoint <= self.max_codepoint
Beispiel #4
0
    def draw_parameter(self, random):
        ascii_categories = charstree.categories(self.ascii_tree)
        unicode_categories = charstree.categories(self.unicode_tree)
        spaces_categories = charstree.categories(self.spaces_tree)

        alphabet_size = 1 + dist.geometric(random, 0.1)
        alphabet = []
        buckets = 10
        ascii_chance = random.randint(1, buckets)

        if spaces_categories and ascii_chance < buckets:
            space_chance = random.randint(1, buckets - ascii_chance)
        else:
            space_chance = 0

        while len(alphabet) < alphabet_size:
            choice = random.randint(1, buckets)

            if ascii_categories and choice <= ascii_chance:
                category = random.choice(ascii_categories)
                tree = self.ascii_tree
            elif spaces_categories and choice <= ascii_chance + space_chance:
                category = random.choice(spaces_categories)
                tree = self.spaces_tree
            else:
                category = random.choice(unicode_categories)
                tree = self.unicode_tree

            codepoint = charstree.random_codepoint(tree, category, random)
            alphabet.append(hunichr(codepoint))

        if u'\n' not in alphabet and not random.randint(0, 6):
            if self.is_good(u'\n'):
                alphabet.append(u'\n')

        return tuple(alphabet)
    def draw_parameter(self, random):
        ascii_categories = charstree.categories(self.ascii_tree)
        unicode_categories = charstree.categories(self.unicode_tree)
        spaces_categories = charstree.categories(self.spaces_tree)

        alphabet_size = 1 + dist.geometric(random, 0.1)
        alphabet = []
        buckets = 10
        ascii_chance = random.randint(1, buckets)

        if spaces_categories and ascii_chance < buckets:
            space_chance = random.randint(1, buckets - ascii_chance)
        else:
            space_chance = 0

        while len(alphabet) < alphabet_size:
            choice = random.randint(1, buckets)

            if ascii_categories and choice <= ascii_chance:
                category = random.choice(ascii_categories)
                tree = self.ascii_tree
            elif spaces_categories and choice <= ascii_chance + space_chance:
                category = random.choice(spaces_categories)
                tree = self.spaces_tree
            else:
                category = random.choice(unicode_categories)
                tree = self.unicode_tree

            codepoint = charstree.random_codepoint(tree, category, random)
            alphabet.append(hunichr(codepoint))

        if u'\n' not in alphabet and not random.randint(0, 6):
            if self.is_good(u'\n'):
                alphabet.append(u'\n')

        return tuple(alphabet)
Beispiel #6
0
def test_ascii_tree_categories():
    tree = charstree.ascii_tree()
    expected = list(
        set([unicodedata.category(hunichr(i)) for i in range(0, 128)]))
    actual = charstree.categories(tree)
    assert sorted(expected) == sorted(actual)