def test_category_by_codepoint():
    tree = charstree.unicode_tree()
    assert 'Nd' == charstree.category_by_codepoint(tree, ord(u'1'))
    assert 'Ll' == charstree.category_by_codepoint(tree, ord(u'я'))

    tree = charstree.ascii_tree()
    assert charstree.category_by_codepoint(tree, ord(u'я')) is None
def test_filter_tree():
    tree = charstree.ascii_tree()
    new_tree = charstree.filter_tree(tree,
                                     min_codepoint=ord('0'),
                                     max_codepoint=ord('9'))
    expected = list(range(ord('0'), ord('9') + 1))
    actual = list(charstree.codepoints(new_tree))
    assert expected == actual
Exemple #3
0
    def __init__(
        self,
        whitelist_categories=None,
        blacklist_categories=None,
        blacklist_characters=None,
        min_codepoint=None,
        max_codepoint=None,
    ):
        whitelist_categories = set(whitelist_categories or [])
        blacklist_categories = set(blacklist_categories or [])
        blacklist_characters = set(blacklist_characters or [])

        min_codepoint = int(min_codepoint or 0)
        max_codepoint = int(max_codepoint or sys.maxunicode)

        self.ascii_tree = charstree.filter_tree(
            charstree.ascii_tree(),
            whitelist_categories,
            blacklist_categories,
            blacklist_characters,
            min_codepoint,
            max_codepoint,
        )
        self.unicode_tree = charstree.filter_tree(
            charstree.unicode_tree(),
            whitelist_categories,
            blacklist_categories,
            blacklist_characters,
            min_codepoint,
            max_codepoint,
        )
        self.spaces_tree = charstree.filter_tree(
            self.unicode_tree,
            whitelist_categories=set(["Zs", "Cc"]),
            blacklist_characters=blacklist_characters,
            min_codepoint=min_codepoint,
            max_codepoint=max_codepoint,
        )
        self.blacklist_characters = blacklist_characters
        self.min_codepoint = min_codepoint
        self.max_codepoint = max_codepoint
        if not self.unicode_tree:
            raise InvalidArgument(
                "No characters could be produced."
                " Try to reduce white/black categories list"
                " or min/max allowed code points."
            )
    def __init__(self,
                 whitelist_categories=None,
                 blacklist_categories=None,
                 blacklist_characters=None,
                 min_codepoint=None,
                 max_codepoint=None):
        whitelist_categories = set(whitelist_categories or [])
        blacklist_categories = set(blacklist_categories or [])
        blacklist_characters = set(blacklist_characters or [])

        min_codepoint = int(min_codepoint or 0)
        max_codepoint = int(max_codepoint or sys.maxunicode)

        self.ascii_tree = charstree.filter_tree(
            charstree.ascii_tree(),
            whitelist_categories,
            blacklist_categories,
            blacklist_characters,
            min_codepoint,
            max_codepoint,
        )
        self.unicode_tree = charstree.filter_tree(
            charstree.unicode_tree(),
            whitelist_categories,
            blacklist_categories,
            blacklist_characters,
            min_codepoint,
            max_codepoint,
        )
        self.spaces_tree = charstree.filter_tree(
            self.unicode_tree,
            whitelist_categories=set(['Zs', 'Cc']),
            blacklist_characters=blacklist_characters,
            min_codepoint=min_codepoint,
            max_codepoint=max_codepoint,
        )
        self.blacklist_characters = blacklist_characters
        self.min_codepoint = min_codepoint
        self.max_codepoint = max_codepoint
        if not self.unicode_tree:
            raise InvalidArgument('No characters could be produced.'
                                  ' Try to reduce white/black categories list'
                                  ' or min/max allowed code points.')
def test_ascii_tree_codepoints():
    tree = charstree.ascii_tree()
    expected = list(range(0, 128))
    actual = sorted(list(charstree.codepoints(tree)))
    assert expected == actual
def test_ascii_tree_categories():
    tree = charstree.ascii_tree()
    expected = list(
        set([unicodedata.category(hunichr(i)) for i in range(0, 128)]))
    actual = charstree.categories(tree)
    assert sorted(expected) == sorted(actual)
def test_ascii_tree():
    tree = charstree.ascii_tree()
    assert isinstance(tree, dict)