def tokenize(cls, string):
        """ Tokenize a string: ignore case and split at each non-alphanumeric
        characters.

        Returns a tuple of Token instances. Which allows for comparison between
        strings and integers. That way we get natural, user-friendly sorting of
        version numbers. That we can get with simple Python, see:

            >>> '2019.0.1' > '9.3'
            False
            >>> ('2019', '0', '1') > ('9', '3')
            False
            >>> (2019, 0, 1) > (9, 3)
            True
        """
        normalized_str = strutils.asciify(string).lower().decode()

        for segment in ALNUM_EXTRACTOR.split(normalized_str):
            if segment.isalnum():
                yield Token(segment)
Exemple #2
0
def test_asciify():
    ref = u'Beyoncé'
    b = strutils.asciify(ref)
    assert len(b) == len(b)
    assert b[-1:].decode('ascii') == 'e'
Exemple #3
0
def test_asciify():
    ref = u'Beyoncé'
    b = strutils.asciify(ref)
    assert len(b) == len(b)
    assert b[-1:].decode('ascii') == 'e'