def tokenize(cls, string): """ Tokenize a string: ignore case and split at each non-alphanumeric characters. Returns a tuple of Token instances. Which allows for comparison between strings and integers. That way we get natural, user-friendly sorting of version numbers. That we can get with simple Python, see: >>> '2019.0.1' > '9.3' False >>> ('2019', '0', '1') > ('9', '3') False >>> (2019, 0, 1) > (9, 3) True """ normalized_str = strutils.asciify(string).lower().decode() for segment in ALNUM_EXTRACTOR.split(normalized_str): if segment.isalnum(): yield Token(segment)
def test_asciify(): ref = u'Beyoncé' b = strutils.asciify(ref) assert len(b) == len(b) assert b[-1:].decode('ascii') == 'e'