def test_clean_fix_unicode_before_truncate(): # [adamhooper, 2019-12-13] I don't think we can actually test this in pure # Python. We'd need a string that has invalid UTF-8 encoding, and I don't # know how to generate one. The only thing I know how to generate is # invalid _Unicode_ with surrogate pairs ... but the replacement character # happens to have the same number of bytes as an erroneous surrogate. # # Oh well. Test that we can actually generate is_unicode_fixed+is_truncated, # at least. assert clean_colname("\ud800abcd", settings=MockSettings(4)) == CleanColname( "�a", is_unicode_fixed=True, is_truncated=True)
def test_clean_ascii_before_truncate(): assert clean_colname("ab\n\ncd", settings=MockSettings(3)) == CleanColname( "abc", is_ascii_cleaned=True, is_truncated=True)
def test_clean_truncate_allow_full_unicode_character(): assert clean_colname("acé", settings=MockSettings(4)) == CleanColname("acé")
def test_clean_truncate_nix_partial_unicode_character(): assert clean_colname("acé", settings=MockSettings(3)) == CleanColname( "ac", is_truncated=True)
def test_clean_fix_unicode(): assert clean_colname("ab\ud800\udc00cd") == CleanColname( "ab��cd", is_unicode_fixed=True)
def test_clean_ascii_control_characters(): assert clean_colname("ab\0\n\tcd") == CleanColname("abcd", is_ascii_cleaned=True)
def test_clean_empty_str(): assert clean_colname("") == CleanColname("")