def test_clean_fix_unicode_before_truncate():
    # [adamhooper, 2019-12-13] I don't think we can actually test this in pure
    # Python. We'd need a string that has invalid UTF-8 encoding, and I don't
    # know how to generate one. The only thing I know how to generate is
    # invalid _Unicode_ with surrogate pairs ... but the replacement character
    # happens to have the same number of bytes as an erroneous surrogate.
    #
    # Oh well. Test that we can actually generate is_unicode_fixed+is_truncated,
    # at least.
    assert clean_colname("\ud800abcd",
                         settings=MockSettings(4)) == CleanColname(
                             "�a", is_unicode_fixed=True, is_truncated=True)
def test_clean_ascii_before_truncate():
    assert clean_colname("ab\n\ncd", settings=MockSettings(3)) == CleanColname(
        "abc", is_ascii_cleaned=True, is_truncated=True)
def test_clean_truncate_allow_full_unicode_character():
    assert clean_colname("acé",
                         settings=MockSettings(4)) == CleanColname("acé")
def test_clean_truncate_nix_partial_unicode_character():
    assert clean_colname("acé", settings=MockSettings(3)) == CleanColname(
        "ac", is_truncated=True)
def test_clean_fix_unicode():
    assert clean_colname("ab\ud800\udc00cd") == CleanColname(
        "ab��cd", is_unicode_fixed=True)
def test_clean_ascii_control_characters():
    assert clean_colname("ab\0\n\tcd") == CleanColname("abcd",
                                                       is_ascii_cleaned=True)
def test_clean_empty_str():
    assert clean_colname("") == CleanColname("")