def test_really_unicode():
    here_dir = path.dirname(__file__)
    s = h.really_unicode('\xef\xbb\xbf<?xml version="1.0" encoding="utf-8" ?>')
    assert s.startswith(u'\ufeff')
    s = h.really_unicode(open(path.join(here_dir, 'data/unicode_test.txt')).read())
    assert isinstance(s, unicode)
    # try non-ascii string in legacy 8bit encoding
    h.really_unicode(u'\u0410\u0401'.encode('cp1251'))
    # ensure invalid encodings are handled gracefully
    s = h._attempt_encodings('foo', ['LKDJFLDK'])
    assert isinstance(s, unicode)
Esempio n. 2
0
def test_really_unicode():
    here_dir = path.dirname(__file__)
    s = h.really_unicode('\xef\xbb\xbf<?xml version="1.0" encoding="utf-8" ?>')
    assert s.startswith(u'\ufeff')
    s = h.really_unicode(open(path.join(here_dir, 'data/unicode_test.txt')).read())
    assert isinstance(s, unicode)
    # try non-ascii string in legacy 8bit encoding
    h.really_unicode(u'\u0410\u0401'.encode('cp1251'))
    # ensure invalid encodings are handled gracefully
    s = h._attempt_encodings('foo', ['LKDJFLDK'])
    assert isinstance(s, unicode)
Esempio n. 3
0
def test_really_unicode():
    here_dir = path.dirname(__file__)
    s = h.really_unicode(b'asdf')
    assert s.startswith('asdf'), repr(s)
    s = h.really_unicode(b'\xef\xbb\xbf<?xml version="1.0" encoding="utf-8" ?>')
    assert s.startswith('\ufeff'), repr(s)
    s = h.really_unicode(
        open(path.join(here_dir, 'data/unicode_test.txt')).read())
    assert isinstance(s, six.text_type)
    # try non-ascii string in legacy 8bit encoding
    h.really_unicode('\u0410\u0401'.encode('cp1251'))
    # ensure invalid encodings are handled gracefully
    s = h._attempt_encodings(b'foo', ['LKDJFLDK'])
    assert isinstance(s, six.text_type)
    # unicode stays the same
    assert_equals(h.really_unicode('¬∂•°‹'), '¬∂•°‹')
    # other types are handled too
    assert_equals(h.really_unicode(1234), '1234')
    assert_equals(h.really_unicode(datetime(2020, 1, 1)), '2020-01-01 00:00:00')
    assert_equals(h.really_unicode(None), '')