def test_vlen_dtype(): dtype = strings.create_vlen_dtype(unicode_type) assert dtype.metadata['element_type'] == unicode_type assert strings.is_unicode_dtype(dtype) assert not strings.is_bytes_dtype(dtype) assert strings.check_vlen_dtype(dtype) is unicode_type dtype = strings.create_vlen_dtype(bytes_type) assert dtype.metadata['element_type'] == bytes_type assert not strings.is_unicode_dtype(dtype) assert strings.is_bytes_dtype(dtype) assert strings.check_vlen_dtype(dtype) is bytes_type assert strings.check_vlen_dtype(np.dtype(object)) is None
def test_vlen_dtype(): dtype = strings.create_vlen_dtype(str) assert dtype.metadata["element_type"] == str assert strings.is_unicode_dtype(dtype) assert not strings.is_bytes_dtype(dtype) assert strings.check_vlen_dtype(dtype) is str dtype = strings.create_vlen_dtype(bytes) assert dtype.metadata["element_type"] == bytes assert not strings.is_unicode_dtype(dtype) assert strings.is_bytes_dtype(dtype) assert strings.check_vlen_dtype(dtype) is bytes assert strings.check_vlen_dtype(np.dtype(object)) is None
def test_EncodedStringCoder_encode(): dtype = strings.create_vlen_dtype(str) raw_data = np.array(["abc", "ß∂µ∆"], dtype=dtype) expected_data = np.array([r.encode("utf-8") for r in raw_data], dtype=object) coder = strings.EncodedStringCoder(allows_unicode=True) raw = Variable(("x",), raw_data, encoding={"dtype": "S1"}) actual = coder.encode(raw) expected = Variable(("x",), expected_data, attrs={"_Encoding": "utf-8"}) assert_identical(actual, expected) raw = Variable(("x",), raw_data) assert_identical(coder.encode(raw), raw) coder = strings.EncodedStringCoder(allows_unicode=False) assert_identical(coder.encode(raw), expected)
def test_EncodedStringCoder_encode(): dtype = strings.create_vlen_dtype(unicode_type) raw_data = np.array([u'abc', u'ß∂µ∆'], dtype=dtype) expected_data = np.array([r.encode('utf-8') for r in raw_data], dtype=object) coder = strings.EncodedStringCoder(allows_unicode=True) raw = Variable(('x', ), raw_data, encoding={'dtype': 'S1'}) actual = coder.encode(raw) expected = Variable(('x', ), expected_data, attrs={'_Encoding': 'utf-8'}) assert_identical(actual, expected) raw = Variable(('x', ), raw_data) assert_identical(coder.encode(raw), raw) coder = strings.EncodedStringCoder(allows_unicode=False) assert_identical(coder.encode(raw), expected)
def test_EncodedStringCoder_encode(): dtype = strings.create_vlen_dtype(unicode_type) raw_data = np.array([u'abc', u'ß∂µ∆'], dtype=dtype) expected_data = np.array([r.encode('utf-8') for r in raw_data], dtype=object) coder = strings.EncodedStringCoder(allows_unicode=True) raw = Variable(('x',), raw_data, encoding={'dtype': 'S1'}) actual = coder.encode(raw) expected = Variable(('x',), expected_data, attrs={'_Encoding': 'utf-8'}) assert_identical(actual, expected) raw = Variable(('x',), raw_data) assert_identical(coder.encode(raw), raw) coder = strings.EncodedStringCoder(allows_unicode=False) assert_identical(coder.encode(raw), expected)
@pytest.mark.parametrize('original', [ Variable(('x', ), [b'ab', b'cdef']), Variable((), b'ab'), Variable(('x', ), [b'a', b'b']), Variable((), b'a'), ]) def test_CharacterArrayCoder_roundtrip(original): coder = strings.CharacterArrayCoder() roundtripped = coder.decode(coder.encode(original)) assert_identical(original, roundtripped) @pytest.mark.parametrize('data', [ np.array([b'a', b'bc']), np.array([b'a', b'bc'], dtype=strings.create_vlen_dtype(bytes_type)), ]) def test_CharacterArrayCoder_encode(data): coder = strings.CharacterArrayCoder() raw = Variable(('x', ), data) actual = coder.encode(raw) expected = Variable(('x', 'string2'), np.array([[b'a', b''], [b'b', b'c']])) assert_identical(actual, expected) def test_StackedBytesArray(): array = np.array([[b'a', b'b', b'c'], [b'd', b'e', b'f']], dtype='S') actual = strings.StackedBytesArray(array) expected = np.array([b'abc', b'def'], dtype='S') assert actual.dtype == expected.dtype
@pytest.mark.parametrize('original', [ Variable(('x',), [b'ab', b'cdef']), Variable((), b'ab'), Variable(('x',), [b'a', b'b']), Variable((), b'a'), ]) def test_CharacterArrayCoder_roundtrip(original): coder = strings.CharacterArrayCoder() roundtripped = coder.decode(coder.encode(original)) assert_identical(original, roundtripped) @pytest.mark.parametrize('data', [ np.array([b'a', b'bc']), np.array([b'a', b'bc'], dtype=strings.create_vlen_dtype(bytes_type)), ]) def test_CharacterArrayCoder_encode(data): coder = strings.CharacterArrayCoder() raw = Variable(('x',), data) actual = coder.encode(raw) expected = Variable(('x', 'string2'), np.array([[b'a', b''], [b'b', b'c']])) assert_identical(actual, expected) def test_StackedBytesArray(): array = np.array([[b'a', b'b', b'c'], [b'd', b'e', b'f']], dtype='S') actual = strings.StackedBytesArray(array) expected = np.array([b'abc', b'def'], dtype='S') assert actual.dtype == expected.dtype
Variable((), b"ab"), Variable(("x", ), [b"a", b"b"]), Variable((), b"a"), ], ) def test_CharacterArrayCoder_roundtrip(original): coder = strings.CharacterArrayCoder() roundtripped = coder.decode(coder.encode(original)) assert_identical(original, roundtripped) @pytest.mark.parametrize( "data", [ np.array([b"a", b"bc"]), np.array([b"a", b"bc"], dtype=strings.create_vlen_dtype(bytes)), ], ) def test_CharacterArrayCoder_encode(data): coder = strings.CharacterArrayCoder() raw = Variable(("x", ), data) actual = coder.encode(raw) expected = Variable(("x", "string2"), np.array([[b"a", b""], [b"b", b"c"]])) assert_identical(actual, expected) @pytest.mark.parametrize( ["original", "expected_char_dim_name"], [ (Variable(("x", ), [b"ab", b"cdef"]), "string4"),
def test_numpy_subclass_handling(numpy_str_type) -> None: with pytest.raises(TypeError, match="unsupported type for vlen_dtype"): strings.create_vlen_dtype(numpy_str_type)