Beispiel #1
0
def test_EncodedStringCoder_encode():
    dtype = strings.create_vlen_dtype(str)
    raw_data = np.array(["abc", "ß∂µ∆"], dtype=dtype)
    expected_data = np.array([r.encode("utf-8") for r in raw_data], dtype=object)

    coder = strings.EncodedStringCoder(allows_unicode=True)
    raw = Variable(("x",), raw_data, encoding={"dtype": "S1"})
    actual = coder.encode(raw)
    expected = Variable(("x",), expected_data, attrs={"_Encoding": "utf-8"})
    assert_identical(actual, expected)

    raw = Variable(("x",), raw_data)
    assert_identical(coder.encode(raw), raw)

    coder = strings.EncodedStringCoder(allows_unicode=False)
    assert_identical(coder.encode(raw), expected)
def test_EncodedStringCoder_encode():
    dtype = strings.create_vlen_dtype(unicode_type)
    raw_data = np.array([u'abc', u'ß∂µ∆'], dtype=dtype)
    expected_data = np.array([r.encode('utf-8') for r in raw_data],
                             dtype=object)

    coder = strings.EncodedStringCoder(allows_unicode=True)
    raw = Variable(('x', ), raw_data, encoding={'dtype': 'S1'})
    actual = coder.encode(raw)
    expected = Variable(('x', ), expected_data, attrs={'_Encoding': 'utf-8'})
    assert_identical(actual, expected)

    raw = Variable(('x', ), raw_data)
    assert_identical(coder.encode(raw), raw)

    coder = strings.EncodedStringCoder(allows_unicode=False)
    assert_identical(coder.encode(raw), expected)
def test_EncodedStringCoder_decode():
    coder = strings.EncodedStringCoder()

    raw_data = np.array([b'abc', u'ß∂µ∆'.encode('utf-8')])
    raw = Variable(('x', ), raw_data, {'_Encoding': 'utf-8'})
    actual = coder.decode(raw)

    expected = Variable(('x', ), np.array([u'abc', u'ß∂µ∆'], dtype=object))
    assert_identical(actual, expected)

    assert_identical(coder.decode(actual[0]), expected[0])
Beispiel #4
0
def test_EncodedStringCoder_decode():
    coder = strings.EncodedStringCoder()

    raw_data = np.array([b"abc", "ß∂µ∆".encode()])
    raw = Variable(("x", ), raw_data, {"_Encoding": "utf-8"})
    actual = coder.decode(raw)

    expected = Variable(("x", ), np.array(["abc", "ß∂µ∆"], dtype=object))
    assert_identical(actual, expected)

    assert_identical(coder.decode(actual[0]), expected[0])
def test_EncodedStringCoder_decode_dask():
    coder = strings.EncodedStringCoder()

    raw_data = np.array([b'abc', u'ß∂µ∆'.encode('utf-8')])
    raw = Variable(('x', ), raw_data, {'_Encoding': 'utf-8'}).chunk()
    actual = coder.decode(raw)
    assert isinstance(actual.data, da.Array)

    expected = Variable(('x', ), np.array([u'abc', u'ß∂µ∆'], dtype=object))
    assert_identical(actual, expected)

    actual_indexed = coder.decode(actual[0])
    assert isinstance(actual_indexed.data, da.Array)
    assert_identical(actual_indexed, expected[0])
Beispiel #6
0
def test_EncodedStringCoder_decode_dask():
    coder = strings.EncodedStringCoder()

    raw_data = np.array([b"abc", "ß∂µ∆".encode()])
    raw = Variable(("x", ), raw_data, {"_Encoding": "utf-8"}).chunk()
    actual = coder.decode(raw)
    assert isinstance(actual.data, da.Array)

    expected = Variable(("x", ), np.array(["abc", "ß∂µ∆"], dtype=object))
    assert_identical(actual, expected)

    actual_indexed = coder.decode(actual[0])
    assert isinstance(actual_indexed.data, da.Array)
    assert_identical(actual_indexed, expected[0])