def test_partition_set_dictionary_type(): set1 = pq.PartitionSet('key1', [u('foo'), u('bar'), u('baz')]) set2 = pq.PartitionSet('key2', [2007, 2008, 2009]) assert isinstance(set1.dictionary, pa.StringArray) assert isinstance(set2.dictionary, pa.IntegerArray) set3 = pq.PartitionSet('key2', [datetime.datetime(2007, 1, 1)]) with pytest.raises(TypeError): set3.dictionary
def test_string_unicode(self): arr = A.from_pylist([u('foo'), None, u('bar')]) v = arr[0] assert isinstance(v, A.StringValue) assert v.as_py() == 'foo' assert arr[1] is A.NA v = arr[2].as_py() assert v == u('bar') assert isinstance(v, str)
def test_bytes_to_binary(self): values = [u('qux'), b'foo', None, 'bar', 'qux', np.nan] df = pd.DataFrame({'strings': values}) table = pa.Table.from_pandas(df) assert table[0].type == pa.binary() values2 = [b'qux', b'foo', None, b'bar', b'qux', np.nan] expected = pd.DataFrame({'strings': values2}) self._check_pandas_roundtrip(df, expected)
def test_bytes(self): arr = A.from_pylist([b'foo', None, u('bar')]) v = arr[0] assert isinstance(v, A.BinaryValue) assert v.as_py() == b'foo' assert arr[1] is A.NA v = arr[2].as_py() assert v == b'bar' assert isinstance(v, bytes)
def test_bytes(self): arr = pa.array([b'foo', None, u('bar')]) v = arr[0] assert isinstance(v, pa.BinaryValue) assert v.as_py() == b'foo' assert arr[1] is pa.NA v = arr[2].as_py() assert v == b'bar' assert isinstance(v, bytes)
def test_unicode(self): data = [u("foo"), u("bar"), None, u("arrow")] arr = pyarrow.from_pylist(data) assert len(arr) == 4 assert arr.null_count == 1 assert arr.type == pyarrow.string() assert arr.to_pylist() == [u("foo"), u("bar"), None, u("arrow")]
def test_unicode(self): data = [u('foo'), u('bar'), None, u('arrow')] arr = pyarrow.from_pylist(data) assert len(arr) == 4 assert arr.null_count == 1 assert arr.type == pyarrow.string() assert arr.to_pylist() == [u('foo'), u('bar'), None, u('arrow')]
def test_string(self): arr = A.from_pylist(['foo', None, u('bar')]) v = arr[0] assert isinstance(v, A.StringValue) assert repr(v) == "'foo'" assert v.as_py() == 'foo' assert arr[1] is A.NA v = arr[2].as_py() assert v == 'bar' assert isinstance(v, str)
def test_bytes(self): arr = pa.array([b'foo', None, u('bar')]) def check_value(v, expected): assert isinstance(v, pa.BinaryValue) assert v.as_py() == expected assert str(v) == str(expected) assert repr(v) == repr(expected) assert v == expected assert v != b'xxxxx' buf = v.as_buffer() assert isinstance(buf, pa.Buffer) assert buf.to_pybytes() == expected check_value(arr[0], b'foo') assert arr[1] is pa.NA check_value(arr[2], b'bar')
def test_large_bytes(self): arr = pa.array([b'foo', None, u('bar')], type=pa.large_binary()) def check_value(v, expected): assert isinstance(v, pa.LargeBinaryValue) assert v.as_py() == expected assert str(v) == str(expected) assert repr(v) == repr(expected) assert v == expected assert v != b'xxxxx' buf = v.as_buffer() assert isinstance(buf, pa.Buffer) assert buf.to_pybytes() == expected check_value(arr[0], b'foo') assert arr[1] is pa.NA check_value(arr[2], b'bar')
def test_bytes_reader_non_bytes(): with pytest.raises(TypeError): pa.BufferReader(u('some sample data'))
def test_unicode(self): repeats = 1000 values = [u('foo'), None, u('bar'), u('qux'), np.nan] df = pd.DataFrame({'strings': values * repeats}) self._check_pandas_roundtrip(df)
def test_bytes_reader_non_bytes(): with pytest.raises(ValueError): io.BufferReader(u('some sample data'))
def test_bytes_reader_non_bytes(): with pytest.raises(ValueError): io.BytesReader(u('some sample data'))