def test_max_times(self, duckdb_cursor): if not can_run: return data = pa.array([2147483647000000], type=pa.time64('us')) result = pa.Table.from_arrays([data], ['a']) #Max Sec data = pa.array([2147483647], type=pa.time32('s')) arrow_table = pa.Table.from_arrays([data], ['a']) rel = duckdb.from_arrow_table(arrow_table).arrow() assert (rel['a'] == result['a']) #Max MSec data = pa.array([2147483647000], type=pa.time64('us')) result = pa.Table.from_arrays([data], ['a']) data = pa.array([2147483647], type=pa.time32('ms')) arrow_table = pa.Table.from_arrays([data], ['a']) rel = duckdb.from_arrow_table(arrow_table).arrow() assert (rel['a'] == result['a']) #Max NSec data = pa.array([9223372036854774], type=pa.time64('us')) result = pa.Table.from_arrays([data], ['a']) data = pa.array([9223372036854774000], type=pa.time64('ns')) arrow_table = pa.Table.from_arrays([data], ['a']) rel = duckdb.from_arrow_table(arrow_table).arrow() print(rel['a']) print(result['a']) assert (rel['a'] == result['a'])
def test_pytime_from_pandas(self): pytimes = [time(1, 2, 3, 1356), time(4, 5, 6, 1356)] # microseconds t1 = pa.time64('us') aobjs = np.array(pytimes + [None], dtype=object) parr = pa.array(aobjs) assert parr.type == t1 assert parr[0].as_py() == pytimes[0] assert parr[1].as_py() == pytimes[1] assert parr[2] is pa.NA # DataFrame df = pd.DataFrame({'times': aobjs}) batch = pa.RecordBatch.from_pandas(df) assert batch[0].equals(parr) # Test ndarray of int64 values arr = np.array([_pytime_to_micros(v) for v in pytimes], dtype='int64') a1 = pa.array(arr, type=pa.time64('us')) assert a1[0].as_py() == pytimes[0] a2 = pa.array(arr * 1000, type=pa.time64('ns')) assert a2[0].as_py() == pytimes[0] a3 = pa.array((arr / 1000).astype('i4'), type=pa.time32('ms')) assert a3[0].as_py() == pytimes[0].replace(microsecond=1000) a4 = pa.array((arr / 1000000).astype('i4'), type=pa.time32('s')) assert a4[0].as_py() == pytimes[0].replace(microsecond=0)
def _from_jvm_time_type(jvm_type): """ Convert a JVM time type to its Python equivalent. Parameters ---------- jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$Time Returns ------- typ: pyarrow.DataType """ time_unit = jvm_type.getUnit().toString() if time_unit == 'SECOND': assert jvm_type.bitWidth == 32 return pa.time32('s') elif time_unit == 'MILLISECOND': assert jvm_type.bitWidth == 32 return pa.time32('ms') elif time_unit == 'MICROSECOND': assert jvm_type.bitWidth == 64 return pa.time64('us') elif time_unit == 'NANOSECOND': assert jvm_type.bitWidth == 64 return pa.time64('ns')
def _from_jvm_time_type(jvm_type): """ Convert a JVM time type to its Python equivalent. Parameters ---------- jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$Time Returns ------- typ: pyarrow.DataType """ time_unit = jvm_type.getUnit().toString() if time_unit == 'SECOND': assert jvm_type.getBitWidth() == 32 return pa.time32('s') elif time_unit == 'MILLISECOND': assert jvm_type.getBitWidth() == 32 return pa.time32('ms') elif time_unit == 'MICROSECOND': assert jvm_type.getBitWidth() == 64 return pa.time64('us') elif time_unit == 'NANOSECOND': assert jvm_type.getBitWidth() == 64 return pa.time64('ns')
def test_time32_python(self): """ Python -> Rust -> Python """ a = pyarrow.array([None, 1, 2], pyarrow.time32('s')) b = arrow_pyarrow_integration_testing.concatenate(a) expected = pyarrow.array([None, 1, 2] + [None, 1, 2], pyarrow.time32('s')) self.assertEqual(b, expected)
def test_table(n, types=None, offset=None, length=None, nullable=True): if types is None: types = [ pyarrow.null(), pyarrow.bool_(), pyarrow.int8(), pyarrow.int16(), pyarrow.int32(), pyarrow.int64(), pyarrow.uint8(), pyarrow.uint16(), pyarrow.uint32(), pyarrow.uint64(), pyarrow.float16(), pyarrow.float32(), pyarrow.float64(), pyarrow.date32(), pyarrow.date64(), pyarrow.timestamp('s'), pyarrow.timestamp('ms'), pyarrow.timestamp('us'), pyarrow.timestamp('ns'), pyarrow.time32('s'), pyarrow.time32('ms'), pyarrow.time64('us'), pyarrow.time64('ns'), pyarrow.string(), pyarrow.binary(), pyarrow.binary(4), pyarrow.dictionary(pyarrow.int32(), pyarrow.string(), True), pyarrow.dictionary(pyarrow.int64(), pyarrow.int64(), True), pyarrow.dictionary(pyarrow.int32(), pyarrow.string(), False), pyarrow.dictionary(pyarrow.int64(), pyarrow.int64(), False), pyarrow.list_(pyarrow.int32()), pyarrow.struct([pyarrow.field('int32', pyarrow.int32())]), pyarrow.list_( pyarrow.struct([pyarrow.field('int32', pyarrow.int32())])), pyarrow.struct( [pyarrow.field('int32', pyarrow.list_(pyarrow.int32()))]), ] data = list() for t in types: name = str(t) array = TestArrayGenerator(n, t, False).array if offset is not None: array = array.slice(offset, length) data.append(pyarrow.column(name, array)) if nullable: name = str(t) + ' (null)' array = TestArrayGenerator(n, t, True).array if offset is not None: array = array.slice(offset, length) data.append(pyarrow.column(name, array)) return pyarrow.Table.from_arrays(data)
def test_date_time_types(tmpdir): buf = io.BytesIO() t1 = pa.date32() data1 = np.array([17259, 17260, 17261], dtype='int32') a1 = pa.Array.from_pandas(data1, type=t1) t2 = pa.date64() data2 = data1.astype('int64') * 86400000 a2 = pa.Array.from_pandas(data2, type=t2) t3 = pa.timestamp('us') start = pd.Timestamp('2000-01-01').value / 1000 data3 = np.array([start, start + 1, start + 2], dtype='int64') a3 = pa.Array.from_pandas(data3, type=t3) t4 = pa.time32('ms') data4 = np.arange(3, dtype='i4') a4 = pa.Array.from_pandas(data4, type=t4) t5 = pa.time64('us') a5 = pa.Array.from_pandas(data4.astype('int64'), type=t5) t6 = pa.time32('s') a6 = pa.Array.from_pandas(data4, type=t6) ex_t6 = pa.time32('ms') ex_a6 = pa.Array.from_pandas(data4 * 1000, type=ex_t6) table = pa.Table.from_arrays([a1, a2, a3, a4, a5, a6], ['date32', 'date64', 'timestamp[us]', 'time32[s]', 'time64[us]', 'time32[s]']) # date64 as date32 # time32[s] to time32[ms] expected = pa.Table.from_arrays([a1, a1, a3, a4, a5, ex_a6], ['date32', 'date64', 'timestamp[us]', 'time32[s]', 'time64[us]', 'time32[s]']) pq.write_table(table, buf, version="2.0") buf.seek(0) result = pq.read_table(buf) assert result.equals(expected) # Unsupported stuff def _assert_unsupported(array): table = pa.Table.from_arrays([array], ['unsupported']) buf = io.BytesIO() with pytest.raises(NotImplementedError): pq.write_table(table, buf, version="2.0") t7 = pa.time64('ns') a7 = pa.Array.from_pandas(data4.astype('int64'), type=t7) _assert_unsupported(a7)
def test_time32_units(): for valid_unit in ('s', 'ms'): ty = pa.time32(valid_unit) assert ty.unit == valid_unit for invalid_unit in ('m', 'us', 'ns'): error_msg = 'Invalid time unit for time32: {!r}'.format(invalid_unit) with pytest.raises(ValueError, match=error_msg): pa.time32(invalid_unit)
def test_time32_units(): for valid_unit in ('s', 'ms'): ty = pa.time32(valid_unit) assert ty.unit == valid_unit for invalid_unit in ('m', 'us', 'ns'): error_msg = 'Invalid TimeUnit for time32: {}'.format(invalid_unit) with pytest.raises(ValueError, match=error_msg): pa.time32(invalid_unit)
def test_time(): t1 = datetime.time(18, 0) t2 = datetime.time(21, 0) types = [pa.time32('s'), pa.time32('ms'), pa.time64('us'), pa.time64('ns')] for ty in types: for t in [t1, t2]: s = pa.scalar(t, type=ty) assert s.as_py() == t
def test_time_null(self, duckdb_cursor): if not can_run: return data = (pa.array([None], type=pa.time32('s')),pa.array([None], type=pa.time32('ms')),pa.array([None], pa.time64('us')),pa.array([None], pa.time64('ns'))) arrow_table = pa.Table.from_arrays([data[0],data[1],data[2],data[3]],['a','b','c','d']) rel = duckdb.from_arrow(arrow_table).arrow() assert (rel['a'] == arrow_table['c']) assert (rel['b'] == arrow_table['c']) assert (rel['c'] == arrow_table['c']) assert (rel['d'] == arrow_table['c'])
def test_time32_python(): """ Python -> Rust -> Python """ a = pa.array([None, 1, 2], pa.time32("s")) b = rust.concatenate(a) expected = pa.array([None, 1, 2] + [None, 1, 2], pa.time32("s")) assert b == expected del a del b del expected
def test_date_time_types(): t1 = pa.date32() data1 = np.array([17259, 17260, 17261], dtype='int32') a1 = pa.Array.from_pandas(data1, type=t1) t2 = pa.date64() data2 = data1.astype('int64') * 86400000 a2 = pa.Array.from_pandas(data2, type=t2) t3 = pa.timestamp('us') start = pd.Timestamp('2000-01-01').value / 1000 data3 = np.array([start, start + 1, start + 2], dtype='int64') a3 = pa.Array.from_pandas(data3, type=t3) t4 = pa.time32('ms') data4 = np.arange(3, dtype='i4') a4 = pa.Array.from_pandas(data4, type=t4) t5 = pa.time64('us') a5 = pa.Array.from_pandas(data4.astype('int64'), type=t5) t6 = pa.time32('s') a6 = pa.Array.from_pandas(data4, type=t6) ex_t6 = pa.time32('ms') ex_a6 = pa.Array.from_pandas(data4 * 1000, type=ex_t6) table = pa.Table.from_arrays([a1, a2, a3, a4, a5, a6], ['date32', 'date64', 'timestamp[us]', 'time32[s]', 'time64[us]', 'time32_from64[s]']) # date64 as date32 # time32[s] to time32[ms] expected = pa.Table.from_arrays([a1, a1, a3, a4, a5, ex_a6], ['date32', 'date64', 'timestamp[us]', 'time32[s]', 'time64[us]', 'time32_from64[s]']) _check_roundtrip(table, expected=expected, version='2.0') # Unsupported stuff def _assert_unsupported(array): table = pa.Table.from_arrays([array], ['unsupported']) buf = io.BytesIO() with pytest.raises(NotImplementedError): _write_table(table, buf, version="2.0") t7 = pa.time64('ns') a7 = pa.Array.from_pandas(data4.astype('int64'), type=t7) _assert_unsupported(a7)
def test_arrow_time_to_pandas(self): pytimes = [time(1, 2, 3, 1356), time(4, 5, 6, 1356), time(0, 0, 0)] expected = np.array(pytimes[:2] + [None]) expected_ms = np.array([x.replace(microsecond=1000) for x in pytimes[:2]] + [None]) expected_s = np.array([x.replace(microsecond=0) for x in pytimes[:2]] + [None]) arr = np.array([_pytime_to_micros(v) for v in pytimes], dtype='int64') arr = np.array([_pytime_to_micros(v) for v in pytimes], dtype='int64') null_mask = np.array([False, False, True], dtype=bool) a1 = pa.array(arr, mask=null_mask, type=pa.time64('us')) a2 = pa.array(arr * 1000, mask=null_mask, type=pa.time64('ns')) a3 = pa.array((arr / 1000).astype('i4'), mask=null_mask, type=pa.time32('ms')) a4 = pa.array((arr / 1000000).astype('i4'), mask=null_mask, type=pa.time32('s')) names = ['time64[us]', 'time64[ns]', 'time32[ms]', 'time32[s]'] batch = pa.RecordBatch.from_arrays([a1, a2, a3, a4], names) arr = a1.to_pandas() assert (arr == expected).all() arr = a2.to_pandas() assert (arr == expected).all() arr = a3.to_pandas() assert (arr == expected_ms).all() arr = a4.to_pandas() assert (arr == expected_s).all() df = batch.to_pandas() expected_df = pd.DataFrame({'time64[us]': expected, 'time64[ns]': expected, 'time32[ms]': expected_ms, 'time32[s]': expected_s}, columns=names) tm.assert_frame_equal(df, expected_df)
def test_arrow_time_to_pandas(self): pytimes = [time(1, 2, 3, 1356), time(4, 5, 6, 1356), time(0, 0, 0)] expected = np.array(pytimes[:2] + [None]) expected_ms = np.array([x.replace(microsecond=1000) for x in pytimes[:2]] + [None]) expected_s = np.array([x.replace(microsecond=0) for x in pytimes[:2]] + [None]) arr = np.array([_pytime_to_micros(v) for v in pytimes], dtype='int64') arr = np.array([_pytime_to_micros(v) for v in pytimes], dtype='int64') null_mask = np.array([False, False, True], dtype=bool) a1 = pa.Array.from_pandas(arr, mask=null_mask, type=pa.time64('us')) a2 = pa.Array.from_pandas(arr * 1000, mask=null_mask, type=pa.time64('ns')) a3 = pa.Array.from_pandas((arr / 1000).astype('i4'), mask=null_mask, type=pa.time32('ms')) a4 = pa.Array.from_pandas((arr / 1000000).astype('i4'), mask=null_mask, type=pa.time32('s')) names = ['time64[us]', 'time64[ns]', 'time32[ms]', 'time32[s]'] batch = pa.RecordBatch.from_arrays([a1, a2, a3, a4], names) arr = a1.to_pandas() assert (arr == expected).all() arr = a2.to_pandas() assert (arr == expected).all() arr = a3.to_pandas() assert (arr == expected_ms).all() arr = a4.to_pandas() assert (arr == expected_s).all() df = batch.to_pandas() expected_df = pd.DataFrame({'time64[us]': expected, 'time64[ns]': expected, 'time32[ms]': expected_ms, 'time32[s]': expected_s}, columns=names) tm.assert_frame_equal(df, expected_df)
def test_time32_python(self): """ Python -> Rust -> Python """ old_allocated = pyarrow.total_allocated_bytes() a = pyarrow.array([None, 1, 2], pyarrow.time32('s')) b = arrow_pyarrow_integration_testing.concatenate(a) expected = pyarrow.array([None, 1, 2] + [None, 1, 2], pyarrow.time32('s')) self.assertEqual(b, expected) del a del b del expected # No leak of C++ memory self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())
def _map_arrow_type(arrow_type): arrow_to_dh = { pa.null(): '', pa.bool_(): '', pa.int8(): 'byte', pa.int16(): 'short', pa.int32(): 'int', pa.int64(): 'long', pa.uint8(): '', pa.uint16(): 'char', pa.uint32(): '', pa.uint64(): '', pa.float16(): '', pa.float32(): 'float', pa.float64(): 'double', pa.time32('s'): '', pa.time32('ms'): '', pa.time64('us'): '', pa.time64('ns'): 'io.deephaven.time.DateTime', pa.timestamp('us', tz=None): '', pa.timestamp('ns', tz=None): '', pa.date32(): 'java.time.LocalDate', pa.date64(): 'java.time.LocalDate', pa.binary(): '', pa.string(): 'java.lang.String', pa.utf8(): 'java.lang.String', pa.large_binary(): '', pa.large_string(): '', pa.large_utf8(): '', # decimal128(int precision, int scale=0) # list_(value_type, int list_size=-1) # large_list(value_type) # map_(key_type, item_type[, keys_sorted]) # struct(fields) # dictionary(index_type, value_type, …) # field(name, type, bool nullable = True[, metadata]) # schema(fields[, metadata]) # from_numpy_dtype(dtype) } dh_type = arrow_to_dh.get(arrow_type) if not dh_type: # if this is a case of timestamp with tz specified if isinstance(arrow_type, pa.TimestampType): dh_type = "io.deephaven.time.DateTime" if not dh_type: raise DHError(f'unsupported arrow data type : {arrow_type}') return {"deephaven:type": dh_type}
def get_many_types(): # returning them from a function is required because of pa.dictionary # type holds a pyarrow array and test_array.py::test_toal_bytes_allocated # checks that the default memory pool has zero allocated bytes return (pa.null(), pa.bool_(), pa.int32(), pa.time32('s'), pa.time64('us'), pa.date32(), pa.timestamp('us'), pa.timestamp('us', tz='UTC'), pa.timestamp('us', tz='Europe/Paris'), pa.float16(), pa.float32(), pa.float64(), pa.decimal128(19, 4), pa.string(), pa.binary(), pa.binary(10), pa.list_(pa.int32()), pa.struct([ pa.field('a', pa.int32()), pa.field('b', pa.int8()), pa.field('c', pa.string()) ]), pa.struct([ pa.field('a', pa.int32(), nullable=False), pa.field('b', pa.int8(), nullable=False), pa.field('c', pa.string()) ]), pa.union( [pa.field('a', pa.binary(10)), pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE), pa.union( [pa.field('a', pa.binary(10)), pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE), pa.union([ pa.field('a', pa.binary(10), nullable=False), pa.field('b', pa.string()) ], mode=pa.lib.UnionMode_SPARSE), pa.dictionary(pa.int32(), pa.array(['a', 'b', 'c'])))
def test_type_schema_pickling(): cases = [ pa.int8(), pa.string(), pa.binary(), pa.binary(10), pa.list_(pa.string()), pa.struct([ pa.field('a', 'int8'), pa.field('b', 'string') ]), pa.time32('s'), pa.time64('us'), pa.date32(), pa.date64(), pa.timestamp('ms'), pa.timestamp('ns'), pa.decimal(12, 2), pa.field('a', 'string', metadata={b'foo': b'bar'}) ] for val in cases: roundtripped = pickle.loads(pickle.dumps(val)) assert val == roundtripped fields = [] for i, f in enumerate(cases): if isinstance(f, pa.Field): fields.append(f) else: fields.append(pa.field('_f{}'.format(i), f)) schema = pa.schema(fields, metadata={b'foo': b'bar'}) roundtripped = pickle.loads(pickle.dumps(schema)) assert schema == roundtripped
def test_type_ids(): # Having this fixed is very important because internally we rely on this id # to parse from python for idx, arrow_type in [ (0, pa.null()), (1, pa.bool_()), (2, pa.uint8()), (3, pa.int8()), (4, pa.uint16()), (5, pa.int16()), (6, pa.uint32()), (7, pa.int32()), (8, pa.uint64()), (9, pa.int64()), (10, pa.float16()), (11, pa.float32()), (12, pa.float64()), (13, pa.string()), (13, pa.utf8()), (14, pa.binary()), (16, pa.date32()), (17, pa.date64()), (18, pa.timestamp("us")), (19, pa.time32("s")), (20, pa.time64("us")), (23, pa.decimal128(8, 1)), (34, pa.large_utf8()), (35, pa.large_binary()), ]: assert idx == arrow_type.id
def test_statistics_convert_logical_types(tempdir): # ARROW-5166, ARROW-4139 # (min, max, type) cases = [ (10, 11164359321221007157, pa.uint64()), (10, 4294967295, pa.uint32()), ("ähnlich", "öffentlich", pa.utf8()), (datetime.time(10, 30, 0, 1000), datetime.time(15, 30, 0, 1000), pa.time32('ms')), (datetime.time(10, 30, 0, 1000), datetime.time(15, 30, 0, 1000), pa.time64('us')), (datetime.datetime(2019, 6, 24, 0, 0, 0, 1000), datetime.datetime(2019, 6, 25, 0, 0, 0, 1000), pa.timestamp('ms')), (datetime.datetime(2019, 6, 24, 0, 0, 0, 1000), datetime.datetime(2019, 6, 25, 0, 0, 0, 1000), pa.timestamp('us')) ] for i, (min_val, max_val, typ) in enumerate(cases): t = pa.Table.from_arrays([pa.array([min_val, max_val], type=typ)], ['col']) path = str(tempdir / ('example{}.parquet'.format(i))) pq.write_table(t, path, version='2.0') pf = pq.ParquetFile(path) stats = pf.metadata.row_group(0).column(0).statistics assert stats.min == min_val assert stats.max == max_val
def test_cast_time32_to_int(): arr = pa.array(np.array([0, 1, 2], dtype='int32'), type=pa.time32('s')) expected = pa.array([0, 1, 2], type='i4') result = arr.cast('i4') assert result.equals(expected)
def test_cast_time32_to_int(): arr = pa.array(np.array([0, 1, 2], dtype='int32'), type=pa.time32('s')) expected = pa.array([0, 1, 2], type='i4') result = arr.cast('i4') assert result.equals(expected)
def test_is_temporal_date_time_timestamp(): date_types = [pa.date32(), pa.date64()] time_types = [pa.time32('s'), pa.time64('ns')] timestamp_types = [pa.timestamp('ms')] duration_types = [pa.duration('ms')] for case in date_types + time_types + timestamp_types + duration_types: assert types.is_temporal(case) for case in date_types: assert types.is_date(case) assert not types.is_time(case) assert not types.is_timestamp(case) assert not types.is_duration(case) for case in time_types: assert types.is_time(case) assert not types.is_date(case) assert not types.is_timestamp(case) assert not types.is_duration(case) for case in timestamp_types: assert types.is_timestamp(case) assert not types.is_date(case) assert not types.is_time(case) assert not types.is_duration(case) for case in duration_types: assert types.is_duration(case) assert not types.is_date(case) assert not types.is_time(case) assert not types.is_timestamp(case) assert not types.is_temporal(pa.int32())
def test_load_table_columnar_arrow_all(self, con): c = con.cursor() c.execute('drop table if exists all_types;') create = textwrap.dedent('''\ create table all_types ( boolean_ BOOLEAN, smallint_ SMALLINT, int_ INT, bigint_ BIGINT, float_ FLOAT, double_ DOUBLE, varchar_ VARCHAR(40), text_ TEXT, time_ TIME, timestamp_ TIMESTAMP, date_ DATE );''') # skipping decimal for now c.execute(create) names = [ 'boolean_', 'smallint_', 'int_', 'bigint_', 'float_', 'double_', 'varchar_', 'text_', 'time_', 'timestamp_', 'date_', ] columns = [ pa.array([True, False, None], type=pa.bool_()), pa.array([1, 0, None]).cast(pa.int16()), pa.array([1, 0, None]).cast(pa.int32()), pa.array([1, 0, None]), pa.array([1.0, 1.1, None]).cast(pa.float32()), pa.array([1.0, 1.1, None]), # no fixed-width string pa.array(['a', 'b', None]), pa.array(['a', 'b', None]), (pa.array([1, 2, None]).cast(pa.int32()).cast(pa.time32('s'))), pa.array([ datetime.datetime(2016, 1, 1, 12, 12, 12), datetime.datetime(2017, 1, 1), None, ]), pa.array( [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1), None]), ] table = pa.Table.from_arrays(columns, names=names) con.load_table_arrow("all_types", table) c.execute('drop table if exists all_types;')
def test_from_time_arrow() -> None: times = pa.array([10, 20, 30], type=pa.time32("s")) times_table = pa.table([times], names=["times"]) assert pl.from_arrow(times_table).to_series().to_list() == [ time(0, 0, 10), time(0, 0, 20), time(0, 0, 30), ]
def test_type_for_alias(): cases = [ ('i1', pa.int8()), ('int8', pa.int8()), ('i2', pa.int16()), ('int16', pa.int16()), ('i4', pa.int32()), ('int32', pa.int32()), ('i8', pa.int64()), ('int64', pa.int64()), ('u1', pa.uint8()), ('uint8', pa.uint8()), ('u2', pa.uint16()), ('uint16', pa.uint16()), ('u4', pa.uint32()), ('uint32', pa.uint32()), ('u8', pa.uint64()), ('uint64', pa.uint64()), ('f4', pa.float32()), ('float32', pa.float32()), ('f8', pa.float64()), ('float64', pa.float64()), ('date32', pa.date32()), ('date64', pa.date64()), ('string', pa.string()), ('str', pa.string()), ('binary', pa.binary()), ('time32[s]', pa.time32('s')), ('time32[ms]', pa.time32('ms')), ('time64[us]', pa.time64('us')), ('time64[ns]', pa.time64('ns')), ('timestamp[s]', pa.timestamp('s')), ('timestamp[ms]', pa.timestamp('ms')), ('timestamp[us]', pa.timestamp('us')), ('timestamp[ns]', pa.timestamp('ns')), ('duration[s]', pa.duration('s')), ('duration[ms]', pa.duration('ms')), ('duration[us]', pa.duration('us')), ('duration[ns]', pa.duration('ns')), ('month_day_nano_interval', pa.month_day_nano_interval()), ] for val, expected in cases: assert pa.type_for_alias(val) == expected
def test_time_types(): t1 = pa.time32('s') t2 = pa.time32('ms') t3 = pa.time64('us') t4 = pa.time64('ns') assert t1.unit == 's' assert t2.unit == 'ms' assert t3.unit == 'us' assert t4.unit == 'ns' assert str(t1) == 'time32[s]' assert str(t4) == 'time64[ns]' with pytest.raises(ValueError): pa.time32('us') with pytest.raises(ValueError): pa.time64('s')
def _to_arrow_type(field): if field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.TINYINT: return pa.field(field.name, pa.int8(), field.type.nullable) elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.SMALLINT: return pa.field(field.name, pa.int16(), field.type.nullable) elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.INT: return pa.field(field.name, pa.int32(), field.type.nullable) elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.BIGINT: return pa.field(field.name, pa.int64(), field.type.nullable) elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.BOOLEAN: return pa.field(field.name, pa.bool_(), field.type.nullable) elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.FLOAT: return pa.field(field.name, pa.float32(), field.type.nullable) elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.DOUBLE: return pa.field(field.name, pa.float64(), field.type.nullable) elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.VARCHAR: return pa.field(field.name, pa.utf8(), field.type.nullable) elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.VARBINARY: return pa.field(field.name, pa.binary(), field.type.nullable) elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.DECIMAL: return pa.field( field.name, pa.decimal128(field.type.decimal_info.precision, field.type.decimal_info.scale), field.type.nullable) elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.DATE: return pa.field(field.name, pa.date32(), field.type.nullable) elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.TIME: if field.type.time_info.precision == 0: return pa.field(field.name, pa.time32('s'), field.type.nullable) elif 1 <= field.type.time_type.precision <= 3: return pa.field(field.name, pa.time32('ms'), field.type.nullable) elif 4 <= field.type.time_type.precision <= 6: return pa.field(field.name, pa.time64('us'), field.type.nullable) else: return pa.field(field.name, pa.time64('ns'), field.type.nullable) else: raise ValueError("field_type %s is not supported." % field.type)
def test_type_for_alias(): cases = [ ('i1', pa.int8()), ('int8', pa.int8()), ('i2', pa.int16()), ('int16', pa.int16()), ('i4', pa.int32()), ('int32', pa.int32()), ('i8', pa.int64()), ('int64', pa.int64()), ('u1', pa.uint8()), ('uint8', pa.uint8()), ('u2', pa.uint16()), ('uint16', pa.uint16()), ('u4', pa.uint32()), ('uint32', pa.uint32()), ('u8', pa.uint64()), ('uint64', pa.uint64()), ('f4', pa.float32()), ('float32', pa.float32()), ('f8', pa.float64()), ('float64', pa.float64()), ('date32', pa.date32()), ('date64', pa.date64()), ('string', pa.string()), ('str', pa.string()), ('binary', pa.binary()), ('time32[s]', pa.time32('s')), ('time32[ms]', pa.time32('ms')), ('time64[us]', pa.time64('us')), ('time64[ns]', pa.time64('ns')), ('timestamp[s]', pa.timestamp('s')), ('timestamp[ms]', pa.timestamp('ms')), ('timestamp[us]', pa.timestamp('us')), ('timestamp[ns]', pa.timestamp('ns')), ] for val, expected in cases: assert pa.type_for_alias(val) == expected
def test_type_schema_pickling(): cases = [ pa.int8(), pa.string(), pa.binary(), pa.binary(10), pa.list_(pa.string()), pa.map_(pa.string(), pa.int8()), pa.struct([ pa.field('a', 'int8'), pa.field('b', 'string') ]), pa.union([ pa.field('a', pa.int8()), pa.field('b', pa.int16()) ], pa.lib.UnionMode_SPARSE), pa.union([ pa.field('a', pa.int8()), pa.field('b', pa.int16()) ], pa.lib.UnionMode_DENSE), pa.time32('s'), pa.time64('us'), pa.date32(), pa.date64(), pa.timestamp('ms'), pa.timestamp('ns'), pa.decimal128(12, 2), pa.decimal256(76, 38), pa.field('a', 'string', metadata={b'foo': b'bar'}), pa.list_(pa.field("element", pa.int64())), pa.large_list(pa.field("element", pa.int64())), pa.map_(pa.field("key", pa.string(), nullable=False), pa.field("value", pa.int8())) ] for val in cases: roundtripped = pickle.loads(pickle.dumps(val)) assert val == roundtripped fields = [] for i, f in enumerate(cases): if isinstance(f, pa.Field): fields.append(f) else: fields.append(pa.field('_f{}'.format(i), f)) schema = pa.schema(fields, metadata={b'foo': b'bar'}) roundtripped = pickle.loads(pickle.dumps(schema)) assert schema == roundtripped
def test_pytime_from_pandas(self): pytimes = [time(1, 2, 3, 1356), time(4, 5, 6, 1356)] # microseconds t1 = pa.time64('us') aobjs = np.array(pytimes + [None], dtype=object) parr = pa.array(aobjs) assert parr.type == t1 assert parr[0].as_py() == pytimes[0] assert parr[1].as_py() == pytimes[1] assert parr[2] is pa.NA # DataFrame df = pd.DataFrame({'times': aobjs}) batch = pa.RecordBatch.from_pandas(df) assert batch[0].equals(parr) # Test ndarray of int64 values arr = np.array([_pytime_to_micros(v) for v in pytimes], dtype='int64') a1 = pa.array(arr, type=pa.time64('us')) assert a1[0].as_py() == pytimes[0] a2 = pa.array(arr * 1000, type=pa.time64('ns')) assert a2[0].as_py() == pytimes[0] a3 = pa.array((arr / 1000).astype('i4'), type=pa.time32('ms')) assert a3[0].as_py() == pytimes[0].replace(microsecond=1000) a4 = pa.array((arr / 1000000).astype('i4'), type=pa.time32('s')) assert a4[0].as_py() == pytimes[0].replace(microsecond=0)
def test_string_to_arrow_bijection_for_primitive_types(self): supported_pyarrow_datatypes = [ pa.time32("s"), pa.time64("us"), pa.timestamp("s"), pa.timestamp("ns", tz="America/New_York"), pa.date32(), pa.date64(), pa.duration("s"), pa.decimal128(10, 2), pa.decimal256(40, -3), pa.string(), pa.int32(), pa.float64(), ] for dt in supported_pyarrow_datatypes: self.assertEqual(dt, string_to_arrow(_arrow_to_datasets_dtype(dt))) unsupported_pyarrow_datatypes = [pa.list_(pa.float64())] for dt in unsupported_pyarrow_datatypes: with self.assertRaises(ValueError): string_to_arrow(_arrow_to_datasets_dtype(dt)) supported_datasets_dtypes = [ "time32[s]", "timestamp[ns]", "timestamp[ns, tz=+07:30]", "duration[us]", "decimal128(30, -4)", "int32", "float64", ] for sdt in supported_datasets_dtypes: self.assertEqual(sdt, _arrow_to_datasets_dtype(string_to_arrow(sdt))) unsupported_datasets_dtypes = [ "time32[ns]", "timestamp[blob]", "timestamp[[ns]]", "timestamp[ns, tz=[ns]]", "duration[[us]]", "decimal20(30, -4)", "int", ] for sdt in unsupported_datasets_dtypes: with self.assertRaises(ValueError): string_to_arrow(sdt)
def test_types_hashable(): types = [ pa.null(), pa.int32(), pa.time32('s'), pa.time64('us'), pa.date32(), pa.timestamp('us'), pa.string(), pa.binary(), pa.binary(10), pa.list_(pa.int32()), pa.struct([pa.field('a', pa.int32()), pa.field('b', pa.int8()), pa.field('c', pa.string())]) ] in_dict = {} for i, type_ in enumerate(types): assert hash(type_) == hash(type_) in_dict[type_] = i assert in_dict[type_] == i
def test_types_hashable(): types = [ pa.null(), pa.int32(), pa.time32('s'), pa.time64('us'), pa.date32(), pa.timestamp('us'), pa.string(), pa.binary(), pa.binary(10), pa.list_(pa.int32()), pa.struct([pa.field('a', pa.int32()), pa.field('b', pa.int8()), pa.field('c', pa.string())]) ] in_dict = {} for i, type_ in enumerate(types): assert hash(type_) == hash(type_) in_dict[type_] = i assert in_dict[type_] == i
def get_many_types(): # returning them from a function is required because of pa.dictionary # type holds a pyarrow array and test_array.py::test_toal_bytes_allocated # checks that the default memory pool has zero allocated bytes return ( pa.null(), pa.bool_(), pa.int32(), pa.time32('s'), pa.time64('us'), pa.date32(), pa.timestamp('us'), pa.timestamp('us', tz='UTC'), pa.timestamp('us', tz='Europe/Paris'), pa.float16(), pa.float32(), pa.float64(), pa.decimal128(19, 4), pa.string(), pa.binary(), pa.binary(10), pa.list_(pa.int32()), pa.struct([pa.field('a', pa.int32()), pa.field('b', pa.int8()), pa.field('c', pa.string())]), pa.struct([pa.field('a', pa.int32(), nullable=False), pa.field('b', pa.int8(), nullable=False), pa.field('c', pa.string())]), pa.union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE), pa.union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE), pa.union([pa.field('a', pa.binary(10), nullable=False), pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE), pa.dictionary(pa.int32(), pa.string()) )
def test_is_temporal_date_time_timestamp(): date_types = [pa.date32(), pa.date64()] time_types = [pa.time32('s'), pa.time64('ns')] timestamp_types = [pa.timestamp('ms')] for case in date_types + time_types + timestamp_types: assert types.is_temporal(case) for case in date_types: assert types.is_date(case) assert not types.is_time(case) assert not types.is_timestamp(case) for case in time_types: assert types.is_time(case) assert not types.is_date(case) assert not types.is_timestamp(case) for case in timestamp_types: assert types.is_timestamp(case) assert not types.is_date(case) assert not types.is_time(case) assert not types.is_temporal(pa.int32())
(pa.uint16(), 'uint16'), (pa.uint32(), 'uint32'), (pa.uint64(), 'uint64'), (pa.float16(), 'float16'), (pa.float32(), 'float32'), (pa.float64(), 'float64'), (pa.date32(), 'date'), (pa.date64(), 'date'), (pa.binary(), 'bytes'), (pa.binary(length=4), 'bytes'), (pa.string(), 'unicode'), (pa.list_(pa.list_(pa.int16())), 'list[list[int16]]'), (pa.decimal128(18, 3), 'decimal'), (pa.timestamp('ms'), 'datetime'), (pa.timestamp('us', 'UTC'), 'datetimetz'), (pa.time32('s'), 'time'), (pa.time64('us'), 'time') ] ) def test_logical_type(type, expected): assert get_logical_type(type) == expected def test_array_uint64_from_py_over_range(): arr = pa.array([2 ** 63], type=pa.uint64()) expected = pa.array(np.array([2 ** 63], dtype='u8')) assert arr.equals(expected) def test_array_conversions_no_sentinel_values(): arr = np.array([1, 2, 3, 4], dtype='int8')
pa.float32(), pa.float64() ]) decimal_type = st.builds( pa.decimal128, precision=st.integers(min_value=1, max_value=38), scale=st.integers(min_value=1, max_value=38) ) numeric_types = st.one_of(integer_types, floating_types, decimal_type) date_types = st.sampled_from([ pa.date32(), pa.date64() ]) time_types = st.sampled_from([ pa.time32('s'), pa.time32('ms'), pa.time64('us'), pa.time64('ns') ]) timestamp_types = st.builds( pa.timestamp, unit=st.sampled_from(['s', 'ms', 'us', 'ns']), tz=tzst.timezones() ) temporal_types = st.one_of(date_types, time_types, timestamp_types) primitive_types = st.one_of( null_type, bool_type, binary_type,
"INT64", pyarrow.uint8().id: "INT64", pyarrow.uint16().id: "INT64", pyarrow.uint32().id: "INT64", pyarrow.uint64().id: "INT64", pyarrow.float16().id: "FLOAT64", pyarrow.float32().id: "FLOAT64", pyarrow.float64().id: "FLOAT64", pyarrow.time32("ms").id: "TIME", pyarrow.time64("ns").id: "TIME", pyarrow.timestamp("ns").id: "TIMESTAMP", pyarrow.date32().id: "DATE", pyarrow.date64().id: "DATETIME", # because millisecond resolution pyarrow.binary().id: "BYTES", pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() # The exact scale and precision don't matter, see below. pyarrow.decimal128(38, scale=9).id:
def dataframe_with_lists(include_index=False, parquet_compatible=False): """ Dataframe with list columns of every possible primtive type. Returns ------- df: pandas.DataFrame schema: pyarrow.Schema Arrow schema definition that is in line with the constructed df. parquet_compatible: bool Exclude types not supported by parquet """ arrays = OrderedDict() fields = [] fields.append(pa.field('int64', pa.list_(pa.int64()))) arrays['int64'] = [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 1, 2, 3, 4], None, [], np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9] * 2, dtype=np.int64)[::2] ] fields.append(pa.field('double', pa.list_(pa.float64()))) arrays['double'] = [ [0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], [0., 1., 2., 3., 4.], None, [], np.array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.] * 2)[::2], ] fields.append(pa.field('bytes_list', pa.list_(pa.binary()))) arrays['bytes_list'] = [ [b"1", b"f"], None, [b"1"], [b"1", b"2", b"3"], [], ] fields.append(pa.field('str_list', pa.list_(pa.string()))) arrays['str_list'] = [ [u"1", u"ä"], None, [u"1"], [u"1", u"2", u"3"], [], ] date_data = [ [], [date(2018, 1, 1), date(2032, 12, 30)], [date(2000, 6, 7)], None, [date(1969, 6, 9), date(1972, 7, 3)] ] time_data = [ [time(23, 11, 11), time(1, 2, 3), time(23, 59, 59)], [], [time(22, 5, 59)], None, [time(0, 0, 0), time(18, 0, 2), time(12, 7, 3)] ] temporal_pairs = [ (pa.date32(), date_data), (pa.date64(), date_data), (pa.time32('s'), time_data), (pa.time32('ms'), time_data), (pa.time64('us'), time_data) ] if not parquet_compatible: temporal_pairs += [ (pa.time64('ns'), time_data), ] for value_type, data in temporal_pairs: field_name = '{}_list'.format(value_type) field_type = pa.list_(value_type) field = pa.field(field_name, field_type) fields.append(field) arrays[field_name] = data if include_index: fields.append(pa.field('__index_level_0__', pa.int64())) df = pd.DataFrame(arrays) schema = pa.schema(fields) return df, schema
def test_date_time_types(): t1 = pa.date32() data1 = np.array([17259, 17260, 17261], dtype='int32') a1 = pa.array(data1, type=t1) t2 = pa.date64() data2 = data1.astype('int64') * 86400000 a2 = pa.array(data2, type=t2) t3 = pa.timestamp('us') start = pd.Timestamp('2000-01-01').value / 1000 data3 = np.array([start, start + 1, start + 2], dtype='int64') a3 = pa.array(data3, type=t3) t4 = pa.time32('ms') data4 = np.arange(3, dtype='i4') a4 = pa.array(data4, type=t4) t5 = pa.time64('us') a5 = pa.array(data4.astype('int64'), type=t5) t6 = pa.time32('s') a6 = pa.array(data4, type=t6) ex_t6 = pa.time32('ms') ex_a6 = pa.array(data4 * 1000, type=ex_t6) t7 = pa.timestamp('ns') start = pd.Timestamp('2001-01-01').value data7 = np.array([start, start + 1000, start + 2000], dtype='int64') a7 = pa.array(data7, type=t7) t7_us = pa.timestamp('us') start = pd.Timestamp('2001-01-01').value data7_us = np.array([start, start + 1000, start + 2000], dtype='int64') // 1000 a7_us = pa.array(data7_us, type=t7_us) table = pa.Table.from_arrays([a1, a2, a3, a4, a5, a6, a7], ['date32', 'date64', 'timestamp[us]', 'time32[s]', 'time64[us]', 'time32_from64[s]', 'timestamp[ns]']) # date64 as date32 # time32[s] to time32[ms] # 'timestamp[ns]' to 'timestamp[us]' expected = pa.Table.from_arrays([a1, a1, a3, a4, a5, ex_a6, a7_us], ['date32', 'date64', 'timestamp[us]', 'time32[s]', 'time64[us]', 'time32_from64[s]', 'timestamp[ns]']) _check_roundtrip(table, expected=expected, version='2.0') # date64 as date32 # time32[s] to time32[ms] # 'timestamp[ns]' is saved as INT96 timestamp expected = pa.Table.from_arrays([a1, a1, a3, a4, a5, ex_a6, a7], ['date32', 'date64', 'timestamp[us]', 'time32[s]', 'time64[us]', 'time32_from64[s]', 'timestamp[ns]']) _check_roundtrip(table, expected=expected, version='2.0', use_deprecated_int96_timestamps=True) # Check that setting flavor to 'spark' uses int96 timestamps _check_roundtrip(table, expected=expected, version='2.0', flavor='spark') # Unsupported stuff def _assert_unsupported(array): table = pa.Table.from_arrays([array], ['unsupported']) buf = io.BytesIO() with pytest.raises(NotImplementedError): _write_table(table, buf, version="2.0") t7 = pa.time64('ns') a7 = pa.array(data4.astype('int64'), type=t7) _assert_unsupported(a7)
assert len(in_dict) == len(fields) for i, field in enumerate(fields): assert in_dict[field] == i def test_fields_weakrefable(): field = pa.field('a', pa.int32()) wr = weakref.ref(field) assert wr() is not None del field assert wr() is None @pytest.mark.parametrize('t,check_func', [(pa.date32(), types.is_date32), (pa.date64(), types.is_date64), (pa.time32('s'), types.is_time32), (pa.time64('ns'), types.is_time64), (pa.int8(), types.is_int8), (pa.int16(), types.is_int16), (pa.int32(), types.is_int32), (pa.int64(), types.is_int64), (pa.uint8(), types.is_uint8), (pa.uint16(), types.is_uint16), (pa.uint32(), types.is_uint32), (pa.uint64(), types.is_uint64), (pa.float16(), types.is_float16), (pa.float32(), types.is_float32), (pa.float64(), types.is_float64)]) def test_exact_primitive_types(t, check_func): assert check_func(t)
("string", None, pa.StringScalar, pa.StringValue), (b"bytes", None, pa.BinaryScalar, pa.BinaryValue), ("largestring", pa.large_string(), pa.LargeStringScalar, pa.LargeStringValue), (b"largebytes", pa.large_binary(), pa.LargeBinaryScalar, pa.LargeBinaryValue), (b"abc", pa.binary(3), pa.FixedSizeBinaryScalar, pa.FixedSizeBinaryValue), ([1, 2, 3], None, pa.ListScalar, pa.ListValue), ([1, 2, 3, 4], pa.large_list( pa.int8()), pa.LargeListScalar, pa.LargeListValue), ([1, 2, 3, 4, 5], pa.list_( pa.int8(), 5), pa.FixedSizeListScalar, pa.FixedSizeListValue), (datetime.date.today(), None, pa.Date32Scalar, pa.Date32Value), (datetime.date.today(), pa.date64(), pa.Date64Scalar, pa.Date64Value), (datetime.datetime.now(), None, pa.TimestampScalar, pa.TimestampValue), (datetime.datetime.now().time().replace(microsecond=0), pa.time32('s'), pa.Time32Scalar, pa.Time32Value), (datetime.datetime.now().time(), None, pa.Time64Scalar, pa.Time64Value), (datetime.timedelta(days=1), None, pa.DurationScalar, pa.DurationValue), ({ 'a': 1, 'b': [1, 2] }, None, pa.StructScalar, pa.StructValue), ([('a', 1), ('b', 2)], pa.map_(pa.string(), pa.int8()), pa.MapScalar, pa.MapValue), ]) def test_basics(value, ty, klass, deprecated): s = pa.scalar(value, type=ty) assert isinstance(s, klass) assert s.as_py() == value assert s == pa.scalar(value, type=ty)
# jvm_spec = om.writeValueAsString(field) @pytest.mark.parametrize('pa_type,jvm_spec', [ (pa.null(), '{"name":"null"}'), (pa.bool_(), '{"name":"bool"}'), (pa.int8(), '{"name":"int","bitWidth":8,"isSigned":true}'), (pa.int16(), '{"name":"int","bitWidth":16,"isSigned":true}'), (pa.int32(), '{"name":"int","bitWidth":32,"isSigned":true}'), (pa.int64(), '{"name":"int","bitWidth":64,"isSigned":true}'), (pa.uint8(), '{"name":"int","bitWidth":8,"isSigned":false}'), (pa.uint16(), '{"name":"int","bitWidth":16,"isSigned":false}'), (pa.uint32(), '{"name":"int","bitWidth":32,"isSigned":false}'), (pa.uint64(), '{"name":"int","bitWidth":64,"isSigned":false}'), (pa.float16(), '{"name":"floatingpoint","precision":"HALF"}'), (pa.float32(), '{"name":"floatingpoint","precision":"SINGLE"}'), (pa.float64(), '{"name":"floatingpoint","precision":"DOUBLE"}'), (pa.time32('s'), '{"name":"time","unit":"SECOND","bitWidth":32}'), (pa.time32('ms'), '{"name":"time","unit":"MILLISECOND","bitWidth":32}'), (pa.time64('us'), '{"name":"time","unit":"MICROSECOND","bitWidth":64}'), (pa.time64('ns'), '{"name":"time","unit":"NANOSECOND","bitWidth":64}'), (pa.timestamp('s'), '{"name":"timestamp","unit":"SECOND",' '"timezone":null}'), (pa.timestamp('ms'), '{"name":"timestamp","unit":"MILLISECOND",' '"timezone":null}'), (pa.timestamp('us'), '{"name":"timestamp","unit":"MICROSECOND",' '"timezone":null}'), (pa.timestamp('ns'), '{"name":"timestamp","unit":"NANOSECOND",' '"timezone":null}'), (pa.timestamp('ns', tz='UTC'), '{"name":"timestamp","unit":"NANOSECOND"' ',"timezone":"UTC"}'), (pa.timestamp('ns', tz='Europe/Paris'), '{"name":"timestamp",' '"unit":"NANOSECOND","timezone":"Europe/Paris"}'),
pa.struct([pa.field('a', pa.int32()), pa.field('b', pa.int8()), pa.field('c', pa.string())]) ] in_dict = {} for i, type_ in enumerate(types): assert hash(type_) == hash(type_) in_dict[type_] = i assert in_dict[type_] == i @pytest.mark.parametrize('t,check_func', [ (pa.date32(), types.is_date32), (pa.date64(), types.is_date64), (pa.time32('s'), types.is_time32), (pa.time64('ns'), types.is_time64), (pa.int8(), types.is_int8), (pa.int16(), types.is_int16), (pa.int32(), types.is_int32), (pa.int64(), types.is_int64), (pa.uint8(), types.is_uint8), (pa.uint16(), types.is_uint16), (pa.uint32(), types.is_uint32), (pa.uint64(), types.is_uint64), (pa.float16(), types.is_float16), (pa.float32(), types.is_float32), (pa.float64(), types.is_float64) ]) def test_exact_primitive_types(t, check_func): assert check_func(t)