def test_partition_set_dictionary_type(): set1 = pq.PartitionSet('key1', [u('foo'), u('bar'), u('baz')]) set2 = pq.PartitionSet('key2', [2007, 2008, 2009]) assert isinstance(set1.dictionary, pa.StringArray) assert isinstance(set2.dictionary, pa.IntegerArray) set3 = pq.PartitionSet('key2', [datetime.datetime(2007, 1, 1)]) with pytest.raises(TypeError): set3.dictionary
def test_arrow_schema_convertion_with_int_partitions(): arrow_schema = pa.schema([ pa.field('int8', pa.int8()), ]) mock_dataset = _mock_parquet_dataset([pq.PartitionSet('part_name', ['0', '1', '2'])], arrow_schema) unischema = Unischema.from_arrow_schema(mock_dataset) assert unischema.part_name.numpy_dtype == np.int64