예제 #1
0
def test_partition_set_dictionary_type():
    set1 = pq.PartitionSet('key1', [u('foo'), u('bar'), u('baz')])
    set2 = pq.PartitionSet('key2', [2007, 2008, 2009])

    assert isinstance(set1.dictionary, pa.StringArray)
    assert isinstance(set2.dictionary, pa.IntegerArray)

    set3 = pq.PartitionSet('key2', [datetime.datetime(2007, 1, 1)])
    with pytest.raises(TypeError):
        set3.dictionary
예제 #2
0
def test_arrow_schema_convertion_with_int_partitions():
    arrow_schema = pa.schema([
        pa.field('int8', pa.int8()),
    ])

    mock_dataset = _mock_parquet_dataset([pq.PartitionSet('part_name', ['0', '1', '2'])], arrow_schema)

    unischema = Unischema.from_arrow_schema(mock_dataset)
    assert unischema.part_name.numpy_dtype == np.int64