Ejemplo n.º 1
0
    assert rg_meta.num_columns == ncols + 1  # +1 for index


@parquet
@pytest.mark.parametrize(
    'data, dtype, min_value, max_value, null_count, num_values', [
        ([1, 2, 2, None, 4], np.uint8, u'1', u'4', 1, 4),
        ([1, 2, 2, None, 4], np.uint16, u'1', u'4', 1, 4),
        ([1, 2, 2, None, 4], np.uint32, u'1', u'4', 1, 4),
        ([1, 2, 2, None, 4], np.uint64, u'1', u'4', 1, 4),
        ([-1, 2, 2, None, 4], np.int16, u'-1', u'4', 1, 4),
        ([-1, 2, 2, None, 4], np.int32, u'-1', u'4', 1, 4),
        ([-1, 2, 2, None, 4], np.int64, u'-1', u'4', 1, 4),
        ([-1.1, 2.2, 2.3, None, 4.4], np.float32, u'-1.1', u'4.4', 1, 4),
        ([-1.1, 2.2, 2.3, None, 4.4], np.float64, u'-1.1', u'4.4', 1, 4),
        ([u'', u'b', unichar(1000), None, u'aaa'
          ], str, u' ', frombytes(
              (unichar(1000) + u' ').encode('utf-8')), 1, 4),
        ([True, False, False, True, True], np.bool, u'0', u'1', 0, 5),
    ])
def test_parquet_column_statistics_api(data, dtype, min_value, max_value,
                                       null_count, num_values):
    df = pd.DataFrame({'data': data}, dtype=dtype)

    fileh = make_sample_file(df)

    meta = fileh.metadata

    rg_meta = meta.row_group(0)
    col_meta = rg_meta.column(0)
Ejemplo n.º 2
0
@parquet
@pytest.mark.parametrize(
    'data, dtype, min_value, max_value, null_count, num_values',
    [
        ([1, 2, 2, None, 4], np.uint8, u'1', u'4', 1, 4),
        ([1, 2, 2, None, 4], np.uint16, u'1', u'4', 1, 4),
        ([1, 2, 2, None, 4], np.uint32, u'1', u'4', 1, 4),
        ([1, 2, 2, None, 4], np.uint64, u'1', u'4', 1, 4),
        ([-1, 2, 2, None, 4], np.int16, u'-1', u'4', 1, 4),
        ([-1, 2, 2, None, 4], np.int32, u'-1', u'4', 1, 4),
        ([-1, 2, 2, None, 4], np.int64, u'-1', u'4', 1, 4),
        ([-1.1, 2.2, 2.3, None, 4.4], np.float32, u'-1.1', u'4.4', 1, 4),
        ([-1.1, 2.2, 2.3, None, 4.4], np.float64, u'-1.1', u'4.4', 1, 4),
        (
            [u'', u'b', unichar(1000), None, u'aaa'],
            str, u' ', frombytes((unichar(1000) + u' ').encode('utf-8')), 1, 4
        ),
        ([True, False, False, True, True], np.bool, u'0', u'1', 0, 5),
    ]
)
def test_parquet_column_statistics_api(
        data,
        dtype,
        min_value,
        max_value,
        null_count,
        num_values):
    df = pd.DataFrame({'data': data}, dtype=dtype)

    fileh = make_sample_file(df)