コード例 #1
0
    mp2 = MetaPartition(
        label="second",
        data=pd.DataFrame({"a": [1, 1], "b": ["a", "b"]}, dtype="category"),
        metadata_version=4,
        partition_keys=["a"],
    )

    new_mp = MetaPartition.concat_metapartitions([mp1, mp2])

    assert new_mp.table_name == "table"
    assert pd.api.types.is_categorical_dtype(new_mp.data["b"].dtype)


# We can't partition on null columns (gh-262)
@pytest.mark.parametrize(
    "col", sorted(set(get_dataframe_not_nested().columns) - {"null"})
)
def test_partition_on_scalar_intermediate(df_not_nested, col):
    """
    Test against a bug where grouping leaves a scalar value
    """
    assert len(df_not_nested) == 1
    mp = MetaPartition(label="somelabel", data=df_not_nested, metadata_version=4)
    new_mp = mp.partition_on(col)
    assert len(new_mp) == 1


def test_partition_on_with_primary_index_invalid(df_not_nested):
    mp = MetaPartition(
        label="pkey=1/pkey2=2/base_label",
        data=df_not_nested,
コード例 #2
0
ファイル: conftest.py プロジェクト: trucnguyenlam/kartothek
def df_not_nested():
    return get_dataframe_not_nested()
コード例 #3
0
    assert_frame_equal(ddf_expected_simple.compute(), ddf.compute())


@pytest.fixture()
def setup_reconstruct_dask_index_types(store_factory, df_not_nested):
    indices = list(df_not_nested.columns)
    indices.remove("null")
    return store_dataframes_as_dataset(
        store=store_factory,
        dataset_uuid="dataset_uuid",
        dfs=[df_not_nested],
        secondary_indices=indices,
    )


@pytest.mark.parametrize("col", get_dataframe_not_nested().columns)
def test_reconstruct_dask_index_types(store_factory,
                                      setup_reconstruct_dask_index_types, col):
    if col == "null":
        pytest.xfail(reason="Cannot index null column")
    ddf = read_dataset_as_ddf(
        dataset_uuid=setup_reconstruct_dask_index_types.uuid,
        store=store_factory,
        table="table",
        dask_index_on=col,
    )
    assert ddf.known_divisions
    assert ddf.index.name == col


def test_reconstruct_dask_index_sorting(store_factory, monkeypatch):