mp2 = MetaPartition( label="second", data=pd.DataFrame({"a": [1, 1], "b": ["a", "b"]}, dtype="category"), metadata_version=4, partition_keys=["a"], ) new_mp = MetaPartition.concat_metapartitions([mp1, mp2]) assert new_mp.table_name == "table" assert pd.api.types.is_categorical_dtype(new_mp.data["b"].dtype) # We can't partition on null columns (gh-262) @pytest.mark.parametrize( "col", sorted(set(get_dataframe_not_nested().columns) - {"null"}) ) def test_partition_on_scalar_intermediate(df_not_nested, col): """ Test against a bug where grouping leaves a scalar value """ assert len(df_not_nested) == 1 mp = MetaPartition(label="somelabel", data=df_not_nested, metadata_version=4) new_mp = mp.partition_on(col) assert len(new_mp) == 1 def test_partition_on_with_primary_index_invalid(df_not_nested): mp = MetaPartition( label="pkey=1/pkey2=2/base_label", data=df_not_nested,
def df_not_nested(): return get_dataframe_not_nested()
assert_frame_equal(ddf_expected_simple.compute(), ddf.compute()) @pytest.fixture() def setup_reconstruct_dask_index_types(store_factory, df_not_nested): indices = list(df_not_nested.columns) indices.remove("null") return store_dataframes_as_dataset( store=store_factory, dataset_uuid="dataset_uuid", dfs=[df_not_nested], secondary_indices=indices, ) @pytest.mark.parametrize("col", get_dataframe_not_nested().columns) def test_reconstruct_dask_index_types(store_factory, setup_reconstruct_dask_index_types, col): if col == "null": pytest.xfail(reason="Cannot index null column") ddf = read_dataset_as_ddf( dataset_uuid=setup_reconstruct_dask_index_types.uuid, store=store_factory, table="table", dask_index_on=col, ) assert ddf.known_divisions assert ddf.index.name == col def test_reconstruct_dask_index_sorting(store_factory, monkeypatch):