Example #1
0
def collect_stats(cube, store, datasets=None):
    """
    Collect statistics for given cube.

    Parameters
    ----------
    cube: Cube
        Cube specification.
    store: simplekv.KeyValueStore
        KV store that preserves the cube.
    datasets: Union[None, Iterable[str], Dict[str, kartothek.core.dataset.DatasetMetadata]]
        Datasets to query, must all be part of the cube. May be either the result of :meth:`discover_datasets`, a list
        of Ktk_cube dataset ID or ``None`` (in which case auto-discovery will be used).

    Returns
    -------
    stats: Dict[str, Dict[str, int]]
        Statistics per ktk_cube dataset ID.
    """
    if callable(store):
        store = store()

    if not isinstance(datasets, dict):
        datasets = discover_datasets_unchecked(
            uuid_prefix=cube.uuid_prefix,
            store=store,
            filter_ktk_cube_dataset_ids=datasets,
        )

    all_metapartitions = get_metapartitions_for_stats(datasets)
    return reduce_stats([collect_stats_block(all_metapartitions, store)])
Example #2
0
def _reduce_stats(nested_stats):
    flat = [stats for sub in nested_stats for stats in sub]
    return [reduce_stats(flat)]