def collect_stats(cube, store, datasets=None): """ Collect statistics for given cube. Parameters ---------- cube: Cube Cube specification. store: simplekv.KeyValueStore KV store that preserves the cube. datasets: Union[None, Iterable[str], Dict[str, kartothek.core.dataset.DatasetMetadata]] Datasets to query, must all be part of the cube. May be either the result of :meth:`discover_datasets`, a list of Ktk_cube dataset ID or ``None`` (in which case auto-discovery will be used). Returns ------- stats: Dict[str, Dict[str, int]] Statistics per ktk_cube dataset ID. """ if callable(store): store = store() if not isinstance(datasets, dict): datasets = discover_datasets_unchecked( uuid_prefix=cube.uuid_prefix, store=store, filter_ktk_cube_dataset_ids=datasets, ) all_metapartitions = get_metapartitions_for_stats(datasets) return reduce_stats([collect_stats_block(all_metapartitions, store)])
def _reduce_stats(nested_stats): flat = [stats for sub in nested_stats for stats in sub] return [reduce_stats(flat)]