def _quick_concat_or_none(dfs, dimension_columns, partition_columns): dfs = list(dfs) if dfs: return quick_concat( dfs=dfs, dimension_columns=dimension_columns, partition_columns=partition_columns, ) else: return None
def query_cube( cube, store, conditions=None, datasets=None, dimension_columns=None, partition_by=None, payload_columns=None, ): """ Query cube. .. note:: In case of ``partition_by=None`` (default case), only a single partition is generated. If this one will be empty (e.g. due to the provided conditions), an empty list will be returned, and a single-element list otherwise. Parameters ---------- cube: Cube Cube specification. store: simplekv.KeyValueStore KV store that preserves the cube. conditions: Union[None, Condition, Iterable[Condition], Conjunction] Conditions that should be applied, optional. datasets: Union[None, Iterable[str], Dict[str, kartothek.core.dataset.DatasetMetadata]] Datasets to query, must all be part of the cube. May be either the result of :meth:`discover_datasets`, a list of Ktk_cube dataset ID or ``None`` (in which case auto-discovery will be used). dimension_columns: Union[None, str, Iterable[str]] Dimension columns of the query, may result in projection. If not provided, dimension columns from cube specification will be used. partition_by: Union[None, str, Iterable[str]] By which column logical partitions should be formed. If not provided, a single partition will be generated. payload_columns: Union[None, str, Iterable[str]] Which columns apart from ``dimension_columns`` and ``partition_by`` should be returned. Returns ------- dfs: List[pandas.DataFrame] List of non-empty DataFrames, order by ``partition_by``. Column of DataFrames is alphabetically ordered. Data types are provided on best effort (they are restored based on the preserved data, but may be different due to Pandas NULL-handling, e.g. integer columns may be floats). """ intention, _empty, groups = plan_query( cube=cube, store=store, conditions=conditions, datasets=datasets, dimension_columns=dimension_columns, partition_by=partition_by, payload_columns=payload_columns, ) dfs = [load_group(group=g, store=store, cube=cube) for g in groups] dfs = [df for df in dfs if not df.empty] if not intention.partition_by and (len(dfs) > 0): dfs = [ quick_concat( dfs=dfs, dimension_columns=intention.dimension_columns, partition_columns=cube.partition_columns, ) ] return dfs