def flatten_dataset_frequent_strings(dataset_summary: DatasetSummary):
    """
    Flatten frequent strings summaries from a dataset summary
    """
    frequent_strings = {}

    for col_name, col in dataset_summary.columns.items():
        try:
            item_summary = getter(getter(col, "string_summary"),
                                  "frequent").items
            items = {item.value: int(item.estimate) for item in item_summary}
            if items:
                frequent_strings[col_name] = items
        except KeyError:
            continue

    return frequent_strings
Beispiel #2
0
def flatten_dataset_histograms(dataset_summary: DatasetSummary):
    """
    Flatten histograms from a dataset summary
    """
    histograms = {}

    for col_name, col in dataset_summary.columns.items():
        try:
            hist = getter(getter(col, "number_summary"), "histogram")
            if len(hist.bins) > 1:
                histograms[col_name] = {
                    "bin_edges": list(hist.bins),
                    "counts": list(hist.counts),
                }
        except KeyError:
            continue
    return histograms
def flatten_dataset_string_quantiles(dataset_summary: DatasetSummary):
    """
    Flatten quantiles from a dataset summary
    """
    quants = {}
    for col_name, col in dataset_summary.columns.items():
        try:
            quant = getter(getter(col, "number_summary"), "quantiles")
            x = OrderedDict()
            for q, qval in zip(_quantile_strings(quant.quantiles),
                               quant.quantile_values):
                x[q] = qval
            quants[col_name] = x
        except KeyError:
            pass

    return quants