def render_real(summary):
    varid = summary["varid"]
    template_variables = render_common(summary)
    image_format = config["plot"]["image_format"].get(str)

    if summary["min"] >= 0:
        name = "Real number (&Ropf;<sub>&ge;0</sub>)"
    else:
        name = "Real number (&Ropf;)"

    # Top
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        name,
        summary["warnings"],
        summary["description"],
    )

    table1 = Table([
        {
            "name": "Distinct count",
            "value": summary["n_unique"],
            "fmt": "fmt",
            "alert": "n_unique" in summary["warn_fields"],
        },
        {
            "name": "Unique (%)",
            "value": summary["p_unique"],
            "fmt": "fmt_percent",
            "alert": "p_unique" in summary["warn_fields"],
        },
        {
            "name": "Missing",
            "value": summary["n_missing"],
            "fmt": "fmt",
            "alert": "n_missing" in summary["warn_fields"],
        },
        {
            "name": "Missing (%)",
            "value": summary["p_missing"],
            "fmt": "fmt_percent",
            "alert": "p_missing" in summary["warn_fields"],
        },
        {
            "name": "Infinite",
            "value": summary["n_infinite"],
            "fmt": "fmt",
            "alert": "n_infinite" in summary["warn_fields"],
        },
        {
            "name": "Infinite (%)",
            "value": summary["p_infinite"],
            "fmt": "fmt_percent",
            "alert": "p_infinite" in summary["warn_fields"],
        },
    ])

    table2 = Table([
        {
            "name": "Mean",
            "value": summary["mean"],
            "fmt": "fmt",
            "alert": False
        },
        {
            "name": "Minimum",
            "value": summary["min"],
            "fmt": "fmt",
            "alert": False
        },
        {
            "name": "Maximum",
            "value": summary["max"],
            "fmt": "fmt",
            "alert": False
        },
        {
            "name": "Zeros",
            "value": summary["n_zeros"],
            "fmt": "fmt",
            "alert": "n_zeros" in summary["warn_fields"],
        },
        {
            "name": "Zeros (%)",
            "value": summary["p_zeros"],
            "fmt": "fmt_percent",
            "alert": "p_zeros" in summary["warn_fields"],
        },
        {
            "name": "Memory size",
            "value": summary["memory_size"],
            "fmt": "fmt_bytesize",
            "alert": False,
        },
    ])

    histogram_bins = 10

    # TODO: replace with SmallImage...
    mini_histo = Image(
        mini_histogram(summary["histogram_data"], summary, histogram_bins),
        image_format=image_format,
        alt="Mini histogram",
    )

    template_variables["top"] = Container([info, table1, table2, mini_histo],
                                          sequence_type="grid")

    quantile_statistics = Table(
        [
            {
                "name": "Minimum",
                "value": summary["min"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "5-th percentile",
                "value": summary["5%"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Q1",
                "value": summary["25%"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "median",
                "value": summary["50%"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Q3",
                "value": summary["75%"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "95-th percentile",
                "value": summary["95%"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Maximum",
                "value": summary["max"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Range",
                "value": summary["range"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Interquartile range (IQR)",
                "value": summary["iqr"],
                "fmt": "fmt_numeric",
            },
        ],
        name="Quantile statistics",
    )

    descriptive_statistics = Table(
        [
            {
                "name": "Standard deviation",
                "value": summary["std"],
                "fmt": "fmt_numeric",
            },
            {
                "name": "Coefficient of variation (CV)",
                "value": summary["cv"],
                "fmt": "fmt_numeric",
            },
            {
                "name": "Kurtosis",
                "value": summary["kurtosis"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Mean",
                "value": summary["mean"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Median Absolute Deviation (MAD)",
                "value": summary["mad"],
                "fmt": "fmt_numeric",
            },
            {
                "name": "Skewness",
                "value": summary["skewness"],
                "fmt": "fmt_numeric",
                "class":
                "alert" if "skewness" in summary["warn_fields"] else "",
            },
            {
                "name": "Sum",
                "value": summary["sum"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Variance",
                "value": summary["variance"],
                "fmt": "fmt_numeric"
            },
        ],
        name="Descriptive statistics",
    )

    statistics = Container(
        [quantile_statistics, descriptive_statistics],
        anchor_id=f"{varid}statistics",
        name="Statistics",
        sequence_type="grid",
    )

    seqs = [
        Image(
            histogram(summary["histogram_data"], summary, histogram_bins),
            image_format=image_format,
            alt="Histogram",
            caption=
            f"<strong>Histogram with fixed size bins</strong> (bins={histogram_bins})",
            name="Histogram",
            anchor_id=f"{varid}histogram",
        )
    ]

    fq = FrequencyTable(
        template_variables["freq_table_rows"],
        name="Common values",
        anchor_id=f"{varid}common_values",
    )

    evs = Container(
        [
            FrequencyTable(
                template_variables["firstn_expanded"],
                name="Minimum 5 values",
                anchor_id=f"{varid}firstn",
            ),
            FrequencyTable(
                template_variables["lastn_expanded"],
                name="Maximum 5 values",
                anchor_id=f"{varid}lastn",
            ),
        ],
        sequence_type="tabs",
        name="Extreme values",
        anchor_id=f"{varid}extreme_values",
    )

    if "histogram_bins_bayesian_blocks" in summary:
        histo_dyn = Image(
            histogram(
                summary["histogram_data"],
                summary,
                summary["histogram_bins_bayesian_blocks"],
            ),
            image_format=image_format,
            alt="Histogram",
            caption=
            '<strong>Histogram with variable size bins</strong> (bins={}, <a href="https://ui.adsabs.harvard.edu/abs/2013ApJ...764..167S/abstract" target="_blank">"bayesian blocks"</a> binning strategy used)'
            .format(
                fmt_array(summary["histogram_bins_bayesian_blocks"],
                          threshold=5)),
            name="Dynamic Histogram",
            anchor_id=f"{varid}dynamic_histogram",
        )

        seqs.append(histo_dyn)

    template_variables["bottom"] = Container(
        [
            statistics,
            Container(
                seqs,
                sequence_type="tabs",
                name="Histogram(s)",
                anchor_id=f"{varid}histograms",
            ),
            fq,
            evs,
        ],
        sequence_type="tabs",
        anchor_id=f"{varid}bottom",
    )

    return template_variables
def render_real(summary):
    varid = summary["varid"]
    template_variables = render_common(summary)
    image_format = config["plot"]["image_format"].get(str)

    if summary["min"] >= 0:
        name = "Real number (&Ropf;<sub>&ge;0</sub>)"
    else:
        name = "Real number (&Ropf;)"

    # Top
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        name,
        summary["warnings"],
        summary["description"],
    )

    table1 = Table([
        {
            "name": "Distinct",
            "value": summary["n_distinct"],
            "fmt": "fmt",
            "alert": "n_distinct" in summary["warn_fields"],
        },
        {
            "name": "Distinct (%)",
            "value": summary["p_distinct"],
            "fmt": "fmt_percent",
            "alert": "p_distinct" in summary["warn_fields"],
        },
        {
            "name": "Missing",
            "value": summary["n_missing"],
            "fmt": "fmt",
            "alert": "n_missing" in summary["warn_fields"],
        },
        {
            "name": "Missing (%)",
            "value": summary["p_missing"],
            "fmt": "fmt_percent",
            "alert": "p_missing" in summary["warn_fields"],
        },
        {
            "name": "Infinite",
            "value": summary["n_infinite"],
            "fmt": "fmt",
            "alert": "n_infinite" in summary["warn_fields"],
        },
        {
            "name": "Infinite (%)",
            "value": summary["p_infinite"],
            "fmt": "fmt_percent",
            "alert": "p_infinite" in summary["warn_fields"],
        },
        {
            "name": "Mean",
            "value": summary["mean"],
            "fmt": "fmt_numeric",
            "alert": False,
        },
    ])

    table2 = Table([
        {
            "name": "Minimum",
            "value": summary["min"],
            "fmt": "fmt_numeric",
            "alert": False,
        },
        {
            "name": "Maximum",
            "value": summary["max"],
            "fmt": "fmt_numeric",
            "alert": False,
        },
        {
            "name": "Zeros",
            "value": summary["n_zeros"],
            "fmt": "fmt",
            "alert": "n_zeros" in summary["warn_fields"],
        },
        {
            "name": "Zeros (%)",
            "value": summary["p_zeros"],
            "fmt": "fmt_percent",
            "alert": "p_zeros" in summary["warn_fields"],
        },
        {
            "name": "Negative",
            "value": summary["n_negative"],
            "fmt": "fmt",
            "alert": False,
        },
        {
            "name": "Negative (%)",
            "value": summary["p_negative"],
            "fmt": "fmt_percent",
            "alert": False,
        },
        {
            "name": "Memory size",
            "value": summary["memory_size"],
            "fmt": "fmt_bytesize",
            "alert": False,
        },
    ])

    mini_histo = Image(
        mini_histogram(*summary["histogram"]),
        image_format=image_format,
        alt="Mini histogram",
    )

    template_variables["top"] = Container([info, table1, table2, mini_histo],
                                          sequence_type="grid")

    quantile_statistics = Table(
        [
            {
                "name": "Minimum",
                "value": summary["min"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "5-th percentile",
                "value": summary["5%"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Q1",
                "value": summary["25%"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "median",
                "value": summary["50%"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Q3",
                "value": summary["75%"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "95-th percentile",
                "value": summary["95%"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Maximum",
                "value": summary["max"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Range",
                "value": summary["range"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Interquartile range (IQR)",
                "value": summary["iqr"],
                "fmt": "fmt_numeric",
            },
        ],
        name="Quantile statistics",
    )

    descriptive_statistics = Table(
        [
            {
                "name": "Standard deviation",
                "value": summary["std"],
                "fmt": "fmt_numeric",
            },
            {
                "name": "Coefficient of variation (CV)",
                "value": summary["cv"],
                "fmt": "fmt_numeric",
            },
            {
                "name": "Kurtosis",
                "value": summary["kurtosis"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Mean",
                "value": summary["mean"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Median Absolute Deviation (MAD)",
                "value": summary["mad"],
                "fmt": "fmt_numeric",
            },
            {
                "name": "Skewness",
                "value": summary["skewness"],
                "fmt": "fmt_numeric",
                "class":
                "alert" if "skewness" in summary["warn_fields"] else "",
            },
            {
                "name": "Sum",
                "value": summary["sum"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Variance",
                "value": summary["variance"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Monotonicity",
                "value": summary["monotonic"],
                "fmt": "fmt_monotonic",
            },
        ],
        name="Descriptive statistics",
    )

    statistics = Container(
        [quantile_statistics, descriptive_statistics],
        anchor_id=f"{varid}statistics",
        name="Statistics",
        sequence_type="grid",
    )

    hist = Image(
        histogram(*summary["histogram"]),
        image_format=image_format,
        alt="Histogram",
        caption=
        f"<strong>Histogram with fixed size bins</strong> (bins={len(summary['histogram'][1]) - 1})",
        name="Histogram",
        anchor_id=f"{varid}histogram",
    )

    fq = FrequencyTable(
        template_variables["freq_table_rows"],
        name="Common values",
        anchor_id=f"{varid}common_values",
        redact=False,
    )

    evs = Container(
        [
            FrequencyTable(
                template_variables["firstn_expanded"],
                name="Minimum 5 values",
                anchor_id=f"{varid}firstn",
                redact=False,
            ),
            FrequencyTable(
                template_variables["lastn_expanded"],
                name="Maximum 5 values",
                anchor_id=f"{varid}lastn",
                redact=False,
            ),
        ],
        sequence_type="tabs",
        name="Extreme values",
        anchor_id=f"{varid}extreme_values",
    )

    template_variables["bottom"] = Container(
        [statistics, hist, fq, evs],
        sequence_type="tabs",
        anchor_id=f"{varid}bottom",
    )

    return template_variables
Example #3
0
def render_count(summary):
    template_variables = render_common(summary)
    image_format = config["plot"]["image_format"].get(str)

    # Top
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        "Real number (&Ropf; / &Ropf;<sub>&ge;0</sub>)",
        summary["warnings"],
    )

    table1 = Table([
        {
            "name": "Distinct count",
            "value": summary["n_unique"],
            "fmt": "fmt",
            "alert": False,
        },
        {
            "name": "Unique (%)",
            "value": summary["p_unique"],
            "fmt": "fmt_percent",
            "alert": False,
        },
        {
            "name": "Missing",
            "value": summary["n_missing"],
            "fmt": "fmt",
            "alert": False,
        },
        {
            "name": "Missing (%)",
            "value": summary["p_missing"],
            "fmt": "fmt_percent",
            "alert": False,
        },
    ])

    table2 = Table([
        {
            "name": "Mean",
            "value": summary["mean"],
            "fmt": "fmt",
            "alert": False
        },
        {
            "name": "Minimum",
            "value": summary["min"],
            "fmt": "fmt",
            "alert": False
        },
        {
            "name": "Maximum",
            "value": summary["max"],
            "fmt": "fmt",
            "alert": False
        },
        {
            "name": "Zeros",
            "value": summary["n_zeros"],
            "fmt": "fmt",
            "alert": False,
        },
        {
            "name": "Zeros (%)",
            "value": summary["p_zeros"],
            "fmt": "fmt_percent",
            "alert": False,
        },
        {
            "name": "Memory size",
            "value": summary["memory_size"],
            "fmt": "fmt_bytesize",
            "alert": False,
        },
    ])

    # TODO: replace with SmallImage...
    mini_histo = Image(
        mini_histogram(summary["histogram_data"], summary,
                       summary["histogram_bins"]),
        image_format=image_format,
        alt="Mini histogram",
    )

    template_variables["top"] = Sequence([info, table1, table2, mini_histo],
                                         sequence_type="grid")

    quantile_statistics = {
        "name":
        "Quantile statistics",
        "items": [
            {
                "name": "Minimum",
                "value": summary["min"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "5-th percentile",
                "value": summary["quantile_5"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "Q1",
                "value": summary["quantile_25"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "median",
                "value": summary["quantile_50"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "Q3",
                "value": summary["quantile_75"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "95-th percentile",
                "value": summary["quantile_95"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "Maximum",
                "value": summary["max"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "Range",
                "value": summary["range"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "Interquartile range",
                "value": summary["iqr"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
        ],
    }

    descriptive_statistics = {
        "name":
        "Descriptive statistics",
        "items": [
            {
                "name": "Standard deviation",
                "value": summary["std"],
                "fmt": "fmt_numeric",
            },
            {
                "name": "Coefficient of variation",
                "value": summary["cv"],
                "fmt": "fmt_numeric",
            },
            {
                "name": "Kurtosis",
                "value": summary["kurt"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Mean",
                "value": summary["mean"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "MAD",
                "value": summary["mad"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Skewness",
                "value": summary["skew"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Sum",
                "value": summary["sum"],
                "fmt": "fmt_numeric"
            },
            {
                "name": "Variance",
                "value": summary["var"],
                "fmt": "fmt_numeric"
            },
        ],
    }

    # TODO: Make sections data structure
    # statistics = ItemRenderer(
    #     'statistics',
    #     'Statistics',
    #     'table',
    #     [
    #         quantile_statistics,
    #         descriptive_statistics
    #     ]
    # )

    seqs = [
        Image(
            histogram(summary["histogram_data"], summary,
                      summary["histogram_bins"]),
            image_format=image_format,
            alt="Histogram",
            caption="<strong>Histogram with fixed size bins</strong> (bins={})"
            .format(summary["histogram_bins"]),
            name="Histogram",
            anchor_id="histogram",
        )
    ]

    fq = FrequencyTable(
        template_variables["freq_table_rows"],
        name="Common values",
        anchor_id="common_values",
    )

    evs = Sequence(
        [
            FrequencyTable(
                template_variables["firstn_expanded"],
                name="Minimum 5 values",
                anchor_id="firstn",
            ),
            FrequencyTable(
                template_variables["lastn_expanded"],
                name="Maximum 5 values",
                anchor_id="lastn",
            ),
        ],
        sequence_type="tabs",
        name="Extreme values",
        anchor_id="extreme_values",
    )

    if "histogram_bins_bayesian_blocks" in summary:
        histo_dyn = Image(
            histogram(
                summary["histogram_data"],
                summary,
                summary["histogram_bins_bayesian_blocks"],
            ),
            image_format=image_format,
            alt="Histogram",
            caption=
            '<strong>Histogram with variable size bins</strong> (bins={}, <a href="https://ui.adsabs.harvard.edu/abs/2013ApJ...764..167S/abstract" target="_blank">"bayesian blocks"</a> binning strategy used)'
            .format(
                fmt_array(summary["histogram_bins_bayesian_blocks"],
                          threshold=5)),
            name="Dynamic Histogram",
            anchor_id="dynamic_histogram",
        )

        seqs.append(histo_dyn)

    template_variables["bottom"] = Sequence(
        [
            # statistics,
            Sequence(seqs,
                     sequence_type="tabs",
                     name="Histogram(s)",
                     anchor_id="histograms"),
            fq,
            evs,
        ],
        sequence_type="tabs",
        anchor_id=summary["varid"],
    )

    return template_variables
Example #4
0
def render_variables_section(stats_object: dict) -> str:
    """Render the HTML for each of the variables in the DataFrame.

    Args:
        stats_object: The statistics for each variable.

    Returns:
        The rendered HTML, where each row represents a variable.
    """
    rows_html = u""

    n_obs_unique = config["n_obs_unique"].get(int)
    n_obs_bool = config["n_obs_bool"].get(int)
    n_extreme_obs = config["n_extreme_obs"].get(int)
    n_freq_table_max = config["n_freq_table_max"].get(int)

    messages = stats_object["messages"]

    # TODO: move to for loop in template
    for idx, row in stats_object["variables"].items():
        formatted_values = row
        formatted_values.update({
            "varname": idx,
            "varid": hash(idx),
            "row_classes": {}
        })

        # TODO: obtain from messages (ignore)
        for m in messages:
            if m.column_name == idx:
                if m.message_type == MessageType.SKEWED:
                    formatted_values["row_classes"]["skewness"] = "alert"
                elif m.message_type == MessageType.HIGH_CARDINALITY:
                    # TODO: rename alert to prevent overlap with bootstrap classes
                    formatted_values["row_classes"]["distinct_count"] = "alert"
                elif m.message_type == MessageType.ZEROS:
                    formatted_values["row_classes"]["zeros"] = "alert"
                elif m.message_type == MessageType.MISSING:
                    formatted_values["row_classes"]["missing"] = "alert"

        if row["type"] in {Variable.TYPE_NUM, Variable.TYPE_DATE}:
            formatted_values["histogram"] = histogram(row["histogramdata"],
                                                      row,
                                                      row["histogram_bins"])
            formatted_values["mini_histogram"] = mini_histogram(
                row["histogramdata"], row, row["histogram_bins"])

            if ("histogram_bins_bayesian_blocks" in row
                    and row["type"] == Variable.TYPE_NUM):
                formatted_values["histogram_bayesian_blocks"] = histogram(
                    row["histogramdata"], row,
                    row["histogram_bins_bayesian_blocks"])

        if row["type"] in {Variable.TYPE_CAT, Variable.TYPE_BOOL}:
            # The number of column to use in the display of the frequency table according to the category
            mini_freq_table_nb_col = {
                Variable.TYPE_CAT: 6,
                Variable.TYPE_BOOL: 3
            }

            formatted_values["minifreqtable"] = freq_table(
                stats_object["variables"][idx]["value_counts_without_nan"],
                stats_object["table"]["n"],
                "mini_freq_table.html",
                max_number_to_print=n_obs_bool,
                idx=idx,
                nb_col=mini_freq_table_nb_col[row["type"]],
            )

        if row["type"] in {Variable.TYPE_URL}:
            keys = ["scheme", "netloc", "path", "query", "fragment"]
            for url_part in keys:
                formatted_values["freqtable_{}".format(url_part)] = freq_table(
                    freqtable=stats_object["variables"][idx][
                        "{}_counts".format(url_part)],
                    # TODO: n - missing
                    n=stats_object["table"]["n"],
                    table_template="freq_table.html",
                    idx=idx,
                    max_number_to_print=n_freq_table_max,
                )

        if row["type"] in {Variable.TYPE_PATH}:
            keys = ["name", "parent", "suffix", "stem"]
            for path_part in keys:
                formatted_values["freqtable_{}".format(
                    path_part)] = freq_table(
                        freqtable=stats_object["variables"][idx][
                            "{}_counts".format(path_part)],
                        # TODO: n - missing
                        n=stats_object["table"]["n"],
                        table_template="freq_table.html",
                        idx=idx,
                        max_number_to_print=n_freq_table_max,
                    )

        if row["type"] == Variable.S_TYPE_UNIQUE:
            table = stats_object["variables"][idx][
                "value_counts_without_nan"].sort_index()
            obs = table.index

            formatted_values["firstn"] = pd.DataFrame(
                list(obs[0:n_obs_unique]),
                columns=["First {} values".format(n_obs_unique)],
            ).to_html(classes="example_values", index=False)
            formatted_values["lastn"] = pd.DataFrame(
                list(obs[-n_obs_unique:]),
                columns=["Last {} values".format(n_obs_unique)],
            ).to_html(classes="example_values", index=False)

        if row["type"] not in {
                Variable.S_TYPE_UNSUPPORTED,
                Variable.S_TYPE_CORR,
                Variable.S_TYPE_CONST,
                Variable.S_TYPE_RECODED,
        }:
            formatted_values["freqtable"] = freq_table(
                freqtable=stats_object["variables"][idx]
                ["value_counts_without_nan"],
                n=stats_object["table"]["n"],
                table_template="freq_table.html",
                idx=idx,
                max_number_to_print=n_freq_table_max,
            )

            formatted_values["n_extreme_obs"] = n_extreme_obs
            formatted_values["firstn_expanded"] = extreme_obs_table(
                freqtable=stats_object["variables"][idx]
                ["value_counts_without_nan"],
                number_to_print=n_extreme_obs,
                n=stats_object["table"]["n"],
                ascending=True,
            )
            formatted_values["lastn_expanded"] = extreme_obs_table(
                freqtable=stats_object["variables"][idx]
                ["value_counts_without_nan"],
                number_to_print=n_extreme_obs,
                n=stats_object["table"]["n"],
                ascending=False,
            )

        if row["type"] == Variable.TYPE_NUM:
            formatted_values["sections"] = {
                "statistics": {
                    "name":
                    "Statistics",
                    "content":
                    templates.template("variables/row_num_statistics.html").
                    render(values=formatted_values),
                },
                "histogram": {
                    "name":
                    "Histogram",
                    "content":
                    templates.template("variables/row_num_histogram.html").
                    render(values=formatted_values),
                },
                "frequency_table": {
                    "name":
                    "Common values",
                    "content":
                    templates.template("variables/row_num_frequency_table.html"
                                       ).render(values=formatted_values),
                },
                "extreme_values": {
                    "name":
                    "Extreme values",
                    "content":
                    templates.template("variables/row_num_extreme_values.html"
                                       ).render(values=formatted_values),
                },
            }

        if row["type"] == Variable.TYPE_CAT:
            formatted_values["sections"] = {
                "frequency_table": {
                    "name":
                    "Common values",
                    "content":
                    templates.template("variables/row_cat_frequency_table.html"
                                       ).render(values=formatted_values),
                }
            }

            check_compositions = config["vars"]["cat"][
                "check_composition"].get(bool)
            if check_compositions:
                formatted_values["sections"]["composition"] = {
                    "name":
                    "Composition",
                    "content":
                    templates.template("variables/row_cat_composition.html").
                    render(values=formatted_values),
                }

        if row["type"] == Variable.TYPE_URL:
            formatted_values["sections"] = {
                "full": {
                    "name": "Full",
                    "value": formatted_values["freqtable"]
                },
                "scheme": {
                    "name": "Scheme",
                    "value": formatted_values["freqtable_scheme"],
                },
                "netloc": {
                    "name": "Netloc",
                    "value": formatted_values["freqtable_netloc"],
                },
                "path": {
                    "name": "Path",
                    "value": formatted_values["freqtable_path"]
                },
                "query": {
                    "name": "Query",
                    "value": formatted_values["freqtable_query"],
                },
                "fragment": {
                    "name": "Fragment",
                    "value": formatted_values["freqtable_fragment"],
                },
            }

        if row["type"] == Variable.TYPE_PATH:
            formatted_values["sections"] = {
                "full": {
                    "name": "Full",
                    "value": formatted_values["freqtable"]
                },
                "stem": {
                    "name": "Stem",
                    "value": formatted_values["freqtable_stem"]
                },
                "name": {
                    "name": "Name",
                    "value": formatted_values["freqtable_name"]
                },
                "suffix": {
                    "name": "Suffix",
                    "value": formatted_values["freqtable_suffix"],
                },
                "parent": {
                    "name": "Parent",
                    "value": formatted_values["freqtable_parent"],
                },
            }

        rows_html += templates.template("variables/row_{}.html".format(
            row["type"].value.lower())).render(values=formatted_values)
    return rows_html
def render_date(summary):
    varid = summary["varid"]
    # TODO: render common?
    template_variables = {}

    image_format = config["plot"]["image_format"].get(str)

    # Top
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        "Date",
        summary["warnings"],
        summary["description"],
    )

    table1 = Table([
        {
            "name": "Distinct count",
            "value": summary["n_unique"],
            "fmt": "fmt",
            "alert": False,
        },
        {
            "name": "Unique (%)",
            "value": summary["p_unique"],
            "fmt": "fmt_percent",
            "alert": False,
        },
        {
            "name": "Missing",
            "value": summary["n_missing"],
            "fmt": "fmt",
            "alert": False,
        },
        {
            "name": "Missing (%)",
            "value": summary["p_missing"],
            "fmt": "fmt_percent",
            "alert": False,
        },
        {
            "name": "Memory size",
            "value": summary["memory_size"],
            "fmt": "fmt_bytesize",
            "alert": False,
        },
    ])

    table2 = Table([
        {
            "name": "Minimum",
            "value": summary["min"],
            "fmt": "fmt",
            "alert": False
        },
        {
            "name": "Maximum",
            "value": summary["max"],
            "fmt": "fmt",
            "alert": False
        },
    ])

    mini_histo = Image(
        mini_histogram(summary["histogram_data"], summary,
                       summary["histogram_bins"]),
        image_format=image_format,
        alt="Mini histogram",
    )

    template_variables["top"] = Container([info, table1, table2, mini_histo],
                                          sequence_type="grid")

    # Bottom
    bottom = Container(
        [
            Image(
                histogram(summary["histogram_data"], summary,
                          summary["histogram_bins"]),
                image_format=image_format,
                alt="Histogram",
                caption="Histogram",
                name="Histogram",
                anchor_id=f"{varid}histogram",
            )
        ],
        sequence_type="tabs",
        anchor_id=summary["varid"],
    )

    template_variables["bottom"] = bottom

    return template_variables
def render_count(config: Settings, summary: dict) -> dict:
    template_variables = render_common(config, summary)
    image_format = config.plot.image_format

    # Top
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        "Real number (&Ropf; / &Ropf;<sub>&ge;0</sub>)",
        summary["warnings"],
        summary["description"],
    )

    table1 = Table([
        {
            "name": "Distinct",
            "value": fmt(summary["n_distinct"]),
            "alert": False,
        },
        {
            "name": "Distinct (%)",
            "value": fmt_percent(summary["p_distinct"]),
            "alert": False,
        },
        {
            "name": "Missing",
            "value": fmt(summary["n_missing"]),
            "alert": False,
        },
        {
            "name": "Missing (%)",
            "value": fmt_percent(summary["p_missing"]),
            "alert": False,
        },
    ])

    table2 = Table([
        {
            "name":
            "Mean",
            "value":
            fmt_numeric(summary["mean"], precision=config.report.precision),
            "alert":
            False,
        },
        {
            "name": "Minimum",
            "value": fmt_numeric(summary["min"],
                                 precision=config.report.precision),
            "alert": False,
        },
        {
            "name": "Maximum",
            "value": fmt_numeric(summary["max"],
                                 precision=config.report.precision),
            "alert": False,
        },
        {
            "name": "Zeros",
            "value": fmt(summary["n_zeros"]),
            "alert": False,
        },
        {
            "name": "Zeros (%)",
            "value": fmt_percent(summary["p_zeros"]),
            "alert": False,
        },
        {
            "name": "Memory size",
            "value": fmt_bytesize(summary["memory_size"]),
            "alert": False,
        },
    ])

    mini_histo = Image(
        mini_histogram(config, *summary["histogram"]),
        image_format=image_format,
        alt="Mini histogram",
    )

    template_variables["top"] = Container([info, table1, table2, mini_histo],
                                          sequence_type="grid")

    seqs = [
        Image(
            histogram(config, *summary["histogram"]),
            image_format=image_format,
            alt="Histogram",
            caption=
            f"<strong>Histogram with fixed size bins</strong> (bins={len(summary['histogram'][1]) - 1})",
            name="Histogram",
            anchor_id="histogram",
        )
    ]

    fq = FrequencyTable(
        template_variables["freq_table_rows"],
        name="Common values",
        anchor_id="common_values",
        redact=False,
    )

    evs = Container(
        [
            FrequencyTable(
                template_variables["firstn_expanded"],
                name="Minimum 5 values",
                anchor_id="firstn",
                redact=False,
            ),
            FrequencyTable(
                template_variables["lastn_expanded"],
                name="Maximum 5 values",
                anchor_id="lastn",
                redact=False,
            ),
        ],
        sequence_type="tabs",
        name="Extreme values",
        anchor_id="extreme_values",
    )

    template_variables["bottom"] = Container(
        [
            Container(seqs,
                      sequence_type="tabs",
                      name="Histogram(s)",
                      anchor_id="histograms"),
            fq,
            evs,
        ],
        sequence_type="tabs",
        anchor_id=summary["varid"],
    )

    return template_variables
Example #7
0
def render_date(summary):
    # TODO: render common?
    template_variables = {}
    # Top
    info = Overview(summary["varid"], summary["varname"], "Date", [])

    table1 = Table([
        {
            "name": "Distinct count",
            "value": summary["n_unique"],
            "fmt": "fmt"
        },
        {
            "name": "Unique (%)",
            "value": summary["p_unique"],
            "fmt": "fmt_percent"
        },
        {
            "name": "Missing",
            "value": summary["n_missing"],
            "fmt": "fmt"
        },
        {
            "name": "Missing (%)",
            "value": summary["p_missing"],
            "fmt": "fmt_percent",
        },
        {
            "name": "Memory size",
            "value": summary["memory_size"],
            "fmt": "fmt_bytesize",
        },
    ])

    table2 = Table([
        {
            "name": "Minimum",
            "value": summary["min"],
            "fmt": "fmt"
        },
        {
            "name": "Maximum",
            "value": summary["max"],
            "fmt": "fmt"
        },
        # {'name': '', 'value': '', 'fmt': 'fmt'},
        # {'name': '', 'value': '', 'fmt': 'fmt'},
        # {'name': '', 'value': '', 'fmt': 'fmt'},
        # {'name': '', 'value': '', 'fmt': 'fmt'},
    ])

    mini_histo = Image(
        mini_histogram(summary["histogram_data"], summary,
                       summary["histogram_bins"]),
        "Mini histogram",
    )

    template_variables["top"] = Sequence([info, table1, table2, mini_histo],
                                         sequence_type="grid")

    # Bottom
    bottom = Sequence(
        [
            Image(
                histogram(summary["histogram_data"], summary,
                          summary["histogram_bins"]),
                alt="Histogram",
                caption="Histogram",
                name="Histogram",
                anchor_id="{varid}histogram".format(varid=summary["varid"]),
            )
        ],
        sequence_type="tabs",
        anchor_id=summary["varid"],
    )

    template_variables["bottom"] = bottom

    return template_variables
Example #8
0
def render_real(summary):
    varid = summary["varid"]
    template_variables = render_common(summary)
    image_format = config["plot"]["image_format"].get(str)

    if summary["min"] >= 0:
        name = "Real number (&Ropf;<sub>&ge;0</sub>)"
    else:
        name = "Real number (&Ropf;)"

    # Top
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        name,
        summary["warnings"],
        summary["description"],
    )

    table1 = Table(
        [
            {
                "name": "唯一值计数",
                "value": summary["n_unique"],
                "fmt": "fmt",
                "alert": "n_unique" in summary["warn_fields"],
            },
            {
                "name": "唯一值比例 (%)",
                "value": summary["p_unique"],
                "fmt": "fmt_percent",
                "alert": "p_unique" in summary["warn_fields"],
            },
            {
                "name": "缺失值",
                "value": summary["n_missing"],
                "fmt": "fmt",
                "alert": "n_missing" in summary["warn_fields"],
            },
            {
                "name": "缺失值比例(%)",
                "value": summary["p_missing"],
                "fmt": "fmt_percent",
                "alert": "p_missing" in summary["warn_fields"],
            },
            {
                "name": "无穷值",
                "value": summary["n_infinite"],
                "fmt": "fmt",
                "alert": "n_infinite" in summary["warn_fields"],
            },
            {
                "name": "无穷值比例 (%)",
                "value": summary["p_infinite"],
                "fmt": "fmt_percent",
                "alert": "p_infinite" in summary["warn_fields"],
            },
        ]
    )

    table2 = Table(
        [
            {
                "name": "均数",
                "value": summary["mean"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "最小值",
                "value": summary["min"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "最大值",
                "value": summary["max"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "零值",
                "value": summary["n_zeros"],
                "fmt": "fmt",
                "alert": "n_zeros" in summary["warn_fields"],
            },
            {
                "name": "零值比例 (%)",
                "value": summary["p_zeros"],
                "fmt": "fmt_percent",
                "alert": "p_zeros" in summary["warn_fields"],
            },
            {
                "name": "内存占用",
                "value": summary["memory_size"],
                "fmt": "fmt_bytesize",
                "alert": False,
            },
        ]
    )

    mini_histo = Image(
        mini_histogram(*summary["histogram"]),
        image_format=image_format,
        alt="Mini histogram",
    )

    template_variables["top"] = Container(
        [info, table1, table2, mini_histo], sequence_type="grid"
    )

    quantile_statistics = Table(
        [
            {"name": "最小值", "value": summary["min"], "fmt": "fmt_numeric"},
            {"name": "5百分位", "value": summary["5%"], "fmt": "fmt_numeric"},
            {"name": "25百分位", "value": summary["25%"], "fmt": "fmt_numeric"},
            {"name": "中位", "value": summary["50%"], "fmt": "fmt_numeric"},
            {"name": "75百分位", "value": summary["75%"], "fmt": "fmt_numeric"},
            {"name": "95-百分位", "value": summary["95%"], "fmt": "fmt_numeric"},
            {"name": "最大值", "value": summary["max"], "fmt": "fmt_numeric"},
            {"name": "极差", "value": summary["range"], "fmt": "fmt_numeric"},
            {
                "name": "四分位距 (IQR)",
                "value": summary["iqr"],
                "fmt": "fmt_numeric",
            },
        ],
        name="定性统计",
    )

    if summary["monotonic_increase_strict"]:
        monotocity = "严格递增"
    elif summary["monotonic_decrease_strict"]:
        monotocity = "严格递减"
    elif summary["monotonic_increase"]:
        monotocity = "递增"
    elif summary["monotonic_decrease"]:
        monotocity = "递减"
    else:
        monotocity = "非单调"

    descriptive_statistics = Table(
        [
            {
                "name": "标准差",
                "value": summary["std"],
                "fmt": "fmt_numeric",
            },
            {
                "name": "变异系数 (CV)",
                "value": summary["cv"],
                "fmt": "fmt_numeric",
            },
            {"name": "峰度", "value": summary["kurtosis"], "fmt": "fmt_numeric"},
            {"name": "均数", "value": summary["mean"], "fmt": "fmt_numeric"},
            {
                "name": "中位绝对偏差 (MAD)",
                "value": summary["mad"],
                "fmt": "fmt_numeric",
            },
            {
                "name": "偏度",
                "value": summary["skewness"],
                "fmt": "fmt_numeric",
                "class": "alert" if "skewness" in summary["warn_fields"] else "",
            },
            {"name": "总和", "value": summary["sum"], "fmt": "fmt_numeric"},
            {"name": "方差", "value": summary["variance"], "fmt": "fmt_numeric"},
            {"name": "单调性", "value": monotocity, "fmt": "fmt"},
        ],
        name="描述性统计",
    )

    statistics = Container(
        [quantile_statistics, descriptive_statistics],
        anchor_id=f"{varid}statistics",
        name="统计",
        sequence_type="grid",
    )

    hist = Image(
        histogram(*summary["histogram"]),
        image_format=image_format,
        alt="Histogram",
        caption=f"<strong>固定大小的直方图</strong> (bins={len(summary['histogram'][1]) - 1})",
        name="直方图",
        anchor_id=f"{varid}histogram",
    )

    fq = FrequencyTable(
        template_variables["freq_table_rows"],
        name="常见值",
        anchor_id=f"{varid}common_values",
        redact=False,
    )

    evs = Container(
        [
            FrequencyTable(
                template_variables["firstn_expanded"],
                name="最小10个",
                anchor_id=f"{varid}firstn",
                redact=False,
            ),
            FrequencyTable(
                template_variables["lastn_expanded"],
                name="最大10个",
                anchor_id=f"{varid}lastn",
                redact=False,
            ),
        ],
        sequence_type="tabs",
        name="极值",
        anchor_id=f"{varid}extreme_values",
    )

    template_variables["bottom"] = Container(
        [statistics, hist, fq, evs], sequence_type="tabs", anchor_id=f"{varid}bottom",
    )

    return template_variables
def render_date(config: Settings, summary: Dict[str, Any]) -> Dict[str, Any]:
    varid = summary["varid"]
    template_variables = {}

    image_format = config.plot.image_format

    # Top
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        "Date",
        summary["warnings"],
        summary["description"],
    )

    table1 = Table(
        [
            {
                "name": "Distinct",
                "value": fmt(summary["n_distinct"]),
                "alert": False,
            },
            {
                "name": "Distinct (%)",
                "value": fmt_percent(summary["p_distinct"]),
                "alert": False,
            },
            {
                "name": "Missing",
                "value": fmt(summary["n_missing"]),
                "alert": False,
            },
            {
                "name": "Missing (%)",
                "value": fmt_percent(summary["p_missing"]),
                "alert": False,
            },
            {
                "name": "Memory size",
                "value": fmt_bytesize(summary["memory_size"]),
                "alert": False,
            },
        ]
    )

    table2 = Table(
        [
            {"name": "Minimum", "value": fmt(summary["min"]), "alert": False},
            {"name": "Maximum", "value": fmt(summary["max"]), "alert": False},
        ]
    )

    mini_histo = Image(
        mini_histogram(
            config, summary["histogram"][0], summary["histogram"][1], date=True
        ),
        image_format=image_format,
        alt="Mini histogram",
    )

    template_variables["top"] = Container(
        [info, table1, table2, mini_histo], sequence_type="grid"
    )

    # Bottom
    bottom = Container(
        [
            Image(
                histogram(
                    config, summary["histogram"][0], summary["histogram"][1], date=True
                ),
                image_format=image_format,
                alt="Histogram",
                caption=f"<strong>Histogram with fixed size bins</strong> (bins={len(summary['histogram'][1]) - 1})",
                name="Histogram",
                anchor_id=f"{varid}histogram",
            )
        ],
        sequence_type="tabs",
        anchor_id=summary["varid"],
    )

    template_variables["bottom"] = bottom

    return template_variables
Example #10
0
def render_date(summary):
    varid = summary["varid"]
    # TODO: render common?
    template_variables = {}

    image_format = config["plot"]["image_format"].get(str)

    # Top
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        "Date",
        summary["warnings"],
        summary["description"],
    )

    table1 = Table(
        [
            {
                "name": "唯一值计数",
                "value": summary["n_unique"],
                "fmt": "fmt",
                "alert": False,
            },
            {
                "name": "唯一值比例 (%)",
                "value": summary["p_unique"],
                "fmt": "fmt_percent",
                "alert": False,
            },
            {
                "name": "缺失值",
                "value": summary["n_missing"],
                "fmt": "fmt",
                "alert": False,
            },
            {
                "name": "缺失值比例(%)",
                "value": summary["p_missing"],
                "fmt": "fmt_percent",
                "alert": False,
            },
            {
                "name": "内存占用",
                "value": summary["memory_size"],
                "fmt": "fmt_bytesize",
                "alert": False,
            },
        ]
    )

    table2 = Table(
        [
            {"name": "最小", "value": summary["min"], "fmt": "fmt", "alert": False},
            {"name": "最大", "value": summary["max"], "fmt": "fmt", "alert": False},
        ]
    )

    mini_histo = Image(
        mini_histogram(*summary["histogram"], date=True),
        image_format=image_format,
        alt="Mini histogram",
    )

    template_variables["top"] = Container(
        [info, table1, table2, mini_histo], sequence_type="grid"
    )

    # Bottom
    bottom = Container(
        [
            Image(
                histogram(*summary["histogram"], date=True),
                image_format=image_format,
                alt="Histogram",
                caption=f"<strong>Histogram with fixed size bins</strong> (bins={len(summary['histogram'][1]) - 1})",
                name="Histogram",
                anchor_id=f"{varid}histogram",
            )
        ],
        sequence_type="tabs",
        anchor_id=summary["varid"],
    )

    template_variables["bottom"] = bottom

    return template_variables
Example #11
0
def render_count(summary):
    varid = summary["varid"]
    template_variables = render_common(summary)
    image_format = config["plot"]["image_format"].get(str)

    # Top
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        "Real number (&Ropf; / &Ropf;<sub>&ge;0</sub>)",
        summary["warnings"],
        summary["description"],
    )

    table1 = Table(
        [
            {
                "name": "Distinct",
                "value": summary["n_distinct"],
                "fmt": "fmt",
                "alert": False,
            },
            {
                "name": "Distinct (%)",
                "value": summary["p_distinct"],
                "fmt": "fmt_percent",
                "alert": False,
            },
            {
                "name": "Missing",
                "value": summary["n_missing"],
                "fmt": "fmt",
                "alert": False,
            },
            {
                "name": "Missing (%)",
                "value": summary["p_missing"],
                "fmt": "fmt_percent",
                "alert": False,
            },
        ]
    )

    table2 = Table(
        [
            {
                "name": "Mean",
                "value": summary["mean"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "Minimum",
                "value": summary["min"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "Maximum",
                "value": summary["max"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "Zeros",
                "value": summary["n_zeros"],
                "fmt": "fmt",
                "alert": False,
            },
            {
                "name": "Zeros (%)",
                "value": summary["p_zeros"],
                "fmt": "fmt_percent",
                "alert": False,
            },
            {
                "name": "Memory size",
                "value": summary["memory_size"],
                "fmt": "fmt_bytesize",
                "alert": False,
            },
        ]
    )

    mini_histo = Image(
        mini_histogram(*summary["histogram"]),
        image_format=image_format,
        alt="Mini histogram",
    )

    template_variables["top"] = Container(
        [info, table1, table2, mini_histo], sequence_type="grid"
    )

    quantile_statistics = {
        "name": "Quantile statistics",
        "items": [
            {
                "name": "Minimum",
                "value": summary["min"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "5-th percentile",
                "value": summary["quantile_5"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "Q1",
                "value": summary["quantile_25"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "median",
                "value": summary["quantile_50"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "Q3",
                "value": summary["quantile_75"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "95-th percentile",
                "value": summary["quantile_95"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "Maximum",
                "value": summary["max"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "Range",
                "value": summary["range"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "Interquartile range",
                "value": summary["iqr"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
        ],
    }

    descriptive_statistics = {
        "name": "Descriptive statistics",
        "items": [
            {
                "name": "Standard deviation",
                "value": summary["std"],
                "fmt": "fmt_numeric",
            },
            {
                "name": "Coefficient of variation",
                "value": summary["cv"],
                "fmt": "fmt_numeric",
            },
            {"name": "Kurtosis", "value": summary["kurt"], "fmt": "fmt_numeric"},
            {"name": "Mean", "value": summary["mean"], "fmt": "fmt_numeric"},
            {"name": "MAD", "value": summary["mad"], "fmt": "fmt_numeric"},
            {"name": "Skewness", "value": summary["skew"], "fmt": "fmt_numeric"},
            {"name": "Sum", "value": summary["sum"], "fmt": "fmt_numeric"},
            {"name": "Variance", "value": summary["var"], "fmt": "fmt_numeric"},
        ],
    }

    # TODO: Make sections data structure
    # statistics = ItemRenderer(
    #     'statistics',
    #     'Statistics',
    #     'table',
    #     [
    #         quantile_statistics,
    #         descriptive_statistics
    #     ]
    # )

    seqs = [
        Image(
            histogram(*summary["histogram"]),
            image_format=image_format,
            alt="Histogram",
            caption=f"<strong>Histogram with fixed size bins</strong> (bins={len(summary['histogram'][1]) - 1})",
            name="Histogram",
            anchor_id="histogram",
        )
    ]

    fq = FrequencyTable(
        template_variables["freq_table_rows"],
        name="Common values",
        anchor_id="common_values",
        redact=False,
    )

    evs = Container(
        [
            FrequencyTable(
                template_variables["firstn_expanded"],
                name="Minimum 5 values",
                anchor_id="firstn",
                redact=False,
            ),
            FrequencyTable(
                template_variables["lastn_expanded"],
                name="Maximum 5 values",
                anchor_id="lastn",
                redact=False,
            ),
        ],
        sequence_type="tabs",
        name="Extreme values",
        anchor_id="extreme_values",
    )

    template_variables["bottom"] = Container(
        [
            # statistics,
            Container(
                seqs, sequence_type="tabs", name="Histogram(s)", anchor_id="histograms"
            ),
            fq,
            evs,
        ],
        sequence_type="tabs",
        anchor_id=summary["varid"],
    )

    return template_variables
def render_count(summary):
    varid = summary["varid"]
    template_variables = render_common(summary)
    image_format = config["plot"]["image_format"].get(str)

    # Top
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        "Real number (&Ropf; / &Ropf;<sub>&ge;0</sub>)",
        summary["warnings"],
        summary["description"],
    )

    table1 = Table(
        [
            {
                "name": "唯一值计数",
                "value": summary["n_unique"],
                "fmt": "fmt",
                "alert": False,
            },
            {
                "name": "唯一值 (%)",
                "value": summary["p_unique"],
                "fmt": "fmt_percent",
                "alert": False,
            },
            {
                "name": "缺失值",
                "value": summary["n_missing"],
                "fmt": "fmt",
                "alert": False,
            },
            {
                "name": "缺失值比例 (%)",
                "value": summary["p_missing"],
                "fmt": "fmt_percent",
                "alert": False,
            },
        ]
    )

    table2 = Table(
        [
            {
                "name": "均数",
                "value": summary["mean"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "最小值",
                "value": summary["min"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "最大值",
                "value": summary["max"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "零值",
                "value": summary["n_zeros"],
                "fmt": "fmt",
                "alert": False,
            },
            {
                "name": "零值 (%)",
                "value": summary["p_zeros"],
                "fmt": "fmt_percent",
                "alert": False,
            },
            {
                "name": "内存占用",
                "value": summary["memory_size"],
                "fmt": "fmt_bytesize",
                "alert": False,
            },
        ]
    )

    mini_histo = Image(
        mini_histogram(*summary["histogram"]),
        image_format=image_format,
        alt="Mini histogram",
    )

    template_variables["top"] = Container(
        [info, table1, table2, mini_histo], sequence_type="grid"
    )

    quantile_statistics = {
        "name": "定性分析",
        "items": [
            {
                "name": "最小值",
                "value": summary["min"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "5-th 百分位",
                "value": summary["quantile_5"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "Q1",
                "value": summary["quantile_25"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "中位数",
                "value": summary["quantile_50"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "Q3",
                "value": summary["quantile_75"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "95-th 百分位",
                "value": summary["quantile_95"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "最大值",
                "value": summary["max"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "区间",
                "value": summary["range"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
            {
                "name": "四分位距",
                "value": summary["iqr"],
                "fmt": "fmt_numeric",
                "alert": False,
            },
        ],
    }

    descriptive_statistics = {
        "name": "描述性统计",
        "items": [
            {
                "name": "标准差",
                "value": summary["std"],
                "fmt": "fmt_numeric",
            },
            {
                "name": "变异系数",
                "value": summary["cv"],
                "fmt": "fmt_numeric",
            },
            {"name": "峰度", "value": summary["kurt"], "fmt": "fmt_numeric"},
            {"name": "均数", "value": summary["mean"], "fmt": "fmt_numeric"},
            {"name": "MAD", "value": summary["mad"], "fmt": "fmt_numeric"},
            {"name": "偏度", "value": summary["skew"], "fmt": "fmt_numeric"},
            {"name": "积", "value": summary["sum"], "fmt": "fmt_numeric"},
            {"name": "方差", "value": summary["var"], "fmt": "fmt_numeric"},
        ],
    }

    # TODO: Make sections data structure
    # statistics = ItemRenderer(
    #     'statistics',
    #     'Statistics',
    #     'table',
    #     [
    #         quantile_statistics,
    #         descriptive_statistics
    #     ]
    # )

    seqs = [
        Image(
            histogram(*summary["histogram"]),
            image_format=image_format,
            alt="Histogram",
            caption=f"<strong>Histogram with fixed size bins</strong> (bins={len(summary['histogram'][1]) - 1})",
            name="Histogram",
            anchor_id="histogram",
        )
    ]

    fq = FrequencyTable(
        template_variables["freq_table_rows"],
        name="Common values",
        anchor_id="common_values",
        redact=False,
    )

    evs = Container(
        [
            FrequencyTable(
                template_variables["firstn_expanded"],
                name="Minimum 5 values",
                anchor_id="firstn",
                redact=False,
            ),
            FrequencyTable(
                template_variables["lastn_expanded"],
                name="Maximum 5 values",
                anchor_id="lastn",
                redact=False,
            ),
        ],
        sequence_type="tabs",
        name="极值",
        anchor_id="extreme_values",
    )

    template_variables["bottom"] = Container(
        [
            # statistics,
            Container(
                seqs, sequence_type="tabs", name="直方图", anchor_id="histograms"
            ),
            fq,
            evs,
        ],
        sequence_type="tabs",
        anchor_id=summary["varid"],
    )

    return template_variables