Ejemplo n.º 1
0
def get_dataset_reproduction(summary: dict) -> Renderable:
    version = summary["package"]["pandas_profiling_version"]
    config = quote(summary["package"]["pandas_profiling_config"])
    date_start = summary["analysis"]["date_start"]
    date_end = summary["analysis"]["date_end"]
    duration = summary["analysis"]["duration"]

    reproduction_table = Table(
        [
            {"name": "Analysis started", "value": fmt(date_start)},
            {"name": "Analysis finished", "value": fmt(date_end)},
            {"name": "Duration", "value": fmt_timespan(duration)},
            {
                "name": "Software version",
                "value": f'<a href="https://github.com/pandas-profiling/pandas-profiling">pandas-profiling v{version}</a>',
            },
            {
                "name": "Download configuration",
                "value": f'<a download="config.json" href="data:text/plain;charset=utf-8,{config}">config.json</a>',
            },
        ],
        name="Reproduction",
        anchor_id="overview_reproduction",
    )

    return Container(
        [reproduction_table],
        name="Reproduction",
        anchor_id="reproduction",
        sequence_type="grid",
    )
Ejemplo n.º 2
0
def render_generic(config: Settings, summary: dict) -> dict:
    info = VariableInfo(
        anchor_id=summary["varid"],
        alerts=summary["alerts"],
        var_type="Unsupported",
        var_name=summary["varname"],
        description=summary["description"],
    )

    table = Table([
        {
            "name": "Missing",
            "value": fmt(summary["n_missing"]),
            "alert": "n_missing" in summary["alert_fields"],
        },
        {
            "name": "Missing (%)",
            "value": fmt_percent(summary["p_missing"]),
            "alert": "p_missing" in summary["alert_fields"],
        },
        {
            "name": "Memory size",
            "value": fmt_bytesize(summary["memory_size"]),
            "alert": False,
        },
    ])

    return {
        "top": Container([info, table, HTML("")], sequence_type="grid"),
        "bottom": None,
    }
Ejemplo n.º 3
0
def get_dataset_column_definitions(definitions: dict) -> Container:
    """Generate an overview section for the variable description

    Args:
        definitions: the variable descriptions.

    Returns:
        A container object
    """

    variable_descriptions = [
        Table(
            [
                {"name": column, "value": fmt(value)}
                for column, value in definitions.items()
            ],
            name="Variable descriptions",
            anchor_id="variable_definition_table",
        )
    ]

    return Container(
        variable_descriptions,
        name="Variables",
        anchor_id="variable_descriptions",
        sequence_type="grid",
    )
Ejemplo n.º 4
0
def get_table(items):
    table = GridspecLayout(len(items), 2)
    for row_id, item in enumerate(items):
        table[row_id, 0] = widgets.HTML(item["name"])
        table[row_id, 1] = widgets.HTML(fmt(item["value"]))

    return table
Ejemplo n.º 5
0
def get_table(items):
    table = QTableWidget()

    table.setRowCount(len(items))
    table.setColumnCount(2)

    for row_id, item in enumerate(items):
        table.setItem(row_id, 0, QTableWidgetItem(item["name"]))
        table.setItem(row_id, 1, QTableWidgetItem(fmt(item["value"])))

    return table
Ejemplo n.º 6
0
def get_dataset_schema(metadata: dict) -> Container:
    about_dataset = []
    for key in ["description", "creator", "author"]:
        if key in metadata and len(metadata[key]) > 0:
            about_dataset.append({
                "name": key.capitalize(),
                "value": fmt(metadata[key])
            })

    if "url" in metadata:
        about_dataset.append({
            "name":
            "URL",
            "value":
            f'<a href="{metadata["url"]}">{metadata["url"]}</a>',
        })

    if "copyright_holder" in metadata and len(
            metadata["copyright_holder"]) > 0:
        if "copyright_year" not in metadata:
            about_dataset.append({
                "name":
                "Copyright",
                "value":
                fmt(f"(c) {metadata['copyright_holder']}"),
            })
        else:
            about_dataset.append({
                "name":
                "Copyright",
                "value":
                fmt(f"(c) {metadata['copyright_holder']} {metadata['copyright_year']}"
                    ),
            })

    return Container(
        [Table(about_dataset, name="Dataset", anchor_id="metadata_dataset")],
        name="Dataset",
        anchor_id="dataset",
        sequence_type="grid",
    )
Ejemplo n.º 7
0
def get_table(items):
    from PyQt5.QtWidgets import QHeaderView

    table = QTableWidget()

    table.setRowCount(len(items))
    table.setColumnCount(2)

    horizontal = table.horizontalHeader()
    horizontal.setSectionResizeMode(QHeaderView.Stretch)
    horizontal.setVisible(False)

    vertical = table.verticalHeader()
    vertical.setVisible(False)

    for row_id, item in enumerate(items):
        table.setItem(row_id, 0, QTableWidgetItem(item["name"]))
        table.setItem(row_id, 1, QTableWidgetItem(fmt(item["value"])))

    return table
Ejemplo n.º 8
0
def render_complex(config: Settings, summary: dict) -> dict:
    varid = summary["varid"]
    template_variables = {}
    image_format = config.plot.image_format

    # Top
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        "Complex number (&Copf;)",
        summary["warnings"],
        summary["description"],
    )

    table1 = Table(
        [
            {"name": "Distinct", "value": fmt(summary["n_distinct"])},
            {
                "name": "Distinct (%)",
                "value": fmt_percent(summary["p_distinct"]),
            },
            {"name": "Missing", "value": fmt(summary["n_missing"])},
            {
                "name": "Missing (%)",
                "value": fmt_percent(summary["p_missing"]),
            },
            {
                "name": "Memory size",
                "value": fmt_bytesize(summary["memory_size"]),
            },
        ]
    )

    table2 = Table(
        [
            {
                "name": "Mean",
                "value": fmt_numeric(
                    summary["mean"], precision=config.report.precision
                ),
            },
            {
                "name": "Minimum",
                "value": fmt_numeric(summary["min"], precision=config.report.precision),
            },
            {
                "name": "Maximum",
                "value": fmt_numeric(summary["max"], precision=config.report.precision),
            },
            {
                "name": "Zeros",
                "value": fmt_numeric(
                    summary["n_zeros"], precision=config.report.precision
                ),
            },
            {"name": "Zeros (%)", "value": fmt_percent(summary["p_zeros"])},
        ]
    )

    placeholder = HTML("")

    template_variables["top"] = Container(
        [info, table1, table2, placeholder], sequence_type="grid"
    )

    # Bottom
    items = [
        Image(
            scatter_complex(config, summary["scatter_data"]),
            image_format=image_format,
            alt="Scatterplot",
            caption="Scatterplot in the complex plane",
            name="Scatter",
            anchor_id=f"{varid}scatter",
        )
    ]

    bottom = Container(items, sequence_type="tabs", anchor_id=summary["varid"])

    template_variables["bottom"] = bottom

    return template_variables
Ejemplo n.º 9
0
def render_categorical(config: Settings, summary: dict) -> dict:
    varid = summary["varid"]
    n_obs_cat = config.vars.cat.n_obs
    image_format = config.plot.image_format
    words = config.vars.cat.words
    characters = config.vars.cat.characters
    length = config.vars.cat.length

    template_variables = render_common(config, summary)

    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        "Categorical",
        summary["alerts"],
        summary["description"],
    )

    table = Table([
        {
            "name": "Distinct",
            "value": fmt(summary["n_distinct"]),
            "alert": "n_distinct" in summary["alert_fields"],
        },
        {
            "name": "Distinct (%)",
            "value": fmt_percent(summary["p_distinct"]),
            "alert": "p_distinct" in summary["alert_fields"],
        },
        {
            "name": "Missing",
            "value": fmt(summary["n_missing"]),
            "alert": "n_missing" in summary["alert_fields"],
        },
        {
            "name": "Missing (%)",
            "value": fmt_percent(summary["p_missing"]),
            "alert": "p_missing" in summary["alert_fields"],
        },
        {
            "name": "Memory size",
            "value": fmt_bytesize(summary["memory_size"]),
            "alert": False,
        },
    ])

    fqm = FrequencyTableSmall(
        freq_table(
            freqtable=summary["value_counts_without_nan"],
            n=summary["count"],
            max_number_to_print=n_obs_cat,
        ),
        redact=config.vars.cat.redact,
    )

    template_variables["top"] = Container([info, table, fqm],
                                          sequence_type="grid")

    # ============================================================================================

    frequency_table = FrequencyTable(
        template_variables["freq_table_rows"],
        name="Common Values",
        anchor_id=f"{varid}common_values",
        redact=config.vars.cat.redact,
    )

    unique_stats = render_categorical_frequency(config, summary, varid)

    overview_items = []

    if length:
        length_table, length_histo = render_categorical_length(
            config, summary, varid)
        overview_items.append(length_table)

    if characters:
        overview_table_char, unitab = render_categorical_unicode(
            config, summary, varid)
        overview_items.append(overview_table_char)

    overview_items.append(unique_stats)

    if not config.vars.cat.redact:
        rows = ("1st row", "2nd row", "3rd row", "4th row", "5th row")

        sample = Table(
            [{
                "name": name,
                "value": fmt(value),
                "alert": False,
            } for name, value in zip(rows, summary["first_rows"])],
            name="Sample",
        )
        overview_items.append(sample)

    string_items: List[Renderable] = [frequency_table]
    if length:
        string_items.append(length_histo)

    max_unique = config.plot.pie.max_unique
    if max_unique > 0 and summary["n_distinct"] <= max_unique:
        string_items.append(
            Image(
                pie_plot(
                    config,
                    summary["value_counts_without_nan"],
                    legend_kws={"loc": "upper right"},
                ),
                image_format=image_format,
                alt="Pie chart",
                name="Pie chart",
                anchor_id=f"{varid}pie_chart",
            ))

    bottom_items = [
        Container(
            overview_items,
            name="Overview",
            anchor_id=f"{varid}overview",
            sequence_type="batch_grid",
            batch_size=len(overview_items),
            titles=False,
        ),
        Container(
            string_items,
            name="Categories",
            anchor_id=f"{varid}string",
            sequence_type="batch_grid",
            batch_size=len(string_items),
        ),
    ]

    if words:
        woc = freq_table(
            freqtable=summary["word_counts"],
            n=summary["word_counts"].sum(),
            max_number_to_print=10,
        )

        fqwo = FrequencyTable(
            woc,
            name="Common words",
            anchor_id=f"{varid}cwo",
            redact=config.vars.cat.redact,
        )

        bottom_items.append(
            Container(
                [fqwo],
                name="Words",
                anchor_id=f"{varid}word",
                sequence_type="grid",
            ))

    if characters:
        bottom_items.append(
            Container(
                [unitab],
                name="Characters",
                anchor_id=f"{varid}characters",
                sequence_type="grid",
            ))

    # Bottom
    template_variables["bottom"] = Container(bottom_items,
                                             sequence_type="tabs",
                                             anchor_id=f"{varid}bottom")

    return template_variables
Ejemplo n.º 10
0
def render_path(config: Settings, summary: dict) -> dict:
    varid = summary["varid"]
    n_freq_table_max = config.n_freq_table_max
    redact = config.vars.cat.redact

    template_variables = render_categorical(config, summary)

    keys = ["name", "parent", "suffix", "stem", "anchor"]
    for path_part in keys:
        template_variables[f"freqtable_{path_part}"] = freq_table(
            freqtable=summary[f"{path_part}_counts"],
            n=summary["n"],
            max_number_to_print=n_freq_table_max,
        )

    # Top
    template_variables["top"].content["items"][0].content["var_type"] = "Path"

    # Bottom
    path_overview_tab = Container(
        [
            Table(
                [
                    {
                        "name": "Common prefix",
                        "value": fmt(summary["common_prefix"]),
                        "alert": False,
                    },
                    {
                        "name": "Unique stems",
                        "value": fmt_numeric(
                            summary["n_stem_unique"], precision=config.report.precision
                        ),
                        "alert": False,
                    },
                    {
                        "name": "Unique names",
                        "value": fmt_numeric(
                            summary["n_name_unique"], precision=config.report.precision
                        ),
                        "alert": False,
                    },
                    {
                        "name": "Unique extensions",
                        "value": fmt_numeric(
                            summary["n_suffix_unique"],
                            precision=config.report.precision,
                        ),
                        "alert": False,
                    },
                    {
                        "name": "Unique directories",
                        "value": fmt_numeric(
                            summary["n_parent_unique"],
                            precision=config.report.precision,
                        ),
                        "alert": False,
                    },
                    {
                        "name": "Unique anchors",
                        "value": fmt_numeric(
                            summary["n_anchor_unique"],
                            precision=config.report.precision,
                        ),
                        "alert": False,
                    },
                ]
            )
        ],
        anchor_id=f"{varid}tbl",
        name="Overview",
        sequence_type="list",
    )

    path_items = [
        path_overview_tab,
        FrequencyTable(
            template_variables["freq_table_rows"],
            name="Full",
            anchor_id=f"{varid}full_frequency",
            redact=redact,
        ),
        FrequencyTable(
            template_variables["freqtable_stem"],
            name="Stem",
            anchor_id=f"{varid}stem_frequency",
            redact=redact,
        ),
        FrequencyTable(
            template_variables["freqtable_name"],
            name="Name",
            anchor_id=f"{varid}name_frequency",
            redact=redact,
        ),
        FrequencyTable(
            template_variables["freqtable_suffix"],
            name="Extension",
            anchor_id=f"{varid}suffix_frequency",
            redact=redact,
        ),
        FrequencyTable(
            template_variables["freqtable_parent"],
            name="Parent",
            anchor_id=f"{varid}parent_frequency",
            redact=redact,
        ),
        FrequencyTable(
            template_variables["freqtable_anchor"],
            name="Anchor",
            anchor_id=f"{varid}anchor_frequency",
            redact=redact,
        ),
    ]

    path_tab = Container(
        path_items,
        name="Path",
        sequence_type="tabs",
        anchor_id=f"{varid}path",
    )

    template_variables["bottom"].content["items"].append(path_tab)

    return template_variables
Ejemplo n.º 11
0
def render_boolean(config: Settings, summary: dict) -> dict:
    varid = summary["varid"]
    n_obs_bool = config.vars.bool.n_obs
    image_format = config.plot.image_format

    # Prepare variables
    template_variables = render_common(config, summary)

    # Element composition
    info = VariableInfo(
        anchor_id=summary["varid"],
        alerts=summary["alerts"],
        var_type="Boolean",
        var_name=summary["varname"],
        description=summary["description"],
    )

    table = Table(
        [
            {
                "name": "Distinct",
                "value": fmt(summary["n_distinct"]),
                "alert": "n_distinct" in summary["alert_fields"],
            },
            {
                "name": "Distinct (%)",
                "value": fmt_percent(summary["p_distinct"]),
                "alert": "p_distinct" in summary["alert_fields"],
            },
            {
                "name": "Missing",
                "value": fmt(summary["n_missing"]),
                "alert": "n_missing" in summary["alert_fields"],
            },
            {
                "name": "Missing (%)",
                "value": fmt_percent(summary["p_missing"]),
                "alert": "p_missing" in summary["alert_fields"],
            },
            {
                "name": "Memory size",
                "value": fmt_bytesize(summary["memory_size"]),
                "alert": False,
            },
        ]
    )

    fqm = FrequencyTableSmall(
        freq_table(
            freqtable=summary["value_counts_without_nan"],
            n=summary["n"],
            max_number_to_print=n_obs_bool,
        ),
        redact=False,
    )

    template_variables["top"] = Container([info, table, fqm], sequence_type="grid")

    items: List[Renderable] = [
        FrequencyTable(
            template_variables["freq_table_rows"],
            name="Common Values",
            anchor_id=f"{varid}frequency_table",
            redact=False,
        )
    ]

    max_unique = config.plot.pie.max_unique
    if max_unique > 0:
        items.append(
            Image(
                pie_plot(
                    config,
                    summary["value_counts_without_nan"],
                    legend_kws={"loc": "upper right"},
                ),
                image_format=image_format,
                alt="Chart",
                name="Chart",
                anchor_id=f"{varid}pie_chart",
            )
        )

    template_variables["bottom"] = Container(
        items, sequence_type="tabs", anchor_id=f"{varid}bottom"
    )

    return template_variables
Ejemplo n.º 12
0
def render_count(config: Settings, summary: dict) -> dict:
    template_variables = render_common(config, summary)
    image_format = config.plot.image_format

    # Top
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        "Real number (&Ropf; / &Ropf;<sub>&ge;0</sub>)",
        summary["warnings"],
        summary["description"],
    )

    table1 = Table([
        {
            "name": "Distinct",
            "value": fmt(summary["n_distinct"]),
            "alert": False,
        },
        {
            "name": "Distinct (%)",
            "value": fmt_percent(summary["p_distinct"]),
            "alert": False,
        },
        {
            "name": "Missing",
            "value": fmt(summary["n_missing"]),
            "alert": False,
        },
        {
            "name": "Missing (%)",
            "value": fmt_percent(summary["p_missing"]),
            "alert": False,
        },
    ])

    table2 = Table([
        {
            "name":
            "Mean",
            "value":
            fmt_numeric(summary["mean"], precision=config.report.precision),
            "alert":
            False,
        },
        {
            "name": "Minimum",
            "value": fmt_numeric(summary["min"],
                                 precision=config.report.precision),
            "alert": False,
        },
        {
            "name": "Maximum",
            "value": fmt_numeric(summary["max"],
                                 precision=config.report.precision),
            "alert": False,
        },
        {
            "name": "Zeros",
            "value": fmt(summary["n_zeros"]),
            "alert": False,
        },
        {
            "name": "Zeros (%)",
            "value": fmt_percent(summary["p_zeros"]),
            "alert": False,
        },
        {
            "name": "Memory size",
            "value": fmt_bytesize(summary["memory_size"]),
            "alert": False,
        },
    ])

    mini_histo = Image(
        mini_histogram(config, *summary["histogram"]),
        image_format=image_format,
        alt="Mini histogram",
    )

    template_variables["top"] = Container([info, table1, table2, mini_histo],
                                          sequence_type="grid")

    seqs = [
        Image(
            histogram(config, *summary["histogram"]),
            image_format=image_format,
            alt="Histogram",
            caption=
            f"<strong>Histogram with fixed size bins</strong> (bins={len(summary['histogram'][1]) - 1})",
            name="Histogram",
            anchor_id="histogram",
        )
    ]

    fq = FrequencyTable(
        template_variables["freq_table_rows"],
        name="Common values",
        anchor_id="common_values",
        redact=False,
    )

    evs = Container(
        [
            FrequencyTable(
                template_variables["firstn_expanded"],
                name="Minimum 5 values",
                anchor_id="firstn",
                redact=False,
            ),
            FrequencyTable(
                template_variables["lastn_expanded"],
                name="Maximum 5 values",
                anchor_id="lastn",
                redact=False,
            ),
        ],
        sequence_type="tabs",
        name="Extreme values",
        anchor_id="extreme_values",
    )

    template_variables["bottom"] = Container(
        [
            Container(seqs,
                      sequence_type="tabs",
                      name="Histogram(s)",
                      anchor_id="histograms"),
            fq,
            evs,
        ],
        sequence_type="tabs",
        anchor_id=summary["varid"],
    )

    return template_variables
Ejemplo n.º 13
0
def render_date(config: Settings, summary: Dict[str, Any]) -> Dict[str, Any]:
    varid = summary["varid"]
    template_variables = {}

    image_format = config.plot.image_format

    # Top
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        "Date",
        summary["warnings"],
        summary["description"],
    )

    table1 = Table(
        [
            {
                "name": "Distinct",
                "value": fmt(summary["n_distinct"]),
                "alert": False,
            },
            {
                "name": "Distinct (%)",
                "value": fmt_percent(summary["p_distinct"]),
                "alert": False,
            },
            {
                "name": "Missing",
                "value": fmt(summary["n_missing"]),
                "alert": False,
            },
            {
                "name": "Missing (%)",
                "value": fmt_percent(summary["p_missing"]),
                "alert": False,
            },
            {
                "name": "Memory size",
                "value": fmt_bytesize(summary["memory_size"]),
                "alert": False,
            },
        ]
    )

    table2 = Table(
        [
            {"name": "Minimum", "value": fmt(summary["min"]), "alert": False},
            {"name": "Maximum", "value": fmt(summary["max"]), "alert": False},
        ]
    )

    mini_histo = Image(
        mini_histogram(
            config, summary["histogram"][0], summary["histogram"][1], date=True
        ),
        image_format=image_format,
        alt="Mini histogram",
    )

    template_variables["top"] = Container(
        [info, table1, table2, mini_histo], sequence_type="grid"
    )

    # Bottom
    bottom = Container(
        [
            Image(
                histogram(
                    config, summary["histogram"][0], summary["histogram"][1], date=True
                ),
                image_format=image_format,
                alt="Histogram",
                caption=f"<strong>Histogram with fixed size bins</strong> (bins={len(summary['histogram'][1]) - 1})",
                name="Histogram",
                anchor_id=f"{varid}histogram",
            )
        ],
        sequence_type="tabs",
        anchor_id=summary["varid"],
    )

    template_variables["bottom"] = bottom

    return template_variables
Ejemplo n.º 14
0
def render_real(config: Settings, summary: dict) -> dict:
    varid = summary["varid"]
    template_variables = render_common(config, summary)
    image_format = config.plot.image_format

    if summary["min"] >= 0:
        name = "Real number (&Ropf;<sub>&ge;0</sub>)"
    else:
        name = "Real number (&Ropf;)"

    # Top
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        name,
        summary["alerts"],
        summary["description"],
    )

    table1 = Table([
        {
            "name": "Distinct",
            "value": fmt(summary["n_distinct"]),
            "alert": "n_distinct" in summary["alert_fields"],
        },
        {
            "name": "Distinct (%)",
            "value": fmt_percent(summary["p_distinct"]),
            "alert": "p_distinct" in summary["alert_fields"],
        },
        {
            "name": "Missing",
            "value": fmt(summary["n_missing"]),
            "alert": "n_missing" in summary["alert_fields"],
        },
        {
            "name": "Missing (%)",
            "value": fmt_percent(summary["p_missing"]),
            "alert": "p_missing" in summary["alert_fields"],
        },
        {
            "name": "Infinite",
            "value": fmt(summary["n_infinite"]),
            "alert": "n_infinite" in summary["alert_fields"],
        },
        {
            "name": "Infinite (%)",
            "value": fmt_percent(summary["p_infinite"]),
            "alert": "p_infinite" in summary["alert_fields"],
        },
        {
            "name":
            "Mean",
            "value":
            fmt_numeric(summary["mean"], precision=config.report.precision),
            "alert":
            False,
        },
    ])

    table2 = Table([
        {
            "name": "Minimum",
            "value": fmt_numeric(summary["min"],
                                 precision=config.report.precision),
            "alert": False,
        },
        {
            "name": "Maximum",
            "value": fmt_numeric(summary["max"],
                                 precision=config.report.precision),
            "alert": False,
        },
        {
            "name": "Zeros",
            "value": fmt(summary["n_zeros"]),
            "alert": "n_zeros" in summary["alert_fields"],
        },
        {
            "name": "Zeros (%)",
            "value": fmt_percent(summary["p_zeros"]),
            "alert": "p_zeros" in summary["alert_fields"],
        },
        {
            "name": "Negative",
            "value": fmt(summary["n_negative"]),
            "alert": False,
        },
        {
            "name": "Negative (%)",
            "value": fmt_percent(summary["p_negative"]),
            "alert": False,
        },
        {
            "name": "Memory size",
            "value": fmt_bytesize(summary["memory_size"]),
            "alert": False,
        },
    ])

    mini_histo = Image(
        mini_histogram(config, *summary["histogram"]),
        image_format=image_format,
        alt="Mini histogram",
    )

    template_variables["top"] = Container([info, table1, table2, mini_histo],
                                          sequence_type="grid")

    quantile_statistics = Table(
        [
            {
                "name":
                "Minimum",
                "value":
                fmt_numeric(summary["min"], precision=config.report.precision),
            },
            {
                "name":
                "5-th percentile",
                "value":
                fmt_numeric(summary["5%"], precision=config.report.precision),
            },
            {
                "name":
                "Q1",
                "value":
                fmt_numeric(summary["25%"], precision=config.report.precision),
            },
            {
                "name":
                "median",
                "value":
                fmt_numeric(summary["50%"], precision=config.report.precision),
            },
            {
                "name":
                "Q3",
                "value":
                fmt_numeric(summary["75%"], precision=config.report.precision),
            },
            {
                "name":
                "95-th percentile",
                "value":
                fmt_numeric(summary["95%"], precision=config.report.precision),
            },
            {
                "name":
                "Maximum",
                "value":
                fmt_numeric(summary["max"], precision=config.report.precision),
            },
            {
                "name":
                "Range",
                "value":
                fmt_numeric(summary["range"],
                            precision=config.report.precision),
            },
            {
                "name":
                "Interquartile range (IQR)",
                "value":
                fmt_numeric(summary["iqr"], precision=config.report.precision),
            },
        ],
        name="Quantile statistics",
    )

    descriptive_statistics = Table(
        [
            {
                "name":
                "Standard deviation",
                "value":
                fmt_numeric(summary["std"], precision=config.report.precision),
            },
            {
                "name":
                "Coefficient of variation (CV)",
                "value":
                fmt_numeric(summary["cv"], precision=config.report.precision),
            },
            {
                "name":
                "Kurtosis",
                "value":
                fmt_numeric(summary["kurtosis"],
                            precision=config.report.precision),
            },
            {
                "name":
                "Mean",
                "value":
                fmt_numeric(summary["mean"],
                            precision=config.report.precision),
            },
            {
                "name":
                "Median Absolute Deviation (MAD)",
                "value":
                fmt_numeric(summary["mad"], precision=config.report.precision),
            },
            {
                "name":
                "Skewness",
                "value":
                fmt_numeric(summary["skewness"],
                            precision=config.report.precision),
                "class":
                "alert" if "skewness" in summary["alert_fields"] else "",
            },
            {
                "name":
                "Sum",
                "value":
                fmt_numeric(summary["sum"], precision=config.report.precision),
            },
            {
                "name":
                "Variance",
                "value":
                fmt_numeric(summary["variance"],
                            precision=config.report.precision),
            },
            {
                "name": "Monotonicity",
                "value": fmt_monotonic(summary["monotonic"]),
            },
        ],
        name="Descriptive statistics",
    )

    statistics = Container(
        [quantile_statistics, descriptive_statistics],
        anchor_id=f"{varid}statistics",
        name="Statistics",
        sequence_type="grid",
    )

    hist = Image(
        histogram(config, *summary["histogram"]),
        image_format=image_format,
        alt="Histogram",
        caption=
        f"<strong>Histogram with fixed size bins</strong> (bins={len(summary['histogram'][1]) - 1})",
        name="Histogram",
        anchor_id=f"{varid}histogram",
    )

    fq = FrequencyTable(
        template_variables["freq_table_rows"],
        name="Common values",
        anchor_id=f"{varid}common_values",
        redact=False,
    )

    evs = Container(
        [
            FrequencyTable(
                template_variables["firstn_expanded"],
                name=f"Minimum {config.n_extreme_obs} values",
                anchor_id=f"{varid}firstn",
                redact=False,
            ),
            FrequencyTable(
                template_variables["lastn_expanded"],
                name=f"Maximum {config.n_extreme_obs} values",
                anchor_id=f"{varid}lastn",
                redact=False,
            ),
        ],
        sequence_type="tabs",
        name="Extreme values",
        anchor_id=f"{varid}extreme_values",
    )

    template_variables["bottom"] = Container(
        [statistics, hist, fq, evs],
        sequence_type="tabs",
        anchor_id=f"{varid}bottom",
    )

    return template_variables
Ejemplo n.º 15
0
def render_url(config: Settings, summary: dict) -> dict:
    varid = summary["varid"]
    n_freq_table_max = config.n_freq_table_max

    n_obs_cat = config.vars.cat.n_obs
    redact = config.vars.cat.redact

    template_variables = render_common(config, summary)

    keys = ["scheme", "netloc", "path", "query", "fragment"]
    for url_part in keys:
        template_variables[f"freqtable_{url_part}"] = freq_table(
            freqtable=summary[f"{url_part}_counts"],
            n=summary["n"],
            max_number_to_print=n_freq_table_max,
        )

    full_frequency_table = FrequencyTable(
        template_variables["freq_table_rows"],
        name="Full",
        anchor_id=f"{varid}full_frequency",
        redact=redact,
    )
    scheme_frequency_table = FrequencyTable(
        template_variables["freqtable_scheme"],
        name="Scheme",
        anchor_id=f"{varid}scheme_frequency",
        redact=redact,
    )
    netloc_frequency_table = FrequencyTable(
        template_variables["freqtable_netloc"],
        name="Netloc",
        anchor_id=f"{varid}netloc_frequency",
        redact=redact,
    )
    path_frequency_table = FrequencyTable(
        template_variables["freqtable_path"],
        name="Path",
        anchor_id=f"{varid}path_frequency",
        redact=redact,
    )
    query_frequency_table = FrequencyTable(
        template_variables["freqtable_query"],
        name="Query",
        anchor_id=f"{varid}query_frequency",
        redact=redact,
    )
    fragment_frequency_table = FrequencyTable(
        template_variables["freqtable_fragment"],
        name="Fragment",
        anchor_id=f"{varid}fragment_frequency",
        redact=redact,
    )

    items = [
        full_frequency_table,
        scheme_frequency_table,
        netloc_frequency_table,
        path_frequency_table,
        query_frequency_table,
        fragment_frequency_table,
    ]
    template_variables["bottom"] = Container(items,
                                             sequence_type="tabs",
                                             name="url stats",
                                             anchor_id=f"{varid}urlstats")

    # Element composition
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        "URL",
        summary["warnings"],
        summary["description"],
    )

    table = Table([
        {
            "name": "Distinct",
            "value": fmt(summary["n_distinct"]),
            "alert": "n_distinct" in summary["warn_fields"],
        },
        {
            "name": "Distinct (%)",
            "value": fmt_percent(summary["p_distinct"]),
            "alert": "p_distinct" in summary["warn_fields"],
        },
        {
            "name": "Missing",
            "value": fmt(summary["n_missing"]),
            "alert": "n_missing" in summary["warn_fields"],
        },
        {
            "name": "Missing (%)",
            "value": fmt_percent(summary["p_missing"]),
            "alert": "p_missing" in summary["warn_fields"],
        },
        {
            "name": "Memory size",
            "value": fmt_bytesize(summary["memory_size"]),
            "alert": False,
        },
    ])

    fqm = FrequencyTableSmall(
        freq_table(
            freqtable=summary["value_counts_without_nan"],
            n=summary["n"],
            max_number_to_print=n_obs_cat,
        ),
        redact=redact,
    )

    template_variables["top"] = Container([info, table, fqm],
                                          sequence_type="grid")

    return template_variables