Example #1
0
def render_url(summary):
    n_freq_table_max = config["n_freq_table_max"].get(int)

    n_obs_cat = config["vars"]["cat"]["n_obs"].get(int)

    # TODO: merge with boolean/categorical
    mini_freq_table_rows = freq_table(freqtable=summary["value_counts"],
                                      n=summary["n"],
                                      max_number_to_print=n_obs_cat)
    template_variables = render_common(summary)

    keys = ["scheme", "netloc", "path", "query", "fragment"]
    for url_part in keys:
        template_variables["freqtable_{}".format(url_part)] = freq_table(
            freqtable=summary["{}_counts".format(url_part)],
            n=summary["n"],
            max_number_to_print=n_freq_table_max,
        )

    full_frequency_table = FrequencyTable(
        template_variables["freq_table_rows"],
        name="Full",
        anchor_id="{varid}full_frequency".format(varid=summary["varid"]),
    )
    scheme_frequency_table = FrequencyTable(
        template_variables["freqtable_scheme"],
        name="Scheme",
        anchor_id="{varid}scheme_frequency".format(varid=summary["varid"]),
    )
    netloc_frequency_table = FrequencyTable(
        template_variables["freqtable_netloc"],
        name="Netloc",
        anchor_id="{varid}netloc_frequency".format(varid=summary["varid"]),
    )
    path_frequency_table = FrequencyTable(
        template_variables["freqtable_path"],
        name="Path",
        anchor_id="{varid}path_frequency".format(varid=summary["varid"]),
    )
    query_frequency_table = FrequencyTable(
        template_variables["freqtable_query"],
        name="Query",
        anchor_id="{varid}query_frequency".format(varid=summary["varid"]),
    )
    fragment_frequency_table = FrequencyTable(
        template_variables["freqtable_fragment"],
        name="Fragment",
        anchor_id="{varid}fragment_frequency".format(varid=summary["varid"]),
    )

    items = [
        full_frequency_table,
        scheme_frequency_table,
        netloc_frequency_table,
        path_frequency_table,
        query_frequency_table,
        fragment_frequency_table,
    ]
    template_variables["bottom"] = Sequence(items, sequence_type="tabs")

    # Element composition
    info = Overview(summary["varid"], summary["varname"], "URL",
                    summary["warnings"])

    table = Table([
        {
            "name": "Distinct count",
            "value": summary["n_unique"],
            "fmt": "fmt"
        },
        {
            "name": "Unique (%)",
            "value": summary["p_unique"],
            "fmt": "fmt_percent"
        },
        {
            "name": "Missing",
            "value": summary["n_missing"],
            "fmt": "fmt"
        },
        {
            "name": "Missing (%)",
            "value": summary["p_missing"],
            "fmt": "fmt_percent",
        },
        {
            "name": "Memory size",
            "value": summary["memory_size"],
            "fmt": "fmt_bytesize",
        },
    ])

    fqm = FrequencyTableSmall(mini_freq_table_rows)

    # TODO: settings 3,3,6
    template_variables["top"] = Sequence([info, table, fqm],
                                         sequence_type="grid")

    return template_variables
Example #2
0
def render_url(summary):
    varid = summary["varid"]
    n_freq_table_max = config["n_freq_table_max"].get(int)

    n_obs_cat = config["vars"]["cat"]["n_obs"].get(int)
    redact = config["vars"]["cat"]["redact"].get(bool)

    template_variables = render_common(summary)

    keys = ["scheme", "netloc", "path", "query", "fragment"]
    for url_part in keys:
        template_variables[f"freqtable_{url_part}"] = freq_table(
            freqtable=summary[f"{url_part}_counts"],
            n=summary["n"],
            max_number_to_print=n_freq_table_max,
        )

    full_frequency_table = FrequencyTable(
        template_variables["freq_table_rows"],
        name="Full",
        anchor_id=f"{varid}full_frequency",
        redact=redact,
    )
    scheme_frequency_table = FrequencyTable(
        template_variables["freqtable_scheme"],
        name="Scheme",
        anchor_id=f"{varid}scheme_frequency",
        redact=redact,
    )
    netloc_frequency_table = FrequencyTable(
        template_variables["freqtable_netloc"],
        name="Netloc",
        anchor_id=f"{varid}netloc_frequency",
        redact=redact,
    )
    path_frequency_table = FrequencyTable(
        template_variables["freqtable_path"],
        name="Path",
        anchor_id=f"{varid}path_frequency",
        redact=redact,
    )
    query_frequency_table = FrequencyTable(
        template_variables["freqtable_query"],
        name="Query",
        anchor_id=f"{varid}query_frequency",
        redact=redact,
    )
    fragment_frequency_table = FrequencyTable(
        template_variables["freqtable_fragment"],
        name="Fragment",
        anchor_id=f"{varid}fragment_frequency",
        redact=redact,
    )

    items = [
        full_frequency_table,
        scheme_frequency_table,
        netloc_frequency_table,
        path_frequency_table,
        query_frequency_table,
        fragment_frequency_table,
    ]
    template_variables["bottom"] = Container(items,
                                             sequence_type="tabs",
                                             name="url stats",
                                             anchor_id=f"{varid}urlstats")

    # Element composition
    info = VariableInfo(
        summary["varid"],
        summary["varname"],
        "URL",
        summary["warnings"],
        summary["description"],
    )

    table = Table([
        {
            "name": "Distinct",
            "value": summary["n_distinct"],
            "fmt": "fmt",
            "alert": "n_distinct" in summary["warn_fields"],
        },
        {
            "name": "Distinct (%)",
            "value": summary["p_distinct"],
            "fmt": "fmt_percent",
            "alert": "p_distinct" in summary["warn_fields"],
        },
        {
            "name": "Missing",
            "value": summary["n_missing"],
            "fmt": "fmt",
            "alert": "n_missing" in summary["warn_fields"],
        },
        {
            "name": "Missing (%)",
            "value": summary["p_missing"],
            "fmt": "fmt_percent",
            "alert": "p_missing" in summary["warn_fields"],
        },
        {
            "name": "Memory size",
            "value": summary["memory_size"],
            "fmt": "fmt_bytesize",
            "alert": False,
        },
    ])

    fqm = FrequencyTableSmall(
        freq_table(
            freqtable=summary["value_counts"],
            n=summary["n"],
            max_number_to_print=n_obs_cat,
        ),
        redact=redact,
    )

    template_variables["top"] = Container([info, table, fqm],
                                          sequence_type="grid")

    return template_variables