Esempio n. 1
0
def test_count_summary_category():
    s = pd.Categorical(
        ["Poor", "Neutral"] + [np.nan] * 100,
        categories=["Poor", "Neutral", "Excellent"],
    )
    sn, r = describe_counts(s, {})
    assert len(r["value_counts_without_nan"].index) == 2
Esempio n. 2
0
def test_summary_supported_empty_df(config, empty_data):
    _, series, summary = describe_counts(config, empty_data["A"], {})
    assert summary["n_missing"] == 0
    assert "p_missing" not in summary

    _, series, summary = describe_generic(config, series, summary)
    assert summary["n_missing"] == 0
    assert summary["p_missing"] == 0
    assert summary["count"] == 0

    _, _, summary = describe_supported(config, series, summary)
    assert summary["n_distinct"] == 0
    assert summary["p_distinct"] == 0
    assert summary["n_unique"] == 0
    assert not summary["is_unique"]
Esempio n. 3
0
def test_count_summary_sorted():
    s = pd.Series([1] + [2] * 1000)
    sn, r = describe_counts(s, {})
    assert r["value_counts_without_nan"].index[0] == 2
    assert r["value_counts_without_nan"].index[1] == 1
Esempio n. 4
0
def test_count_summary_nat():
    s = pd.to_datetime(pd.Series([1, 2] + [np.nan, pd.NaT]))
    sn, r = describe_counts(s, {})
    assert len(r["value_counts_without_nan"].index) == 2