def test_issue502(summarizer, typeset): series = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], dtype=pd.Int64Dtype()) result = describe_1d(series, summarizer, typeset) assert result["min"] == 1 assert result["max"] == 11
def test_describe_unique(data, is_unique, p_distinct, p_unique): """Test the unique feature of 1D data""" desc_1d = describe_1d(data) if is_unique is not None: assert desc_1d["p_unique"] == p_unique, "Describe 1D p_unique incorrect" assert desc_1d["p_distinct"] == p_distinct, "Describe 1D p_distinct incorrect" assert desc_1d["is_unique"] == is_unique, "Describe 1D should return unique"
def test_describe_unique(data, expected, summarizer, typeset): """Test the unique feature of 1D data""" config["vars"]["num"]["low_categorical_threshold"] = 0 desc_1d = describe_1d(data, summarizer, typeset) if expected["is_unique"] is not None: assert (desc_1d["p_unique"] == expected["p_unique"] ), "Describe 1D p_unique incorrect" assert (desc_1d["p_distinct"] == expected["p_distinct"] ), "Describe 1D p_distinct incorrect" assert (desc_1d["is_unique"] == expected["is_unique"] ), "Describe 1D should return unique"
def multiprocess_1d(args: tuple) -> Tuple[str, dict]: """Wrapper to process series in parallel. Args: column: The name of the column. series: The series values. Returns: A tuple with column and the series description. """ column, series = args return column, describe_1d(config, series, summarizer, typeset)
def mock_multiprocess_1d(args, summarizer, typeset) -> Tuple[str, dict]: """Wrapper to process series in parallel. copy of multiprocess_1d function in get_series_descriptions, summary.py Args: column: The name of the column. series: The series values. Returns: A tuple with column and the series description. """ column, series = args return column, describe_1d(series, summarizer, typeset)
def test_issue502(): series = pd.Series([1, 2, 3, 4, 5], dtype=pd.Int64Dtype()) result = describe_1d(series) assert result["min"] == 1 assert result["max"] == 5