예제 #1
0
def test_issue502(summarizer, typeset):
    series = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
                       dtype=pd.Int64Dtype())

    result = describe_1d(series, summarizer, typeset)
    assert result["min"] == 1
    assert result["max"] == 11
예제 #2
0
def test_describe_unique(data, is_unique, p_distinct, p_unique):
    """Test the unique feature of 1D data"""

    desc_1d = describe_1d(data)
    if is_unique is not None:
        assert desc_1d["p_unique"] == p_unique, "Describe 1D p_unique incorrect"
        assert desc_1d["p_distinct"] == p_distinct, "Describe 1D p_distinct incorrect"
        assert desc_1d["is_unique"] == is_unique, "Describe 1D should return unique"
예제 #3
0
def test_describe_unique(data, expected, summarizer, typeset):
    """Test the unique feature of 1D data"""
    config["vars"]["num"]["low_categorical_threshold"] = 0

    desc_1d = describe_1d(data, summarizer, typeset)
    if expected["is_unique"] is not None:
        assert (desc_1d["p_unique"] == expected["p_unique"]
                ), "Describe 1D p_unique incorrect"
        assert (desc_1d["p_distinct"] == expected["p_distinct"]
                ), "Describe 1D p_distinct incorrect"
        assert (desc_1d["is_unique"] == expected["is_unique"]
                ), "Describe 1D should return unique"
예제 #4
0
    def multiprocess_1d(args: tuple) -> Tuple[str, dict]:
        """Wrapper to process series in parallel.

        Args:
            column: The name of the column.
            series: The series values.

        Returns:
            A tuple with column and the series description.
        """
        column, series = args
        return column, describe_1d(config, series, summarizer, typeset)
예제 #5
0
def mock_multiprocess_1d(args, summarizer, typeset) -> Tuple[str, dict]:
    """Wrapper to process series in parallel.
        copy of multiprocess_1d function in get_series_descriptions, summary.py

    Args:
        column: The name of the column.
        series: The series values.

    Returns:
        A tuple with column and the series description.
    """
    column, series = args
    return column, describe_1d(series, summarizer, typeset)
예제 #6
0
def test_issue502():
    series = pd.Series([1, 2, 3, 4, 5], dtype=pd.Int64Dtype())

    result = describe_1d(series)
    assert result["min"] == 1
    assert result["max"] == 5