Esempio n. 1
0
def test_groupby_bool_first() -> None:
    df = pd.DataFrame({"x": pd.Series([True, True], dtype=RLEDtype(bool)), "g": 1})
    series = df.groupby("g")["x"].first()
    assert series.dtype == RLEDtype(bool)

    expected = RLEArray._from_sequence([True])
    npt.assert_array_equal(series.array, expected)
Esempio n. 2
0
def test_unary_operator(
    rle_series: pd.Series,
    uncompressed_series: pd.Series,
    unary_operator: FUnaryOperator,
) -> None:
    actual = unary_operator(rle_series)
    assert actual.dtype == RLEDtype(float)

    expected = unary_operator(uncompressed_series).astype(RLEDtype(float))
    pd.testing.assert_series_equal(actual, expected)
Esempio n. 3
0
def test_binary_bool_operator_scalar(
    rle_bool_series: pd.Series,
    uncompressed_bool_series: pd.Series,
    bool_scalar: bool,
    binary_bool_operator: FBinaryBoolOperator,
) -> None:
    actual = binary_bool_operator(rle_bool_series, bool_scalar)
    assert actual.dtype == RLEDtype(bool)

    expected = binary_bool_operator(uncompressed_bool_series,
                                    bool_scalar).astype(RLEDtype(bool))
    pd.testing.assert_series_equal(actual, expected)
Esempio n. 4
0
def test_groupby_bool_sum() -> None:
    # Cython routines for integer addition are not available, so we need to accept floats here.
    df = pd.DataFrame({"x": pd.Series([True, True], dtype=RLEDtype(bool)), "g": 1})
    series = df.groupby("g")["x"].sum()
    assert series.dtype == np.float64

    expected = np.array([2], dtype=np.float64)
    npt.assert_array_equal(series.to_numpy(), expected)
def test_round(data_orig: pd.Series, data_rle: pd.Series, decimals: int) -> None:
    result_orig = data_orig.round(decimals=decimals)
    result_rle = data_rle.round(decimals=decimals)

    assert result_rle.dtype == RLEDtype(result_orig.dtype)

    result_converted = result_rle.astype(result_rle.dtype._dtype)
    pdt.assert_series_equal(result_orig, result_converted)
def test_shift(
    data_orig: pd.Series, data_rle: pd.Series, periods: int, fill_value: Any
) -> None:
    result_orig = data_orig.shift(periods=periods, fill_value=fill_value)
    result_rle = data_rle.shift(periods=periods, fill_value=fill_value)

    assert result_rle.dtype == RLEDtype(result_orig.dtype)

    result_converted = result_rle.astype(result_rle.dtype._dtype)
    pdt.assert_series_equal(result_orig, result_converted)
Esempio n. 7
0
def test_unary_bool_operator_array(
    rle_bool_series: pd.Series,
    uncompressed_bool_series: pd.Series,
    unary_bool_operator: FUnaryBoolOperator,
) -> None:
    actual = unary_bool_operator(rle_bool_series.array)
    assert actual.dtype == RLEDtype(bool)

    expected = unary_bool_operator(uncompressed_bool_series.array)
    npt.assert_array_equal(actual, expected)
Esempio n. 8
0
def test_factorize_int() -> None:
    array = RLEArray._from_sequence([42, -10, -10], dtype=RLEDtype(np.int32))
    codes_actual, uniques_actual = array.factorize()

    codes_expected = np.array([0, 1, 1], dtype=np.int64)
    assert codes_actual.dtype == codes_expected.dtype
    npt.assert_array_equal(codes_actual, codes_expected)

    uniques_expected = RLEArray._from_sequence([42, -10], dtype=np.int32)
    assert uniques_actual.dtype == uniques_expected.dtype
    npt.assert_array_equal(uniques_actual, uniques_expected)
Esempio n. 9
0
def test_from_sequence_bool() -> None:
    array = RLEArray._from_sequence(
        np.array([0, 1], dtype=np.int64), dtype=RLEDtype(bool)
    )
    npt.assert_array_equal(array, np.array([False, True]))

    array = RLEArray._from_sequence(
        np.array([0.0, 1.0], dtype=np.float64), dtype=RLEDtype(bool)
    )
    npt.assert_array_equal(array, np.array([False, True]))

    with pytest.raises(TypeError, match="Need to pass bool-like values"):
        RLEArray._from_sequence(np.array([1, 2], dtype=np.int64), dtype=RLEDtype(bool))

    with pytest.raises(TypeError, match="Need to pass bool-like values"):
        RLEArray._from_sequence(np.array([-1, 1], dtype=np.int64), dtype=RLEDtype(bool))

    with pytest.raises(TypeError, match="Masked booleans are not supported"):
        RLEArray._from_sequence(
            np.array([np.nan, 1.0], dtype=np.float64), dtype=RLEDtype(bool)
        )
Esempio n. 10
0
def test_binary_operator_scalar(
    rle_series: pd.Series,
    uncompressed_series: pd.Series,
    scalar: float,
    binary_operator: FBinaryOperator,
) -> None:
    actual = binary_operator(rle_series, scalar)
    assert actual.dtype == RLEDtype(float)

    expected = binary_operator(uncompressed_series,
                               scalar).astype("RLEDtype[float]")
    pd.testing.assert_series_equal(actual, expected)
Esempio n. 11
0
def test_compare_scalar(
    rle_series: pd.Series,
    uncompressed_series: pd.Series,
    scalar: float,
    compare_operator: FCompareOperator,
) -> None:
    actual = compare_operator(rle_series, scalar)
    assert actual.dtype == RLEDtype(bool)

    expected = compare_operator(uncompressed_series,
                                scalar).astype("RLEDtype[bool]")
    pd.testing.assert_series_equal(actual, expected)
Esempio n. 12
0
def test_no_copy(series: pd.Series) -> None:
    series2 = series.astype(series.dtype, copy=False)
    assert series2.values is series.values
    assert series2.dtype == RLEDtype(int)
Esempio n. 13
0
def series() -> pd.Series:
    return pd.Series([1, 1, 2]).astype(RLEDtype(int))
Esempio n. 14
0
import numpy as np
import pytest

from rle_array import RLEDtype


@pytest.mark.parametrize(
    "a,b,expected",
    [
        (
            # a
            RLEDtype(int),
            # b
            RLEDtype(int),
            # expected
            True,
        ),
        (
            # a
            RLEDtype(int),
            # b
            RLEDtype(float),
            # expected
            False,
        ),
        (
            # a
            RLEDtype(int),
            # b
            RLEDtype(np.int64),
            # expected
Esempio n. 15
0
def test_append_mixed() -> None:
    actual = pd.concat(
        [pd.Series([1], dtype=np.int8), pd.Series([1], dtype=RLEDtype(np.int8))]
    )
    assert actual.dtype == np.int8
Esempio n. 16
0
def data_rle(data_orig: pd.Series) -> pd.Series:
    return data_orig.astype(RLEDtype(data_orig.dtype))
Esempio n. 17
0
def data_rle_bool(data_orig_bool):
    return data_orig_bool.astype(RLEDtype(data_orig_bool.dtype))
Esempio n. 18
0
def test_copy_different_dtype(series: pd.Series) -> None:
    series2 = series.astype(RLEDtype(float), copy=False)
    assert series2.values is not series.values
    assert series2.dtype == RLEDtype(float)
Esempio n. 19
0
def test_get_common_dtype(dtype: RLEDtype, dtypes: List[Any], expected: Any) -> None:
    actual = dtype._get_common_dtype(dtypes)
    assert actual == expected
Esempio n. 20
0
def dtype():
    """A fixture providing the ExtensionDtype to validate."""
    return RLEDtype(np.float32)
Esempio n. 21
0
def data_rle_bool(data_orig_bool: pd.Series) -> pd.Series:
    return data_orig_bool.astype(RLEDtype(data_orig_bool.dtype))
Esempio n. 22
0
from typing import Any, List

import numpy as np
import pytest

from rle_array import RLEDtype


@pytest.mark.parametrize(
    "a, b, expected",
    [
        (
            # a
            RLEDtype(int),
            # b
            RLEDtype(int),
            # expected
            True,
        ),
        (
            # a
            RLEDtype(int),
            # b
            RLEDtype(float),
            # expected
            False,
        ),
        (
            # a
            RLEDtype(int),
            # b
Esempio n. 23
0
def data_rle(data_orig):
    return data_orig.astype(RLEDtype(data_orig.dtype))
Esempio n. 24
0
def series_rle(series_orig: pd.Series) -> pd.Series:
    return series_orig.astype(RLEDtype(series_orig.dtype))
Esempio n. 25
0
def series_rle(series_orig):
    return series_orig.astype(RLEDtype(series_orig.dtype))