Example #1
0
def test_factorize_int() -> None:
    array = RLEArray._from_sequence([42, -10, -10], dtype=RLEDtype(np.int32))
    codes_actual, uniques_actual = array.factorize()

    codes_expected = np.array([0, 1, 1], dtype=np.int64)
    assert codes_actual.dtype == codes_expected.dtype
    npt.assert_array_equal(codes_actual, codes_expected)

    uniques_expected = RLEArray._from_sequence([42, -10], dtype=np.int32)
    assert uniques_actual.dtype == uniques_expected.dtype
    npt.assert_array_equal(uniques_actual, uniques_expected)
Example #2
0
def test_groupby_bool_first() -> None:
    df = pd.DataFrame({"x": pd.Series([True, True], dtype=RLEDtype(bool)), "g": 1})
    series = df.groupby("g")["x"].first()
    assert series.dtype == RLEDtype(bool)

    expected = RLEArray._from_sequence([True])
    npt.assert_array_equal(series.array, expected)
Example #3
0
def test_bool_ensure_int_or_float() -> None:
    array = RLEArray._from_sequence([False, True], dtype=np.bool_)
    actual = ensure_int_or_float(array)

    expected = np.array([0, 1], dtype=np.int64)
    assert actual.dtype == expected.dtype
    npt.assert_array_equal(actual, expected)
Example #4
0
def test_fail_two_dim_indexing() -> None:
    array = RLEArray._from_sequence(range(10))
    with pytest.raises(
            NotImplementedError,
            match="__getitem__ does currently only work w/ a single parameter",
    ):
        array[1, 2]
Example #5
0
def test_inplace_update() -> None:
    array = RLEArray._from_sequence([1], dtype=np.int64)
    array[[True]] = 2

    expected = np.array([2], dtype=np.int64)
    npt.assert_array_equal(array, expected)

    assert array._dtype._dtype == np.int64
    assert array._data.dtype == np.int64
Example #6
0
def test_square_out(array_orig: np.ndarray, array_rle: RLEArray,
                    out_is_rle: bool) -> None:
    out_orig = np.array([0] * len(array_orig), dtype=array_orig.dtype)
    if out_is_rle:
        out_rle = RLEArray._from_sequence(out_orig)
    else:
        out_rle = out_orig.copy()

    np.square(array_orig, out=out_orig)
    np.square(array_rle, out=out_rle)

    npt.assert_array_equal(out_orig, out_rle)
Example #7
0
def test_pickle() -> None:
    array = RLEArray._from_sequence([1])

    # roundtrip
    s = pickle.dumps(array)
    array2 = pickle.loads(s)
    npt.assert_array_equal(array, array2)

    # views must not be linked (A)
    array2_orig = array2.copy()
    array[:] = 2
    npt.assert_array_equal(array2, array2_orig)

    # views must not be linked (B)
    array_orig = array.copy()
    array2[:] = 3
    npt.assert_array_equal(array, array_orig)
Example #8
0
def test_from_sequence_bool() -> None:
    array = RLEArray._from_sequence(
        np.array([0, 1], dtype=np.int64), dtype=RLEDtype(bool)
    )
    npt.assert_array_equal(array, np.array([False, True]))

    array = RLEArray._from_sequence(
        np.array([0.0, 1.0], dtype=np.float64), dtype=RLEDtype(bool)
    )
    npt.assert_array_equal(array, np.array([False, True]))

    with pytest.raises(TypeError, match="Need to pass bool-like values"):
        RLEArray._from_sequence(np.array([1, 2], dtype=np.int64), dtype=RLEDtype(bool))

    with pytest.raises(TypeError, match="Need to pass bool-like values"):
        RLEArray._from_sequence(np.array([-1, 1], dtype=np.int64), dtype=RLEDtype(bool))

    with pytest.raises(TypeError, match="Masked booleans are not supported"):
        RLEArray._from_sequence(
            np.array([np.nan, 1.0], dtype=np.float64), dtype=RLEDtype(bool)
        )
Example #9
0
def test_mean_divisor_overflow() -> None:
    # https://github.com/JDASoftwareGroup/rle-array/issues/22
    array = RLEArray._from_sequence([1] * 256, dtype=np.uint8)
    assert array.mean() == 1
Example #10
0
def test_object_isna() -> None:
    array = RLEArray._from_sequence(["foo", None], dtype=object)
    actual = array.isna()
    expected = np.asarray([False, True])
    npt.assert_equal(actual, expected)
Example #11
0
def rle_bool_series2(bool_values: np.ndarray) -> pd.Series:
    # TODO: Use `index=np.arange(len(bool_values)) + 1`.
    #       For some reason, pandas casts us back to dtype=bool in that case.
    return pd.Series(RLEArray._from_sequence(bool_values[::-1]))
Example #12
0
def rle_bool_series(bool_values: np.ndarray) -> pd.Series:
    return pd.Series(RLEArray._from_sequence(bool_values))
Example #13
0
def rle_series2(values: np.ndarray) -> pd.Series:
    return pd.Series(RLEArray._from_sequence(values[::-1]),
                     index=np.arange(len(values)) + 1)
Example #14
0
def test_different_length_raises(values: np.ndarray) -> None:
    array1 = RLEArray._from_sequence(values)
    array2 = RLEArray._from_sequence(values[:-1])
    with pytest.raises(ValueError, match="arrays have different lengths"):
        array1 + array2
Example #15
0
def array_rle(array_orig: np.ndarray) -> RLEArray:
    return RLEArray._from_sequence(array_orig)