def test_factorize_int() -> None: array = RLEArray._from_sequence([42, -10, -10], dtype=RLEDtype(np.int32)) codes_actual, uniques_actual = array.factorize() codes_expected = np.array([0, 1, 1], dtype=np.int64) assert codes_actual.dtype == codes_expected.dtype npt.assert_array_equal(codes_actual, codes_expected) uniques_expected = RLEArray._from_sequence([42, -10], dtype=np.int32) assert uniques_actual.dtype == uniques_expected.dtype npt.assert_array_equal(uniques_actual, uniques_expected)
def test_groupby_bool_first() -> None: df = pd.DataFrame({"x": pd.Series([True, True], dtype=RLEDtype(bool)), "g": 1}) series = df.groupby("g")["x"].first() assert series.dtype == RLEDtype(bool) expected = RLEArray._from_sequence([True]) npt.assert_array_equal(series.array, expected)
def test_bool_ensure_int_or_float() -> None: array = RLEArray._from_sequence([False, True], dtype=np.bool_) actual = ensure_int_or_float(array) expected = np.array([0, 1], dtype=np.int64) assert actual.dtype == expected.dtype npt.assert_array_equal(actual, expected)
def test_fail_two_dim_indexing() -> None: array = RLEArray._from_sequence(range(10)) with pytest.raises( NotImplementedError, match="__getitem__ does currently only work w/ a single parameter", ): array[1, 2]
def test_inplace_update() -> None: array = RLEArray._from_sequence([1], dtype=np.int64) array[[True]] = 2 expected = np.array([2], dtype=np.int64) npt.assert_array_equal(array, expected) assert array._dtype._dtype == np.int64 assert array._data.dtype == np.int64
def test_square_out(array_orig: np.ndarray, array_rle: RLEArray, out_is_rle: bool) -> None: out_orig = np.array([0] * len(array_orig), dtype=array_orig.dtype) if out_is_rle: out_rle = RLEArray._from_sequence(out_orig) else: out_rle = out_orig.copy() np.square(array_orig, out=out_orig) np.square(array_rle, out=out_rle) npt.assert_array_equal(out_orig, out_rle)
def test_pickle() -> None: array = RLEArray._from_sequence([1]) # roundtrip s = pickle.dumps(array) array2 = pickle.loads(s) npt.assert_array_equal(array, array2) # views must not be linked (A) array2_orig = array2.copy() array[:] = 2 npt.assert_array_equal(array2, array2_orig) # views must not be linked (B) array_orig = array.copy() array2[:] = 3 npt.assert_array_equal(array, array_orig)
def test_from_sequence_bool() -> None: array = RLEArray._from_sequence( np.array([0, 1], dtype=np.int64), dtype=RLEDtype(bool) ) npt.assert_array_equal(array, np.array([False, True])) array = RLEArray._from_sequence( np.array([0.0, 1.0], dtype=np.float64), dtype=RLEDtype(bool) ) npt.assert_array_equal(array, np.array([False, True])) with pytest.raises(TypeError, match="Need to pass bool-like values"): RLEArray._from_sequence(np.array([1, 2], dtype=np.int64), dtype=RLEDtype(bool)) with pytest.raises(TypeError, match="Need to pass bool-like values"): RLEArray._from_sequence(np.array([-1, 1], dtype=np.int64), dtype=RLEDtype(bool)) with pytest.raises(TypeError, match="Masked booleans are not supported"): RLEArray._from_sequence( np.array([np.nan, 1.0], dtype=np.float64), dtype=RLEDtype(bool) )
def test_mean_divisor_overflow() -> None: # https://github.com/JDASoftwareGroup/rle-array/issues/22 array = RLEArray._from_sequence([1] * 256, dtype=np.uint8) assert array.mean() == 1
def test_object_isna() -> None: array = RLEArray._from_sequence(["foo", None], dtype=object) actual = array.isna() expected = np.asarray([False, True]) npt.assert_equal(actual, expected)
def rle_bool_series2(bool_values: np.ndarray) -> pd.Series: # TODO: Use `index=np.arange(len(bool_values)) + 1`. # For some reason, pandas casts us back to dtype=bool in that case. return pd.Series(RLEArray._from_sequence(bool_values[::-1]))
def rle_bool_series(bool_values: np.ndarray) -> pd.Series: return pd.Series(RLEArray._from_sequence(bool_values))
def rle_series2(values: np.ndarray) -> pd.Series: return pd.Series(RLEArray._from_sequence(values[::-1]), index=np.arange(len(values)) + 1)
def test_different_length_raises(values: np.ndarray) -> None: array1 = RLEArray._from_sequence(values) array2 = RLEArray._from_sequence(values[:-1]) with pytest.raises(ValueError, match="arrays have different lengths"): array1 + array2
def array_rle(array_orig: np.ndarray) -> RLEArray: return RLEArray._from_sequence(array_orig)