Exemple #1
0
def test_factorize_int() -> None:
    array = RLEArray._from_sequence([42, -10, -10], dtype=RLEDtype(np.int32))
    codes_actual, uniques_actual = array.factorize()

    codes_expected = np.array([0, 1, 1], dtype=np.int64)
    assert codes_actual.dtype == codes_expected.dtype
    npt.assert_array_equal(codes_actual, codes_expected)

    uniques_expected = RLEArray._from_sequence([42, -10], dtype=np.int32)
    assert uniques_actual.dtype == uniques_expected.dtype
    npt.assert_array_equal(uniques_actual, uniques_expected)
def test_add_unhandled(array_orig: np.ndarray, array_rle: RLEArray,
                       t: type) -> None:
    other = t(array_orig)

    # the pandas docs say we should not handle these
    assert (array_rle.__array_ufunc__(np.add, "__call__", array_rle, other) is
            NotImplemented)
Exemple #3
0
def test_fail_two_dim_indexing() -> None:
    array = RLEArray._from_sequence(range(10))
    with pytest.raises(
            NotImplementedError,
            match="__getitem__ does currently only work w/ a single parameter",
    ):
        array[1, 2]
Exemple #4
0
def test_bool_ensure_int_or_float() -> None:
    array = RLEArray._from_sequence([False, True], dtype=np.bool_)
    actual = ensure_int_or_float(array)

    expected = np.array([0, 1], dtype=np.int64)
    assert actual.dtype == expected.dtype
    npt.assert_array_equal(actual, expected)
Exemple #5
0
def test_groupby_bool_first() -> None:
    df = pd.DataFrame({"x": pd.Series([True, True], dtype=RLEDtype(bool)), "g": 1})
    series = df.groupby("g")["x"].first()
    assert series.dtype == RLEDtype(bool)

    expected = RLEArray._from_sequence([True])
    npt.assert_array_equal(series.array, expected)
def test_different_lengths() -> None:
    with pytest.raises(
            ValueError,
            match="data and positions must have same length but have 3 and 2"):
        RLEArray(
            data=np.asarray([1.0, 2.0, 3.0]),
            positions=np.asarray([10, 20], dtype=POSITIONS_DTYPE),
        )
Exemple #7
0
def data_for_grouping():
    """Data for factorization, grouping, and unique tests.
    Expected to be like [B, B, NA, NA, A, A, B, C]
    Where A < B < C and NA is missing
    """
    return RLEArray(
        data=np.asarray([2.0, np.nan, 1.0, 2.0, 3.0], dtype=np.float32),
        positions=np.asarray([2, 4, 6, 7, 8], dtype=POSITIONS_DTYPE),
    )
Exemple #8
0
def data():
    """Length-100 array for this type.
    * data[0] and data[1] should both be non missing
    * data[0] and data[1] should not be equal
    """
    return RLEArray(
        data=np.asarray([13, -1, -2, 42], dtype=np.float32),
        positions=np.asarray([1, 2, 4, 100], dtype=POSITIONS_DTYPE),
    )
Exemple #9
0
def data_missing_for_sorting():
    """Length-3 array with a known sort order.
    This should be three items [B, NA, A] with
    A < B and NA missing.
    """
    return RLEArray(
        data=np.asarray([2.0, np.nan, 1.0], dtype=np.float32),
        positions=np.asarray([1, 2, 3], dtype=POSITIONS_DTYPE),
    )
Exemple #10
0
def test_inplace_update() -> None:
    array = RLEArray._from_sequence([1], dtype=np.int64)
    array[[True]] = 2

    expected = np.array([2], dtype=np.int64)
    npt.assert_array_equal(array, expected)

    assert array._dtype._dtype == np.int64
    assert array._data.dtype == np.int64
def test_data_invalid_dims() -> None:
    with pytest.raises(
            ValueError,
            match="data must be an 1-dimensional ndarray but has 2 dimensions"
    ):
        RLEArray(
            data=np.asarray([[1.0, 2.0], [3.0, 4.0]]),
            positions=np.asarray([10, 20], dtype=POSITIONS_DTYPE),
        )
Exemple #12
0
def data_for_sorting() -> RLEArray:
    """Length-3 array with a known sort order.
    This should be three items [B, C, A] with
    A < B < C
    """
    return RLEArray(
        data=np.asarray([2.0, 3.0, 1.0], dtype=np.float32),
        positions=np.asarray([1, 2, 3], dtype=POSITIONS_DTYPE),
    )
Exemple #13
0
def test_square_out(array_orig, array_rle, out_is_rle):
    out_orig = np.array([0] * len(array_orig), dtype=array_orig.dtype)
    if out_is_rle:
        out_rle = RLEArray(*compress(out_orig))
    else:
        out_rle = out_orig.copy()

    np.square(array_orig, out=out_orig)
    np.square(array_rle, out=out_rle)

    npt.assert_array_equal(out_orig, out_rle)
def test_square_out(array_orig: np.ndarray, array_rle: RLEArray,
                    out_is_rle: bool) -> None:
    out_orig = np.array([0] * len(array_orig), dtype=array_orig.dtype)
    if out_is_rle:
        out_rle = RLEArray._from_sequence(out_orig)
    else:
        out_rle = out_orig.copy()

    np.square(array_orig, out=out_orig)
    np.square(array_rle, out=out_rle)

    npt.assert_array_equal(out_orig, out_rle)
Exemple #15
0
def test_pickle() -> None:
    array = RLEArray._from_sequence([1])

    # roundtrip
    s = pickle.dumps(array)
    array2 = pickle.loads(s)
    npt.assert_array_equal(array, array2)

    # views must not be linked (A)
    array2_orig = array2.copy()
    array[:] = 2
    npt.assert_array_equal(array2, array2_orig)

    # views must not be linked (B)
    array_orig = array.copy()
    array2[:] = 3
    npt.assert_array_equal(array, array_orig)
Exemple #16
0
def test_from_sequence_bool() -> None:
    array = RLEArray._from_sequence(
        np.array([0, 1], dtype=np.int64), dtype=RLEDtype(bool)
    )
    npt.assert_array_equal(array, np.array([False, True]))

    array = RLEArray._from_sequence(
        np.array([0.0, 1.0], dtype=np.float64), dtype=RLEDtype(bool)
    )
    npt.assert_array_equal(array, np.array([False, True]))

    with pytest.raises(TypeError, match="Need to pass bool-like values"):
        RLEArray._from_sequence(np.array([1, 2], dtype=np.int64), dtype=RLEDtype(bool))

    with pytest.raises(TypeError, match="Need to pass bool-like values"):
        RLEArray._from_sequence(np.array([-1, 1], dtype=np.int64), dtype=RLEDtype(bool))

    with pytest.raises(TypeError, match="Masked booleans are not supported"):
        RLEArray._from_sequence(
            np.array([np.nan, 1.0], dtype=np.float64), dtype=RLEDtype(bool)
        )
Exemple #17
0
def data_for_twos():
    """Length-100 array in which all the elements are two."""
    return RLEArray(
        data=np.asarray([2.0], dtype=np.float32),
        positions=np.asarray([100], dtype=POSITIONS_DTYPE),
    )
def test_positions_invalid_type() -> None:
    with pytest.raises(TypeError,
                       match="positions must be an ndarray but is int"):
        RLEArray(data=np.asarray([1.0, 2.0]), positions=1)
def test_data_invalid_type() -> None:
    with pytest.raises(TypeError, match="data must be an ndarray but is int"):
        RLEArray(data=1, positions=np.asarray([10, 20], dtype=POSITIONS_DTYPE))
def test_valid() -> None:
    RLEArray(
        data=np.asarray([1.0, 2.0]),
        positions=np.asarray([10, 20], dtype=POSITIONS_DTYPE),
    )
def test_positions_invalid_dtype() -> None:
    with pytest.raises(ValueError,
                       match="positions must have dtype int64 but has uint64"):
        RLEArray(data=np.asarray([1.0, 2.0]),
                 positions=np.asarray([10, 20], dtype=np.uint64))
Exemple #22
0
def test_object_isna() -> None:
    array = RLEArray._from_sequence(["foo", None], dtype=object)
    actual = array.isna()
    expected = np.asarray([False, True])
    npt.assert_equal(actual, expected)
Exemple #23
0
def test_mean_divisor_overflow() -> None:
    # https://github.com/JDASoftwareGroup/rle-array/issues/22
    array = RLEArray._from_sequence([1] * 256, dtype=np.uint8)
    assert array.mean() == 1
Exemple #24
0
def rle_bool_series2(bool_values: np.ndarray) -> pd.Series:
    # TODO: Use `index=np.arange(len(bool_values)) + 1`.
    #       For some reason, pandas casts us back to dtype=bool in that case.
    return pd.Series(RLEArray._from_sequence(bool_values[::-1]))
def test_not_sorted_2() -> None:
    with pytest.raises(ValueError, match="positions must be strictly sorted"):
        RLEArray(
            data=np.asarray([1.0, 2.0]),
            positions=np.asarray([10, 10], dtype=POSITIONS_DTYPE),
        )
Exemple #26
0
def rle_bool_series(bool_values: np.ndarray) -> pd.Series:
    return pd.Series(RLEArray._from_sequence(bool_values))
Exemple #27
0
def rle_series2(values: np.ndarray) -> pd.Series:
    return pd.Series(RLEArray._from_sequence(values[::-1]),
                     index=np.arange(len(values)) + 1)
Exemple #28
0
def test_different_length_raises(values: np.ndarray) -> None:
    array1 = RLEArray._from_sequence(values)
    array2 = RLEArray._from_sequence(values[:-1])
    with pytest.raises(ValueError, match="arrays have different lengths"):
        array1 + array2
def array_rle(array_orig: np.ndarray) -> RLEArray:
    return RLEArray._from_sequence(array_orig)
Exemple #30
0
def data_missing():
    """Length-2 array with [NA, Valid]"""
    return RLEArray(
        data=np.asarray([np.nan, 42], dtype=np.float32),
        positions=np.asarray([1, 2], dtype=POSITIONS_DTYPE),
    )