Ejemplo n.º 1
0
 def test_set_inclusive(self, closed, new_inclusive):
     # GH 21670
     array = IntervalArray.from_breaks(range(10), inclusive=closed)
     result = array.set_inclusive(new_inclusive)
     expected = IntervalArray.from_breaks(range(10),
                                          inclusive=new_inclusive)
     tm.assert_extension_array_equal(result, expected)
Ejemplo n.º 2
0
def test_arrow_table_roundtrip(breaks):
    import pyarrow as pa

    from pandas.core.arrays.arrow._arrow_utils import ArrowIntervalType

    arr = IntervalArray.from_breaks(breaks)
    arr[1] = None
    df = pd.DataFrame({"a": arr})

    table = pa.table(df)
    assert isinstance(table.field("a").type, ArrowIntervalType)
    result = table.to_pandas()
    assert isinstance(result["a"].dtype, pd.IntervalDtype)
    tm.assert_frame_equal(result, df)

    table2 = pa.concat_tables([table, table])
    result = table2.to_pandas()
    expected = pd.concat([df, df], ignore_index=True)
    tm.assert_frame_equal(result, expected)

    # GH-41040
    table = pa.table([pa.chunked_array([], type=table.column(0).type)],
                     schema=table.schema)
    result = table.to_pandas()
    tm.assert_frame_equal(result, expected[0:0])
Ejemplo n.º 3
0
 def test_shift(self):
     # https://github.com/pandas-dev/pandas/issues/31495, GH#22428, GH#31502
     a = IntervalArray.from_breaks([1, 2, 3])
     result = a.shift()
     # int -> float
     expected = IntervalArray.from_tuples([(np.nan, np.nan), (1.0, 2.0)])
     tm.assert_interval_array_equal(result, expected)
Ejemplo n.º 4
0
def test_arrow_array_missing():
    import pyarrow as pa

    from pandas.core.arrays._arrow_utils import ArrowIntervalType

    arr = IntervalArray.from_breaks([0.0, 1.0, 2.0, 3.0])
    arr[1] = None

    result = pa.array(arr)
    assert isinstance(result.type, ArrowIntervalType)
    assert result.type.closed == arr.closed
    assert result.type.subtype == pa.float64()

    # fields have missing values (not NaN)
    left = pa.array([0.0, None, 2.0], type="float64")
    right = pa.array([1.0, None, 3.0], type="float64")
    assert result.storage.field("left").equals(left)
    assert result.storage.field("right").equals(right)

    # structarray itself also has missing values on the array level
    vals = [
        {"left": 0.0, "right": 1.0},
        {"left": None, "right": None},
        {"left": 2.0, "right": 3.0},
    ]
    expected = pa.StructArray.from_pandas(vals, mask=np.array([False, True, False]))
    assert result.storage.equals(expected)
Ejemplo n.º 5
0
    def test_index_series_compat(self, op, constructor, expected_type,
                                 assert_func):
        # IntervalIndex/Series that rely on IntervalArray for comparisons
        breaks = range(4)
        index = constructor(IntervalIndex.from_breaks(breaks))

        # scalar comparisons
        other = index[0]
        result = op(index, other)
        expected = expected_type(self.elementwise_comparison(op, index, other))
        assert_func(result, expected)

        other = breaks[0]
        result = op(index, other)
        expected = expected_type(self.elementwise_comparison(op, index, other))
        assert_func(result, expected)

        # list-like comparisons
        other = IntervalArray.from_breaks(breaks)
        result = op(index, other)
        expected = expected_type(self.elementwise_comparison(op, index, other))
        assert_func(result, expected)

        other = [index[0], breaks[0], "foo"]
        result = op(index, other)
        expected = expected_type(self.elementwise_comparison(op, index, other))
        assert_func(result, expected)
Ejemplo n.º 6
0
    def test_shift_datetime(self):
        a = IntervalArray.from_breaks(pd.date_range("2000", periods=4))
        result = a.shift(2)
        expected = a.take([-1, -1, 0], allow_fill=True)
        tm.assert_interval_array_equal(result, expected)

        result = a.shift(-1)
        expected = a.take([1, 2, -1], allow_fill=True)
        tm.assert_interval_array_equal(result, expected)
Ejemplo n.º 7
0
 def test_get_numeric_data_extension_dtype(self):
     # GH 22290
     df = DataFrame({
         'A': integer_array([-10, np.nan, 0, 10, 20, 30], dtype='Int64'),
         'B': Categorical(list('abcabc')),
         'C': integer_array([0, 1, 2, 3, np.nan, 5], dtype='UInt8'),
         'D': IntervalArray.from_breaks(range(7))})
     result = df._get_numeric_data()
     expected = df.loc[:, ['A', 'C']]
     assert_frame_equal(result, expected)
Ejemplo n.º 8
0
 def test_get_numeric_data_extension_dtype(self):
     # GH 22290
     df = DataFrame({
         'A': integer_array([-10, np.nan, 0, 10, 20, 30], dtype='Int64'),
         'B': Categorical(list('abcabc')),
         'C': integer_array([0, 1, 2, 3, np.nan, 5], dtype='UInt8'),
         'D': IntervalArray.from_breaks(range(7))})
     result = df._get_numeric_data()
     expected = df.loc[:, ['A', 'C']]
     assert_frame_equal(result, expected)
Ejemplo n.º 9
0
    def test_where_raises(self, other):
        # GH#45768 The IntervalArray methods raises; the Series method coerces
        ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed="left"))
        mask = np.array([True, False, True])
        match = "'value.closed' is 'right', expected 'left'."
        with pytest.raises(ValueError, match=match):
            ser.array._where(mask, other)

        res = ser.where(mask, other=other)
        expected = ser.astype(object).where(mask, other)
        tm.assert_series_equal(res, expected)
Ejemplo n.º 10
0
 def test_get_numeric_data_extension_dtype(self):
     # GH 22290
     df = DataFrame(
         {
             "A": integer_array([-10, np.nan, 0, 10, 20, 30], dtype="Int64"),
             "B": Categorical(list("abcabc")),
             "C": integer_array([0, 1, 2, 3, np.nan, 5], dtype="UInt8"),
             "D": IntervalArray.from_breaks(range(7)),
         }
     )
     result = df._get_numeric_data()
     expected = df.loc[:, ["A", "C"]]
     assert_frame_equal(result, expected)
Ejemplo n.º 11
0
class TestMethods:
    @pytest.mark.parametrize("new_inclusive",
                             ["left", "right", "both", "neither"])
    def test_set_inclusive(self, closed, new_inclusive):
        # GH 21670
        array = IntervalArray.from_breaks(range(10), inclusive=closed)
        result = array.set_inclusive(new_inclusive)
        expected = IntervalArray.from_breaks(range(10),
                                             inclusive=new_inclusive)
        tm.assert_extension_array_equal(result, expected)

    @pytest.mark.parametrize(
        "other",
        [
            Interval(0, 1, inclusive="right"),
            IntervalArray.from_breaks([1, 2, 3, 4], inclusive="right"),
        ],
    )
    def test_where_raises(self, other):
        # GH#45768 The IntervalArray methods raises; the Series method coerces
        ser = pd.Series(
            IntervalArray.from_breaks([1, 2, 3, 4], inclusive="left"))
        mask = np.array([True, False, True])
        match = "'value.inclusive' is 'right', expected 'left'."
        with pytest.raises(ValueError, match=match):
            ser.array._where(mask, other)

        res = ser.where(mask, other=other)
        expected = ser.astype(object).where(mask, other)
        tm.assert_series_equal(res, expected)

    def test_shift(self):
        # https://github.com/pandas-dev/pandas/issues/31495, GH#22428, GH#31502
        a = IntervalArray.from_breaks([1, 2, 3], "right")
        result = a.shift()
        # int -> float
        expected = IntervalArray.from_tuples([(np.nan, np.nan), (1.0, 2.0)],
                                             "right")
        tm.assert_interval_array_equal(result, expected)

    def test_shift_datetime(self):
        # GH#31502, GH#31504
        a = IntervalArray.from_breaks(date_range("2000", periods=4), "right")
        result = a.shift(2)
        expected = a.take([-1, -1, 0], allow_fill=True)
        tm.assert_interval_array_equal(result, expected)

        result = a.shift(-1)
        expected = a.take([1, 2, -1], allow_fill=True)
        tm.assert_interval_array_equal(result, expected)
Ejemplo n.º 12
0
def test_arrow_table_roundtrip_without_metadata(breaks):
    import pyarrow as pa

    arr = IntervalArray.from_breaks(breaks)
    arr[1] = None
    df = pd.DataFrame({"a": arr})

    table = pa.table(df)
    # remove the metadata
    table = table.replace_schema_metadata()
    assert table.schema.metadata is None

    result = table.to_pandas()
    assert isinstance(result["a"].dtype, pd.IntervalDtype)
    tm.assert_frame_equal(result, df)
Ejemplo n.º 13
0
    def test_setitem_mismatched_inclusive(self):
        arr = IntervalArray.from_breaks(range(4), "right")
        orig = arr.copy()
        other = arr.set_inclusive("both")

        msg = "'value.inclusive' is 'both', expected 'right'"
        with pytest.raises(ValueError, match=msg):
            arr[0] = other[0]
        with pytest.raises(ValueError, match=msg):
            arr[:1] = other[:1]
        with pytest.raises(ValueError, match=msg):
            arr[:0] = other[:0]
        with pytest.raises(ValueError, match=msg):
            arr[:] = other[::-1]
        with pytest.raises(ValueError, match=msg):
            arr[:] = list(other[::-1])
        with pytest.raises(ValueError, match=msg):
            arr[:] = other[::-1].astype(object)
        with pytest.raises(ValueError, match=msg):
            arr[:] = other[::-1].astype("category")

        # empty list should be no-op
        arr[:0] = IntervalArray.from_breaks([], "right")
        tm.assert_interval_array_equal(arr, orig)
Ejemplo n.º 14
0
def test_from_arrow_from_raw_struct_array():
    # in case pyarrow lost the Interval extension type (eg on parquet roundtrip
    # with datetime64[ns] subtype, see GH-45881), still allow conversion
    # from arrow to IntervalArray
    import pyarrow as pa

    arr = pa.array([{"left": 0, "right": 1}, {"left": 1, "right": 2}])
    dtype = pd.IntervalDtype(np.dtype("int64"), closed="neither")

    result = dtype.__from_arrow__(arr)
    expected = IntervalArray.from_breaks(np.array([0, 1, 2], dtype="int64"),
                                         closed="neither")
    tm.assert_extension_array_equal(result, expected)

    result = dtype.__from_arrow__(pa.chunked_array([arr]))
    tm.assert_extension_array_equal(result, expected)
Ejemplo n.º 15
0
class TestMethods:
    @pytest.mark.parametrize('new_closed',
                             ['left', 'right', 'both', 'neither'])
    def test_set_closed(self, closed, new_closed):
        # GH 21670
        array = IntervalArray.from_breaks(range(10), closed=closed)
        result = array.set_closed(new_closed)
        expected = IntervalArray.from_breaks(range(10), closed=new_closed)
        tm.assert_extension_array_equal(result, expected)

    @pytest.mark.parametrize('other', [
        Interval(0, 1, closed='right'),
        IntervalArray.from_breaks([1, 2, 3, 4], closed='right'),
    ])
    def test_where_raises(self, other):
        ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed='left'))
        match = "'value.closed' is 'right', expected 'left'."
        with pytest.raises(ValueError, match=match):
            ser.where([True, False, True], other=other)
Ejemplo n.º 16
0
class TestMethods:
    @pytest.mark.parametrize("new_closed",
                             ["left", "right", "both", "neither"])
    def test_set_closed(self, closed, new_closed):
        # GH 21670
        array = IntervalArray.from_breaks(range(10), closed=closed)
        result = array.set_closed(new_closed)
        expected = IntervalArray.from_breaks(range(10), closed=new_closed)
        tm.assert_extension_array_equal(result, expected)

    @pytest.mark.parametrize(
        "other",
        [
            Interval(0, 1, closed="right"),
            IntervalArray.from_breaks([1, 2, 3, 4], closed="right"),
        ],
    )
    def test_where_raises(self, other):
        ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed="left"))
        match = "'value.closed' is 'right', expected 'left'."
        with pytest.raises(ValueError, match=match):
            ser.where([True, False, True], other=other)

    def test_shift(self):
        # https://github.com/pandas-dev/pandas/issues/31495, GH#22428, GH#31502
        a = IntervalArray.from_breaks([1, 2, 3])
        result = a.shift()
        # int -> float
        expected = IntervalArray.from_tuples([(np.nan, np.nan), (1.0, 2.0)])
        tm.assert_interval_array_equal(result, expected)

    def test_shift_datetime(self):
        # GH#31502, GH#31504
        a = IntervalArray.from_breaks(date_range("2000", periods=4))
        result = a.shift(2)
        expected = a.take([-1, -1, 0], allow_fill=True)
        tm.assert_interval_array_equal(result, expected)

        result = a.shift(-1)
        expected = a.take([1, 2, -1], allow_fill=True)
        tm.assert_interval_array_equal(result, expected)
Ejemplo n.º 17
0
class TestMethods(object):
    @pytest.mark.parametrize('repeats', [0, 1, 5])
    def test_repeat(self, left_right_dtypes, repeats):
        left, right = left_right_dtypes
        result = IntervalArray.from_arrays(left, right).repeat(repeats)
        expected = IntervalArray.from_arrays(left.repeat(repeats),
                                             right.repeat(repeats))
        tm.assert_extension_array_equal(result, expected)

    @pytest.mark.parametrize(
        'bad_repeats, msg',
        [(-1, 'negative dimensions are not allowed'),
         ('foo', r'invalid literal for (int|long)\(\) with base 10')])
    def test_repeat_errors(self, bad_repeats, msg):
        array = IntervalArray.from_breaks(range(4))
        with pytest.raises(ValueError, match=msg):
            array.repeat(bad_repeats)

    @pytest.mark.parametrize('new_closed',
                             ['left', 'right', 'both', 'neither'])
    def test_set_closed(self, closed, new_closed):
        # GH 21670
        array = IntervalArray.from_breaks(range(10), closed=closed)
        result = array.set_closed(new_closed)
        expected = IntervalArray.from_breaks(range(10), closed=new_closed)
        tm.assert_extension_array_equal(result, expected)

    @pytest.mark.parametrize('other', [
        Interval(0, 1, closed='right'),
        IntervalArray.from_breaks([1, 2, 3, 4], closed='right'),
    ])
    def test_where_raises(self, other):
        ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed='left'))
        match = "'value.closed' is 'right', expected 'left'."
        with pytest.raises(ValueError, match=match):
            ser.where([True, False, True], other=other)
Ejemplo n.º 18
0
    result = ser.array
    if is_datetime64_dtype(any_numpy_dtype):
        assert isinstance(result, DatetimeArray)
    elif is_timedelta64_dtype(any_numpy_dtype):
        assert isinstance(result, TimedeltaArray)
    else:
        assert isinstance(result, PandasArray)


@pytest.mark.parametrize(
    "array, attr",
    [
        (pd.Categorical(["a", "b"]), "_codes"),
        (pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_data"),
        (pd.core.arrays.integer_array([0, np.nan]), "_data"),
        (IntervalArray.from_breaks([0, 1]), "_combined"),
        (SparseArray([0, 1]), "_sparse_values"),
        (DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_data"),
        # tz-aware Datetime
        (
            DatetimeArray(
                np.array(["2000-01-01T12:00:00", "2000-01-02T12:00:00"],
                         dtype="M8[ns]"),
                dtype=DatetimeTZDtype(tz="US/Central"),
            ),
            "_data",
        ),
    ],
)
def test_array(array, attr, index_or_series):
    box = index_or_series
Ejemplo n.º 19
0
def test_set_closed_deprecated():
    # GH#40245
    array = IntervalArray.from_breaks(range(10))
    with tm.assert_produces_warning(FutureWarning):
        array.set_closed(closed="both")
Ejemplo n.º 20
0
 def test_set_closed(self, closed, new_closed):
     # GH 21670
     array = IntervalArray.from_breaks(range(10), closed=closed)
     result = array.set_closed(new_closed)
     expected = IntervalArray.from_breaks(range(10), closed=new_closed)
     tm.assert_extension_array_equal(result, expected)
Ejemplo n.º 21
0
 def test_repeat_errors(self, bad_repeats, msg):
     array = IntervalArray.from_breaks(range(4))
     with tm.assert_raises_regex(ValueError, msg):
         array.repeat(bad_repeats)
Ejemplo n.º 22
0
 def test_repeat_errors(self, bad_repeats, msg):
     array = IntervalArray.from_breaks(range(4))
     with pytest.raises(ValueError, match=msg):
         array.repeat(bad_repeats)
Ejemplo n.º 23
0
def test_from_breaks_deprecation():
    # GH#40245
    with tm.assert_produces_warning(FutureWarning):
        IntervalArray.from_breaks([0, 1, 2, 3], closed="right")
Ejemplo n.º 24
0
 def test_repeat_errors(self, bad_repeats, msg):
     array = IntervalArray.from_breaks(range(4))
     with tm.assert_raises_regex(ValueError, msg):
         array.repeat(bad_repeats)
Ejemplo n.º 25
0
 def test_set_closed(self, closed, new_closed):
     # GH 21670
     array = IntervalArray.from_breaks(range(10), closed=closed)
     result = array.set_closed(new_closed)
     expected = IntervalArray.from_breaks(range(10), closed=new_closed)
     tm.assert_extension_array_equal(result, expected)
Ejemplo n.º 26
0
    result = ser.array
    if is_datetime64_dtype(any_numpy_dtype):
        assert isinstance(result, DatetimeArray)
    elif is_timedelta64_dtype(any_numpy_dtype):
        assert isinstance(result, TimedeltaArray)
    else:
        assert isinstance(result, PandasArray)


@pytest.mark.parametrize(
    "array, attr",
    [
        (pd.Categorical(["a", "b"]), "_codes"),
        (pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_data"),
        (pd.core.arrays.integer_array([0, np.nan]), "_data"),
        (IntervalArray.from_breaks([0, 1]), "_left"),
        (SparseArray([0, 1]), "_sparse_values"),
        (DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_data"),
        # tz-aware Datetime
        (
            DatetimeArray(
                np.array(["2000-01-01T12:00:00", "2000-01-02T12:00:00"],
                         dtype="M8[ns]"),
                dtype=DatetimeTZDtype(tz="US/Central"),
            ),
            "_data",
        ),
    ],
)
def test_array(array, attr, index_or_series):
    box = index_or_series
Ejemplo n.º 27
0
 def test_where_raises(self, other):
     ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4],
                                               closed='left'))
     match = "'value.closed' is 'right', expected 'left'."
     with pytest.raises(ValueError, match=match):
         ser.where([True, False, True], other=other)
Ejemplo n.º 28
0
 def test_where_raises(self, other):
     ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed="left"))
     match = "'value.closed' is 'right', expected 'left'."
     with pytest.raises(ValueError, match=match):
         ser.where([True, False, True], other=other)
Ejemplo n.º 29
0
 def test_repeat_errors(self, bad_repeats, msg):
     array = IntervalArray.from_breaks(range(4))
     with pytest.raises(ValueError, match=msg):
         array.repeat(bad_repeats)