Beispiel #1
0
    def test_arithmetic_explicit_conversions(self):
        # GH 8608
        # add/sub are overridden explicitly for Float/Int Index
        index_cls = self._index_cls
        if index_cls is RangeIndex:
            idx = RangeIndex(5)
        else:
            idx = index_cls(np.arange(5, dtype="int64"))

        # float conversions
        arr = np.arange(5, dtype="int64") * 3.2
        expected = Float64Index(arr)
        fidx = idx * 3.2
        tm.assert_index_equal(fidx, expected)
        fidx = 3.2 * idx
        tm.assert_index_equal(fidx, expected)

        # interops with numpy arrays
        expected = Float64Index(arr)
        a = np.zeros(5, dtype="float64")
        result = fidx - a
        tm.assert_index_equal(result, expected)

        expected = Float64Index(-arr)
        a = np.zeros(5, dtype="float64")
        result = a - fidx
        tm.assert_index_equal(result, expected)
Beispiel #2
0
    def test_ufunc_multiple_return_values(self, holder):
        obj = holder([1, 2, 3], name="x")
        box = Series if holder is Series else Index

        result = np.modf(obj)
        assert isinstance(result, tuple)
        exp1 = Float64Index([0.0, 0.0, 0.0], name="x")
        exp2 = Float64Index([1.0, 2.0, 3.0], name="x")
        tm.assert_equal(result[0], tm.box_expected(exp1, box))
        tm.assert_equal(result[1], tm.box_expected(exp2, box))
    def test_mixed_float_int(self, left_subtype, right_subtype):
        """mixed int/float left/right results in float for both sides"""
        left = np.arange(9, dtype=left_subtype)
        right = np.arange(1, 10, dtype=right_subtype)
        result = IntervalIndex.from_arrays(left, right)

        expected_left = Float64Index(left)
        expected_right = Float64Index(right)
        expected_subtype = np.float64

        tm.assert_index_equal(result.left, expected_left)
        tm.assert_index_equal(result.right, expected_right)
        assert result.dtype.subtype == expected_subtype
    def test_repr_floats(self):
        # GH 32553

        markers = Series(
            ["foo", "bar"],
            index=IntervalIndex([
                Interval(left, right) for left, right in zip(
                    Float64Index([329.973, 345.137], dtype="float64"),
                    Float64Index([345.137, 360.191], dtype="float64"),
                )
            ]),
        )
        result = str(markers)
        expected = "(329.973, 345.137]    foo\n(345.137, 360.191]    bar\ndtype: object"
        assert result == expected
Beispiel #5
0
    def test_pow_float(self, op, numeric_idx, box_with_array):
        # test power calculations both ways, GH#14973
        box = box_with_array
        idx = numeric_idx
        expected = Float64Index(op(idx.values, 2.0))

        idx = tm.box_expected(idx, box)
        expected = tm.box_expected(expected, box)

        result = op(idx, 2.0)
        tm.assert_equal(result, expected)
Beispiel #6
0
    def test_maybe_convert_i8_nat(self, breaks):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)

        to_convert = breaks._constructor([pd.NaT] * 3)
        expected = Float64Index([np.nan] * 3)
        result = index._maybe_convert_i8(to_convert)
        tm.assert_index_equal(result, expected)

        to_convert = to_convert.insert(0, breaks[0])
        expected = expected.insert(0, float(breaks[0].value))
        result = index._maybe_convert_i8(to_convert)
        tm.assert_index_equal(result, expected)
Beispiel #7
0
    def test_insert_na(self, nulls_fixture, simple_index):
        # GH 18295 (test missing)
        index = simple_index
        na_val = nulls_fixture

        if na_val is pd.NaT:
            expected = Index([index[0], pd.NaT] + list(index[1:]),
                             dtype=object)
        else:
            expected = Float64Index([index[0], np.nan] + list(index[1:]))

        result = index.insert(1, na_val)
        tm.assert_index_equal(result, expected)
Beispiel #8
0
    def test_contains_with_float_index(self):
        # GH#22085
        integer_index = Int64Index([0, 1, 2, 3])
        uinteger_index = UInt64Index([0, 1, 2, 3])
        float_index = Float64Index([0.1, 1.1, 2.2, 3.3])

        for index in (integer_index, uinteger_index):
            assert 1.1 not in index
            assert 1.0 in index
            assert 1 in index

        assert 1.1 in float_index
        assert 1.0 not in float_index
        assert 1 not in float_index
    def test_astype_timedelta64(self):
        # GH 13149, GH 13209
        idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN])

        result = idx.astype("timedelta64")
        expected = Float64Index([1e14] + [np.NaN] * 3, dtype="float64")
        tm.assert_index_equal(result, expected)

        result = idx.astype("timedelta64[ns]")
        tm.assert_index_equal(result, idx)
        assert result is not idx

        result = idx.astype("timedelta64[ns]", copy=False)
        tm.assert_index_equal(result, idx)
        assert result is idx
Beispiel #10
0
    def test_inf_upcast(self):
        # GH 16957
        # We should be able to use np.inf as a key
        # np.inf should cause an index to convert to float

        # Test with np.inf in rows
        df = DataFrame(columns=[0])
        df.loc[1] = 1
        df.loc[2] = 2
        df.loc[np.inf] = 3

        # make sure we can look up the value
        assert df.loc[np.inf, 0] == 3

        result = df.index
        expected = Float64Index([1, 2, np.inf])
        tm.assert_index_equal(result, expected)
Beispiel #11
0
def test_get_nan_multiple():
    # GH 8569
    # ensure that fixing "test_get_nan" above hasn't broken get
    # with multiple elements
    s = Float64Index(range(10)).to_series()

    idx = [2, 30]
    assert s.get(idx) is None

    idx = [2, np.nan]
    assert s.get(idx) is None

    # GH 17295 - all missing keys
    idx = [20, 30]
    assert s.get(idx) is None

    idx = [np.nan, np.nan]
    assert s.get(idx) is None
Beispiel #12
0
    def test_insert_na(self, nulls_fixture, simple_index):
        # GH 18295 (test missing)
        index = simple_index
        na_val = nulls_fixture

        if na_val is pd.NaT:
            expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object)
        else:
            expected = Float64Index([index[0], np.nan] + list(index[1:]))

            if index._is_backward_compat_public_numeric_index:
                # GH#43921 we preserve NumericIndex
                if index.dtype.kind == "f":
                    expected = NumericIndex(expected, dtype=index.dtype)
                else:
                    expected = NumericIndex(expected)

        result = index.insert(1, na_val)
        tm.assert_index_equal(result, expected, exact=True)
Beispiel #13
0
    def test_asof_locs_mismatched_type(self):
        dti = date_range("2016-01-01", periods=3)
        pi = dti.to_period("D")
        pi2 = dti.to_period("H")

        mask = np.array([0, 1, 0], dtype=bool)

        msg = "must be DatetimeIndex or PeriodIndex"
        with pytest.raises(TypeError, match=msg):
            pi.asof_locs(Int64Index(pi.asi8), mask)

        with pytest.raises(TypeError, match=msg):
            pi.asof_locs(Float64Index(pi.asi8), mask)

        with pytest.raises(TypeError, match=msg):
            # TimedeltaIndex
            pi.asof_locs(dti - dti, mask)

        msg = "Input has different freq=H"
        with pytest.raises(libperiod.IncompatibleFrequency, match=msg):
            pi.asof_locs(pi2, mask)
Beispiel #14
0
    def test_ufunc_coercions(self, holder):
        idx = holder([1, 2, 3, 4, 5], name="x")
        box = Series if holder is Series else Index

        result = np.sqrt(idx)
        assert result.dtype == "f8" and isinstance(result, box)
        exp = Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = np.divide(idx, 2.0)
        assert result.dtype == "f8" and isinstance(result, box)
        exp = Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        # _evaluate_numeric_binop
        result = idx + 2.0
        assert result.dtype == "f8" and isinstance(result, box)
        exp = Float64Index([3.0, 4.0, 5.0, 6.0, 7.0], name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = idx - 2.0
        assert result.dtype == "f8" and isinstance(result, box)
        exp = Float64Index([-1.0, 0.0, 1.0, 2.0, 3.0], name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = idx * 1.0
        assert result.dtype == "f8" and isinstance(result, box)
        exp = Float64Index([1.0, 2.0, 3.0, 4.0, 5.0], name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = idx / 2.0
        assert result.dtype == "f8" and isinstance(result, box)
        exp = Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)
Beispiel #15
0
    def setup_method(self):

        self.series_ints = Series(np.random.rand(4), index=np.arange(0, 8, 2))
        self.frame_ints = DataFrame(np.random.randn(4, 4),
                                    index=np.arange(0, 8, 2),
                                    columns=np.arange(0, 12, 3))

        self.series_uints = Series(np.random.rand(4),
                                   index=UInt64Index(np.arange(0, 8, 2)))
        self.frame_uints = DataFrame(
            np.random.randn(4, 4),
            index=UInt64Index(range(0, 8, 2)),
            columns=UInt64Index(range(0, 12, 3)),
        )

        self.series_floats = Series(np.random.rand(4),
                                    index=Float64Index(range(0, 8, 2)))
        self.frame_floats = DataFrame(
            np.random.randn(4, 4),
            index=Float64Index(range(0, 8, 2)),
            columns=Float64Index(range(0, 12, 3)),
        )

        m_idces = [
            MultiIndex.from_product([[1, 2], [3, 4]]),
            MultiIndex.from_product([[5, 6], [7, 8]]),
            MultiIndex.from_product([[9, 10], [11, 12]]),
        ]

        self.series_multi = Series(np.random.rand(4), index=m_idces[0])
        self.frame_multi = DataFrame(np.random.randn(4, 4),
                                     index=m_idces[0],
                                     columns=m_idces[1])

        self.series_labels = Series(np.random.randn(4), index=list("abcd"))
        self.frame_labels = DataFrame(np.random.randn(4, 4),
                                      index=list("abcd"),
                                      columns=list("ABCD"))

        self.series_mixed = Series(np.random.randn(4), index=[2, 4, "null", 8])
        self.frame_mixed = DataFrame(np.random.randn(4, 4),
                                     index=[2, 4, "null", 8])

        self.series_ts = Series(np.random.randn(4),
                                index=date_range("20130101", periods=4))
        self.frame_ts = DataFrame(np.random.randn(4, 4),
                                  index=date_range("20130101", periods=4))

        dates_rev = date_range("20130101",
                               periods=4).sort_values(ascending=False)
        self.series_ts_rev = Series(np.random.randn(4), index=dates_rev)
        self.frame_ts_rev = DataFrame(np.random.randn(4, 4), index=dates_rev)

        self.frame_empty = DataFrame()
        self.series_empty = Series(dtype=object)

        # form agglomerates
        for kind in self._kinds:
            d = {}
            for typ in self._typs:
                d[typ] = getattr(self, f"{kind}_{typ}")

            setattr(self, kind, d)
Beispiel #16
0
class TestGrouping:
    def test_grouper_index_types(self):
        # related GH5375
        # groupby misbehaving when using a Floatlike index
        df = DataFrame(np.arange(10).reshape(5, 2), columns=list("AB"))
        for index in [
            tm.makeFloatIndex,
            tm.makeStringIndex,
            tm.makeUnicodeIndex,
            tm.makeIntIndex,
            tm.makeDateIndex,
            tm.makePeriodIndex,
        ]:

            df.index = index(len(df))
            df.groupby(list("abcde")).apply(lambda x: x)

            df.index = list(reversed(df.index.tolist()))
            df.groupby(list("abcde")).apply(lambda x: x)

    def test_grouper_multilevel_freq(self):

        # GH 7885
        # with level and freq specified in a pd.Grouper
        from datetime import (
            date,
            timedelta,
        )

        d0 = date.today() - timedelta(days=14)
        dates = date_range(d0, date.today())
        date_index = MultiIndex.from_product([dates, dates], names=["foo", "bar"])
        df = DataFrame(np.random.randint(0, 100, 225), index=date_index)

        # Check string level
        expected = (
            df.reset_index()
            .groupby([pd.Grouper(key="foo", freq="W"), pd.Grouper(key="bar", freq="W")])
            .sum()
        )
        # reset index changes columns dtype to object
        expected.columns = Index([0], dtype="int64")

        result = df.groupby(
            [pd.Grouper(level="foo", freq="W"), pd.Grouper(level="bar", freq="W")]
        ).sum()
        tm.assert_frame_equal(result, expected)

        # Check integer level
        result = df.groupby(
            [pd.Grouper(level=0, freq="W"), pd.Grouper(level=1, freq="W")]
        ).sum()
        tm.assert_frame_equal(result, expected)

    def test_grouper_creation_bug(self):

        # GH 8795
        df = DataFrame({"A": [0, 0, 1, 1, 2, 2], "B": [1, 2, 3, 4, 5, 6]})
        g = df.groupby("A")
        expected = g.sum()

        g = df.groupby(pd.Grouper(key="A"))
        result = g.sum()
        tm.assert_frame_equal(result, expected)

        g = df.groupby(pd.Grouper(key="A", axis=0))
        result = g.sum()
        tm.assert_frame_equal(result, expected)

        result = g.apply(lambda x: x.sum())
        expected["A"] = [0, 2, 4]
        expected = expected.loc[:, ["A", "B"]]
        tm.assert_frame_equal(result, expected)

        # GH14334
        # pd.Grouper(key=...) may be passed in a list
        df = DataFrame(
            {"A": [0, 0, 0, 1, 1, 1], "B": [1, 1, 2, 2, 3, 3], "C": [1, 2, 3, 4, 5, 6]}
        )
        # Group by single column
        expected = df.groupby("A").sum()
        g = df.groupby([pd.Grouper(key="A")])
        result = g.sum()
        tm.assert_frame_equal(result, expected)

        # Group by two columns
        # using a combination of strings and Grouper objects
        expected = df.groupby(["A", "B"]).sum()

        # Group with two Grouper objects
        g = df.groupby([pd.Grouper(key="A"), pd.Grouper(key="B")])
        result = g.sum()
        tm.assert_frame_equal(result, expected)

        # Group with a string and a Grouper object
        g = df.groupby(["A", pd.Grouper(key="B")])
        result = g.sum()
        tm.assert_frame_equal(result, expected)

        # Group with a Grouper object and a string
        g = df.groupby([pd.Grouper(key="A"), "B"])
        result = g.sum()
        tm.assert_frame_equal(result, expected)

        # GH8866
        s = Series(
            np.arange(8, dtype="int64"),
            index=MultiIndex.from_product(
                [list("ab"), range(2), date_range("20130101", periods=2)],
                names=["one", "two", "three"],
            ),
        )
        result = s.groupby(pd.Grouper(level="three", freq="M")).sum()
        expected = Series(
            [28],
            index=pd.DatetimeIndex([Timestamp("2013-01-31")], freq="M", name="three"),
        )
        tm.assert_series_equal(result, expected)

        # just specifying a level breaks
        result = s.groupby(pd.Grouper(level="one")).sum()
        expected = s.groupby(level="one").sum()
        tm.assert_series_equal(result, expected)

    def test_grouper_column_and_index(self):
        # GH 14327

        # Grouping a multi-index frame by a column and an index level should
        # be equivalent to resetting the index and grouping by two columns
        idx = MultiIndex.from_tuples(
            [("a", 1), ("a", 2), ("a", 3), ("b", 1), ("b", 2), ("b", 3)]
        )
        idx.names = ["outer", "inner"]
        df_multi = DataFrame(
            {"A": np.arange(6), "B": ["one", "one", "two", "two", "one", "one"]},
            index=idx,
        )
        result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean()
        expected = df_multi.reset_index().groupby(["B", "inner"]).mean()
        tm.assert_frame_equal(result, expected)

        # Test the reverse grouping order
        result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean()
        expected = df_multi.reset_index().groupby(["inner", "B"]).mean()
        tm.assert_frame_equal(result, expected)

        # Grouping a single-index frame by a column and the index should
        # be equivalent to resetting the index and grouping by two columns
        df_single = df_multi.reset_index("outer")
        result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean()
        expected = df_single.reset_index().groupby(["B", "inner"]).mean()
        tm.assert_frame_equal(result, expected)

        # Test the reverse grouping order
        result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean()
        expected = df_single.reset_index().groupby(["inner", "B"]).mean()
        tm.assert_frame_equal(result, expected)

    def test_groupby_levels_and_columns(self):
        # GH9344, GH9049
        idx_names = ["x", "y"]
        idx = MultiIndex.from_tuples([(1, 1), (1, 2), (3, 4), (5, 6)], names=idx_names)
        df = DataFrame(np.arange(12).reshape(-1, 3), index=idx)

        by_levels = df.groupby(level=idx_names).mean()
        # reset_index changes columns dtype to object
        by_columns = df.reset_index().groupby(idx_names).mean()

        # without casting, by_columns.columns is object-dtype
        by_columns.columns = by_columns.columns.astype(np.int64)
        tm.assert_frame_equal(by_levels, by_columns)

    def test_groupby_categorical_index_and_columns(self, observed):
        # GH18432, adapted for GH25871
        columns = ["A", "B", "A", "B"]
        categories = ["B", "A"]
        data = np.array(
            [[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2]], int
        )
        cat_columns = CategoricalIndex(columns, categories=categories, ordered=True)
        df = DataFrame(data=data, columns=cat_columns)
        result = df.groupby(axis=1, level=0, observed=observed).sum()
        expected_data = np.array([[4, 2], [4, 2], [4, 2], [4, 2], [4, 2]], int)
        expected_columns = CategoricalIndex(
            categories, categories=categories, ordered=True
        )
        expected = DataFrame(data=expected_data, columns=expected_columns)
        tm.assert_frame_equal(result, expected)

        # test transposed version
        df = DataFrame(data.T, index=cat_columns)
        result = df.groupby(axis=0, level=0, observed=observed).sum()
        expected = DataFrame(data=expected_data.T, index=expected_columns)
        tm.assert_frame_equal(result, expected)

    def test_grouper_getting_correct_binner(self):

        # GH 10063
        # using a non-time-based grouper and a time-based grouper
        # and specifying levels
        df = DataFrame(
            {"A": 1},
            index=MultiIndex.from_product(
                [list("ab"), date_range("20130101", periods=80)], names=["one", "two"]
            ),
        )
        result = df.groupby(
            [pd.Grouper(level="one"), pd.Grouper(level="two", freq="M")]
        ).sum()
        expected = DataFrame(
            {"A": [31, 28, 21, 31, 28, 21]},
            index=MultiIndex.from_product(
                [list("ab"), date_range("20130101", freq="M", periods=3)],
                names=["one", "two"],
            ),
        )
        tm.assert_frame_equal(result, expected)

    def test_grouper_iter(self, df):
        assert sorted(df.groupby("A").grouper) == ["bar", "foo"]

    def test_empty_groups(self, df):
        # see gh-1048
        with pytest.raises(ValueError, match="No group keys passed!"):
            df.groupby([])

    def test_groupby_grouper(self, df):
        grouped = df.groupby("A")

        result = df.groupby(grouped.grouper).mean()
        expected = grouped.mean()
        tm.assert_frame_equal(result, expected)

    def test_groupby_dict_mapping(self):
        # GH #679
        from pandas import Series

        s = Series({"T1": 5})
        result = s.groupby({"T1": "T2"}).agg(sum)
        expected = s.groupby(["T2"]).agg(sum)
        tm.assert_series_equal(result, expected)

        s = Series([1.0, 2.0, 3.0, 4.0], index=list("abcd"))
        mapping = {"a": 0, "b": 0, "c": 1, "d": 1}

        result = s.groupby(mapping).mean()
        result2 = s.groupby(mapping).agg(np.mean)
        expected = s.groupby([0, 0, 1, 1]).mean()
        expected2 = s.groupby([0, 0, 1, 1]).mean()
        tm.assert_series_equal(result, expected)
        tm.assert_series_equal(result, result2)
        tm.assert_series_equal(result, expected2)

    @pytest.mark.parametrize(
        "index",
        [
            [0, 1, 2, 3],
            ["a", "b", "c", "d"],
            [Timestamp(2021, 7, 28 + i) for i in range(4)],
        ],
    )
    def test_groupby_series_named_with_tuple(self, frame_or_series, index):
        # GH 42731
        obj = frame_or_series([1, 2, 3, 4], index=index)
        groups = Series([1, 0, 1, 0], index=index, name=("a", "a"))
        result = obj.groupby(groups).last()
        expected = frame_or_series([4, 3])
        expected.index.name = ("a", "a")
        tm.assert_equal(result, expected)

    def test_groupby_grouper_f_sanity_checked(self):
        dates = date_range("01-Jan-2013", periods=12, freq="MS")
        ts = Series(np.random.randn(12), index=dates)

        # GH3035
        # index.map is used to apply grouper to the index
        # if it fails on the elements, map tries it on the entire index as
        # a sequence. That can yield invalid results that cause trouble
        # down the line.
        # the surprise comes from using key[0:6] rather than str(key)[0:6]
        # when the elements are Timestamp.
        # the result is Index[0:6], very confusing.

        msg = r"Grouper result violates len\(labels\) == len\(data\)"
        with pytest.raises(AssertionError, match=msg):
            ts.groupby(lambda key: key[0:6])

    def test_grouping_error_on_multidim_input(self, df):
        msg = "Grouper for '<class 'pandas.core.frame.DataFrame'>' not 1-dimensional"
        with pytest.raises(ValueError, match=msg):
            Grouping(df.index, df[["A", "A"]])

    def test_multiindex_passthru(self):

        # GH 7997
        # regression from 0.14.1
        df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        df.columns = MultiIndex.from_tuples([(0, 1), (1, 1), (2, 1)])

        result = df.groupby(axis=1, level=[0, 1]).first()
        tm.assert_frame_equal(result, df)

    def test_multiindex_negative_level(self, mframe):
        # GH 13901
        result = mframe.groupby(level=-1).sum()
        expected = mframe.groupby(level="second").sum()
        tm.assert_frame_equal(result, expected)

        result = mframe.groupby(level=-2).sum()
        expected = mframe.groupby(level="first").sum()
        tm.assert_frame_equal(result, expected)

        result = mframe.groupby(level=[-2, -1]).sum()
        expected = mframe
        tm.assert_frame_equal(result, expected)

        result = mframe.groupby(level=[-1, "first"]).sum()
        expected = mframe.groupby(level=["second", "first"]).sum()
        tm.assert_frame_equal(result, expected)

    def test_multifunc_select_col_integer_cols(self, df):
        df.columns = np.arange(len(df.columns))

        # it works!
        df.groupby(1, as_index=False)[2].agg({"Q": np.mean})

    def test_multiindex_columns_empty_level(self):
        lst = [["count", "values"], ["to filter", ""]]
        midx = MultiIndex.from_tuples(lst)

        df = DataFrame([[1, "A"]], columns=midx)

        grouped = df.groupby("to filter").groups
        assert grouped["A"] == [0]

        grouped = df.groupby([("to filter", "")]).groups
        assert grouped["A"] == [0]

        df = DataFrame([[1, "A"], [2, "B"]], columns=midx)

        expected = df.groupby("to filter").groups
        result = df.groupby([("to filter", "")]).groups
        assert result == expected

        df = DataFrame([[1, "A"], [2, "A"]], columns=midx)

        expected = df.groupby("to filter").groups
        result = df.groupby([("to filter", "")]).groups
        tm.assert_dict_equal(result, expected)

    def test_groupby_multiindex_tuple(self):
        # GH 17979
        df = DataFrame(
            [[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
            columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]),
        )
        expected = df.groupby([("b", 1)]).groups
        result = df.groupby(("b", 1)).groups
        tm.assert_dict_equal(expected, result)

        df2 = DataFrame(
            df.values,
            columns=MultiIndex.from_arrays(
                [["a", "b", "b", "c"], ["d", "d", "e", "e"]]
            ),
        )
        expected = df2.groupby([("b", "d")]).groups
        result = df.groupby(("b", 1)).groups
        tm.assert_dict_equal(expected, result)

        df3 = DataFrame(df.values, columns=[("a", "d"), ("b", "d"), ("b", "e"), "c"])
        expected = df3.groupby([("b", "d")]).groups
        result = df.groupby(("b", 1)).groups
        tm.assert_dict_equal(expected, result)

    @pytest.mark.parametrize("sort", [True, False])
    def test_groupby_level(self, sort, mframe, df):
        # GH 17537
        frame = mframe
        deleveled = frame.reset_index()

        result0 = frame.groupby(level=0, sort=sort).sum()
        result1 = frame.groupby(level=1, sort=sort).sum()

        expected0 = frame.groupby(deleveled["first"].values, sort=sort).sum()
        expected1 = frame.groupby(deleveled["second"].values, sort=sort).sum()

        expected0.index.name = "first"
        expected1.index.name = "second"

        assert result0.index.name == "first"
        assert result1.index.name == "second"

        tm.assert_frame_equal(result0, expected0)
        tm.assert_frame_equal(result1, expected1)
        assert result0.index.name == frame.index.names[0]
        assert result1.index.name == frame.index.names[1]

        # groupby level name
        result0 = frame.groupby(level="first", sort=sort).sum()
        result1 = frame.groupby(level="second", sort=sort).sum()
        tm.assert_frame_equal(result0, expected0)
        tm.assert_frame_equal(result1, expected1)

        # axis=1

        result0 = frame.T.groupby(level=0, axis=1, sort=sort).sum()
        result1 = frame.T.groupby(level=1, axis=1, sort=sort).sum()
        tm.assert_frame_equal(result0, expected0.T)
        tm.assert_frame_equal(result1, expected1.T)

        # raise exception for non-MultiIndex
        msg = "level > 0 or level < -1 only valid with MultiIndex"
        with pytest.raises(ValueError, match=msg):
            df.groupby(level=1)

    def test_groupby_level_index_names(self, axis):
        # GH4014 this used to raise ValueError since 'exp'>1 (in py2)
        df = DataFrame({"exp": ["A"] * 3 + ["B"] * 3, "var1": range(6)}).set_index(
            "exp"
        )
        if axis in (1, "columns"):
            df = df.T
        df.groupby(level="exp", axis=axis)
        msg = f"level name foo is not the name of the {df._get_axis_name(axis)}"
        with pytest.raises(ValueError, match=msg):
            df.groupby(level="foo", axis=axis)

    @pytest.mark.parametrize("sort", [True, False])
    def test_groupby_level_with_nas(self, sort):
        # GH 17537
        index = MultiIndex(
            levels=[[1, 0], [0, 1, 2, 3]],
            codes=[[1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]],
        )

        # factorizing doesn't confuse things
        s = Series(np.arange(8.0), index=index)
        result = s.groupby(level=0, sort=sort).sum()
        expected = Series([6.0, 22.0], index=[0, 1])
        tm.assert_series_equal(result, expected)

        index = MultiIndex(
            levels=[[1, 0], [0, 1, 2, 3]],
            codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]],
        )

        # factorizing doesn't confuse things
        s = Series(np.arange(8.0), index=index)
        result = s.groupby(level=0, sort=sort).sum()
        expected = Series([6.0, 18.0], index=[0.0, 1.0])
        tm.assert_series_equal(result, expected)

    def test_groupby_args(self, mframe):
        # PR8618 and issue 8015
        frame = mframe

        msg = "You have to supply one of 'by' and 'level'"
        with pytest.raises(TypeError, match=msg):
            frame.groupby()

        msg = "You have to supply one of 'by' and 'level'"
        with pytest.raises(TypeError, match=msg):
            frame.groupby(by=None, level=None)

    @pytest.mark.parametrize(
        "sort,labels",
        [
            [True, [2, 2, 2, 0, 0, 1, 1, 3, 3, 3]],
            [False, [0, 0, 0, 1, 1, 2, 2, 3, 3, 3]],
        ],
    )
    def test_level_preserve_order(self, sort, labels, mframe):
        # GH 17537
        grouped = mframe.groupby(level=0, sort=sort)
        exp_labels = np.array(labels, np.intp)
        tm.assert_almost_equal(grouped.grouper.codes[0], exp_labels)

    def test_grouping_labels(self, mframe):
        grouped = mframe.groupby(mframe.index.get_level_values(0))
        exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp)
        tm.assert_almost_equal(grouped.grouper.codes[0], exp_labels)

    def test_list_grouper_with_nat(self):
        # GH 14715
        df = DataFrame({"date": date_range("1/1/2011", periods=365, freq="D")})
        df.iloc[-1] = pd.NaT
        grouper = pd.Grouper(key="date", freq="AS")

        # Grouper in a list grouping
        result = df.groupby([grouper])
        expected = {Timestamp("2011-01-01"): Index(list(range(364)))}
        tm.assert_dict_equal(result.groups, expected)

        # Test case without a list
        result = df.groupby(grouper)
        expected = {Timestamp("2011-01-01"): 365}
        tm.assert_dict_equal(result.groups, expected)

    @pytest.mark.parametrize(
        "func,expected",
        [
            (
                "transform",
                Series(name=2, dtype=np.float64, index=Index([])),
            ),
            (
                "agg",
                Series(name=2, dtype=np.float64, index=Float64Index([], name=1)),
            ),
            (
                "apply",
                Series(name=2, dtype=np.float64, index=Float64Index([], name=1)),
            ),
        ],
    )
    def test_evaluate_with_empty_groups(self, func, expected):
        # 26208
        # test transform'ing empty groups
        # (not testing other agg fns, because they return
        # different index objects.
        df = DataFrame({1: [], 2: []})
        g = df.groupby(1)
        result = getattr(g[2], func)(lambda x: x)
        tm.assert_series_equal(result, expected)

    def test_groupby_empty(self):
        # https://github.com/pandas-dev/pandas/issues/27190
        s = Series([], name="name", dtype="float64")
        gr = s.groupby([])

        result = gr.mean()
        tm.assert_series_equal(result, s)

        # check group properties
        assert len(gr.grouper.groupings) == 1
        tm.assert_numpy_array_equal(
            gr.grouper.group_info[0], np.array([], dtype=np.dtype(np.intp))
        )

        tm.assert_numpy_array_equal(
            gr.grouper.group_info[1], np.array([], dtype=np.dtype("int"))
        )

        assert gr.grouper.group_info[2] == 0

        # check name
        assert s.groupby(s).grouper.names == ["name"]

    def test_groupby_level_index_value_all_na(self):
        # issue 20519
        df = DataFrame(
            [["x", np.nan, 10], [None, np.nan, 20]], columns=["A", "B", "C"]
        ).set_index(["A", "B"])
        result = df.groupby(level=["A", "B"]).sum()
        expected = DataFrame(
            data=[],
            index=MultiIndex(
                levels=[Index(["x"], dtype="object"), Index([], dtype="float64")],
                codes=[[], []],
                names=["A", "B"],
            ),
            columns=["C"],
            dtype="int64",
        )
        tm.assert_frame_equal(result, expected)

    def test_groupby_multiindex_level_empty(self):
        # https://github.com/pandas-dev/pandas/issues/31670
        df = DataFrame(
            [[123, "a", 1.0], [123, "b", 2.0]], columns=["id", "category", "value"]
        )
        df = df.set_index(["id", "category"])
        empty = df[df.value < 0]
        result = empty.groupby("id").sum()
        expected = DataFrame(
            dtype="float64", columns=["value"], index=Int64Index([], name="id")
        )
        tm.assert_frame_equal(result, expected)
Beispiel #17
0
class TestABCClasses:
    tuples = [[1, 2, 2], ["red", "blue", "red"]]
    multi_index = pd.MultiIndex.from_arrays(tuples, names=("number", "color"))
    datetime_index = pd.to_datetime(["2000/1/1", "2010/1/1"])
    timedelta_index = pd.to_timedelta(np.arange(5), unit="s")
    period_index = pd.period_range("2000/1/1", "2010/1/1/", freq="M")
    categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1])
    categorical_df = pd.DataFrame({"values": [1, 2, 3]}, index=categorical)
    df = pd.DataFrame({"names": ["a", "b", "c"]}, index=multi_index)
    sparse_array = pd.arrays.SparseArray(np.random.randn(10))
    datetime_array = pd.core.arrays.DatetimeArray(datetime_index)
    timedelta_array = pd.core.arrays.TimedeltaArray(timedelta_index)

    abc_pairs = [
        ("ABCInt64Index", Int64Index([1, 2, 3])),
        ("ABCUInt64Index", UInt64Index([1, 2, 3])),
        ("ABCFloat64Index", Float64Index([1, 2, 3])),
        ("ABCMultiIndex", multi_index),
        ("ABCDatetimeIndex", datetime_index),
        ("ABCRangeIndex", pd.RangeIndex(3)),
        ("ABCTimedeltaIndex", timedelta_index),
        ("ABCIntervalIndex", pd.interval_range(start=0, end=3)),
        ("ABCPeriodArray", pd.arrays.PeriodArray([2000, 2001, 2002], freq="D")),
        ("ABCPandasArray", pd.arrays.PandasArray(np.array([0, 1, 2]))),
        ("ABCPeriodIndex", period_index),
        ("ABCCategoricalIndex", categorical_df.index),
        ("ABCSeries", pd.Series([1, 2, 3])),
        ("ABCDataFrame", df),
        ("ABCCategorical", categorical),
        ("ABCDatetimeArray", datetime_array),
        ("ABCTimedeltaArray", timedelta_array),
    ]

    @pytest.mark.parametrize("abctype1, inst", abc_pairs)
    @pytest.mark.parametrize("abctype2, _", abc_pairs)
    def test_abc_pairs_instance_check(self, abctype1, abctype2, inst, _):
        # GH 38588, 46719
        if abctype1 == abctype2:
            assert isinstance(inst, getattr(gt, abctype2))
            assert not isinstance(type(inst), getattr(gt, abctype2))
        else:
            assert not isinstance(inst, getattr(gt, abctype2))

    @pytest.mark.parametrize("abctype1, inst", abc_pairs)
    @pytest.mark.parametrize("abctype2, _", abc_pairs)
    def test_abc_pairs_subclass_check(self, abctype1, abctype2, inst, _):
        # GH 38588, 46719
        if abctype1 == abctype2:
            assert issubclass(type(inst), getattr(gt, abctype2))

            with pytest.raises(
                TypeError, match=re.escape("issubclass() arg 1 must be a class")
            ):
                issubclass(inst, getattr(gt, abctype2))
        else:
            assert not issubclass(type(inst), getattr(gt, abctype2))

    abc_subclasses = {
        "ABCIndex": [
            abctype
            for abctype, _ in abc_pairs
            if "Index" in abctype and abctype != "ABCIndex"
        ],
        "ABCNDFrame": ["ABCSeries", "ABCDataFrame"],
        "ABCExtensionArray": [
            "ABCCategorical",
            "ABCDatetimeArray",
            "ABCPeriodArray",
            "ABCTimedeltaArray",
        ],
    }

    @pytest.mark.parametrize("parent, subs", abc_subclasses.items())
    @pytest.mark.parametrize("abctype, inst", abc_pairs)
    def test_abc_hierarchy(self, parent, subs, abctype, inst):
        # GH 38588
        if abctype in subs:
            assert isinstance(inst, getattr(gt, parent))
        else:
            assert not isinstance(inst, getattr(gt, parent))

    @pytest.mark.parametrize("abctype", [e for e in gt.__dict__ if e.startswith("ABC")])
    def test_abc_coverage(self, abctype):
        # GH 38588
        assert (
            abctype in (e for e, _ in self.abc_pairs) or abctype in self.abc_subclasses
        )
Beispiel #18
0
    def test_floating_misc(self, indexer_sl):

        # related 236
        # scalar/slicing of a float index
        s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64)

        # label based slicing
        result = indexer_sl(s)[1.0:3.0]
        expected = Series(1, index=[2.5])
        tm.assert_series_equal(result, expected)

        # exact indexing when found

        result = indexer_sl(s)[5.0]
        assert result == 2

        result = indexer_sl(s)[5]
        assert result == 2

        # value not found (and no fallbacking at all)

        # scalar integers
        with pytest.raises(KeyError, match=r"^4$"):
            indexer_sl(s)[4]

        # fancy floats/integers create the correct entry (as nan)
        # fancy tests
        expected = Series([2, 0], index=Float64Index([5.0, 0.0]))
        for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]:  # float
            tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)

        expected = Series([2, 0], index=Index([5, 0], dtype="float64"))
        for fancy_idx in [[5, 0], np.array([5, 0])]:
            tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)

        # all should return the same as we are slicing 'the same'
        result1 = indexer_sl(s)[2:5]
        result2 = indexer_sl(s)[2.0:5.0]
        result3 = indexer_sl(s)[2.0:5]
        result4 = indexer_sl(s)[2.1:5]
        tm.assert_series_equal(result1, result2)
        tm.assert_series_equal(result1, result3)
        tm.assert_series_equal(result1, result4)

        expected = Series([1, 2], index=[2.5, 5.0])
        result = indexer_sl(s)[2:5]

        tm.assert_series_equal(result, expected)

        # list selection
        result1 = indexer_sl(s)[[0.0, 5, 10]]
        result2 = s.iloc[[0, 2, 4]]
        tm.assert_series_equal(result1, result2)

        with pytest.raises(KeyError, match="not in index"):
            indexer_sl(s)[[1.6, 5, 10]]

        with pytest.raises(KeyError, match="not in index"):
            indexer_sl(s)[[0, 1, 2]]

        result = indexer_sl(s)[[2.5, 5]]
        tm.assert_series_equal(result, Series([1, 2], index=[2.5, 5.0]))

        result = indexer_sl(s)[[2.5]]
        tm.assert_series_equal(result, Series([1], index=[2.5]))
Beispiel #19
0
def test_get():
    # GH 6383
    s = Series(
        np.array([
            43,
            48,
            60,
            48,
            50,
            51,
            50,
            45,
            57,
            48,
            56,
            45,
            51,
            39,
            55,
            43,
            54,
            52,
            51,
            54,
        ]))

    result = s.get(25, 0)
    expected = 0
    assert result == expected

    s = Series(
        np.array([
            43,
            48,
            60,
            48,
            50,
            51,
            50,
            45,
            57,
            48,
            56,
            45,
            51,
            39,
            55,
            43,
            54,
            52,
            51,
            54,
        ]),
        index=Float64Index([
            25.0,
            36.0,
            49.0,
            64.0,
            81.0,
            100.0,
            121.0,
            144.0,
            169.0,
            196.0,
            1225.0,
            1296.0,
            1369.0,
            1444.0,
            1521.0,
            1600.0,
            1681.0,
            1764.0,
            1849.0,
            1936.0,
        ]),
    )

    result = s.get(25, 0)
    expected = 43
    assert result == expected

    # GH 7407
    # with a boolean accessor
    df = pd.DataFrame({"i": [0] * 3, "b": [False] * 3})
    vc = df.i.value_counts()
    result = vc.get(99, default="Missing")
    assert result == "Missing"

    vc = df.b.value_counts()
    result = vc.get(False, default="Missing")
    assert result == 3

    result = vc.get(True, default="Missing")
    assert result == "Missing"
Beispiel #20
0
def test_get_nan():
    # GH 8569
    s = Float64Index(range(10)).to_series()
    assert s.get(np.nan) is None
    assert s.get(np.nan, default="Missing") == "Missing"
Beispiel #21
0
    def test_insert_index_float64(self, insert, coerced_val, coerced_dtype):
        obj = Float64Index([1.0, 2.0, 3.0, 4.0])
        assert obj.dtype == np.float64

        exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0])
        self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
Beispiel #22
0
def test_array_equivalent(dtype_equal):
    assert array_equivalent(np.array([np.nan, np.nan]),
                            np.array([np.nan, np.nan]),
                            dtype_equal=dtype_equal)
    assert array_equivalent(
        np.array([np.nan, 1, np.nan]),
        np.array([np.nan, 1, np.nan]),
        dtype_equal=dtype_equal,
    )
    assert array_equivalent(
        np.array([np.nan, None], dtype="object"),
        np.array([np.nan, None], dtype="object"),
        dtype_equal=dtype_equal,
    )
    # Check the handling of nested arrays in array_equivalent_object
    assert array_equivalent(
        np.array([np.array([np.nan, None], dtype="object"), None],
                 dtype="object"),
        np.array([np.array([np.nan, None], dtype="object"), None],
                 dtype="object"),
        dtype_equal=dtype_equal,
    )
    assert array_equivalent(
        np.array([np.nan, 1 + 1j], dtype="complex"),
        np.array([np.nan, 1 + 1j], dtype="complex"),
        dtype_equal=dtype_equal,
    )
    assert not array_equivalent(
        np.array([np.nan, 1 + 1j], dtype="complex"),
        np.array([np.nan, 1 + 2j], dtype="complex"),
        dtype_equal=dtype_equal,
    )
    assert not array_equivalent(
        np.array([np.nan, 1, np.nan]),
        np.array([np.nan, 2, np.nan]),
        dtype_equal=dtype_equal,
    )
    assert not array_equivalent(np.array(["a", "b", "c", "d"]),
                                np.array(["e", "e"]),
                                dtype_equal=dtype_equal)
    assert array_equivalent(Float64Index([0, np.nan]),
                            Float64Index([0, np.nan]),
                            dtype_equal=dtype_equal)
    assert not array_equivalent(Float64Index([0, np.nan]),
                                Float64Index([1, np.nan]),
                                dtype_equal=dtype_equal)
    assert array_equivalent(DatetimeIndex([0, np.nan]),
                            DatetimeIndex([0, np.nan]),
                            dtype_equal=dtype_equal)
    assert not array_equivalent(DatetimeIndex([0, np.nan]),
                                DatetimeIndex([1, np.nan]),
                                dtype_equal=dtype_equal)
    assert array_equivalent(
        TimedeltaIndex([0, np.nan]),
        TimedeltaIndex([0, np.nan]),
        dtype_equal=dtype_equal,
    )
    assert not array_equivalent(
        TimedeltaIndex([0, np.nan]),
        TimedeltaIndex([1, np.nan]),
        dtype_equal=dtype_equal,
    )
    assert array_equivalent(
        DatetimeIndex([0, np.nan], tz="US/Eastern"),
        DatetimeIndex([0, np.nan], tz="US/Eastern"),
        dtype_equal=dtype_equal,
    )
    assert not array_equivalent(
        DatetimeIndex([0, np.nan], tz="US/Eastern"),
        DatetimeIndex([1, np.nan], tz="US/Eastern"),
        dtype_equal=dtype_equal,
    )
    # The rest are not dtype_equal
    assert not array_equivalent(DatetimeIndex([0, np.nan]),
                                DatetimeIndex([0, np.nan], tz="US/Eastern"))
    assert not array_equivalent(
        DatetimeIndex([0, np.nan], tz="CET"),
        DatetimeIndex([0, np.nan], tz="US/Eastern"),
    )

    assert not array_equivalent(DatetimeIndex([0, np.nan]),
                                TimedeltaIndex([0, np.nan]))
Beispiel #23
0
def test_array_equivalent(dtype_equal):
    assert array_equivalent(np.array([np.nan, np.nan]),
                            np.array([np.nan, np.nan]),
                            dtype_equal=dtype_equal)
    assert array_equivalent(
        np.array([np.nan, 1, np.nan]),
        np.array([np.nan, 1, np.nan]),
        dtype_equal=dtype_equal,
    )
    assert array_equivalent(
        np.array([np.nan, None], dtype="object"),
        np.array([np.nan, None], dtype="object"),
        dtype_equal=dtype_equal,
    )
    # Check the handling of nested arrays in array_equivalent_object
    assert array_equivalent(
        np.array([np.array([np.nan, None], dtype="object"), None],
                 dtype="object"),
        np.array([np.array([np.nan, None], dtype="object"), None],
                 dtype="object"),
        dtype_equal=dtype_equal,
    )
    assert array_equivalent(
        np.array([np.nan, 1 + 1j], dtype="complex"),
        np.array([np.nan, 1 + 1j], dtype="complex"),
        dtype_equal=dtype_equal,
    )
    assert not array_equivalent(
        np.array([np.nan, 1 + 1j], dtype="complex"),
        np.array([np.nan, 1 + 2j], dtype="complex"),
        dtype_equal=dtype_equal,
    )
    assert not array_equivalent(
        np.array([np.nan, 1, np.nan]),
        np.array([np.nan, 2, np.nan]),
        dtype_equal=dtype_equal,
    )
    assert not array_equivalent(np.array(["a", "b", "c", "d"]),
                                np.array(["e", "e"]),
                                dtype_equal=dtype_equal)
    assert array_equivalent(Float64Index([0, np.nan]),
                            Float64Index([0, np.nan]),
                            dtype_equal=dtype_equal)
    assert not array_equivalent(Float64Index([0, np.nan]),
                                Float64Index([1, np.nan]),
                                dtype_equal=dtype_equal)
    assert array_equivalent(DatetimeIndex([0, np.nan]),
                            DatetimeIndex([0, np.nan]),
                            dtype_equal=dtype_equal)
    assert not array_equivalent(DatetimeIndex([0, np.nan]),
                                DatetimeIndex([1, np.nan]),
                                dtype_equal=dtype_equal)
    assert array_equivalent(
        TimedeltaIndex([0, np.nan]),
        TimedeltaIndex([0, np.nan]),
        dtype_equal=dtype_equal,
    )
    assert not array_equivalent(
        TimedeltaIndex([0, np.nan]),
        TimedeltaIndex([1, np.nan]),
        dtype_equal=dtype_equal,
    )

    msg = "will be interpreted as nanosecond UTC timestamps instead of wall-times"
    with tm.assert_produces_warning(FutureWarning, match=msg):
        dti1 = DatetimeIndex([0, np.nan], tz="US/Eastern")
        dti2 = DatetimeIndex([0, np.nan], tz="CET")
        dti3 = DatetimeIndex([1, np.nan], tz="US/Eastern")

    assert array_equivalent(
        dti1,
        dti1,
        dtype_equal=dtype_equal,
    )
    assert not array_equivalent(
        dti1,
        dti3,
        dtype_equal=dtype_equal,
    )
    # The rest are not dtype_equal
    assert not array_equivalent(DatetimeIndex([0, np.nan]), dti1)
    assert not array_equivalent(
        dti2,
        dti1,
    )

    assert not array_equivalent(DatetimeIndex([0, np.nan]),
                                TimedeltaIndex([0, np.nan]))
Beispiel #24
0
    Examples
    --------
    >>> arr = RangeIndex(5)
    >>> arr / zeros
    Float64Index([nan, inf, inf, inf, inf], dtype='float64')
    """
    return request.param


# ------------------------------------------------------------------
# Vector Fixtures


@pytest.fixture(
    params=[
        Float64Index(np.arange(5, dtype="float64")),
        Int64Index(np.arange(5, dtype="int64")),
        UInt64Index(np.arange(5, dtype="uint64")),
        RangeIndex(5),
    ],
    ids=lambda x: type(x).__name__,
)
def numeric_idx(request):
    """
    Several types of numeric-dtypes Index objects
    """
    return request.param


# ------------------------------------------------------------------
# Scalar Fixtures
Beispiel #25
0
def makeFloatIndex(k=10, name=None):
    base_idx = makeNumericIndex(k, name=name, dtype="float64")
    return Float64Index(base_idx)
class ConstructorTests:
    """
    Common tests for all variations of IntervalIndex construction. Input data
    to be supplied in breaks format, then converted by the subclass method
    get_kwargs_from_breaks to the expected format.
    """

    @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
    @pytest.mark.parametrize(
        "breaks",
        [
            [3, 14, 15, 92, 653],
            np.arange(10, dtype="int64"),
            Int64Index(range(-10, 11)),
            Float64Index(np.arange(20, 30, 0.5)),
            date_range("20180101", periods=10),
            date_range("20180101", periods=10, tz="US/Eastern"),
            timedelta_range("1 day", periods=10),
        ],
    )
    def test_constructor(self, constructor, breaks, closed, name):
        result_kwargs = self.get_kwargs_from_breaks(breaks, closed)
        result = constructor(closed=closed, name=name, **result_kwargs)

        assert result.closed == closed
        assert result.name == name
        assert result.dtype.subtype == getattr(breaks, "dtype", "int64")
        tm.assert_index_equal(result.left, Index(breaks[:-1]))
        tm.assert_index_equal(result.right, Index(breaks[1:]))

    @pytest.mark.parametrize(
        "breaks, subtype",
        [
            (Int64Index([0, 1, 2, 3, 4]), "float64"),
            (Int64Index([0, 1, 2, 3, 4]), "datetime64[ns]"),
            (Int64Index([0, 1, 2, 3, 4]), "timedelta64[ns]"),
            (Float64Index([0, 1, 2, 3, 4]), "int64"),
            (date_range("2017-01-01", periods=5), "int64"),
            (timedelta_range("1 day", periods=5), "int64"),
        ],
    )
    def test_constructor_dtype(self, constructor, breaks, subtype):
        # GH 19262: conversion via dtype parameter
        expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
        expected = constructor(**expected_kwargs)

        result_kwargs = self.get_kwargs_from_breaks(breaks)
        iv_dtype = IntervalDtype(subtype, "right")
        for dtype in (iv_dtype, str(iv_dtype)):
            result = constructor(dtype=dtype, **result_kwargs)
            tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            Int64Index([0, 1, 2, 3, 4]),
            Int64Index([0, 1, 2, 3, 4]),
            Int64Index([0, 1, 2, 3, 4]),
            Float64Index([0, 1, 2, 3, 4]),
            date_range("2017-01-01", periods=5),
            timedelta_range("1 day", periods=5),
        ],
    )
    def test_constructor_pass_closed(self, constructor, breaks):
        # not passing closed to IntervalDtype, but to IntervalArray constructor
        warn = None
        if isinstance(constructor, partial) and constructor.func is Index:
            # passing kwargs to Index is deprecated
            warn = FutureWarning

        iv_dtype = IntervalDtype(breaks.dtype)

        result_kwargs = self.get_kwargs_from_breaks(breaks)

        for dtype in (iv_dtype, str(iv_dtype)):
            with tm.assert_produces_warning(warn):

                result = constructor(dtype=dtype, closed="left", **result_kwargs)
            assert result.dtype.closed == "left"

    @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
    @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
    def test_constructor_nan(self, constructor, breaks, closed):
        # GH 18421
        result_kwargs = self.get_kwargs_from_breaks(breaks)
        result = constructor(closed=closed, **result_kwargs)

        expected_subtype = np.float64
        expected_values = np.array(breaks[:-1], dtype=object)

        assert result.closed == closed
        assert result.dtype.subtype == expected_subtype
        tm.assert_numpy_array_equal(np.array(result), expected_values)

    @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
    @pytest.mark.parametrize(
        "breaks",
        [
            [],
            np.array([], dtype="int64"),
            np.array([], dtype="float64"),
            np.array([], dtype="datetime64[ns]"),
            np.array([], dtype="timedelta64[ns]"),
        ],
    )
    def test_constructor_empty(self, constructor, breaks, closed):
        # GH 18421
        result_kwargs = self.get_kwargs_from_breaks(breaks)
        result = constructor(closed=closed, **result_kwargs)

        expected_values = np.array([], dtype=object)
        expected_subtype = getattr(breaks, "dtype", np.int64)

        assert result.empty
        assert result.closed == closed
        assert result.dtype.subtype == expected_subtype
        tm.assert_numpy_array_equal(np.array(result), expected_values)

    @pytest.mark.parametrize(
        "breaks",
        [
            tuple("0123456789"),
            list("abcdefghij"),
            np.array(list("abcdefghij"), dtype=object),
            np.array(list("abcdefghij"), dtype="<U1"),
        ],
    )
    def test_constructor_string(self, constructor, breaks):
        # GH 19016
        msg = (
            "category, object, and string subtypes are not supported "
            "for IntervalIndex"
        )
        with pytest.raises(TypeError, match=msg):
            constructor(**self.get_kwargs_from_breaks(breaks))

    @pytest.mark.parametrize("cat_constructor", [Categorical, CategoricalIndex])
    def test_constructor_categorical_valid(self, constructor, cat_constructor):
        # GH 21243/21253

        breaks = np.arange(10, dtype="int64")
        expected = IntervalIndex.from_breaks(breaks)

        cat_breaks = cat_constructor(breaks)
        result_kwargs = self.get_kwargs_from_breaks(cat_breaks)
        result = constructor(**result_kwargs)
        tm.assert_index_equal(result, expected)

    def test_generic_errors(self, constructor):
        # filler input data to be used when supplying invalid kwargs
        filler = self.get_kwargs_from_breaks(range(10))

        # invalid closed
        msg = "closed must be one of 'right', 'left', 'both', 'neither'"
        with pytest.raises(ValueError, match=msg):
            constructor(closed="invalid", **filler)

        # unsupported dtype
        msg = "dtype must be an IntervalDtype, got int64"
        with pytest.raises(TypeError, match=msg):
            constructor(dtype="int64", **filler)

        # invalid dtype
        msg = "data type [\"']invalid[\"'] not understood"
        with pytest.raises(TypeError, match=msg):
            constructor(dtype="invalid", **filler)

        # no point in nesting periods in an IntervalIndex
        periods = period_range("2000-01-01", periods=10)
        periods_kwargs = self.get_kwargs_from_breaks(periods)
        msg = "Period dtypes are not supported, use a PeriodIndex instead"
        with pytest.raises(ValueError, match=msg):
            constructor(**periods_kwargs)

        # decreasing values
        decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1))
        msg = "left side of interval must be <= right side"
        with pytest.raises(ValueError, match=msg):
            constructor(**decreasing_kwargs)